KinoSearch::Store::OutStream - filehandles for writing invindexes |
void lu_write (outstream, template_sv, ...) OutStream *outstream; SV *template_sv; PREINIT: STRLEN tpt_len; /* bytelength of template */ char *template; /* ptr to a spot in the template */ char *tpt_end; /* ptr to the end of the template */ int repeat_count; /* number of times to repeat sym */ int item_count; /* current place in @_ */ char sym; /* the current symbol in the template */ char countsym; /* used when calculating repeat counts */ I32 aI32; U32 aU32; double aDouble; SV *aSV; char *string; STRLEN string_len; PPCODE: { /* require an object, a template, and at least 1 item */ if (items < 2) { Kino_confess(``lu_write error: too few arguments''); }
/* prepare the template and get pointers */ template = SvPV(template_sv, tpt_len); tpt_end = template + tpt_len;
/* reject an empty template */ if (tpt_len == 0) { Kino_confess("lu_write error: TEMPLATE cannot be empty string"); } /* init counters */ repeat_count = 0; item_count = 2;
while (1) { /* only process template if we're not in the midst of a repeat */ if (repeat_count == 0) { /* fast-forward past space characters */ while (*template == ' ' && template < tpt_end) { template++; }
/* if we're done, return or throw error */ if (template == tpt_end || item_count == items) { if (item_count != items) { Kino_confess( "lu_write error: Too many ITEMS, not enough TEMPLATE"); } else if (template != tpt_end) { Kino_confess( "lu_write error: Too much TEMPLATE, not enough ITEMS"); } else { /* success! */ break; } }
/* derive the current symbol and a possible digit repeat sym */ sym = *template++; countsym = *template;
if (template == tpt_end) { /* sym is last char in template */ repeat_count = 1; } else if (countsym >= '0' && countsym <= '9') { /* calculate numerical repeat count */ repeat_count = countsym - KINO_NUM_CHAR_OFFSET; countsym = *(++template); while ( template <= tpt_end && countsym >= '0' && countsym <= '9' ) { repeat_count = (repeat_count * 10) + (countsym - KINO_NUM_CHAR_OFFSET); countsym = *(++template); } } else { /* no numeric repeat count, so process sym only once */ repeat_count = 1; } }
switch(sym) {
case 'a': /* arbitrary binary data */ aSV = ST(item_count); if (!SvOK(aSV)) { Kino_confess("Internal error: undef at lu_write 'a'"); } string = SvPV(aSV, string_len); if (repeat_count != string_len) { Kino_confess( "lu_write error: repeat_count != string_len: %d %d", repeat_count, string_len); } Kino_OutStream_write_bytes(outstream, string, string_len); /* trigger next sym */ repeat_count = 1; break;
case 'b': /* signed byte */ case 'B': /* unsigned byte */ aI32 = SvIV( ST(item_count) ); Kino_OutStream_write_byte(outstream, (char)(aI32 & 0xff)); break;
case 'i': /* signed 32-bit integer */ aI32 = SvIV( ST(item_count) ); Kino_OutStream_write_int(outstream, (U32)aI32); break;
case 'I': /* unsigned 32-bit integer */ aU32 = SvUV( ST(item_count) ); Kino_OutStream_write_int(outstream, aU32); break; case 'Q': /* unsigned "64-bit" integer */ aDouble = SvNV( ST(item_count) ); Kino_OutStream_write_long(outstream, aDouble); break; case 'V': /* VInt */ aU32 = SvUV( ST(item_count) ); Kino_OutStream_write_vint(outstream, aU32); break;
case 'W': /* VLong */ aDouble = SvNV( ST(item_count) ); Kino_OutStream_write_vlong(outstream, aDouble); break;
case 'T': /* string */ aSV = ST(item_count); string = SvPV(aSV, string_len); Kino_OutStream_write_string(outstream, string, string_len); break;
default: Kino_confess("Illegal character in template: %c", sym); }
/* use up one repeat_count and one item from the stack */ repeat_count--; item_count++; } }
void
DESTROY(outstream)
OutStream *outstream;
PPCODE:
Kino_OutStream_destroy(outstream);
__H__
#ifndef H_KINOIO #define H_KINOIO 1
#include ``EXTERN.h'' #include ``perl.h'' #include ``XSUB.h'' #include ``KinoSearchStoreInStream.h'' #include ``KinoSearchUtilCarp.h'' #include ``KinoSearchUtilMathUtils.h''
typedef struct outstream { PerlIO *fh; SV *fh_sv; char *buf; Off_t buf_start; int buf_pos; void (*seek) (struct outstream*, double); double (*tell) (struct outstream*); void (*write_byte) (struct outstream*, char); void (*write_bytes) (struct outstream*, char*, STRLEN); void (*write_int) (struct outstream*, U32); void (*write_long) (struct outstream*, double); void (*write_vint) (struct outstream*, U32); void (*write_vlong) (struct outstream*, double); void (*write_string)(struct outstream*, char*, STRLEN); } OutStream;
OutStream* Kino_OutStream_new (char*, SV*); void Kino_OutStream_seek (OutStream*, double); double Kino_OutStream_tell (OutStream*); double Kino_OutStream_length (OutStream*); void Kino_OutStream_flush (OutStream*); void Kino_OutStream_absorb (OutStream*, InStream*); void Kino_OutStream_write_byte (OutStream*, char); void Kino_OutStream_write_bytes (OutStream*, char*, STRLEN); void Kino_OutStream_write_int (OutStream*, U32); void Kino_OutStream_write_long (OutStream*, double); void Kino_OutStream_write_vint (OutStream*, U32); int Kino_OutStream_encode_vint (U32, char*); void Kino_OutStream_write_vlong (OutStream*, double); void Kino_OutStream_write_string (OutStream*, char*, STRLEN); void Kino_OutStream_destroy (OutStream*);
#endif /* include guard */
__C__
#include ``KinoSearchStoreOutStream.h''
OutStream* Kino_OutStream_new(char* class, SV* fh_sv) { OutStream *outstream;
/* allocate */ Kino_New(0, outstream, 1, OutStream);
/* assign */ outstream->fh_sv = newSVsv(fh_sv); outstream->fh = IoOFP( sv_2io(fh_sv) );
/* init buffer */ Kino_New(0, outstream->buf, KINO_IO_STREAM_BUF_SIZE, char); outstream->buf_start = 0; outstream->buf_pos = 0;
/* assign methods */ outstream->seek = Kino_OutStream_seek; outstream->tell = Kino_OutStream_tell; outstream->write_byte = Kino_OutStream_write_byte; outstream->write_bytes = Kino_OutStream_write_bytes; outstream->write_int = Kino_OutStream_write_int; outstream->write_long = Kino_OutStream_write_long; outstream->write_vint = Kino_OutStream_write_vint; outstream->write_vlong = Kino_OutStream_write_vlong; outstream->write_string = Kino_OutStream_write_string;
return outstream;
}
void Kino_OutStream_seek(OutStream *outstream, double target) { Kino_OutStream_flush(outstream); outstream->buf_start = target; PerlIO_seek(outstream->fh, target, 0); }
double Kino_OutStream_tell(OutStream *outstream) { return outstream->buf_start + outstream->buf_pos; }
double Kino_OutStream_length(OutStream *outstream) { double len;
/* flush, go to end, note length, return to bookmark */ Kino_OutStream_flush(outstream); PerlIO_seek(outstream->fh, 0, 2); len = PerlIO_tell(outstream->fh); PerlIO_seek(outstream->fh, outstream->buf_start, 0);
return len; }
void Kino_OutStream_flush(OutStream *outstream) { PerlIO_write(outstream->fh, outstream->buf, outstream->buf_pos); outstream->buf_start += outstream->buf_pos; outstream->buf_pos = 0; }
void Kino_OutStream_absorb(OutStream *outstream, InStream *instream) { double bytes_left, bytes_this_iter; char *buf; int check_val;
/* flush, then "borrow" the buffer */ Kino_OutStream_flush(outstream); buf = outstream->buf; bytes_left = instream->len;
while (bytes_left > 0) { bytes_this_iter = bytes_left < KINO_IO_STREAM_BUF_SIZE ? bytes_left : KINO_IO_STREAM_BUF_SIZE; instream->read_bytes(instream, buf, bytes_this_iter); check_val = PerlIO_write(outstream->fh, buf, bytes_this_iter); if (check_val != bytes_this_iter) { Kino_confess("outstream->absorb error: %"UVuf", %d", (UV)bytes_this_iter, check_val); } bytes_left -= bytes_this_iter; outstream->buf_start += bytes_this_iter; } }
void Kino_OutStream_write_byte(OutStream *outstream, char aChar) { if (outstream->buf_pos >= KINO_IO_STREAM_BUF_SIZE) Kino_OutStream_flush(outstream); outstream->buf[ outstream->buf_pos++ ] = aChar; }
void Kino_OutStream_write_bytes(OutStream *outstream, char *bytes, STRLEN len) { /* if this data is larger than the buffer size, flush and write */ if (len >= KINO_IO_STREAM_BUF_SIZE) { int check_val; Kino_OutStream_flush(outstream); check_val = PerlIO_write(outstream->fh, bytes, len); if (check_val != len) { Kino_confess(``Write error: tried to write %''UVuf``, got %d'', (UV)len, check_val); } outstream->buf_start += len; } /* if there's not enough room in the buffer, flush then add */ else if (outstream->buf_pos + len >= KINO_IO_STREAM_BUF_SIZE) { Kino_OutStream_flush(outstream); Copy(bytes, (outstream->buf + outstream->buf_pos), len, char); outstream->buf_pos += len; } /* if there's room, just add these bytes to the buffer */ else { Copy(bytes, (outstream->buf + outstream->buf_pos), len, char); outstream->buf_pos += len; } }
void Kino_OutStream_write_int(OutStream *outstream, U32 aU32) { unsigned char buf[4]; Kino_encode_bigend_U32(aU32, buf); outstream->write_bytes(outstream, (char*)buf, 4); }
void Kino_OutStream_write_long(OutStream *outstream, double aDouble) { unsigned char buf[8]; U32 aU32;
/* derive the upper 4 bytes by truncating a quotient */ aU32 = floor( ldexp( aDouble, -32 ) ); Kino_encode_bigend_U32(aU32, buf); /* derive the lower 4 bytes by taking a modulus against 2**32 */ aU32 = fmod(aDouble, (pow(2.0, 32.0))); Kino_encode_bigend_U32(aU32, &buf[4]);
/* print encoded Long to the output handle */ outstream->write_bytes(outstream, (char*)buf, 8); }
void Kino_OutStream_write_vint(OutStream *outstream, U32 aU32) { char buf[5]; int num_bytes; num_bytes = Kino_OutStream_encode_vint(aU32, buf); outstream->write_bytes(outstream, buf, num_bytes); }
/* Encode a VInt. buf must have room for at 5 bytes. */ int Kino_OutStream_encode_vint(U32 aU32, char *buf) { int num_bytes = 0;
while ((aU32 & ~0x7f) != 0) { buf[num_bytes++] = ( (aU32 & 0x7f) | 0x80 ); aU32 >>= 7; } buf[num_bytes++] = aU32 & 0x7f;
return num_bytes; }
void Kino_OutStream_write_vlong(OutStream *outstream, double aDouble) { unsigned char buf[10]; int num_bytes = 0; U32 aU32;
while (aDouble > 127.0) { /* take modulus of num against 128 */ aU32 = fmod(aDouble, 128); buf[num_bytes++] = ( (aU32 & 0x7f) | 0x80 ); /* right shift for floating point! */ aDouble = floor( ldexp( aDouble, -7 ) ); } buf[num_bytes++] = aDouble;
outstream->write_bytes(outstream, (char*)buf, num_bytes); }
void Kino_OutStream_write_string(OutStream *outstream, char *string, STRLEN len) { Kino_OutStream_write_vint(outstream, (U32)len); Kino_OutStream_write_bytes(outstream, string, len); }
void Kino_OutStream_destroy(OutStream *outstream) { Kino_OutStream_flush(outstream); SvREFCNT_dec(outstream->fh_sv); Kino_Safefree(outstream->buf); Kino_Safefree(outstream); }
__POD__
KinoSearch::Store::OutStream - filehandles for writing invindexes |