diff --git a/ChangeLog b/ChangeLog index 439280d822..7146e9dadf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +Sun Aug 10 11:15:55 2008 Tanaka Akira + + * transcode_data.h (rb_transcoding): add feedlen field. + + * transcode.c (transcode_restartable0): renamed from + transcode_restartable. + save input buffer into feed buffer if next character is started the + point before input buffer. for example, "\x00\xd8\x01" then "\x02" + in UTF-16LE. \x02 causes invalid and next character is started from + \x01. + (transcode_restartable): new function to call + transcode_restartable0. if feed buffer is not empty, convert it at + first. + Sun Aug 10 11:02:58 2008 Nobuyoshi Nakada * common.mk (extconf): use MAKEDIRS. diff --git a/transcode.c b/transcode.c index f4c14398ec..63de574b79 100644 --- a/transcode.c +++ b/transcode.c @@ -355,7 +355,7 @@ typedef enum { } transcode_result_t; static transcode_result_t -transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, +transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, rb_transcoding *my_transcoding, const int opt) @@ -363,6 +363,7 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, { const rb_transcoder *my_transcoder = my_transcoding->transcoder; int unitlen = my_transcoder->input_unit_length; + int feedlen = 0; const unsigned char *inchar_start; const unsigned char *in_p; @@ -396,11 +397,15 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, do { \ my_transcoding->resume_position = (num); \ if (0 < in_p - inchar_start) \ - MEMCPY(TRANSCODING_READBUF(my_transcoding)+my_transcoding->readlen, \ + MEMMOVE(TRANSCODING_READBUF(my_transcoding)+my_transcoding->readlen, \ inchar_start, unsigned char, in_p - inchar_start); \ *in_pos = in_p; \ *out_pos = out_p; \ my_transcoding->readlen += in_p - inchar_start; \ + if (feedlen) { \ + my_transcoding->readlen -= feedlen; \ + my_transcoding->feedlen = feedlen; \ + } \ my_transcoding->next_table = next_table; \ my_transcoding->next_info = next_info; \ my_transcoding->next_byte = next_byte; \ @@ -524,12 +529,23 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, } } else { + int found_len; /* including the last byte which cuases invalid */ + int invalid_len; int step; - /* xxx: step may be negative. - * possibly in_p is lesser than *in_pos. - * caller may want to access readbuf. */ - step = (((my_transcoding->readlen + (in_p - inchar_start)) - 1) / unitlen) * unitlen - (my_transcoding->readlen + (in_p - inchar_start)); - in_p += step; + found_len = my_transcoding->readlen + (in_p - inchar_start); + invalid_len = ((found_len - 1) / unitlen) * unitlen; + step = invalid_len - found_len; + if (step < -1) { + if (-step <= in_p - *in_pos) { + in_p += step; + } + else { + feedlen = -step; + } + } + else { + in_p += step; + } } goto invalid; } @@ -559,6 +575,32 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, #undef SUSPEND } +static transcode_result_t +transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, + const unsigned char *in_stop, unsigned char *out_stop, + rb_transcoding *my_transcoding, + const int opt) +{ + if (my_transcoding->feedlen) { + unsigned char *feed_buf = ALLOCA_N(unsigned char, my_transcoding->feedlen); + const unsigned char *feed_pos = feed_buf; + const unsigned char *feed_stop = feed_buf + my_transcoding->feedlen; + transcode_result_t res; + + MEMCPY(feed_buf, TRANSCODING_READBUF(my_transcoding) + my_transcoding->readlen, + unsigned char, my_transcoding->feedlen); + my_transcoding->feedlen = 0; + res = transcode_restartable0(&feed_pos, out_pos, feed_stop, out_stop, my_transcoding, opt); + if (res != transcode_ibuf_empty) { + MEMCPY(TRANSCODING_READBUF(my_transcoding) + my_transcoding->readlen + my_transcoding->feedlen, + feed_pos, unsigned char, feed_stop - feed_pos); + my_transcoding->feedlen += feed_stop - feed_pos; + return res; + } + } + return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, my_transcoding, opt); +} + static void more_output_buffer( VALUE destination, @@ -590,6 +632,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, my_transcoding->resume_position = 0; my_transcoding->readlen = 0; + my_transcoding->feedlen = 0; if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) { my_transcoding->readbuf.ptr = xmalloc(my_transcoder->max_input); @@ -648,7 +691,7 @@ static void transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, VALUE destination, - unsigned char *(*resize_destination)(VALUE, struct rb_transcoding*, int, int), + unsigned char *(*resize_destination)(VALUE, int, int), rb_transcoding *my_transcoding, const int opt) { @@ -659,6 +702,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, my_transcoding->resume_position = 0; my_transcoding->readlen = 0; + my_transcoding->feedlen = 0; if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) { my_transcoding->readbuf.ptr = xmalloc(my_transcoder->max_input); diff --git a/transcode_data.h b/transcode_data.h index ad20a0b9df..42c3b2dc89 100644 --- a/transcode_data.h +++ b/transcode_data.h @@ -65,11 +65,12 @@ typedef struct rb_transcoding { const BYTE_LOOKUP *next_table; VALUE next_info; unsigned char next_byte; - int readlen; + int readlen; /* already interpreted */ + int feedlen; /* not yet interpreted */ union { unsigned char ary[8]; /* max_input <= sizeof(ary) */ unsigned char *ptr; /* length is max_input */ - } readbuf; + } readbuf; /* readlen + feedlen used */ unsigned char stateful[256]; /* opaque data for stateful encoding */ } rb_transcoding;