diff --git a/ChangeLog b/ChangeLog index 5621d6da23..e5f6dcf128 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +Mon Jan 21 06:40:03 2008 Nobuyoshi Nakada + + * transcode.c (transcode_dispatch): constified return value. + + * transcode_data.h (rb_transcoding): include pointer to rb_transcoder + and auxiliary data. + + * transcode_data.h (rb_transcoder): all callback functions shoud have + their own parameters. + + * enc/trans/{japanese,single_byte}.c: constified. + Mon Jan 21 03:45:49 2008 Nobuyoshi Nakada * string.c (rb_str_each_char): advance offset before get next char diff --git a/enc/trans/japanese.c b/enc/trans/japanese.c index 8b08f81f5d..d065d18240 100644 --- a/enc/trans/japanese.c +++ b/enc/trans/japanese.c @@ -4422,7 +4422,7 @@ from_SHIFT_JIS = { from_SHIFT_JIS_offsets, from_SHIFT_JIS_infos }; -static rb_transcoder +static const rb_transcoder rb_from_SHIFT_JIS = { "SHIFT_JIS", "UTF-8", &from_SHIFT_JIS, 3, 0, NULL, NULL, @@ -13352,7 +13352,7 @@ to_SHIFT_JIS = { to_SHIFT_JIS_offsets, to_SHIFT_JIS_infos }; -static rb_transcoder +static const rb_transcoder rb_to_SHIFT_JIS = { "UTF-8", "SHIFT_JIS", &to_SHIFT_JIS, 2, 1, NULL, NULL, @@ -18126,7 +18126,7 @@ from_EUC_JP = { from_EUC_JP_offsets, from_EUC_JP_infos }; -static rb_transcoder +static const rb_transcoder rb_from_EUC_JP = { "EUC-JP", "UTF-8", &from_EUC_JP, 3, 0, NULL, NULL, @@ -23631,7 +23631,7 @@ to_EUC_JP = { to_SHIFT_JIS_offsets, to_EUC_JP_infos }; -static rb_transcoder +static const rb_transcoder rb_to_EUC_JP = { "UTF-8", "EUC-JP", &to_EUC_JP, 2, 1, NULL, NULL, @@ -23721,9 +23721,9 @@ get_iso_2022_mode(char **in_pos) static void from_iso_2022_jp_transcoder_preprocessor(char **in_pos, char **out_pos, char *in_stop, char *out_stop, - rb_transcoder *my_transcoder, rb_transcoding *my_transcoding) { + const rb_transcoder *my_transcoder = my_transcoding->transcoder; char *in_p = *in_pos, *out_p = *out_pos; int cur_mode = ISO_2022_GZ_ASCII; unsigned char c1; @@ -23801,9 +23801,9 @@ select_iso_2022_mode(char **out_pos, int new_mode) static void to_iso_2022_jp_transcoder_postprocessor(char **in_pos, char **out_pos, char *in_stop, char *out_stop, - rb_transcoder *my_transcoder, rb_transcoding *my_transcoding) { + const rb_transcoder *my_transcoder = my_transcoding->transcoder; char *in_p = *in_pos, *out_p = *out_pos; int cur_mode = ISO_2022_GZ_ASCII, new_mode = 0; unsigned char next_byte; @@ -23844,13 +23844,13 @@ to_iso_2022_jp_transcoder_postprocessor(char **in_pos, char **out_pos, *out_pos = out_p; } -static rb_transcoder +static const rb_transcoder rb_from_ISO_2022_JP = { "ISO-2022-JP", "UTF-8", &from_EUC_JP, 8, 0, &from_iso_2022_jp_transcoder_preprocessor, NULL, }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_2022_JP = { "UTF-8", "ISO-2022-JP", &to_EUC_JP, 8, 1, NULL, &to_iso_2022_jp_transcoder_postprocessor, diff --git a/enc/trans/single_byte.c b/enc/trans/single_byte.c index baa523447e..9e132b8bed 100644 --- a/enc/trans/single_byte.c +++ b/enc/trans/single_byte.c @@ -33,25 +33,25 @@ from_US_ASCII = { from_US_ASCII_infos }; -static rb_transcoder +static const rb_transcoder rb_from_US_ASCII = { "US-ASCII", "UTF-8", &from_US_ASCII, 1, 0, NULL, NULL, }; -static rb_transcoder +static const rb_transcoder rb_to_US_ASCII = { "UTF-8", "US-ASCII", &from_US_ASCII, 1, 1, NULL, NULL, }; -static rb_transcoder +static const rb_transcoder rb_from_ASCII_8BIT = { "ASCII-8BIT", "UTF-8", &from_US_ASCII, 1, 0, NULL, NULL, }; -static rb_transcoder +static const rb_transcoder rb_to_ASCII_8BIT = { "UTF-8", "ASCII-8BIT", &from_US_ASCII, 1, 1, NULL, NULL, @@ -127,7 +127,7 @@ from_ISO_8859_1 = { from_ISO_8859_1_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_1 = { "ISO-8859-1", "UTF-8", &from_ISO_8859_1, 2, 0, NULL, NULL, @@ -226,7 +226,7 @@ to_ISO_8859_1 = { to_ISO_8859_1_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_1 = { "UTF-8", "ISO-8859-1", &to_ISO_8859_1, 1, 1, NULL, NULL, @@ -274,7 +274,7 @@ from_ISO_8859_2 = { from_ISO_8859_2_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_2 = { "ISO-8859-2", "UTF-8", &from_ISO_8859_2, 2, 0, NULL, NULL, @@ -431,7 +431,7 @@ to_ISO_8859_2 = { to_ISO_8859_2_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_2 = { "UTF-8", "ISO-8859-2", &to_ISO_8859_2, 1, 1, NULL, NULL, @@ -496,7 +496,7 @@ from_ISO_8859_3 = { from_ISO_8859_3_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_3 = { "ISO-8859-3", "UTF-8", &from_ISO_8859_3, 2, 0, NULL, NULL, @@ -628,7 +628,7 @@ to_ISO_8859_3 = { to_ISO_8859_3_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_3 = { "UTF-8", "ISO-8859-3", &to_ISO_8859_3, 1, 1, NULL, NULL, @@ -676,7 +676,7 @@ from_ISO_8859_4 = { from_ISO_8859_4_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_4 = { "ISO-8859-4", "UTF-8", &from_ISO_8859_4, 2, 0, NULL, NULL, @@ -812,7 +812,7 @@ to_ISO_8859_4 = { to_ISO_8859_4_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_4 = { "UTF-8", "ISO-8859-4", &to_ISO_8859_4, 1, 1, NULL, NULL, @@ -892,7 +892,7 @@ from_ISO_8859_5 = { from_ISO_8859_5_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_5 = { "ISO-8859-5", "UTF-8", &from_ISO_8859_5, 3, 0, NULL, NULL, @@ -1044,7 +1044,7 @@ to_ISO_8859_5 = { to_ISO_8859_5_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_5 = { "UTF-8", "ISO-8859-5", &to_ISO_8859_5, 1, 1, NULL, NULL, @@ -1100,7 +1100,7 @@ from_ISO_8859_6 = { from_ISO_8859_6_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_6 = { "ISO-8859-6", "UTF-8", &from_ISO_8859_6, 2, 0, NULL, NULL, @@ -1207,7 +1207,7 @@ to_ISO_8859_6 = { to_ISO_8859_6_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_6 = { "UTF-8", "ISO-8859-6", &to_ISO_8859_6, 1, 1, NULL, NULL, @@ -1305,7 +1305,7 @@ from_ISO_8859_7 = { from_ISO_8859_7_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_7 = { "ISO-8859-7", "UTF-8", &from_ISO_8859_7, 3, 0, NULL, NULL, @@ -1492,7 +1492,7 @@ to_ISO_8859_7 = { to_ISO_8859_7_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_7 = { "UTF-8", "ISO-8859-7", &to_ISO_8859_7, 1, 1, NULL, NULL, @@ -1573,7 +1573,7 @@ from_ISO_8859_8 = { from_ISO_8859_8_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_8 = { "ISO-8859-8", "UTF-8", &from_ISO_8859_8, 3, 0, NULL, NULL, @@ -1719,7 +1719,7 @@ to_ISO_8859_8 = { to_ISO_8859_8_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_8 = { "UTF-8", "ISO-8859-8", &to_ISO_8859_8, 1, 1, NULL, NULL, @@ -1767,7 +1767,7 @@ from_ISO_8859_9 = { from_ISO_8859_9_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_9 = { "ISO-8859-9", "UTF-8", &from_ISO_8859_9, 2, 0, NULL, NULL, @@ -1870,7 +1870,7 @@ to_ISO_8859_9 = { to_ISO_8859_9_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_9 = { "UTF-8", "ISO-8859-9", &to_ISO_8859_9, 1, 1, NULL, NULL, @@ -1950,7 +1950,7 @@ from_ISO_8859_10 = { from_ISO_8859_10_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_10 = { "ISO-8859-10", "UTF-8", &from_ISO_8859_10, 3, 0, NULL, NULL, @@ -2108,7 +2108,7 @@ to_ISO_8859_10 = { to_ISO_8859_10_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_10 = { "UTF-8", "ISO-8859-10", &to_ISO_8859_10, 1, 1, NULL, NULL, @@ -2203,7 +2203,7 @@ from_ISO_8859_11 = { from_ISO_8859_11_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_11 = { "ISO-8859-11", "UTF-8", &from_ISO_8859_11, 3, 0, NULL, NULL, @@ -2337,7 +2337,7 @@ to_ISO_8859_11 = { to_ISO_8859_11_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_11 = { "UTF-8", "ISO-8859-11", &to_ISO_8859_11, 1, 1, NULL, NULL, @@ -2417,7 +2417,7 @@ from_ISO_8859_13 = { from_ISO_8859_13_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_13 = { "ISO-8859-13", "UTF-8", &from_ISO_8859_13, 3, 0, NULL, NULL, @@ -2562,7 +2562,7 @@ to_ISO_8859_13 = { to_ISO_8859_13_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_13 = { "UTF-8", "ISO-8859-13", &to_ISO_8859_13, 1, 1, NULL, NULL, @@ -2642,7 +2642,7 @@ from_ISO_8859_14 = { from_ISO_8859_14_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_14 = { "ISO-8859-14", "UTF-8", &from_ISO_8859_14, 3, 0, NULL, NULL, @@ -2864,7 +2864,7 @@ to_ISO_8859_14 = { to_ISO_8859_14_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_14 = { "UTF-8", "ISO-8859-14", &to_ISO_8859_14, 1, 1, NULL, NULL, @@ -2944,7 +2944,7 @@ from_ISO_8859_15 = { from_ISO_8859_15_infos }; -static rb_transcoder +static const rb_transcoder rb_from_ISO_8859_15 = { "ISO-8859-15", "UTF-8", &from_ISO_8859_15, 3, 0, NULL, NULL, @@ -3064,7 +3064,7 @@ to_ISO_8859_15 = { to_ISO_8859_15_infos }; -static rb_transcoder +static const rb_transcoder rb_to_ISO_8859_15 = { "UTF-8", "ISO-8859-15", &to_ISO_8859_15, 1, 1, NULL, NULL, diff --git a/transcode.c b/transcode.c index 86d5c6ad0e..ae3c0841aa 100644 --- a/transcode.c +++ b/transcode.c @@ -110,7 +110,7 @@ init_transcoder_table(void) #define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0) -static rb_transcoder * +static const rb_transcoder * transcode_dispatch(const char* from_encoding, const char* to_encoding) { char *const key = transcoder_key(from_encoding, to_encoding); @@ -214,17 +214,17 @@ transcode_loop(char **in_pos, char **out_pos, *out_p++ = getBT3(next_info); continue; case FUNii: - next_info = (VALUE)(*my_transcoder->func_ii)(next_info); + next_info = (VALUE)(*my_transcoder->func_ii)(next_info, my_transcoding); goto follow_info; case FUNsi: - next_info = (VALUE)(*my_transcoder->func_si)(char_start); + next_info = (VALUE)(*my_transcoder->func_si)(char_start, my_transcoding); goto follow_info; break; case FUNio: - out_p += (VALUE)(*my_transcoder->func_io)(next_info, out_p); + out_p += (VALUE)(*my_transcoder->func_io)(next_info, out_p, my_transcoding); break; case FUNso: - out_p += (VALUE)(*my_transcoder->func_so)(char_start, out_p); + out_p += (VALUE)(*my_transcoder->func_so)(char_start, out_p, my_transcoding); break; case INVALID: goto invalid; @@ -269,7 +269,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self) const char *from_e, *to_e; int from_encidx, to_encidx; VALUE from_encval, to_encval; - rb_transcoder *my_transcoder; + const rb_transcoder *my_transcoder; rb_transcoding my_transcoding; int final_encoding = 0; @@ -307,7 +307,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self) return to_encidx; } } - if (STRCASECMP(from_e, to_e) == 0) { + if (encoding_equal(from_e, to_e)) { return -1; } @@ -324,7 +324,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self) dest = rb_str_tmp_new(blen); bp = RSTRING_PTR(dest); my_transcoding.ruby_string_dest = dest; - (*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding); + (*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp); } @@ -356,7 +356,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self) dest = rb_str_tmp_new(blen); bp = RSTRING_PTR(dest); my_transcoding.ruby_string_dest = dest; - (*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding); + (*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp); } diff --git a/transcode_data.h b/transcode_data.h index 8d0c8ccf33..96f5eea7ca 100644 --- a/transcode_data.h +++ b/transcode_data.h @@ -59,9 +59,11 @@ typedef struct byte_lookup { /* dynamic structure, one per conversion (similar to iconv_t) */ /* may carry conversion state (e.g. for iso-2022-jp) */ typedef struct rb_transcoding { + struct rb_transcoder *transcoder; VALUE ruby_string_dest; /* the String used as the conversion destination, or NULL if something else is being converted */ char *(*flush_func)(struct rb_transcoding*, int, int); + VALUE auxiliary_data; } rb_transcoding; /* static structure, one per supported encoding pair */ @@ -71,14 +73,12 @@ typedef struct rb_transcoder { const BYTE_LOOKUP *conv_tree_start; int max_output; int from_utf8; - void (*preprocessor)(char**, char**, char*, char*, - struct rb_transcoder *, struct rb_transcoding *); - void (*postprocessor)(char**, char**, char*, char*, - struct rb_transcoder *, struct rb_transcoding *); - VALUE (*func_ii)(VALUE); /* info -> info */ - VALUE (*func_si)(const unsigned char* const); /* start -> info */ - int (*func_io)(VALUE, const unsigned char*); /* info -> output */ - int (*func_so)(const unsigned char*, unsigned char*); /* start -> output */ + void (*preprocessor)(char**, char**, char*, char*, struct rb_transcoding *); + void (*postprocessor)(char**, char**, char*, char*, struct rb_transcoding *); + VALUE (*func_ii)(VALUE, struct rb_transcoding *); /* info -> info */ + VALUE (*func_si)(const char *, struct rb_transcoding *); /* start -> info */ + int (*func_io)(VALUE, const char*, struct rb_transcoding *); /* info -> output */ + int (*func_so)(const char*, char*, struct rb_transcoding *); /* start -> output */ } rb_transcoder; void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);