mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* common.mk (COMMONOBJS): transcode_data_*.c moved under enc/trans.
* transcode_data.h (rb_transcoding, rb_transcoder): prefixed. * transcode.c (rb_register_transcoder, rb_declare_transcoder): split declaration and registration. [ruby-dev:32704] * transcode.c (transcode_dispatch): autoload pre-declared transcoder. * transcode.c (str_transcode): use rb_define_dummy_encoding(). * transcode.c (Init_transcode): initialize transcoder tables. * enc/trans/single_byte.c, enc/trans/japanese.c: moved from top. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14666 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
df914b2fef
commit
b7db9036be
6 changed files with 422 additions and 238 deletions
17
ChangeLog
17
ChangeLog
|
@ -1,3 +1,20 @@
|
|||
Tue Dec 25 14:57:00 2007 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* common.mk (COMMONOBJS): transcode_data_*.c moved under enc/trans.
|
||||
|
||||
* transcode_data.h (rb_transcoding, rb_transcoder): prefixed.
|
||||
|
||||
* transcode.c (rb_register_transcoder, rb_declare_transcoder): split
|
||||
declaration and registration. [ruby-dev:32704]
|
||||
|
||||
* transcode.c (transcode_dispatch): autoload pre-declared transcoder.
|
||||
|
||||
* transcode.c (str_transcode): use rb_define_dummy_encoding().
|
||||
|
||||
* transcode.c (Init_transcode): initialize transcoder tables.
|
||||
|
||||
* enc/trans/single_byte.c, enc/trans/japanese.c: moved from top.
|
||||
|
||||
Tue Dec 25 14:20:13 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* lib/mkmf.rb (map_dir): should generate path including $top_srcdir.
|
||||
|
|
|
@ -60,8 +60,6 @@ COMMONOBJS = array.$(OBJEXT) \
|
|||
struct.$(OBJEXT) \
|
||||
time.$(OBJEXT) \
|
||||
transcode.$(OBJEXT) \
|
||||
transcode_data_one_byte.$(OBJEXT) \
|
||||
transcode_data_japanese.$(OBJEXT) \
|
||||
util.$(OBJEXT) \
|
||||
variable.$(OBJEXT) \
|
||||
version.$(OBJEXT) \
|
||||
|
@ -548,8 +546,6 @@ thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \
|
|||
{$(VPATH)}signal.h {$(VPATH)}st.h {$(VPATH)}dln.h
|
||||
transcode.$(OBJEXT): {$(VPATH)}transcode.c {$(VPATH)}transcode_data.h {$(VPATH)}ruby.h {$(VPATH)}config.h \
|
||||
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h {$(VPATH)}encoding.h
|
||||
transcode_data_one_byte.$(OBJEXT): {$(VPATH)}transcode_data_one_byte.c {$(VPATH)}transcode_data.h
|
||||
transcode_data_japanese.$(OBJEXT): {$(VPATH)}transcode_data_japanese.c {$(VPATH)}transcode_data.h
|
||||
cont.$(OBJEXT): {$(VPATH)}cont.c {$(VPATH)}eval_intern.h \
|
||||
{$(VPATH)}ruby.h {$(VPATH)}vm_core.h {$(VPATH)}id.h {$(VPATH)}config.h \
|
||||
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
#define TRANSCODE_DATA
|
||||
#include "transcode_data.h"
|
||||
|
||||
static const unsigned char
|
||||
|
@ -4418,11 +4417,16 @@ from_SHIFT_JIS_infos[108] = {
|
|||
&from_SHIFT_JIS_FA, &from_SHIFT_JIS_FB,
|
||||
&from_SHIFT_JIS_FC, UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_SHIFT_JIS = {
|
||||
static const BYTE_LOOKUP
|
||||
from_SHIFT_JIS = {
|
||||
from_SHIFT_JIS_offsets,
|
||||
from_SHIFT_JIS_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_SHIFT_JIS = {
|
||||
"UTF-8", "SHIFT_JIS", &from_SHIFT_JIS, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_SHIFT_JIS_C2_offsets[64] = {
|
||||
|
@ -13343,11 +13347,16 @@ to_SHIFT_JIS_infos[17] = {
|
|||
&to_SHIFT_JIS_E7, &to_SHIFT_JIS_E8, &to_SHIFT_JIS_E9, &to_SHIFT_JIS_EF,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_SHIFT_JIS = {
|
||||
static const BYTE_LOOKUP
|
||||
to_SHIFT_JIS = {
|
||||
to_SHIFT_JIS_offsets,
|
||||
to_SHIFT_JIS_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_SHIFT_JIS = {
|
||||
"SHIFT_JIS", "UTF-8", &to_SHIFT_JIS, 2, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_EUC_JP_8E_offsets[256] = {
|
||||
|
@ -18112,11 +18121,16 @@ from_EUC_JP_infos[85] = {
|
|||
&from_EUC_JP_F9, &from_EUC_JP_FA, &from_EUC_JP_FB, &from_EUC_JP_FC,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_EUC_JP = {
|
||||
static const BYTE_LOOKUP
|
||||
from_EUC_JP = {
|
||||
from_EUC_JP_offsets,
|
||||
from_EUC_JP_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_EUC_JP = {
|
||||
"UTF-8", "EUC-JP", &from_EUC_JP, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
to_EUC_JP_C2_infos[14] = {
|
||||
|
@ -23612,11 +23626,16 @@ to_EUC_JP_infos[17] = {
|
|||
&to_EUC_JP_E7, &to_EUC_JP_E8, &to_EUC_JP_E9, &to_EUC_JP_EF,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_EUC_JP = {
|
||||
static const BYTE_LOOKUP
|
||||
to_EUC_JP = {
|
||||
to_SHIFT_JIS_offsets,
|
||||
to_EUC_JP_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_EUC_JP = {
|
||||
"EUC_JP", "UTF-8", &to_EUC_JP, 2, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
#define ISO_2022_ENCODING(escseq, byte) ((escseq<<8)|byte)
|
||||
enum ISO_2022_ESCSEQ {
|
||||
|
@ -23699,11 +23718,11 @@ get_iso_2022_mode(char **in_pos)
|
|||
return new_mode;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
from_iso_2022_jp_transcoder_preprocessor(char **in_pos, char **out_pos,
|
||||
char *in_stop, char *out_stop,
|
||||
transcoder *my_transcoder,
|
||||
transcoding *my_transcoding)
|
||||
char *in_stop, char *out_stop,
|
||||
rb_transcoder *my_transcoder,
|
||||
rb_transcoding *my_transcoding)
|
||||
{
|
||||
char *in_p = *in_pos, *out_p = *out_pos;
|
||||
int cur_mode = ISO_2022_GZ_ASCII;
|
||||
|
@ -23779,11 +23798,11 @@ select_iso_2022_mode(char **out_pos, int new_mode)
|
|||
return new_mode;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
to_iso_2022_jp_transcoder_postprocessor(char **in_pos, char **out_pos,
|
||||
char *in_stop, char *out_stop,
|
||||
transcoder *my_transcoder,
|
||||
transcoding *my_transcoding)
|
||||
char *in_stop, char *out_stop,
|
||||
rb_transcoder *my_transcoder,
|
||||
rb_transcoding *my_transcoding)
|
||||
{
|
||||
char *in_p = *in_pos, *out_p = *out_pos;
|
||||
int cur_mode = ISO_2022_GZ_ASCII, new_mode = 0;
|
||||
|
@ -23824,3 +23843,26 @@ to_iso_2022_jp_transcoder_postprocessor(char **in_pos, char **out_pos,
|
|||
*in_pos = in_p;
|
||||
*out_pos = out_p;
|
||||
}
|
||||
|
||||
static rb_transcoder
|
||||
rb_from_ISO_2022_JP = {
|
||||
"ISO-2022-JP", "UTF-8", &from_EUC_JP, 8, 0,
|
||||
&from_iso_2022_jp_transcoder_preprocessor, NULL,
|
||||
};
|
||||
|
||||
static rb_transcoder
|
||||
rb_to_ISO_2022_JP = {
|
||||
"UTF-8", "ISO-2022-JP", &to_EUC_JP, 8, 1,
|
||||
NULL, &to_iso_2022_jp_transcoder_postprocessor,
|
||||
};
|
||||
|
||||
void
|
||||
Init_japanese(void)
|
||||
{
|
||||
rb_register_transcoder(&rb_from_SHIFT_JIS);
|
||||
rb_register_transcoder(&rb_from_EUC_JP);
|
||||
rb_register_transcoder(&rb_to_SHIFT_JIS);
|
||||
rb_register_transcoder(&rb_to_EUC_JP);
|
||||
rb_register_transcoder(&rb_from_ISO_2022_JP);
|
||||
rb_register_transcoder(&rb_to_ISO_2022_JP);
|
||||
}
|
|
@ -1,4 +1,3 @@
|
|||
#define TRANSCODE_DATA
|
||||
#include "transcode_data.h"
|
||||
|
||||
static const unsigned char
|
||||
|
@ -65,11 +64,16 @@ from_ISO_8859_1_infos[129] = {
|
|||
o2(0xC3,0xBB), o2(0xC3,0xBC), o2(0xC3,0xBD), o2(0xC3,0xBE),
|
||||
o2(0xC3,0xBF),
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_1 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_1 = {
|
||||
from_ISO_8859_1_offsets,
|
||||
from_ISO_8859_1_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_1 = {
|
||||
"ISO-8859-1", "UTF-8", &from_ISO_8859_1, 2, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_1_C2_offsets[64] = {
|
||||
|
@ -158,11 +162,16 @@ to_ISO_8859_1_infos[4] = {
|
|||
NOMAP, &to_ISO_8859_1_C2,
|
||||
&to_ISO_8859_1_C3, UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_1 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_1 = {
|
||||
to_ISO_8859_1_offsets,
|
||||
to_ISO_8859_1_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_1 = {
|
||||
"UTF-8", "ISO-8859-1", &to_ISO_8859_1, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_ISO_8859_2_infos[129] = {
|
||||
|
@ -200,11 +209,16 @@ from_ISO_8859_2_infos[129] = {
|
|||
o2(0xC5,0xB1), o2(0xC3,0xBC), o2(0xC3,0xBD), o2(0xC5,0xA3),
|
||||
o2(0xCB,0x99),
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_2 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_2 = {
|
||||
from_ISO_8859_1_offsets,
|
||||
from_ISO_8859_2_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_2 = {
|
||||
"ISO-8859-2", "UTF-8", &from_ISO_8859_2, 2, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_2_C2_offsets[64] = {
|
||||
|
@ -351,11 +365,16 @@ to_ISO_8859_2_infos[7] = {
|
|||
&to_ISO_8859_2_C5, &to_ISO_8859_2_CB,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_2 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_2 = {
|
||||
to_ISO_8859_2_offsets,
|
||||
to_ISO_8859_2_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_2 = {
|
||||
"UTF-8", "ISO-8859-2", &to_ISO_8859_2, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_ISO_8859_3_offsets[256] = {
|
||||
|
@ -410,11 +429,16 @@ from_ISO_8859_3_infos[123] = {
|
|||
o2(0xC3,0xBA), o2(0xC3,0xBB), o2(0xC3,0xBC), o2(0xC5,0xAD),
|
||||
o2(0xC5,0x9D), o2(0xCB,0x99), UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_3 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_3 = {
|
||||
from_ISO_8859_3_offsets,
|
||||
from_ISO_8859_3_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_3 = {
|
||||
"ISO-8859-3", "UTF-8", &from_ISO_8859_3, 2, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_3_C2_offsets[64] = {
|
||||
|
@ -536,11 +560,16 @@ to_ISO_8859_3_infos[7] = {
|
|||
&to_ISO_8859_3_C5, &to_ISO_8859_3_CB,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_3 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_3 = {
|
||||
to_ISO_8859_2_offsets,
|
||||
to_ISO_8859_3_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_3 = {
|
||||
"UTF-8", "ISO-8859-3", &to_ISO_8859_3, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_ISO_8859_4_infos[129] = {
|
||||
|
@ -578,11 +607,16 @@ from_ISO_8859_4_infos[129] = {
|
|||
o2(0xC3,0xBB), o2(0xC3,0xBC), o2(0xC5,0xA9), o2(0xC5,0xAB),
|
||||
o2(0xCB,0x99),
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_4 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_4 = {
|
||||
from_ISO_8859_1_offsets,
|
||||
from_ISO_8859_4_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_4 = {
|
||||
"ISO-8859-4", "UTF-8", &from_ISO_8859_4, 2, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_4_C2_offsets[64] = {
|
||||
|
@ -708,11 +742,16 @@ to_ISO_8859_4_infos[7] = {
|
|||
&to_ISO_8859_4_C5, &to_ISO_8859_4_CB,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_4 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_4 = {
|
||||
to_ISO_8859_2_offsets,
|
||||
to_ISO_8859_4_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_4 = {
|
||||
"UTF-8", "ISO-8859-4", &to_ISO_8859_4, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_ISO_8859_5_infos[129] = {
|
||||
|
@ -782,11 +821,16 @@ from_ISO_8859_5_infos[129] = {
|
|||
o2(0xC2,0xA7), o2(0xD1,0x9E),
|
||||
o2(0xD1,0x9F),
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_5 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_5 = {
|
||||
from_ISO_8859_1_offsets,
|
||||
from_ISO_8859_5_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_5 = {
|
||||
"ISO-8859-5", "UTF-8", &from_ISO_8859_5, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_5_C2_offsets[64] = {
|
||||
|
@ -928,11 +972,16 @@ to_ISO_8859_5_infos[6] = {
|
|||
&to_ISO_8859_5_D0, &to_ISO_8859_5_D1,
|
||||
&to_ISO_8859_5_E2, UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_5 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_5 = {
|
||||
to_ISO_8859_5_offsets,
|
||||
to_ISO_8859_5_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_5 = {
|
||||
"UTF-8", "ISO-8859-5", &to_ISO_8859_5, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_ISO_8859_6_offsets[256] = {
|
||||
|
@ -978,11 +1027,16 @@ from_ISO_8859_6_infos[85] = {
|
|||
o2(0xD9,0x8F), o2(0xD9,0x90), o2(0xD9,0x91), o2(0xD9,0x92),
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_6 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_6 = {
|
||||
from_ISO_8859_6_offsets,
|
||||
from_ISO_8859_6_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_6 = {
|
||||
"ISO-8859-6", "UTF-8", &from_ISO_8859_6, 2, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_6_C2_offsets[64] = {
|
||||
|
@ -1079,11 +1133,16 @@ to_ISO_8859_6_infos[5] = {
|
|||
&to_ISO_8859_6_D8, &to_ISO_8859_6_D9,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_6 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_6 = {
|
||||
to_ISO_8859_6_offsets,
|
||||
to_ISO_8859_6_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_6 = {
|
||||
"UTF-8", "ISO-8859-6", &to_ISO_8859_6, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_ISO_8859_7_offsets[256] = {
|
||||
|
@ -1171,11 +1230,16 @@ from_ISO_8859_7_infos[127] = {
|
|||
o2(0xCF,0x8D), o2(0xCF,0x8E),
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_7 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_7 = {
|
||||
from_ISO_8859_7_offsets,
|
||||
from_ISO_8859_7_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_7 = {
|
||||
"ISO-8859-7", "UTF-8", &from_ISO_8859_7, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_7_C2_offsets[64] = {
|
||||
|
@ -1352,11 +1416,16 @@ to_ISO_8859_7_infos[7] = {
|
|||
&to_ISO_8859_7_CF, &to_ISO_8859_7_E2,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_7 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_7 = {
|
||||
to_ISO_8859_7_offsets,
|
||||
to_ISO_8859_7_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_7 = {
|
||||
"UTF-8", "ISO-8859-7", &to_ISO_8859_7, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_ISO_8859_8_offsets[256] = {
|
||||
|
@ -1427,11 +1496,16 @@ from_ISO_8859_8_infos[94] = {
|
|||
o2(0xD7,0xAA), o3(0xE2,0x80,0x8E),
|
||||
o3(0xE2,0x80,0x8F), UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_8 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_8 = {
|
||||
from_ISO_8859_8_offsets,
|
||||
from_ISO_8859_8_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_8 = {
|
||||
"ISO-8859-8", "UTF-8", &from_ISO_8859_8, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_8_C2_offsets[64] = {
|
||||
|
@ -1567,11 +1641,16 @@ to_ISO_8859_8_infos[6] = {
|
|||
&to_ISO_8859_8_C3, &to_ISO_8859_8_D7,
|
||||
&to_ISO_8859_8_E2, UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_8 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_8 = {
|
||||
to_ISO_8859_8_offsets,
|
||||
to_ISO_8859_8_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_8 = {
|
||||
"UTF-8", "ISO-8859-8", &to_ISO_8859_8, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_ISO_8859_9_infos[129] = {
|
||||
|
@ -1609,11 +1688,16 @@ from_ISO_8859_9_infos[129] = {
|
|||
o2(0xC3,0xBB), o2(0xC3,0xBC), o2(0xC4,0xB1), o2(0xC5,0x9F),
|
||||
o2(0xC3,0xBF),
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_9 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_9 = {
|
||||
from_ISO_8859_1_offsets,
|
||||
from_ISO_8859_9_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_9 = {
|
||||
"ISO-8859-9", "UTF-8", &from_ISO_8859_9, 2, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_9_C3_offsets[64] = {
|
||||
|
@ -1706,11 +1790,16 @@ to_ISO_8859_9_infos[6] = {
|
|||
&to_ISO_8859_9_C3, &to_ISO_8859_9_C4,
|
||||
&to_ISO_8859_9_C5, UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_9 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_9 = {
|
||||
to_ISO_8859_9_offsets,
|
||||
to_ISO_8859_9_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_9 = {
|
||||
"UTF-8", "ISO-8859-9", &to_ISO_8859_9, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_ISO_8859_10_infos[129] = {
|
||||
|
@ -1780,11 +1869,16 @@ from_ISO_8859_10_infos[129] = {
|
|||
o2(0xC3,0xBD), o2(0xC3,0xBE),
|
||||
o2(0xC4,0xB8),
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_10 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_10 = {
|
||||
from_ISO_8859_1_offsets,
|
||||
from_ISO_8859_10_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_10 = {
|
||||
"ISO-8859-10", "UTF-8", &from_ISO_8859_10, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_10_C2_offsets[64] = {
|
||||
|
@ -1932,11 +2026,16 @@ to_ISO_8859_10_infos[7] = {
|
|||
&to_ISO_8859_10_C5, &to_ISO_8859_10_E2,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_10 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_10 = {
|
||||
to_ISO_8859_10_offsets,
|
||||
to_ISO_8859_10_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_10 = {
|
||||
"UTF-8", "ISO-8859-10", &to_ISO_8859_10, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_ISO_8859_11_offsets[256] = {
|
||||
|
@ -2021,11 +2120,16 @@ from_ISO_8859_11_infos[122] = {
|
|||
o3(0xE0,0xB9,0x99), o3(0xE0,0xB9,0x9A),
|
||||
o3(0xE0,0xB9,0x9B), UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_11 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_11 = {
|
||||
from_ISO_8859_11_offsets,
|
||||
from_ISO_8859_11_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_11 = {
|
||||
"ISO-8859-11", "UTF-8", &from_ISO_8859_11, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_11_C2_offsets[64] = {
|
||||
|
@ -2149,11 +2253,16 @@ to_ISO_8859_11_infos[4] = {
|
|||
NOMAP, &to_ISO_8859_11_C2,
|
||||
&to_ISO_8859_11_E0, UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_11 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_11 = {
|
||||
to_ISO_8859_11_offsets,
|
||||
to_ISO_8859_11_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_11 = {
|
||||
"UTF-8", "ISO-8859-11", &to_ISO_8859_11, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_ISO_8859_13_infos[129] = {
|
||||
|
@ -2223,11 +2332,16 @@ from_ISO_8859_13_infos[129] = {
|
|||
o2(0xC5,0xBC), o2(0xC5,0xBE),
|
||||
o3(0xE2,0x80,0x99),
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_13 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_13 = {
|
||||
from_ISO_8859_1_offsets,
|
||||
from_ISO_8859_13_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_13 = {
|
||||
"ISO-8859-13", "UTF-8", &from_ISO_8859_13, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_13_C2_offsets[64] = {
|
||||
|
@ -2362,11 +2476,16 @@ to_ISO_8859_13_infos[7] = {
|
|||
&to_ISO_8859_13_C5, &to_ISO_8859_13_E2,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_13 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_13 = {
|
||||
to_ISO_8859_10_offsets,
|
||||
to_ISO_8859_13_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_13 = {
|
||||
"UTF-8", "ISO-8859-13", &to_ISO_8859_13, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_ISO_8859_14_infos[129] = {
|
||||
|
@ -2436,11 +2555,16 @@ from_ISO_8859_14_infos[129] = {
|
|||
o2(0xC3,0xBD), o2(0xC5,0xB7),
|
||||
o2(0xC3,0xBF),
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_14 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_14 = {
|
||||
from_ISO_8859_1_offsets,
|
||||
from_ISO_8859_14_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_14 = {
|
||||
"ISO-8859-14", "UTF-8", &from_ISO_8859_14, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_14_C2_offsets[64] = {
|
||||
|
@ -2652,11 +2776,16 @@ to_ISO_8859_14_infos[7] = {
|
|||
&to_ISO_8859_14_C5, &to_ISO_8859_14_E1,
|
||||
UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_14 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_14 = {
|
||||
to_ISO_8859_14_offsets,
|
||||
to_ISO_8859_14_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_14 = {
|
||||
"UTF-8", "ISO-8859-14", &to_ISO_8859_14, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_ISO_8859_15_infos[129] = {
|
||||
|
@ -2726,11 +2855,16 @@ from_ISO_8859_15_infos[129] = {
|
|||
o2(0xC3,0xBD), o2(0xC3,0xBE),
|
||||
o2(0xC3,0xBF),
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_from_ISO_8859_15 = {
|
||||
static const BYTE_LOOKUP
|
||||
from_ISO_8859_15 = {
|
||||
from_ISO_8859_1_offsets,
|
||||
from_ISO_8859_15_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_from_ISO_8859_15 = {
|
||||
"ISO-8859-15", "UTF-8", &from_ISO_8859_15, 3, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_ISO_8859_15_C2_offsets[64] = {
|
||||
|
@ -2840,10 +2974,47 @@ to_ISO_8859_15_infos[6] = {
|
|||
&to_ISO_8859_1_C3, &to_ISO_8859_15_C5,
|
||||
&to_ISO_8859_15_E2, UNDEF,
|
||||
};
|
||||
const BYTE_LOOKUP
|
||||
rb_to_ISO_8859_15 = {
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_15 = {
|
||||
to_ISO_8859_15_offsets,
|
||||
to_ISO_8859_15_infos
|
||||
};
|
||||
static rb_transcoder
|
||||
rb_to_ISO_8859_15 = {
|
||||
"UTF-8", "ISO-8859-15", &to_ISO_8859_15, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
void
|
||||
Init_one_byte(void)
|
||||
{
|
||||
rb_register_transcoder(&rb_from_ISO_8859_1);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_2);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_3);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_4);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_5);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_6);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_7);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_8);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_9);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_10);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_11);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_13);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_14);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_15);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_1);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_2);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_3);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_4);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_5);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_6);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_7);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_8);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_9);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_10);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_11);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_13);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_14);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_15);
|
||||
}
|
||||
/* Footprint (bytes): gross: 26788, saved: 3728, net: 23060 */
|
254
transcode.c
254
transcode.c
|
@ -12,9 +12,9 @@
|
|||
|
||||
#include "ruby/ruby.h"
|
||||
#include "ruby/encoding.h"
|
||||
|
||||
#define PType (int)
|
||||
#include "transcode_data.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
VALUE rb_str_tmp_new(long);
|
||||
VALUE rb_str_shared_replace(VALUE, VALUE);
|
||||
|
@ -23,168 +23,122 @@ VALUE rb_str_shared_replace(VALUE, VALUE);
|
|||
* Dispatch data and logic
|
||||
*/
|
||||
|
||||
/* extern declarations, should use some include file here */
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_1;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_2;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_3;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_4;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_5;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_6;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_7;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_8;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_9;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_10;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_11;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_13;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_14;
|
||||
extern const BYTE_LOOKUP rb_from_ISO_8859_15;
|
||||
static st_table *transcoder_table, *transcoder_lib_table;
|
||||
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_1;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_2;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_3;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_4;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_5;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_6;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_7;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_8;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_9;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_10;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_11;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_13;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_14;
|
||||
extern const BYTE_LOOKUP rb_to_ISO_8859_15;
|
||||
#define TRANSCODER_INTERNAL_SEPARATOR '\t'
|
||||
|
||||
extern const BYTE_LOOKUP rb_from_SHIFT_JIS;
|
||||
extern const BYTE_LOOKUP rb_from_EUC_JP;
|
||||
|
||||
extern const BYTE_LOOKUP rb_to_SHIFT_JIS;
|
||||
extern const BYTE_LOOKUP rb_to_EUC_JP;
|
||||
|
||||
extern void from_iso_2022_jp_transcoder_preprocessor(char**, char**, char*, char*,
|
||||
struct transcoder_st *transcoder, struct transcoding*);
|
||||
extern void to_iso_2022_jp_transcoder_postprocessor(char**, char**, char*, char*,
|
||||
struct transcoder_st *transcoder, struct transcoding*);
|
||||
|
||||
/* declarations probably need to go into separate header file, e.g. transcode.h */
|
||||
|
||||
/* todo: dynamic structure, one per conversion (stream) */
|
||||
|
||||
/* in the future, add some mechanism for dynamically adding stuff here */
|
||||
#define MAX_TRANSCODERS 35 /* todo: fix: this number has to be adjusted by hand */
|
||||
static transcoder transcoder_table[MAX_TRANSCODERS];
|
||||
/* variable to work across register_transcoder and register_functional_transcoder */
|
||||
static int next_transcoder_position = 0;
|
||||
|
||||
/* not sure why it's not possible to do relocatable initializations */
|
||||
/* maybe the code here can be removed (changed to simple initialization) */
|
||||
/* if we move this to another file???? */
|
||||
static void
|
||||
register_transcoder(const char *from_e, const char *to_e,
|
||||
const BYTE_LOOKUP *tree_start, int max_output, int from_utf8)
|
||||
static char *
|
||||
transcoder_key(const char *from_e, const char *to_e)
|
||||
{
|
||||
if (next_transcoder_position >= MAX_TRANSCODERS) {
|
||||
/* we are initializing, is it okay to use rb_raise here? */
|
||||
rb_raise(rb_eRuntimeError /*change exception*/, "not enough transcoder slots");
|
||||
}
|
||||
transcoder_table[next_transcoder_position].from_encoding = from_e;
|
||||
transcoder_table[next_transcoder_position].to_encoding = to_e;
|
||||
transcoder_table[next_transcoder_position].conv_tree_start = tree_start;
|
||||
transcoder_table[next_transcoder_position].max_output = max_output;
|
||||
transcoder_table[next_transcoder_position].from_utf8 = from_utf8;
|
||||
int to_len = strlen(to_e);
|
||||
int from_len = strlen(from_e);
|
||||
char *const key = xmalloc(to_len + from_len + 2);
|
||||
|
||||
next_transcoder_position++;
|
||||
memcpy(key, to_e, to_len);
|
||||
memcpy(key + to_len + 1, from_e, from_len + 1);
|
||||
key[to_len] = TRANSCODER_INTERNAL_SEPARATOR;
|
||||
return key;
|
||||
}
|
||||
|
||||
void
|
||||
rb_register_transcoder(const rb_transcoder *tr)
|
||||
{
|
||||
st_data_t k, val = 0;
|
||||
const char *const from_e = tr->from_encoding;
|
||||
const char *const to_e = tr->to_encoding;
|
||||
char *const key = transcoder_key(from_e, to_e);
|
||||
|
||||
if (st_lookup(transcoder_table, (st_data_t)key, &val)) {
|
||||
xfree(key);
|
||||
rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
|
||||
from_e, to_e);
|
||||
}
|
||||
k = (st_data_t)key;
|
||||
if (st_delete(transcoder_lib_table, &k, &val)) {
|
||||
xfree((char *)k);
|
||||
}
|
||||
st_insert(transcoder_table, (st_data_t)key, (st_data_t)tr);
|
||||
}
|
||||
|
||||
static void
|
||||
register_functional_transcoder(const char *from_e, const char *to_e,
|
||||
const BYTE_LOOKUP *tree_start, int max_output, int from_utf8,
|
||||
void (*preprocessor)(char**, char**, char*, char*, transcoder*, transcoding*),
|
||||
void (*postprocessor)(char**, char**, char*, char*, transcoder*, transcoding*))
|
||||
declare_transcoder(const char *to, const char *from, const char *lib)
|
||||
{
|
||||
if (next_transcoder_position >= MAX_TRANSCODERS) {
|
||||
/* we are initializing, is it okay to use rb_raise here? */
|
||||
rb_raise(rb_eRuntimeError /*change exception*/, "not enough transcoder slots");
|
||||
}
|
||||
transcoder_table[next_transcoder_position].from_encoding = from_e;
|
||||
transcoder_table[next_transcoder_position].to_encoding = to_e;
|
||||
transcoder_table[next_transcoder_position].conv_tree_start = tree_start;
|
||||
transcoder_table[next_transcoder_position].max_output = max_output;
|
||||
transcoder_table[next_transcoder_position].from_utf8 = from_utf8;
|
||||
transcoder_table[next_transcoder_position].conv_tree_start = tree_start;
|
||||
transcoder_table[next_transcoder_position].preprocessor = preprocessor;
|
||||
transcoder_table[next_transcoder_position].postprocessor = postprocessor;
|
||||
const char *const key = transcoder_key(to, from);
|
||||
st_data_t k = (st_data_t)key, val;
|
||||
|
||||
next_transcoder_position++;
|
||||
if (st_delete(transcoder_lib_table, &k, &val)) {
|
||||
xfree((char *)k);
|
||||
}
|
||||
st_insert(transcoder_lib_table, (st_data_t)key, (st_data_t)lib);
|
||||
}
|
||||
|
||||
#define MAX_TRANSCODER_LIBNAME_LEN 64
|
||||
static const char transcoder_lib_prefix[] = "enc/trans/";
|
||||
|
||||
void
|
||||
rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
|
||||
{
|
||||
if (!lib || strlen(lib) > MAX_TRANSCODER_LIBNAME_LEN) {
|
||||
rb_raise(rb_eArgError, "invalid library name - %s",
|
||||
lib ? lib : "(null)");
|
||||
}
|
||||
declare_transcoder(enc1, enc2, lib);
|
||||
declare_transcoder(enc2, enc1, lib);
|
||||
}
|
||||
|
||||
static void
|
||||
init_transcoder_table(void)
|
||||
{
|
||||
register_transcoder("ISO-8859-1", "UTF-8", &rb_from_ISO_8859_1, 2, 0);
|
||||
register_transcoder("ISO-8859-2", "UTF-8", &rb_from_ISO_8859_2, 2, 0);
|
||||
register_transcoder("ISO-8859-3", "UTF-8", &rb_from_ISO_8859_3, 2, 0);
|
||||
register_transcoder("ISO-8859-4", "UTF-8", &rb_from_ISO_8859_4, 2, 0);
|
||||
register_transcoder("ISO-8859-5", "UTF-8", &rb_from_ISO_8859_5, 3, 0);
|
||||
register_transcoder("ISO-8859-6", "UTF-8", &rb_from_ISO_8859_6, 2, 0);
|
||||
register_transcoder("ISO-8859-7", "UTF-8", &rb_from_ISO_8859_7, 3, 0);
|
||||
register_transcoder("ISO-8859-8", "UTF-8", &rb_from_ISO_8859_8, 3, 0);
|
||||
register_transcoder("ISO-8859-9", "UTF-8", &rb_from_ISO_8859_9, 2, 0);
|
||||
register_transcoder("ISO-8859-10", "UTF-8", &rb_from_ISO_8859_10, 3, 0);
|
||||
register_transcoder("ISO-8859-11", "UTF-8", &rb_from_ISO_8859_11, 3, 0);
|
||||
register_transcoder("ISO-8859-13", "UTF-8", &rb_from_ISO_8859_13, 3, 0);
|
||||
register_transcoder("ISO-8859-14", "UTF-8", &rb_from_ISO_8859_14, 3, 0);
|
||||
register_transcoder("ISO-8859-15", "UTF-8", &rb_from_ISO_8859_15, 3, 0);
|
||||
register_transcoder("UTF-8", "ISO-8859-1", &rb_to_ISO_8859_1, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-2", &rb_to_ISO_8859_2, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-3", &rb_to_ISO_8859_3, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-4", &rb_to_ISO_8859_4, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-5", &rb_to_ISO_8859_5, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-6", &rb_to_ISO_8859_6, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-7", &rb_to_ISO_8859_7, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-8", &rb_to_ISO_8859_8, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-9", &rb_to_ISO_8859_9, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-10", &rb_to_ISO_8859_10, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-11", &rb_to_ISO_8859_11, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-13", &rb_to_ISO_8859_13, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-14", &rb_to_ISO_8859_14, 1, 1);
|
||||
register_transcoder("UTF-8", "ISO-8859-15", &rb_to_ISO_8859_15, 1, 1);
|
||||
|
||||
register_transcoder("SHIFT_JIS", "UTF-8", &rb_from_SHIFT_JIS, 3, 0);
|
||||
register_transcoder("EUC-JP", "UTF-8", &rb_from_EUC_JP, 3, 0);
|
||||
register_transcoder("UTF-8", "SHIFT_JIS", &rb_to_SHIFT_JIS, 2, 1);
|
||||
register_transcoder("UTF-8", "EUC-JP", &rb_to_EUC_JP, 2, 1);
|
||||
register_functional_transcoder("ISO-2022-JP", "UTF-8", &rb_from_EUC_JP,
|
||||
8, 0, &from_iso_2022_jp_transcoder_preprocessor, NULL);
|
||||
register_functional_transcoder("UTF-8", "ISO-2022-JP", &rb_to_EUC_JP,
|
||||
8, 1, NULL, &to_iso_2022_jp_transcoder_postprocessor);
|
||||
|
||||
register_transcoder(NULL, NULL, NULL, 0, 0);
|
||||
rb_declare_transcoder("ISO-8859-1", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-2", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-3", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-4", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-5", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-6", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-7", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-8", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-9", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-10", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-11", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-13", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-14", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-15", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("SHIFT_JIS", "UTF-8", "japanese");
|
||||
rb_declare_transcoder("EUC-JP", "UTF-8", "japanese");
|
||||
rb_declare_transcoder("ISO-2022-JP", "UTF-8", "japanese");
|
||||
}
|
||||
|
||||
static int
|
||||
encoding_equal(const char* encoding1, const char* encoding2)
|
||||
{
|
||||
return 0==strcasecmp(encoding1, encoding2);
|
||||
}
|
||||
#define encoding_equal(enc1, enc2) (strcasecmp(enc1, enc2) == 0)
|
||||
|
||||
static transcoder*
|
||||
static rb_transcoder *
|
||||
transcode_dispatch(const char* from_encoding, const char* to_encoding)
|
||||
{
|
||||
transcoder *candidate = transcoder_table;
|
||||
char *const key = transcoder_key(from_encoding, to_encoding);
|
||||
st_data_t k, val = 0;
|
||||
|
||||
for (candidate = transcoder_table; candidate->from_encoding; candidate++) {
|
||||
if (encoding_equal(from_encoding, candidate->from_encoding)
|
||||
&& encoding_equal(to_encoding, candidate->to_encoding)) {
|
||||
return candidate;
|
||||
k = (st_data_t)key;
|
||||
if (!st_lookup(transcoder_table, k, &val) &&
|
||||
st_delete(transcoder_lib_table, &k, &val)) {
|
||||
const char *const lib = (const char *)val;
|
||||
int len = strlen(lib);
|
||||
char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN];
|
||||
|
||||
xfree((char *)k);
|
||||
if (len > MAX_TRANSCODER_LIBNAME_LEN) return NULL;
|
||||
memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
|
||||
memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1);
|
||||
if (!rb_require(path)) return NULL;
|
||||
if (!st_lookup(transcoder_table, (st_data_t)key, &val)) {
|
||||
/* multistep logic, via UTF-8 */
|
||||
if (!encoding_equal(from_encoding, "UTF-8") &&
|
||||
!encoding_equal(to_encoding, "UTF-8") &&
|
||||
transcode_dispatch("UTF-8", to_encoding)) { /* check that we have a second step */
|
||||
return transcode_dispatch(from_encoding, "UTF-8"); /* return first step */
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
/* multistep logic, via UTF-8 */
|
||||
if (!encoding_equal(from_encoding, "UTF-8")
|
||||
&& !encoding_equal(to_encoding, "UTF-8")
|
||||
&& transcode_dispatch("UTF-8", to_encoding)) { /* check that we have a second step */
|
||||
return transcode_dispatch(from_encoding, "UTF-8"); /* return first step */
|
||||
}
|
||||
return NULL;
|
||||
return (rb_transcoder *)val;
|
||||
}
|
||||
|
||||
|
||||
|
@ -194,8 +148,8 @@ transcode_dispatch(const char* from_encoding, const char* to_encoding)
|
|||
static void
|
||||
transcode_loop(char **in_pos, char **out_pos,
|
||||
char *in_stop, char *out_stop,
|
||||
transcoder *my_transcoder,
|
||||
transcoding *my_transcoding)
|
||||
const rb_transcoder *my_transcoder,
|
||||
rb_transcoding *my_transcoding)
|
||||
{
|
||||
char *in_p = *in_pos, *out_p = *out_pos;
|
||||
const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
|
||||
|
@ -280,7 +234,7 @@ transcode_loop(char **in_pos, char **out_pos,
|
|||
*/
|
||||
|
||||
static char *
|
||||
str_transcoding_resize(transcoding *my_transcoding, int len, int new_len)
|
||||
str_transcoding_resize(rb_transcoding *my_transcoding, int len, int new_len)
|
||||
{
|
||||
VALUE dest_string = my_transcoding->ruby_string_dest;
|
||||
rb_str_resize(dest_string, new_len);
|
||||
|
@ -298,8 +252,8 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
const char *from_e, *to_e;
|
||||
int from_encidx, to_encidx;
|
||||
VALUE from_encval, to_encval;
|
||||
transcoder *my_transcoder;
|
||||
transcoding my_transcoding;
|
||||
rb_transcoder *my_transcoder;
|
||||
rb_transcoding my_transcoding;
|
||||
int final_encoding = 0;
|
||||
|
||||
if (argc<1 || argc>2) {
|
||||
|
@ -307,6 +261,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
}
|
||||
if ((to_encidx = rb_to_encoding_index(to_encval = argv[0])) < 0) {
|
||||
to_enc = 0;
|
||||
to_encidx = 0;
|
||||
to_e = StringValueCStr(to_encval);
|
||||
}
|
||||
else {
|
||||
|
@ -405,7 +360,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
}
|
||||
/* set encoding */
|
||||
if (!to_enc) {
|
||||
to_encidx = rb_enc_replicate(to_e, rb_ascii8bit_encoding());
|
||||
to_encidx = rb_define_dummy_encoding(to_e);
|
||||
}
|
||||
*self = dest;
|
||||
|
||||
|
@ -467,7 +422,10 @@ rb_str_transcode(int argc, VALUE *argv, VALUE str)
|
|||
void
|
||||
Init_transcode(void)
|
||||
{
|
||||
transcoder_table = st_init_strcasetable();
|
||||
transcoder_lib_table = st_init_strcasetable();
|
||||
init_transcoder_table();
|
||||
|
||||
rb_define_method(rb_cString, "encode", rb_str_transcode, -1);
|
||||
rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);
|
||||
}
|
||||
|
|
|
@ -22,12 +22,9 @@ typedef struct byte_lookup {
|
|||
const struct byte_lookup *const *info;
|
||||
} BYTE_LOOKUP;
|
||||
|
||||
#ifdef TRANSCODE_DATA
|
||||
#ifndef PType
|
||||
/* data file needs to treat this as a pointer, to remove warnings */
|
||||
#define PType (const BYTE_LOOKUP *)
|
||||
#else
|
||||
/* in code, this is treated as just an integer */
|
||||
#define PType (int)
|
||||
#endif
|
||||
|
||||
#define NOMAP (PType 0x01) /* single byte direct map */
|
||||
|
@ -56,23 +53,26 @@ typedef struct byte_lookup {
|
|||
|
||||
/* dynamic structure, one per conversion (similar to iconv_t) */
|
||||
/* may carry conversion state (e.g. for iso-2022-jp) */
|
||||
typedef struct transcoding {
|
||||
typedef struct rb_transcoding {
|
||||
VALUE ruby_string_dest; /* the String used as the conversion destination,
|
||||
or NULL if something else is being converted */
|
||||
char *(*flush_func)(struct transcoding*, int, int);
|
||||
} transcoding;
|
||||
char *(*flush_func)(struct rb_transcoding*, int, int);
|
||||
} rb_transcoding;
|
||||
|
||||
/* static structure, one per supported encoding pair */
|
||||
typedef struct transcoder_st{
|
||||
typedef struct rb_transcoder {
|
||||
const char *from_encoding;
|
||||
const char *to_encoding;
|
||||
const BYTE_LOOKUP *conv_tree_start;
|
||||
int max_output;
|
||||
int from_utf8;
|
||||
void (*preprocessor)(char**, char**, char*, char*,
|
||||
struct transcoder_st *transcoder, struct transcoding*);
|
||||
struct rb_transcoder *, struct rb_transcoding *);
|
||||
void (*postprocessor)(char**, char**, char*, char*,
|
||||
struct transcoder_st *transcoder, struct transcoding*);
|
||||
} transcoder;
|
||||
struct rb_transcoder *, struct rb_transcoding *);
|
||||
} rb_transcoder;
|
||||
|
||||
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
||||
void rb_register_transcoder(const rb_transcoder *);
|
||||
|
||||
#endif /* RUBY_TRANSCODE_DATA_H */
|
||||
|
|
Loading…
Reference in a new issue