1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Sat Dec 22 15:45:45 2007 Martin Duerst <duerst@it.aoyama.ac.jp>

* transcode_data_one_byte: slightly optimized

	* transcode_data_japanese: new data file for EUC-JP and SHIFT_JIS
	  (not yet optimized; tests to follow; data from
	   http://nkf.sourceforge.jp/ucm/{SJIS|eucJP}-nkf.ucm)

	* common.mk, transcode.c: Adjusted for transcode_data_japanese



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14472 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
duerst 2007-12-22 06:45:55 +00:00
parent c9bf419684
commit 5ad8c5566d
4 changed files with 27475 additions and 73 deletions

View file

@ -61,6 +61,7 @@ COMMONOBJS = array.$(OBJEXT) \
time.$(OBJEXT) \
transcode.$(OBJEXT) \
transcode_data_one_byte.$(OBJEXT) \
transcode_data_japanese.$(OBJEXT) \
util.$(OBJEXT) \
variable.$(OBJEXT) \
version.$(OBJEXT) \
@ -548,6 +549,7 @@ thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \
transcode.$(OBJEXT): {$(VPATH)}transcode.c {$(VPATH)}transcode_data.h {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h {$(VPATH)}encoding.h
transcode_data_one_byte.$(OBJEXT): {$(VPATH)}transcode_data_one_byte.c {$(VPATH)}transcode_data.h
transcode_data_japanese.$(OBJEXT): {$(VPATH)}transcode_data_japanese.c {$(VPATH)}transcode_data.h
cont.$(OBJEXT): {$(VPATH)}cont.c {$(VPATH)}eval_intern.h \
{$(VPATH)}ruby.h {$(VPATH)}vm_core.h {$(VPATH)}id.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \

View file

@ -54,6 +54,12 @@ extern const BYTE_LOOKUP to_ISO_8859_13;
extern const BYTE_LOOKUP to_ISO_8859_14;
extern const BYTE_LOOKUP to_ISO_8859_15;
extern const BYTE_LOOKUP from_SHIFT_JIS;
extern const BYTE_LOOKUP from_EUC_JP;
extern const BYTE_LOOKUP to_SHIFT_JIS;
extern const BYTE_LOOKUP to_EUC_JP;
/* declarations probably need to go into separate header file, e.g. transcode.h */
@ -69,7 +75,7 @@ typedef struct {
/* todo: dynamic structure, one per conversion (stream) */
/* in the future, add some mechanism for dynamically adding stuff here */
#define MAX_TRANSCODERS 29 /* todo: fix: this number has to be adjusted by hand */
#define MAX_TRANSCODERS 33 /* todo: fix: this number has to be adjusted by hand */
static transcoder transcoder_table[MAX_TRANSCODERS];
/* not sure why it's not possible to do relocatable initializations */
@ -124,6 +130,12 @@ init_transcoder_table(void)
register_transcoder("UTF-8", "ISO-8859-13", &to_ISO_8859_13, 1, 1);
register_transcoder("UTF-8", "ISO-8859-14", &to_ISO_8859_14, 1, 1);
register_transcoder("UTF-8", "ISO-8859-15", &to_ISO_8859_15, 1, 1);
register_transcoder("SHIFT_JIS", "UTF-8", &from_SHIFT_JIS, 3, 0);
register_transcoder("EUC-JP", "UTF-8", &from_EUC_JP, 3, 0);
register_transcoder("UTF-8", "SHIFT_JIS", &to_SHIFT_JIS, 2, 1);
register_transcoder("UTF-8", "EUC-JP", &to_EUC_JP, 2, 1);
register_transcoder(NULL, NULL, NULL, 0, 0);
}

27416
transcode_data_japanese.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -4,7 +4,8 @@
static const unsigned char
from_ISO_8859_1_offsets[256] = {
/* used from from_ISO_8859_1 */
/* used from from_ISO_8859_9 */
/* used from from_ISO_8859_5 */
/* used from from_ISO_8859_13 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -75,8 +76,6 @@ to_ISO_8859_1_C2_offsets[64] = {
};
static const struct byte_lookup* const
to_ISO_8859_1_C2_infos[64] = {
/* used from to_ISO_8859_1_C2 */
/* used from to_ISO_8859_9_C2 */
o1(0x80), o1(0x81), o1(0x82), o1(0x83),
o1(0x84), o1(0x85), o1(0x86), o1(0x87),
o1(0x88), o1(0x89), o1(0x8A), o1(0x8B),
@ -96,14 +95,14 @@ to_ISO_8859_1_C2_infos[64] = {
};
static const BYTE_LOOKUP
to_ISO_8859_1_C2 = {
/* used from to_ISO_8859_1 */
/* used from to_ISO_8859_9 */
to_ISO_8859_1_C2_offsets,
to_ISO_8859_1_C2_infos
};
static const struct byte_lookup* const
to_ISO_8859_1_C3_infos[64] = {
/* used from to_ISO_8859_1_C3 */
/* used from to_ISO_8859_15_C3 */
o1(0xC0), o1(0xC1), o1(0xC2), o1(0xC3),
o1(0xC4), o1(0xC5), o1(0xC6), o1(0xC7),
o1(0xC8), o1(0xC9), o1(0xCA), o1(0xCB),
@ -123,6 +122,8 @@ to_ISO_8859_1_C3_infos[64] = {
};
static const BYTE_LOOKUP
to_ISO_8859_1_C3 = {
/* used from to_ISO_8859_1 */
/* used from to_ISO_8859_15 */
to_ISO_8859_1_C2_offsets,
to_ISO_8859_1_C3_infos
};
@ -158,9 +159,11 @@ to_ISO_8859_1 = {
};
static const unsigned char
from_ISO_8859_15_offsets[256] = {
from_ISO_8859_10_offsets[256] = {
/* used from from_ISO_8859_10 */
/* used from from_ISO_8859_15 */
/* used from from_ISO_8859_2 */
/* used from from_ISO_8859_9 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -216,7 +219,7 @@ from_ISO_8859_2_infos[129] = {
};
const BYTE_LOOKUP
from_ISO_8859_2 = {
from_ISO_8859_15_offsets,
from_ISO_8859_10_offsets,
from_ISO_8859_2_infos
};
@ -339,7 +342,6 @@ to_ISO_8859_2_CB = {
static const unsigned char
to_ISO_8859_2_offsets[256] = {
/* used from to_ISO_8859_2 */
/* used from to_ISO_8859_3 */
/* used from to_ISO_8859_4 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -543,6 +545,25 @@ to_ISO_8859_3_CB = {
to_ISO_8859_3_CB_infos
};
static const unsigned char
to_ISO_8859_3_offsets[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 1, 2, 3, 4, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
};
static const struct byte_lookup* const
to_ISO_8859_3_infos[7] = {
NOMAP, &to_ISO_8859_3_C2,
@ -552,13 +573,12 @@ to_ISO_8859_3_infos[7] = {
};
const BYTE_LOOKUP
to_ISO_8859_3 = {
to_ISO_8859_2_offsets,
to_ISO_8859_3_offsets,
to_ISO_8859_3_infos
};
static const unsigned char
from_ISO_8859_13_offsets[256] = {
/* used from from_ISO_8859_13 */
from_ISO_8859_14_offsets[256] = {
/* used from from_ISO_8859_14 */
/* used from from_ISO_8859_4 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -616,7 +636,7 @@ from_ISO_8859_4_infos[129] = {
};
const BYTE_LOOKUP
from_ISO_8859_4 = {
from_ISO_8859_13_offsets,
from_ISO_8859_14_offsets,
from_ISO_8859_4_infos
};
@ -750,27 +770,6 @@ to_ISO_8859_4 = {
to_ISO_8859_4_infos
};
static const unsigned char
from_ISO_8859_10_offsets[256] = {
/* used from from_ISO_8859_10 */
/* used from from_ISO_8859_5 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
97, 98, 99,100,101,102,103,104, 105,106,107,108,109,110,111,112,
113,114,115,116,117,118,119,120, 121,122,123,124,125,126,127,128,
};
static const struct byte_lookup* const
from_ISO_8859_5_infos[129] = {
NOMAP, o2(0xC2,0x80),
@ -841,7 +840,7 @@ from_ISO_8859_5_infos[129] = {
};
const BYTE_LOOKUP
from_ISO_8859_5 = {
from_ISO_8859_10_offsets,
from_ISO_8859_1_offsets,
from_ISO_8859_5_infos
};
@ -1580,7 +1579,8 @@ to_ISO_8859_8_E2_80 = {
};
static const unsigned char
to_ISO_8859_13_E2_offsets[64] = {
to_ISO_8859_10_E2_offsets[64] = {
/* used from to_ISO_8859_10_E2 */
/* used from to_ISO_8859_13_E2 */
/* used from to_ISO_8859_8_E2 */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -1594,7 +1594,7 @@ to_ISO_8859_8_E2_infos[2] = {
};
static const BYTE_LOOKUP
to_ISO_8859_8_E2 = {
to_ISO_8859_13_E2_offsets,
to_ISO_8859_10_E2_offsets,
to_ISO_8859_8_E2_infos
};
@ -1667,25 +1667,10 @@ from_ISO_8859_9_infos[129] = {
};
const BYTE_LOOKUP
from_ISO_8859_9 = {
from_ISO_8859_1_offsets,
from_ISO_8859_10_offsets,
from_ISO_8859_9_infos
};
static const unsigned char
to_ISO_8859_15_C3_offsets[64] = {
/* used from to_ISO_8859_15_C3 */
/* used from to_ISO_8859_9_C2 */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
};
static const BYTE_LOOKUP
to_ISO_8859_9_C2 = {
to_ISO_8859_15_C3_offsets,
to_ISO_8859_1_C2_infos
};
static const unsigned char
to_ISO_8859_9_C3_offsets[64] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -1773,7 +1758,7 @@ to_ISO_8859_9_offsets[256] = {
};
static const struct byte_lookup* const
to_ISO_8859_9_infos[6] = {
NOMAP, &to_ISO_8859_9_C2,
NOMAP, &to_ISO_8859_1_C2,
&to_ISO_8859_9_C3, &to_ISO_8859_9_C4,
&to_ISO_8859_9_C5, UNDEF,
};
@ -1965,13 +1950,6 @@ to_ISO_8859_10_E2_80 = {
to_ISO_8859_10_E2_80_infos
};
static const unsigned char
to_ISO_8859_10_E2_offsets[64] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
static const struct byte_lookup* const
to_ISO_8859_10_E2_infos[2] = {
&to_ISO_8859_10_E2_80, UNDEF,
@ -2303,7 +2281,7 @@ from_ISO_8859_13_infos[129] = {
};
const BYTE_LOOKUP
from_ISO_8859_13 = {
from_ISO_8859_13_offsets,
from_ISO_8859_1_offsets,
from_ISO_8859_13_infos
};
@ -2429,7 +2407,7 @@ to_ISO_8859_13_E2_infos[2] = {
};
static const BYTE_LOOKUP
to_ISO_8859_13_E2 = {
to_ISO_8859_13_E2_offsets,
to_ISO_8859_10_E2_offsets,
to_ISO_8859_13_E2_infos
};
@ -2516,7 +2494,7 @@ from_ISO_8859_14_infos[129] = {
};
const BYTE_LOOKUP
from_ISO_8859_14 = {
from_ISO_8859_13_offsets,
from_ISO_8859_14_offsets,
from_ISO_8859_14_infos
};
@ -2806,7 +2784,7 @@ from_ISO_8859_15_infos[129] = {
};
const BYTE_LOOKUP
from_ISO_8859_15 = {
from_ISO_8859_15_offsets,
from_ISO_8859_10_offsets,
from_ISO_8859_15_infos
};
@ -2841,12 +2819,6 @@ to_ISO_8859_15_C2 = {
to_ISO_8859_15_C2_infos
};
static const BYTE_LOOKUP
to_ISO_8859_15_C3 = {
to_ISO_8859_15_C3_offsets,
to_ISO_8859_1_C3_infos
};
static const unsigned char
to_ISO_8859_15_C5_offsets[64] = {
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
@ -2921,7 +2893,7 @@ to_ISO_8859_15_offsets[256] = {
static const struct byte_lookup* const
to_ISO_8859_15_infos[6] = {
NOMAP, &to_ISO_8859_15_C2,
&to_ISO_8859_15_C3, &to_ISO_8859_15_C5,
&to_ISO_8859_1_C3, &to_ISO_8859_15_C5,
&to_ISO_8859_15_E2, UNDEF,
};
const BYTE_LOOKUP
@ -2930,4 +2902,4 @@ to_ISO_8859_15 = {
to_ISO_8859_15_infos
};
/* Footprint (bytes): gross: 27684, saved: 3712, net: 23972 */
/* Footprint (bytes): gross: 27556, saved: 3728, net: 23828 */