mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
follow CVS Head of original nkf.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7213 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
27425a52ba
commit
8b60d65bcb
2 changed files with 131 additions and 19 deletions
|
@ -46,7 +46,7 @@ static char *CopyRight =
|
|||
static char *Version =
|
||||
"2.0";
|
||||
static char *Patchlevel =
|
||||
"4/0401/Shinji Kono";
|
||||
"4/0410/Shinji Kono";
|
||||
|
||||
/*
|
||||
**
|
||||
|
@ -198,7 +198,7 @@ static char *Patchlevel =
|
|||
|
||||
#define UTF8 12
|
||||
#define UTF8_INPUT 13
|
||||
#define UTF16_INPUT 14
|
||||
#define UTF16LE_INPUT 14
|
||||
#define UTF16BE_INPUT 15
|
||||
|
||||
#define WISH_TRUE 15
|
||||
|
@ -376,8 +376,9 @@ static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
|
|||
#endif
|
||||
static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
|
||||
#ifdef UTF8_OUTPUT_ENABLE
|
||||
static int w_oconv16_begin_f= 0; /* utf-16 header */
|
||||
static int unicode_bom_f= 0; /* Output Unicode BOM */
|
||||
static int w_oconv16_LE = 0; /* utf-16 little endian */
|
||||
static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -443,7 +444,7 @@ STATIC void s_status PROTO((struct input_code *, int));
|
|||
#ifdef UTF8_INPUT_ENABLE
|
||||
STATIC void w_status PROTO((struct input_code *, int));
|
||||
STATIC void w16_status PROTO((struct input_code *, int));
|
||||
static int utf16_mode = UTF16_INPUT;
|
||||
static int utf16_mode = UTF16LE_INPUT;
|
||||
#endif
|
||||
|
||||
struct input_code input_code_list[] = {
|
||||
|
@ -892,6 +893,7 @@ struct {
|
|||
#ifdef UTF8_OUTPUT_ENABLE
|
||||
{"utf8", "w"},
|
||||
{"utf16", "w16"},
|
||||
{"ms-ucs-map", ""},
|
||||
#endif
|
||||
#ifdef UTF8_INPUT_ENABLE
|
||||
{"utf8-input", "W"},
|
||||
|
@ -1007,6 +1009,12 @@ options(cp)
|
|||
exec_f = -1;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#ifdef UTF8_OUTPUT_ENABLE
|
||||
if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
|
||||
ms_ucs_map_f = TRUE;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (strcmp(long_option[i].name, "prefix=") == 0){
|
||||
if (*p == '=' && ' ' < p[1] && p[1] < 128){
|
||||
|
@ -1082,17 +1090,23 @@ options(cp)
|
|||
if ('1'== cp[0] && '6'==cp[1]) {
|
||||
output_conv = w_oconv16; cp+=2;
|
||||
if (cp[0]=='L') {
|
||||
w_oconv16_begin_f=2; cp++;
|
||||
unicode_bom_f=2; cp++;
|
||||
w_oconv16_LE = 1;
|
||||
if (cp[0] == '0'){
|
||||
w_oconv16_begin_f=1; cp++;
|
||||
unicode_bom_f=1; cp++;
|
||||
}
|
||||
} else if (cp[0] == 'B') {
|
||||
w_oconv16_begin_f=2; cp++;
|
||||
unicode_bom_f=2; cp++;
|
||||
if (cp[0] == '0'){
|
||||
w_oconv16_begin_f=1; cp++;
|
||||
unicode_bom_f=1; cp++;
|
||||
}
|
||||
}
|
||||
} else if (cp[0] == '8') {
|
||||
output_conv = w_oconv; cp++;
|
||||
unicode_bom_f=2;
|
||||
if (cp[0] == '0'){
|
||||
unicode_bom_f=1; cp++;
|
||||
}
|
||||
} else
|
||||
output_conv = w_oconv;
|
||||
continue;
|
||||
|
@ -1100,7 +1114,16 @@ options(cp)
|
|||
#ifdef UTF8_INPUT_ENABLE
|
||||
case 'W': /* UTF-8 input */
|
||||
if ('1'== cp[0] && '6'==cp[1]) {
|
||||
input_f = UTF16_INPUT;
|
||||
input_f = UTF16LE_INPUT;
|
||||
if (cp[0]=='L') {
|
||||
cp++;
|
||||
} else if (cp[0] == 'B') {
|
||||
cp++;
|
||||
input_f = UTF16BE_INPUT;
|
||||
}
|
||||
} else if (cp[0] == '8') {
|
||||
cp++;
|
||||
input_f = UTF8_INPUT;
|
||||
} else
|
||||
input_f = UTF8_INPUT;
|
||||
continue;
|
||||
|
@ -1760,7 +1783,7 @@ module_connection()
|
|||
#ifdef UTF8_INPUT_ENABLE
|
||||
} else if (input_f == UTF8_INPUT) {
|
||||
set_iconv(-TRUE, w_iconv);
|
||||
} else if (input_f == UTF16_INPUT) {
|
||||
} else if (input_f == UTF16LE_INPUT) {
|
||||
set_iconv(-TRUE, w_iconv16);
|
||||
#endif
|
||||
} else {
|
||||
|
@ -2364,7 +2387,7 @@ w_iconv16(c2, c1, c0)
|
|||
int ret;
|
||||
|
||||
if (c2==0376 && c1==0377){
|
||||
utf16_mode = UTF16_INPUT;
|
||||
utf16_mode = UTF16LE_INPUT;
|
||||
return 0;
|
||||
} else if (c2==0377 && c1==0376){
|
||||
utf16_mode = UTF16BE_INPUT;
|
||||
|
@ -2424,6 +2447,7 @@ e2w_conv(c2, c1)
|
|||
{
|
||||
extern unsigned short euc_to_utf8_1byte[];
|
||||
extern unsigned short * euc_to_utf8_2bytes[];
|
||||
extern unsigned short * euc_to_utf8_2bytes_ms[];
|
||||
unsigned short *p;
|
||||
|
||||
if (c2 == X0201) {
|
||||
|
@ -2432,7 +2456,7 @@ e2w_conv(c2, c1)
|
|||
c2 &= 0x7f;
|
||||
c2 = (c2&0x7f) - 0x21;
|
||||
if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
|
||||
p = euc_to_utf8_2bytes[c2];
|
||||
p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
@ -2462,7 +2486,16 @@ w_oconv(c2, c1)
|
|||
if (c2 == EOF) {
|
||||
(*o_putc)(EOF);
|
||||
return;
|
||||
} else if (c2 == 0) {
|
||||
}
|
||||
|
||||
if (unicode_bom_f==2) {
|
||||
(*o_putc)('\357');
|
||||
(*o_putc)('\273');
|
||||
(*o_putc)('\277');
|
||||
unicode_bom_f=1;
|
||||
}
|
||||
|
||||
if (c2 == 0) {
|
||||
output_mode = ASCII;
|
||||
(*o_putc)(c1);
|
||||
} else if (c2 == ISO8859_1) {
|
||||
|
@ -2489,7 +2522,7 @@ w_oconv16(c2, c1)
|
|||
return;
|
||||
}
|
||||
|
||||
if (w_oconv16_begin_f==2) {
|
||||
if (unicode_bom_f==2) {
|
||||
if (w_oconv16_LE){
|
||||
(*o_putc)((unsigned char)'\377');
|
||||
(*o_putc)('\376');
|
||||
|
@ -2497,7 +2530,7 @@ w_oconv16(c2, c1)
|
|||
(*o_putc)('\376');
|
||||
(*o_putc)((unsigned char)'\377');
|
||||
}
|
||||
w_oconv16_begin_f=1;
|
||||
unicode_bom_f=1;
|
||||
}
|
||||
|
||||
if (c2 == ISO8859_1) {
|
||||
|
@ -3930,8 +3963,8 @@ reinit()
|
|||
}
|
||||
}
|
||||
#ifdef UTF8_OUTPUT_ENABLE
|
||||
if (w_oconv16_begin_f) {
|
||||
w_oconv16_begin_f = 2;
|
||||
if (unicode_bom_f) {
|
||||
unicode_bom_f = 2;
|
||||
}
|
||||
#endif
|
||||
f_line = 0;
|
||||
|
@ -3989,8 +4022,14 @@ usage()
|
|||
#endif
|
||||
#ifdef DEFAULT_CODE_UTF8
|
||||
fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
|
||||
#endif
|
||||
#ifdef UTF8_OUTPUT_ENABLE
|
||||
fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
|
||||
#endif
|
||||
fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
|
||||
#ifdef UTF8_INPUT_ENABLE
|
||||
fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
|
||||
#endif
|
||||
fprintf(stderr,"t no conversion\n");
|
||||
fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
|
||||
fprintf(stderr,"r {de/en}crypt ROT13/47\n");
|
||||
|
@ -4012,8 +4051,21 @@ usage()
|
|||
fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
|
||||
fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
|
||||
fprintf(stderr,"long name options\n");
|
||||
fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
|
||||
fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
|
||||
fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
|
||||
fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
|
||||
#ifdef INPUT_OPTION
|
||||
fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%'\n");
|
||||
#endif
|
||||
#ifdef NUMCHAR_OPTION
|
||||
fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
|
||||
#endif
|
||||
#ifdef SHIFTJIS_CP932
|
||||
fprintf(stderr," --no-cp932 Don't convert Shift_JIS FAxx-FCxx to equivalnet CP932\n");
|
||||
#endif
|
||||
#ifdef UTF8_OUTPUT_ENABLE
|
||||
fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
|
||||
#endif
|
||||
#ifdef OVERWRITE
|
||||
fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
|
||||
#endif
|
||||
|
|
|
@ -15,6 +15,23 @@ unsigned short euc_to_utf8_A1[] = {
|
|||
0xFF04, 0x00A2, 0x00A3, 0xFF05, 0xFF03, 0xFF06, 0xFF0A, 0xFF20,
|
||||
0x00A7, 0x2606, 0x2605, 0x25CB, 0x25CF, 0x25CE, 0x25C7,
|
||||
};
|
||||
|
||||
/* Microsoft UCS Mapping Compatible */
|
||||
unsigned short euc_to_utf8_A1_ms[] = {
|
||||
0x3000, 0x3001, 0x3002, 0xFF0C, 0xFF0E, 0x30FB, 0xFF1A,
|
||||
0xFF1B, 0xFF1F, 0xFF01, 0x309B, 0x309C, 0x00B4, 0xFF40, 0x00A8,
|
||||
0xFF3E, 0xFFE3, 0xFF3F, 0x30FD, 0x30FE, 0x309D, 0x309E, 0x3003,
|
||||
0x4EDD, 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F,
|
||||
0xFF3C, 0xFF5E, 0x2225, 0xFF5C, 0x2026, 0x2025, 0x2018, 0x2019,
|
||||
0x201C, 0x201D, 0xFF08, 0xFF09, 0x3014, 0x3015, 0xFF3B, 0xFF3D,
|
||||
0xFF5B, 0xFF5D, 0x3008, 0x3009, 0x300A, 0x300B, 0x300C, 0x300D,
|
||||
0x300E, 0x300F, 0x3010, 0x3011, 0xFF0B, 0xFF0D, 0x00B1, 0x00D7,
|
||||
0x00F7, 0xFF1D, 0x2260, 0xFF1C, 0xFF1E, 0x2266, 0x2267, 0x221E,
|
||||
0x2234, 0x2642, 0x2640, 0x00B0, 0x2032, 0x2033, 0x2103, 0xFFE5,
|
||||
0xFF04, 0xFFE0, 0xFFE1, 0xFF05, 0xFF03, 0xFF06, 0xFF0A, 0xFF20,
|
||||
0x00A7, 0x2606, 0x2605, 0x25CB, 0x25CF, 0x25CE, 0x25C7,
|
||||
};
|
||||
|
||||
unsigned short euc_to_utf8_A2[] = {
|
||||
0x25C6, 0x25A1, 0x25A0, 0x25B3, 0x25B2, 0x25BD, 0x25BC,
|
||||
0x203B, 0x3012, 0x2192, 0x2190, 0x2191, 0x2193, 0x3013, 0,
|
||||
|
@ -29,6 +46,22 @@ unsigned short euc_to_utf8_A2[] = {
|
|||
0, 0, 0x212B, 0x2030, 0x266F, 0x266D, 0x266A, 0x2020,
|
||||
0x2021, 0x00B6, 0, 0, 0, 0, 0x25EF,
|
||||
};
|
||||
|
||||
/* Microsoft UCS Mapping Compatible */
|
||||
unsigned short euc_to_utf8_A2_ms[] = {
|
||||
0x25C6, 0x25A1, 0x25A0, 0x25B3, 0x25B2, 0x25BD, 0x25BC,
|
||||
0x203B, 0x3012, 0x2192, 0x2190, 0x2191, 0x2193, 0x3013, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0x2208, 0x220B, 0x2286, 0x2287, 0x2282, 0x2283,
|
||||
0x222A, 0x2229, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0x2227, 0x2228, 0xFFE2, 0x21D2, 0x21D4, 0x2200,
|
||||
0x2203, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0x2220, 0x22A5, 0x2312, 0x2202,
|
||||
0x2207, 0x2261, 0x2252, 0x226A, 0x226B, 0x221A, 0x223D, 0x221D,
|
||||
0x2235, 0x222B, 0x222C, 0, 0, 0, 0, 0,
|
||||
0, 0, 0x212B, 0x2030, 0x266F, 0x266D, 0x266A, 0x2020,
|
||||
0x2021, 0x00B6, 0, 0, 0, 0, 0x25EF,
|
||||
};
|
||||
unsigned short euc_to_utf8_A3[] = {
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -1287,6 +1320,33 @@ unsigned short * euc_to_utf8_2bytes[] = {
|
|||
0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB,
|
||||
euc_to_utf8_FC, 0, 0,
|
||||
};
|
||||
/* Microsoft UCS Mapping Compatible */
|
||||
unsigned short * euc_to_utf8_2bytes_ms[] = {
|
||||
euc_to_utf8_A1_ms,euc_to_utf8_A2_ms, euc_to_utf8_A3,
|
||||
euc_to_utf8_A4, euc_to_utf8_A5, euc_to_utf8_A6, euc_to_utf8_A7,
|
||||
euc_to_utf8_A8, euc_to_utf8_A9, euc_to_utf8_AA, euc_to_utf8_AB,
|
||||
euc_to_utf8_AC, euc_to_utf8_AD, euc_to_utf8_AE, euc_to_utf8_AF,
|
||||
euc_to_utf8_B0, euc_to_utf8_B1, euc_to_utf8_B2, euc_to_utf8_B3,
|
||||
euc_to_utf8_B4, euc_to_utf8_B5, euc_to_utf8_B6, euc_to_utf8_B7,
|
||||
euc_to_utf8_B8, euc_to_utf8_B9, euc_to_utf8_BA, euc_to_utf8_BB,
|
||||
euc_to_utf8_BC, euc_to_utf8_BD, euc_to_utf8_BE, euc_to_utf8_BF,
|
||||
euc_to_utf8_C0, euc_to_utf8_C1, euc_to_utf8_C2, euc_to_utf8_C3,
|
||||
euc_to_utf8_C4, euc_to_utf8_C5, euc_to_utf8_C6, euc_to_utf8_C7,
|
||||
euc_to_utf8_C8, euc_to_utf8_C9, euc_to_utf8_CA, euc_to_utf8_CB,
|
||||
euc_to_utf8_CC, euc_to_utf8_CD, euc_to_utf8_CE, euc_to_utf8_CF,
|
||||
euc_to_utf8_D0, euc_to_utf8_D1, euc_to_utf8_D2, euc_to_utf8_D3,
|
||||
euc_to_utf8_D4, euc_to_utf8_D5, euc_to_utf8_D6, euc_to_utf8_D7,
|
||||
euc_to_utf8_D8, euc_to_utf8_D9, euc_to_utf8_DA, euc_to_utf8_DB,
|
||||
euc_to_utf8_DC, euc_to_utf8_DD, euc_to_utf8_DE, euc_to_utf8_DF,
|
||||
euc_to_utf8_E0, euc_to_utf8_E1, euc_to_utf8_E2, euc_to_utf8_E3,
|
||||
euc_to_utf8_E4, euc_to_utf8_E5, euc_to_utf8_E6, euc_to_utf8_E7,
|
||||
euc_to_utf8_E8, euc_to_utf8_E9, euc_to_utf8_EA, euc_to_utf8_EB,
|
||||
euc_to_utf8_EC, euc_to_utf8_ED, euc_to_utf8_EE, euc_to_utf8_EF,
|
||||
euc_to_utf8_F0, euc_to_utf8_F1, euc_to_utf8_F2, euc_to_utf8_F3,
|
||||
euc_to_utf8_F4, euc_to_utf8_F5, 0, 0,
|
||||
0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB,
|
||||
euc_to_utf8_FC, 0, 0,
|
||||
};
|
||||
#endif /* UTF8_OUTPUT_ENABLE */
|
||||
|
||||
#ifdef UTF8_INPUT_ENABLE
|
||||
|
|
Loading…
Reference in a new issue