mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enc/iso_2022_jp.h: add CP50220.
* enc/trans/iso2022.trans: add converter for CP50220. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27860 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
6b15bccfdf
commit
f8d97b0026
4 changed files with 161 additions and 3 deletions
|
@ -1,3 +1,9 @@
|
|||
Mon Apr 5 09:20:08 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* enc/iso_2022_jp.h: add CP50220.
|
||||
|
||||
* enc/trans/iso2022.trans: add converter for CP50220.
|
||||
|
||||
Mon May 17 09:37:25 2010 NAKAMURA Usaku <usa@ruby-lang.org>
|
||||
|
||||
* lib/fileutils.rb (FileUtils::Entry_#entries): returns pathname in
|
||||
|
|
|
@ -5,16 +5,34 @@ ENC_ALIAS("ISO2022-JP", "ISO-2022-JP");
|
|||
ENC_REPLICATE("ISO-2022-JP-2", "ISO-2022-JP");
|
||||
ENC_ALIAS("ISO2022-JP2", "ISO-2022-JP-2");
|
||||
|
||||
/* Windows Codepage 50221
|
||||
/* Windows Codepage 50220
|
||||
* a ISO-2022-JP variant.
|
||||
* This includes
|
||||
* * US-ASCII
|
||||
* * JIS X 0201 Latin
|
||||
* * JIS X 0201 Katakana
|
||||
* * JIS X 0208
|
||||
* * NEC special characters
|
||||
* * NEC selected IBM extended characters
|
||||
* and this implementation doesn't include
|
||||
* * User Defined Characters
|
||||
*
|
||||
* So this CP50220 has the same characters of CP51932.
|
||||
*
|
||||
* See http://legacy-encoding.sourceforge.jp/wiki/index.php?cp50220
|
||||
*/
|
||||
ENC_REPLICATE("CP50220", "ISO-2022-JP");
|
||||
|
||||
/* Windows Codepage 50221
|
||||
* a ISO-2022-JP variant.
|
||||
* This includes
|
||||
* * US-ASCII
|
||||
* * JIS X 0201 Latin
|
||||
* * JIS X 0201 Katakana
|
||||
* * JIS X 0208
|
||||
* * NEC special characters
|
||||
* * NEC selected IBM extended characters
|
||||
* and this implementation doesn't include
|
||||
* * User Defined Characters
|
||||
*
|
||||
* So this CP50221 has the same characters of CP51932.
|
||||
|
|
|
@ -357,6 +357,18 @@ fun_so_cp50221_decoder(void *statep, const unsigned char *s, size_t l, unsigned
|
|||
}
|
||||
}
|
||||
|
||||
static const rb_transcoder
|
||||
rb_cp50220_decoder = {
|
||||
"CP50220", "cp51932", cp50221_decoder,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
3, /* max_input */
|
||||
3, /* max_output */
|
||||
asciicompat_decoder, /* asciicompat_type */
|
||||
1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
|
||||
NULL, fun_si_cp50221_decoder, NULL, fun_so_cp50221_decoder
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_cp50221_decoder = {
|
||||
"CP50221", "cp51932", cp50221_decoder,
|
||||
|
@ -370,7 +382,8 @@ rb_cp50221_decoder = {
|
|||
};
|
||||
|
||||
static ssize_t
|
||||
fun_so_cp50221_encoder(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
|
||||
fun_so_cp5022x_encoder(void *statep, const unsigned char *s, size_t l,
|
||||
unsigned char *o, size_t osize)
|
||||
{
|
||||
unsigned char *sp = statep;
|
||||
unsigned char *output0 = o;
|
||||
|
@ -425,11 +438,121 @@ rb_cp50221_encoder = {
|
|||
5, /* max_output */
|
||||
asciicompat_encoder, /* asciicompat_type */
|
||||
1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
|
||||
NULL, NULL, NULL, fun_so_cp50221_encoder,
|
||||
NULL, NULL, NULL, fun_so_cp5022x_encoder,
|
||||
finish_iso2022jp_encoder,
|
||||
iso2022jp_encoder_reset_sequence_size, finish_iso2022jp_encoder
|
||||
};
|
||||
|
||||
static const char *tbl0208 =
|
||||
"\x21\x23\x21\x56\x21\x57\x21\x22\x21\x26\x25\x72\x25\x21\x25\x23" \
|
||||
"\x25\x25\x25\x27\x25\x29\x25\x63\x25\x65\x25\x67\x25\x43\x21\x3C" \
|
||||
"\x25\x22\x25\x24\x25\x26\x25\x28\x25\x2A\x25\x2B\x25\x2D\x25\x2F" \
|
||||
"\x25\x31\x25\x33\x25\x35\x25\x37\x25\x39\x25\x3B\x25\x3D\x25\x3F" \
|
||||
"\x25\x41\x25\x44\x25\x46\x25\x48\x25\x4A\x25\x4B\x25\x4C\x25\x4D" \
|
||||
"\x25\x4E\x25\x4F\x25\x52\x25\x55\x25\x58\x25\x5B\x25\x5E\x25\x5F" \
|
||||
"\x25\x60\x25\x61\x25\x62\x25\x64\x25\x66\x25\x68\x25\x69\x25\x6A" \
|
||||
"\x25\x6B\x25\x6C\x25\x6D\x25\x6F\x25\x73\x21\x2B\x21\x2C";
|
||||
|
||||
static ssize_t
|
||||
fun_so_cp50220_encoder(void *statep, const unsigned char *s, size_t l,
|
||||
unsigned char *o, size_t osize)
|
||||
{
|
||||
unsigned char *output0 = o;
|
||||
unsigned char *sp = statep;
|
||||
|
||||
if (sp[0] == G0_JISX0201_KATAKANA) {
|
||||
int c = sp[2] & 0x7F;
|
||||
const char *p = tbl0208 + (c - 0x21) * 2;
|
||||
if (sp[1] != G0_JISX0208_1983) {
|
||||
*o++ = 0x1b;
|
||||
*o++ = '$';
|
||||
*o++ = 'B';
|
||||
}
|
||||
sp[0] = G0_JISX0208_1983;
|
||||
*o++ = *p++;
|
||||
if (l == 2 && s[0] == 0x8E) {
|
||||
if (s[1] == 0xDE) {
|
||||
*o++ = *p + 1;
|
||||
return o - output0;
|
||||
}
|
||||
else if (s[1] == 0xDF && (0x4A <= c && c <= 0x4E)) {
|
||||
*o++ = *p + 2;
|
||||
return o - output0;
|
||||
}
|
||||
}
|
||||
*o++ = *p;
|
||||
}
|
||||
|
||||
if (l == 2 && s[0] == 0x8E) {
|
||||
const char *p = tbl0208 + (s[1] - 0xA1) * 2;
|
||||
if ((0xA1 <= s[1] && s[1] <= 0xB5) ||
|
||||
(0xC5 <= s[1] && s[1] <= 0xC9) ||
|
||||
(0xCF <= s[1] && s[1] <= 0xDF)) {
|
||||
if (*sp != G0_JISX0208_1983) {
|
||||
*o++ = 0x1b;
|
||||
*o++ = '$';
|
||||
*o++ = 'B';
|
||||
*sp = G0_JISX0208_1983;
|
||||
}
|
||||
*o++ = *p++;
|
||||
*o++ = *p;
|
||||
return o - output0;
|
||||
}
|
||||
|
||||
sp[2] = s[1];
|
||||
sp[1] = sp[0];
|
||||
sp[0] = G0_JISX0201_KATAKANA;
|
||||
return o - output0;
|
||||
}
|
||||
|
||||
o += fun_so_cp5022x_encoder(statep, s, l, o, osize);
|
||||
return o - output0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
finish_cp50220_encoder(void *statep, unsigned char *o, size_t osize)
|
||||
{
|
||||
unsigned char *sp = statep;
|
||||
unsigned char *output0 = o;
|
||||
|
||||
if (*sp == G0_ASCII)
|
||||
return 0;
|
||||
|
||||
if (sp[0] == G0_JISX0201_KATAKANA) {
|
||||
int c = sp[2] & 0x7F;
|
||||
const char *p = tbl0208 + (c - 0x21) * 2;
|
||||
if (sp[1] != G0_JISX0208_1983) {
|
||||
*o++ = 0x1b;
|
||||
*o++ = '$';
|
||||
*o++ = 'B';
|
||||
}
|
||||
sp[0] = G0_JISX0208_1983;
|
||||
*o++ = *p++;
|
||||
*o++ = *p;
|
||||
}
|
||||
|
||||
*o++ = 0x1b;
|
||||
*o++ = '(';
|
||||
*o++ = 'B';
|
||||
*sp = G0_ASCII;
|
||||
|
||||
return o - output0;
|
||||
}
|
||||
|
||||
static const rb_transcoder
|
||||
rb_cp50220_encoder = {
|
||||
"CP51932", "CP50220", cp50221_encoder,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
3, /* max_input */
|
||||
5, /* max_output */
|
||||
asciicompat_encoder, /* asciicompat_type */
|
||||
3, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
|
||||
NULL, NULL, NULL, fun_so_cp50220_encoder,
|
||||
finish_cp50220_encoder,
|
||||
iso2022jp_encoder_reset_sequence_size, finish_cp50220_encoder
|
||||
};
|
||||
|
||||
void
|
||||
Init_iso2022(void)
|
||||
{
|
||||
|
@ -437,7 +560,9 @@ Init_iso2022(void)
|
|||
rb_register_transcoder(&rb_iso2022jp_encoder);
|
||||
rb_register_transcoder(&rb_stateless_iso2022jp_to_eucjp);
|
||||
rb_register_transcoder(&rb_eucjp_to_stateless_iso2022jp);
|
||||
rb_register_transcoder(&rb_cp50220_decoder);
|
||||
rb_register_transcoder(&rb_cp50221_decoder);
|
||||
rb_register_transcoder(&rb_cp50220_encoder);
|
||||
rb_register_transcoder(&rb_cp50221_encoder);
|
||||
}
|
||||
|
||||
|
|
|
@ -1373,6 +1373,15 @@ class TestTranscode < Test::Unit::TestCase
|
|||
assert_equal("\u5fde", "\e$B\x7A\x21".encode("utf-8", "cp50221"))
|
||||
assert_equal("\u72be", "\e$B\x7B\x21".encode("utf-8", "cp50221"))
|
||||
assert_equal("\u91d7", "\e$B\x7C\x21".encode("utf-8", "cp50221"))
|
||||
assert_equal("\e(I!_\e(B", "\xA1\xDF".encode("cp50220","sjis"))
|
||||
end
|
||||
|
||||
def test_cp50221
|
||||
assert_equal("\e$B!#!,\e(B".force_encoding("cp50220"),
|
||||
"\xA1\xDF".encode("cp50220","sjis"))
|
||||
assert_equal("\e$B%*!+%,%I%J!+%N!+%P%\\%^!+%Q%]%\"\e(B".force_encoding("cp50220"),
|
||||
"\xB5\xDE\xB6\xDE\xC4\xDE\xC5\xDE\xC9\xDE\xCA\xDE\xCE\xDE\xCF\xDE\xCA\xDF\xCE\xDF\xB1".
|
||||
encode("cp50220", "sjis"))
|
||||
end
|
||||
|
||||
def test_iso_2022_jp_1
|
||||
|
|
Loading…
Reference in a new issue