mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enc/trans/gb18030.trans, gb18030-tbl.rb:
new Chinese GB18030 transcoding (from Yoshihiro Kambayashi) * test/ruby/test_transcode.rb: added tests for the above (from Yoshihiro Kambayashi) * transcode_data.h, transcode.c, tool/transcode_tblgen.rb: added support for GB18030-specific 4-byte sequences (with Yoshihiro Kambayashi) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@21509 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
b949be82cf
commit
82c673d3a1
7 changed files with 63613 additions and 6 deletions
12
ChangeLog
12
ChangeLog
|
@ -1,3 +1,15 @@
|
|||
Wed Jan 14 20:05:05 2009 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/trans/gb18030.trans, gb18030-tbl.rb:
|
||||
new Chinese GB18030 transcoding (from Yoshihiro Kambayashi)
|
||||
|
||||
* test/ruby/test_transcode.rb: added tests for the above
|
||||
(from Yoshihiro Kambayashi)
|
||||
|
||||
* transcode_data.h, transcode.c, tool/transcode_tblgen.rb:
|
||||
added support for GB18030-specific 4-byte sequences
|
||||
(with Yoshihiro Kambayashi)
|
||||
|
||||
Wed Jan 14 16:16:19 2009 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* ext/curses/extconf.rb: check ncursesw earlier than ncurses to
|
||||
|
|
63330
enc/trans/gb18030-tbl.rb
Normal file
63330
enc/trans/gb18030-tbl.rb
Normal file
File diff suppressed because it is too large
Load diff
85
enc/trans/gb18030.trans
Normal file
85
enc/trans/gb18030.trans
Normal file
|
@ -0,0 +1,85 @@
|
|||
#include "transcode_data.h"
|
||||
|
||||
<%
|
||||
require "gb18030-tbl"
|
||||
|
||||
transcode_tbl_only "GB18030", "UTF-8", [["{00-7f}", :nomap],
|
||||
*GB18030_TO_UCS_TBL,
|
||||
["{90-e2}{30-39}{81-fe}{30-39}", :func_so],
|
||||
["e3{30-31}{81-fe}{30-39}", :func_so],
|
||||
["e332{81-99}{30-39}", :func_so],
|
||||
["e3329a{30-35}", :func_so], # "E3329A35" is U+10FFFF
|
||||
]
|
||||
transcode_tbl_only "UTF-8", "GB18030", [["{00-7f}", :nomap],
|
||||
*GB18030_TO_UCS_TBL.map {|a,b| [b,a] },
|
||||
["f0{90-bf}{80-bf}{80-bf}", :func_so],
|
||||
["{f1-f3}{80-bf}{80-bf}{80-bf}", :func_so],
|
||||
["f4{80-8f}{80-bf}{80-bf}", :func_so]
|
||||
]
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
||||
static ssize_t
|
||||
fun_so_from_gb18030(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
|
||||
{
|
||||
/* outside BMP only */
|
||||
/* u: Unicode Scalar Value */
|
||||
unsigned int u = (s[0]-0x90)*10*126*10 + (s[1]-0x30)*126*10 + (s[2]-0x81)*10 + (s[3]-0x30) + 0x10000;
|
||||
o[0] = 0xF0 | (u>>18);
|
||||
o[1] = 0x80 | ((u>>12)&0x3F);
|
||||
o[2] = 0x80 | ((u>>6)&0x3F);
|
||||
o[3] = 0x80 | (u&0x3F);
|
||||
return 4;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
fun_so_to_gb18030(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
|
||||
{
|
||||
/* outside BMP only */
|
||||
/* u: Unicode Scalar Value */
|
||||
unsigned int u = ((s[0]&0x07)<<18) | ((s[1]&0x3F)<<12) | ((s[2]&0x3F)<<6) | (s[3]&0x3F);
|
||||
u -= 0x10000;
|
||||
o[3] = 0x30 + u%10;
|
||||
u /= 10;
|
||||
o[2] = 0x81 + u%126;
|
||||
u /= 126;
|
||||
o[1] = 0x30 + u%10;
|
||||
o[0] = 0x90 + u/10;
|
||||
return 4;
|
||||
}
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_GB18030 = {
|
||||
"GB18030", "UTF-8", from_GB18030,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
4, /* max_input */
|
||||
3, /* max_output */
|
||||
asciicompat_converter, /* asciicompat_type */
|
||||
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||
NULL, NULL, NULL, fun_so_from_gb18030,
|
||||
NULL, NULL, NULL
|
||||
};
|
||||
static const rb_transcoder
|
||||
rb_to_GB18030 = {
|
||||
"UTF-8", "GB18030", to_GB18030,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
4, /* max_input */
|
||||
4, /* max_output */
|
||||
asciicompat_converter, /* asciicompat_type */
|
||||
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||
NULL, NULL, NULL, fun_so_to_gb18030,
|
||||
NULL, NULL, NULL
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
Init_gb18030(void)
|
||||
{
|
||||
rb_register_transcoder(&rb_from_GB18030);
|
||||
rb_register_transcoder(&rb_to_GB18030);
|
||||
|
||||
}
|
|
@ -1562,6 +1562,162 @@ class TestTranscode < Test::Unit::TestCase
|
|||
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xC9\xF1\xC1\xD6\xC1\x78\xB2\xA9", 'GBK') # 神林義博
|
||||
end
|
||||
|
||||
def test_gb18030
|
||||
# test from GBK
|
||||
check_both_ways("\u4E02", "\x81\x40", 'GB18030') #
|
||||
check_both_ways("\u4E8A", "\x81\x7E", 'GB18030') #
|
||||
check_both_ways("\u4E90", "\x81\x80", 'GB18030') #
|
||||
check_both_ways("\u4FA2", "\x81\xFE", 'GB18030') # 侢
|
||||
check_both_ways("\u5EC6", "\x8F\x40", 'GB18030') #
|
||||
check_both_ways("\u5F24", "\x8F\x7E", 'GB18030') # 弤
|
||||
check_both_ways("\u5F28", "\x8F\x80", 'GB18030') # 弨
|
||||
check_both_ways("\u6007", "\x8F\xFE", 'GB18030') #
|
||||
check_both_ways("\u6008", "\x90\x40", 'GB18030') #
|
||||
check_both_ways("\u6080", "\x90\x7E", 'GB18030') # 悀
|
||||
check_both_ways("\u6081", "\x90\x80", 'GB18030') #
|
||||
check_both_ways("\u6146", "\x90\xFE", 'GB18030') #
|
||||
check_both_ways("\u70DC", "\x9F\x40", 'GB18030') #
|
||||
check_both_ways("\u7134", "\x9F\x7E", 'GB18030') # 焴
|
||||
check_both_ways("\u7135", "\x9F\x80", 'GB18030') # 焵
|
||||
check_both_ways("\u71D3", "\x9F\xFE", 'GB18030') #
|
||||
check_both_ways("\u71D6", "\xA0\x40", 'GB18030') #
|
||||
check_both_ways("\u721A", "\xA0\x7E", 'GB18030') #
|
||||
check_both_ways("\u721B", "\xA0\x80", 'GB18030') #
|
||||
check_both_ways("\u72DB", "\xA0\xFE", 'GB18030') #
|
||||
check_both_ways("\u3000", "\xA1\xA1", 'GB18030') # full-width space
|
||||
check_both_ways("\u3001", "\xA1\xA2", 'GB18030') #
|
||||
check_both_ways("\u3013", "\xA1\xFE", 'GB18030') #
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA2\xA0".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u2170", "\xA2\xA1", 'GB18030') # ⅰ
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA2\xB0".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u2488", "\xA2\xB1", 'GB18030') #
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA2\xE4".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u3220", "\xA2\xE5", 'GB18030') # ㈠
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA2\xF0".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u2160", "\xA2\xF1", 'GB18030') # Ⅰ
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA3\xA0".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\uFF01", "\xA3\xA1", 'GB18030') # E
|
||||
check_both_ways("\uFFE3", "\xA3\xFE", 'GB18030') # E
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA4\xA0".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u3041", "\xA4\xA1", 'GB18030') #
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA5\xA0".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u30A1", "\xA5\xA1", 'GB18030') # ァ
|
||||
check_both_ways("\u0391", "\xA6\xA1", 'GB18030') #
|
||||
check_both_ways("\u03B1", "\xA6\xC1", 'GB18030') # α
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA6\xED".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\uFE3B", "\xA6\xEE", 'GB18030') # E
|
||||
check_both_ways("\u0410", "\xA7\xA1", 'GB18030') #
|
||||
check_both_ways("\u0430", "\xA7\xD1", 'GB18030') # а
|
||||
check_both_ways("\u02CA", "\xA8\x40", 'GB18030') #
|
||||
check_both_ways("\u2587", "\xA8\x7E", 'GB18030') #
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA8\x96".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u0101", "\xA8\xA1", 'GB18030') #
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA8\xBC".encode("utf-8", 'GB18030') }
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA8\xBF".encode("utf-8", 'GB18030') }
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA8\xC4".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u3105", "\xA8\xC5", 'GB18030') #
|
||||
check_both_ways("\u3021", "\xA9\x40", 'GB18030') # 〡
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA9\x58".encode("utf-8", 'GB18030') }
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA9\x5B".encode("utf-8", 'GB18030') }
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA9\x5D".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u3007", "\xA9\x96", 'GB18030') #
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA9\xA3".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u2500", "\xA9\xA4", 'GB18030') # ─
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA9\xF0".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u7588", "\xAF\x40", 'GB18030') #
|
||||
check_both_ways("\u7607", "\xAF\x7E", 'GB18030') #
|
||||
check_both_ways("\u7608", "\xAF\x80", 'GB18030') #
|
||||
check_both_ways("\u7644", "\xAF\xA0", 'GB18030') #
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xAF\xA1".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u7645", "\xB0\x40", 'GB18030') #
|
||||
check_both_ways("\u769B", "\xB0\x7E", 'GB18030') #
|
||||
check_both_ways("\u769C", "\xB0\x80", 'GB18030') #
|
||||
check_both_ways("\u5265", "\xB0\xFE", 'GB18030') # 剥
|
||||
check_both_ways("\u7DFB", "\xBF\x40", 'GB18030') # 緻
|
||||
check_both_ways("\u7E39", "\xBF\x7E", 'GB18030') # 縹
|
||||
check_both_ways("\u7E3A", "\xBF\x80", 'GB18030') # 縺
|
||||
check_both_ways("\u5080", "\xBF\xFE", 'GB18030') # 傀
|
||||
check_both_ways("\u7E5E", "\xC0\x40", 'GB18030') #
|
||||
check_both_ways("\u7E9E", "\xC0\x7E", 'GB18030') #
|
||||
check_both_ways("\u7EAE", "\xC0\x80", 'GB18030') # 纮
|
||||
check_both_ways("\u4FD0", "\xC0\xFE", 'GB18030') #
|
||||
check_both_ways("\u87A5", "\xCF\x40", 'GB18030') # 螥
|
||||
check_both_ways("\u87F8", "\xCF\x7E", 'GB18030') # 蟸
|
||||
check_both_ways("\u87FA", "\xCF\x80", 'GB18030') # 蟺
|
||||
check_both_ways("\u6653", "\xCF\xFE", 'GB18030') #
|
||||
check_both_ways("\u8824", "\xD0\x40", 'GB18030') # 蠤
|
||||
check_both_ways("\u887A", "\xD0\x7E", 'GB18030') # 衺
|
||||
check_both_ways("\u887B", "\xD0\x80", 'GB18030') # 衻
|
||||
check_both_ways("\u7384", "\xD0\xFE", 'GB18030') #
|
||||
check_both_ways("\u9019", "\xDF\x40", 'GB18030') #
|
||||
check_both_ways("\u9081", "\xDF\x7E", 'GB18030') #
|
||||
check_both_ways("\u9084", "\xDF\x80", 'GB18030') #
|
||||
check_both_ways("\u553C", "\xDF\xFE", 'GB18030') # 唼
|
||||
check_both_ways("\u90C2", "\xE0\x40", 'GB18030') #
|
||||
check_both_ways("\u911C", "\xE0\x7E", 'GB18030') #
|
||||
check_both_ways("\u911D", "\xE0\x80", 'GB18030') #
|
||||
check_both_ways("\u5E3C", "\xE0\xFE", 'GB18030') # 帼
|
||||
check_both_ways("\u986F", "\xEF\x40", 'GB18030') # 顯
|
||||
check_both_ways("\u98E4", "\xEF\x7E", 'GB18030') # 飤
|
||||
check_both_ways("\u98E5", "\xEF\x80", 'GB18030') # 飥
|
||||
check_both_ways("\u7A14", "\xEF\xFE", 'GB18030') #
|
||||
check_both_ways("\u9908", "\xF0\x40", 'GB18030') #
|
||||
check_both_ways("\u9949", "\xF0\x7E", 'GB18030') #
|
||||
check_both_ways("\u994A", "\xF0\x80", 'GB18030') #
|
||||
check_both_ways("\u7619", "\xF0\xFE", 'GB18030') #
|
||||
check_both_ways("\u9F32", "\xFD\x40", 'GB18030') # 鼲
|
||||
check_both_ways("\u9F78", "\xFD\x7E", 'GB18030') # 齸
|
||||
check_both_ways("\u9F79", "\xFD\x80", 'GB18030') # 齹
|
||||
check_both_ways("\uF9F1", "\xFD\xA0", 'GB18030') # E
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xFD\xA1".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\uFA0C", "\xFE\x40", 'GB18030') # E
|
||||
check_both_ways("\uFA29", "\xFE\x4F", 'GB18030') # E
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xFE\x50".encode("utf-8", 'GB18030') }
|
||||
check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC7\xE0\xC9\xBD\xD1\xA7\xD4\xBA\xB4\xF3\xD1\xA7", 'GB18030') # 青山学院大学
|
||||
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xC9\xF1\xC1\xD6\xC1\x78\xB2\xA9", 'GB18030') # 神林義
|
||||
|
||||
# new tests for GB18030
|
||||
check_both_ways("\u9FA6", "\x82\x35\x8F\x33", 'GB18030') # 龦
|
||||
check_both_ways("\uD7FF", "\x83\x36\xC7\x38", 'GB18030') # No name ()
|
||||
|
||||
check_both_ways("\u0452", "\x81\x30\xD3\x30", 'GB18030') #
|
||||
check_both_ways("\u200F", "\x81\x36\xA5\x31", 'GB18030') # RIGHT-TO-LEFT MARK
|
||||
|
||||
check_both_ways("\uE865", "\x83\x36\xD0\x30", 'GB18030') # No name (Private Use Area)
|
||||
check_both_ways("\uF92B", "\x84\x30\x85\x34", 'GB18030') # E
|
||||
|
||||
check_both_ways("\u2643", "\x81\x37\xA8\x39", 'GB18030') #
|
||||
check_both_ways("\u2E80", "\x81\x38\xFD\x38", 'GB18030') # ⺀
|
||||
|
||||
check_both_ways("\uFA2A", "\x84\x30\x9C\x38", 'GB18030') # E
|
||||
check_both_ways("\uFE2F", "\x84\x31\x85\x37", 'GB18030') # No name (Combining Half Marks)
|
||||
|
||||
check_both_ways("\u3CE1", "\x82\x31\xD4\x38", 'GB18030') # 㳡
|
||||
check_both_ways("\u4055", "\x82\x32\xAF\x32", 'GB18030') #
|
||||
|
||||
check_both_ways("\u361B", "\x82\x30\xA6\x33", 'GB18030') #
|
||||
check_both_ways("\u3917", "\x82\x30\xF2\x37", 'GB18030') #
|
||||
|
||||
check_both_ways("\u49B8", "\x82\x34\xA1\x31", 'GB18030') # 䦸
|
||||
check_both_ways("\u4C76", "\x82\x34\xE7\x33", 'GB18030') # 䱶
|
||||
|
||||
check_both_ways("\u4160", "\x82\x32\xC9\x37", 'GB18030') # 䅠
|
||||
check_both_ways("\u4336", "\x82\x32\xF8\x37", 'GB18030') # 䌶
|
||||
|
||||
check_both_ways("\u478E", "\x82\x33\xE8\x38", 'GB18030') #
|
||||
check_both_ways("\u4946", "\x82\x34\x96\x38", 'GB18030') #
|
||||
|
||||
check_both_ways("\u44D7", "\x82\x33\xA3\x39", 'GB18030') #
|
||||
check_both_ways("\u464B", "\x82\x33\xC9\x31", 'GB18030') #
|
||||
|
||||
check_both_ways("\uFFE6", "\x84\x31\xA2\x34", 'GB18030') # E
|
||||
check_both_ways("\uFFFF", "\x84\x31\xA4\x39", 'GB18030') # not a character
|
||||
|
||||
check_both_ways("\u{10000}", "\x90\x30\x81\x30", 'GB18030') # 𐀀
|
||||
check_both_ways("\u{10FFFE}", "\xE3\x32\x9A\x34", 'GB18030') # No name (Not a character)
|
||||
check_both_ways("\u{10FFFF}", "\xE3\x32\x9A\x35", 'GB18030') # No name (Not a character)
|
||||
end
|
||||
|
||||
def test_Big5
|
||||
check_both_ways("\u3000", "\xA1\x40", 'Big5') # full-width space
|
||||
check_both_ways("\uFE5A", "\xA1\x7E", 'Big5') # ﹚
|
||||
|
|
|
@ -334,6 +334,8 @@ class ActionMap
|
|||
"o2(0x#$1,0x#$2)"
|
||||
when /\A([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i
|
||||
"o3(0x#$1,0x#$2,0x#$3)"
|
||||
when /\A([0-9a-f][0-9a-f])(3[0-9])([0-9a-f][0-9a-f])(3[0-9])\z/i
|
||||
"g4(0x#$1,0x#$2,0x#$3,0x#$4)"
|
||||
when /\A(f[0-7])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i
|
||||
"o4(0x#$1,0x#$2,0x#$3,0x#$4)"
|
||||
when /\A([0-9a-f][0-9a-f]){4,259}\z/i
|
||||
|
@ -605,7 +607,7 @@ end
|
|||
TRANSCODERS = []
|
||||
TRANSCODE_GENERATED_TRANSCODER_CODE = ''
|
||||
|
||||
def transcode_tblgen(from, to, map)
|
||||
def transcode_tbl_only (from, to, map)
|
||||
if VERBOSE_MODE
|
||||
if from.empty? || to.empty?
|
||||
STDERR.puts "converter for #{from.empty? ? to : from}"
|
||||
|
@ -624,6 +626,11 @@ def transcode_tblgen(from, to, map)
|
|||
end
|
||||
map = encode_utf8(map)
|
||||
real_tree_name, max_input = transcode_compile_tree(tree_name, from, map)
|
||||
return map, tree_name, real_tree_name, max_input
|
||||
end
|
||||
|
||||
def transcode_tblgen(from, to, map)
|
||||
map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map)
|
||||
transcoder_name = "rb_#{tree_name}"
|
||||
TRANSCODERS << transcoder_name
|
||||
input_unit_length = UnitLength[from]
|
||||
|
|
18
transcode.c
18
transcode.c
|
@ -501,6 +501,10 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
case 26: goto resume_label26;
|
||||
case 27: goto resume_label27;
|
||||
case 28: goto resume_label28;
|
||||
case 29: goto resume_label29;
|
||||
case 30: goto resume_label30;
|
||||
case 31: goto resume_label31;
|
||||
case 32: goto resume_label32;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
|
@ -569,6 +573,12 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
SUSPEND_OBUF(18); *out_p++ = getBT2(next_info);
|
||||
SUSPEND_OBUF(19); *out_p++ = getBT3(next_info);
|
||||
continue;
|
||||
case GB4bt:
|
||||
SUSPEND_OBUF(29); *out_p++ = getGB4bt0(next_info);
|
||||
SUSPEND_OBUF(30); *out_p++ = getGB4bt1(next_info);
|
||||
SUSPEND_OBUF(31); *out_p++ = getGB4bt2(next_info);
|
||||
SUSPEND_OBUF(32); *out_p++ = getGB4bt3(next_info);
|
||||
continue;
|
||||
case STR1:
|
||||
tc->output_index = 0;
|
||||
while (tc->output_index < STR1_LENGTH(BYTE_ADDR(STR1_BYTEINDEX(next_info)))) {
|
||||
|
@ -2686,9 +2696,9 @@ make_encobj(const char *name)
|
|||
* Encoding::Converter.asciicompat_encoding(string) => encoding or nil
|
||||
* Encoding::Converter.asciicompat_encoding(encoding) => encoding or nil
|
||||
*
|
||||
* returns the corresponding ASCII compatible encoding.
|
||||
* Returns the corresponding ASCII compatible encoding.
|
||||
*
|
||||
* It returns nil if the argument is an ASCII compatible encoding.
|
||||
* Returns nil if the argument is an ASCII compatible encoding.
|
||||
*
|
||||
* "corresponding ASCII compatible encoding" is a ASCII compatible encoding which
|
||||
* can represents exactly the same characters as the given ASCII incompatible encoding.
|
||||
|
@ -3997,7 +4007,7 @@ ecerr_error_bytes(VALUE self)
|
|||
* call-seq:
|
||||
* ecerr.readagain_bytes -> string
|
||||
*
|
||||
* returns the bytes to be read again when Encoding::InvalidByteSequenceError occur.
|
||||
* Returns the bytes to be read again when Encoding::InvalidByteSequenceError occurs.
|
||||
*/
|
||||
static VALUE
|
||||
ecerr_readagain_bytes(VALUE self)
|
||||
|
@ -4009,7 +4019,7 @@ ecerr_readagain_bytes(VALUE self)
|
|||
* call-seq:
|
||||
* ecerr.incomplete_input? -> true or false
|
||||
*
|
||||
* returns true if the invalid byte sequence error is caused by
|
||||
* Returns true if the invalid byte sequence error is caused by
|
||||
* premature end of string.
|
||||
*
|
||||
* ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#define FUNio (PType 0x0E) /* function from info to output */
|
||||
#define FUNso (PType 0x0F) /* function from start to output */
|
||||
#define STR1 (PType 0x11) /* string 4 <= len <= 259 bytes: 1byte length + content */
|
||||
#define GB4bt (PType 0x12) /* GB18030 four bytes payload */
|
||||
|
||||
#define STR1_LENGTH(byte_addr) (*(byte_addr) + 4)
|
||||
#define STR1_BYTEINDEX(w) ((w) >> 6)
|
||||
|
@ -44,13 +45,19 @@
|
|||
#define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt))
|
||||
#define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt))
|
||||
#define o3(b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|THREEbt)&0xffffffffU))
|
||||
#define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<< 8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU))
|
||||
#define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU))
|
||||
#define g4(b0,b1,b2,b3) (PType(((((unsigned char)(b0))<<8)|(((unsigned char)(b2))<<16)|((((unsigned char)(b1))&0x0f)<<24)|((((unsigned char)(b3))&0x0f)<<28)|GB4bt)&0xffffffffU))
|
||||
|
||||
#define getBT1(a) (((a)>> 8)&0xFF)
|
||||
#define getBT2(a) (((a)>>16)&0xFF)
|
||||
#define getBT3(a) (((a)>>24)&0xFF)
|
||||
#define getBT0(a) ((((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */
|
||||
|
||||
#define getGB4bt0(a) (((a)>> 8)&0xFF)
|
||||
#define getGB4bt1(a) (((a)>>24)&0x0F|0x30)
|
||||
#define getGB4bt2(a) (((a)>>16)&0xFF)
|
||||
#define getGB4bt3(a) (((a)>>28)&0x0F|0x30)
|
||||
|
||||
#define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii))
|
||||
|
||||
/* do we need these??? maybe not, can be done with simple tables */
|
||||
|
|
Loading…
Reference in a new issue