mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enc/trans/big5.trans, big5-hkscs-tbl.rb:
new Chinese BIG5-HKSCS transcoding (with Tatsuya Mizuno) * test/ruby/test_transcode.rb: added tests for the above (with Tatsuya Mizuno) * enc/big5.c: Added BIG5-HKSCS as a replicate encoding of BIG5 (short term solution, needs more work; with Tatsuya Mizuno) * tool/transcode-tblgen.rb: made 'pat' directly accessible in class StrSet git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@24264 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
9914a16ca9
commit
2886207584
6 changed files with 18474 additions and 1 deletions
14
ChangeLog
14
ChangeLog
|
@ -1,3 +1,17 @@
|
|||
Fri Jul 24 19:19:19 2009 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/trans/big5.trans, big5-hkscs-tbl.rb:
|
||||
new Chinese BIG5-HKSCS transcoding (with Tatsuya Mizuno)
|
||||
|
||||
* test/ruby/test_transcode.rb: added tests for the above
|
||||
(with Tatsuya Mizuno)
|
||||
|
||||
* enc/big5.c: Added BIG5-HKSCS as a replicate encoding of BIG5
|
||||
(short term solution, needs more work; with Tatsuya Mizuno)
|
||||
|
||||
* tool/transcode-tblgen.rb: made 'pat' directly accessible in
|
||||
class StrSet
|
||||
|
||||
Fri Jul 24 18:03:01 2009 Shugo Maeda <shugo@ruby-lang.org>
|
||||
|
||||
* lib/net/imap.rb (idle): leaves IDLE in a ensure clause. a patch
|
||||
|
|
|
@ -214,3 +214,4 @@ OnigEncodingDefine(big5, BIG5) = {
|
|||
big5_is_allowed_reverse_match
|
||||
};
|
||||
ENC_ALIAS("CP950", "BIG5")
|
||||
ENC_REPLICATE("BIG5-HKSCS", "BIG5")
|
||||
|
|
18385
enc/trans/big5-hkscs-tbl.rb
Normal file
18385
enc/trans/big5-hkscs-tbl.rb
Normal file
File diff suppressed because it is too large
Load diff
|
@ -2,9 +2,13 @@
|
|||
|
||||
<%
|
||||
require "big5-tbl"
|
||||
require "big5-hkscs-tbl"
|
||||
|
||||
transcode_tblgen "Big5", "UTF-8", [["{00-7f}", :nomap], *BIG5_TO_UCS_TBL]
|
||||
transcode_tblgen "UTF-8", "Big5", [["{00-7f}", :nomap], *BIG5_TO_UCS_TBL.map {|a,b| [b,a] }]
|
||||
|
||||
transcode_tblgen "Big5-HKSCS", "UTF-8", [["{00-7f}", :nomap], *BIG5_HKSCS_TO_UCS_TBL]
|
||||
transcode_tblgen "UTF-8", "Big5-HKSCS", [["{00-7f}", :nomap], *BIG5_HKSCS_TO_UCS_TBL.map {|a,b| [b,a] }]
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
|
|
@ -1798,6 +1798,74 @@ class TestTranscode < Test::Unit::TestCase
|
|||
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xAF\xAB\xAA\x4C\xB8\x71\xB3\xD5", 'Big5') # 神林義博
|
||||
end
|
||||
|
||||
def test_Big5_Hkscs
|
||||
check_both_ways("\u3000", "\xA1\x40", 'Big5-HKSCS') # full-width space
|
||||
check_both_ways("\uFE5A", "\xA1\x7E", 'Big5-HKSCS') # ﹚
|
||||
check_both_ways("\uFE5B", "\xA1\xA1", 'Big5-HKSCS') # ﹛
|
||||
#check_both_ways("\uFF0F", "\xA1\xFE", 'Big5-HKSCS') # /
|
||||
check_both_ways("\uFF57", "\xA3\x40", 'Big5-HKSCS') # w
|
||||
check_both_ways("\u310F", "\xA3\x7E", 'Big5-HKSCS') # ㄏ
|
||||
check_both_ways("\u3110", "\xA3\xA1", 'Big5-HKSCS') # ㄐ
|
||||
check_both_ways("\u02CB", "\xA3\xBF", 'Big5-HKSCS') # ˋ
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xA3\xC0".encode("utf-8", 'Big5-HKSCS') }
|
||||
check_both_ways("\u6D6C", "\xAF\x40", 'Big5-HKSCS') # 浬
|
||||
check_both_ways("\u7837", "\xAF\x7E", 'Big5-HKSCS') # 砷
|
||||
check_both_ways("\u7825", "\xAF\xA1", 'Big5-HKSCS') # 砥
|
||||
check_both_ways("\u8343", "\xAF\xFE", 'Big5-HKSCS') # 荃
|
||||
check_both_ways("\u8654", "\xB0\x40", 'Big5-HKSCS') # 虔
|
||||
check_both_ways("\u9661", "\xB0\x7E", 'Big5-HKSCS') # 陡
|
||||
check_both_ways("\u965B", "\xB0\xA1", 'Big5-HKSCS') # 陛
|
||||
check_both_ways("\u5A40", "\xB0\xFE", 'Big5-HKSCS') # 婀
|
||||
check_both_ways("\u6FC3", "\xBF\x40", 'Big5-HKSCS') # 濃
|
||||
check_both_ways("\u7E0A", "\xBF\x7E", 'Big5-HKSCS') # 縊
|
||||
check_both_ways("\u7E11", "\xBF\xA1", 'Big5-HKSCS') # 縑
|
||||
check_both_ways("\u931A", "\xBF\xFE", 'Big5-HKSCS') # 錚
|
||||
check_both_ways("\u9310", "\xC0\x40", 'Big5-HKSCS') # 錐
|
||||
check_both_ways("\u5687", "\xC0\x7E", 'Big5-HKSCS') # 嚇
|
||||
check_both_ways("\u568F", "\xC0\xA1", 'Big5-HKSCS') # 嚏
|
||||
check_both_ways("\u77AC", "\xC0\xFE", 'Big5-HKSCS') # 瞬
|
||||
check_both_ways("\u8B96", "\xC6\x40", 'Big5-HKSCS') # 讖
|
||||
check_both_ways("\u7C72", "\xC6\x7E", 'Big5-HKSCS') # 籲
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xC6\xA1".encode("utf-8", 'Big5-HKSCS') }
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xC7\x40".encode("utf-8", 'Big5-HKSCS') }
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xC8\x40".encode("utf-8", 'Big5-HKSCS') }
|
||||
check_both_ways("\u4E42", "\xC9\x40", 'Big5-HKSCS') # 乂
|
||||
check_both_ways("\u6C15", "\xC9\x7E", 'Big5-HKSCS') # 氕
|
||||
check_both_ways("\u6C36", "\xC9\xA1", 'Big5-HKSCS') # 氶
|
||||
check_both_ways("\u6C4B", "\xC9\xFE", 'Big5-HKSCS') # 汋
|
||||
check_both_ways("\u67DC", "\xCF\x40", 'Big5-HKSCS') # 柜
|
||||
check_both_ways("\u6D42", "\xCF\x7E", 'Big5-HKSCS') # 浂
|
||||
check_both_ways("\u6D01", "\xCF\xA1", 'Big5-HKSCS') # 洁
|
||||
check_both_ways("\u7A80", "\xCF\xFE", 'Big5-HKSCS') # 窀
|
||||
check_both_ways("\u7A7E", "\xD0\x40", 'Big5-HKSCS') # 穾
|
||||
check_both_ways("\u82EA", "\xD0\x7E", 'Big5-HKSCS') # 苪
|
||||
check_both_ways("\u82E4", "\xD0\xA1", 'Big5-HKSCS') # 苤
|
||||
check_both_ways("\u54F1", "\xD0\xFE", 'Big5-HKSCS') # 哱
|
||||
check_both_ways("\u7A1B", "\xDF\x40", 'Big5-HKSCS') # 稛
|
||||
check_both_ways("\u816F", "\xDF\x7E", 'Big5-HKSCS') # 腯
|
||||
check_both_ways("\u8144", "\xDF\xA1", 'Big5-HKSCS') # 腄
|
||||
check_both_ways("\u89E4", "\xDF\xFE", 'Big5-HKSCS') # 觤
|
||||
check_both_ways("\u89E1", "\xE0\x40", 'Big5-HKSCS') # 觡
|
||||
check_both_ways("\u903F", "\xE0\x7E", 'Big5-HKSCS') # 逿
|
||||
check_both_ways("\u9044", "\xE0\xA1", 'Big5-HKSCS') # 遄
|
||||
check_both_ways("\u50E0", "\xE0\xFE", 'Big5-HKSCS') # 僠
|
||||
check_both_ways("\u979E", "\xEF\x40", 'Big5-HKSCS') # 鞞
|
||||
check_both_ways("\u9D30", "\xEF\x7E", 'Big5-HKSCS') # 鴰
|
||||
check_both_ways("\u9D45", "\xEF\xA1", 'Big5-HKSCS') # 鵅
|
||||
check_both_ways("\u7376", "\xEF\xFE", 'Big5-HKSCS') # 獶
|
||||
check_both_ways("\u74B8", "\xF0\x40", 'Big5-HKSCS') # 璸
|
||||
check_both_ways("\u81D2", "\xF0\x7E", 'Big5-HKSCS') # 臒
|
||||
check_both_ways("\u81D0", "\xF0\xA1", 'Big5-HKSCS') # 臐
|
||||
check_both_ways("\u8E67", "\xF0\xFE", 'Big5-HKSCS') # 蹧
|
||||
check_both_ways("\u7E98", "\xF9\x40", 'Big5-HKSCS') # 纘
|
||||
check_both_ways("\u9F0A", "\xF9\x7E", 'Big5-HKSCS') # 鼊
|
||||
check_both_ways("\u9FA4", "\xF9\xA1", 'Big5-HKSCS') # 龤
|
||||
check_both_ways("\u9F98", "\xF9\xD5", 'Big5-HKSCS') # 龘
|
||||
check_both_ways("\u{23ED7}", "\x8E\x40", 'Big5-HKSCS') # 𣻗
|
||||
#assert_raise(Encoding::UndefinedConversionError) { "\xF9\xD6".encode("utf-8", 'Big5-HKSCS') }
|
||||
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xAF\xAB\xAA\x4C\xB8\x71\xB3\xD5", 'Big5-HKSCS') # 神林義博
|
||||
end
|
||||
|
||||
def test_nothing_changed
|
||||
a = "James".force_encoding("US-ASCII")
|
||||
b = a.encode("Shift_JIS")
|
||||
|
|
|
@ -19,6 +19,7 @@ def c_esc(str)
|
|||
end
|
||||
|
||||
class StrSet
|
||||
attr_reader :pat
|
||||
def self.parse(pattern)
|
||||
if /\A\s*(([0-9a-f][0-9a-f]|\{([0-9a-f][0-9a-f]|[0-9a-f][0-9a-f]-[0-9a-f][0-9a-f])(,([0-9a-f][0-9a-f]|[0-9a-f][0-9a-f]-[0-9a-f][0-9a-f]))*\})+(\s+|\z))*\z/i !~ pattern
|
||||
raise ArgumentError, "invalid pattern: #{pattern.inspect}"
|
||||
|
@ -68,7 +69,7 @@ class StrSet
|
|||
|
||||
def eql?(other)
|
||||
self.class == other.class &&
|
||||
@pat == other.instance_eval { @pat }
|
||||
@pat == other.pat
|
||||
end
|
||||
|
||||
alias == eql?
|
||||
|
|
Loading…
Reference in a new issue