1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* test/ruby/test_transcode.rb: added tests for GB2312

(from Yoshihiro Kambayashi)

* enc/trans/chinese.trans: set valid byte patterns for
  GB2312 and GB12345


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@21314 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
duerst 2009-01-04 08:55:04 +00:00
parent 3bc30f0b73
commit fecce9e5e5
3 changed files with 60 additions and 1 deletions

View file

@ -1,3 +1,11 @@
Sun Jan 4 17:39:39 2009 Martin Duerst <duerst@it.aoyama.ac.jp>
* test/ruby/test_transcode.rb: added tests for GB2312
(from Yoshihiro Kambayashi)
* enc/trans/chinese.trans: set valid byte patterns for
GB2312 and GB12345
Sun Jan 4 17:39:39 2009 Martin Duerst <duerst@it.aoyama.ac.jp>
* enc/trans/big5.trans, big5-tbl.rb:

View file

@ -1,6 +1,9 @@
#include "transcode_data.h"
<%
set_valid_byte_pattern 'GB2312', 'EUC-KR'
set_valid_byte_pattern 'GB12345', 'EUC-KR'
transcode_tblgen "GB2312", "UTF-8",
[["{00-7f}", :nomap]] +
citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS")

View file

@ -1400,7 +1400,55 @@ class TestTranscode < Test::Unit::TestCase
assert_raise(Encoding::UndefinedConversionError) { "\u203E".encode("CP51932") }
end
def test_Big5
def test_gb2312
check_both_ways("\u3000", "\xA1\xA1", 'GB2312') # full-width space
check_both_ways("\u3013", "\xA1\xFE", 'GB2312') # 〓
assert_raise(Encoding::UndefinedConversionError) { "\xA2\xB0".encode("utf-8", 'GB2312') }
check_both_ways("\u2488", "\xA2\xB1", 'GB2312') # ⒈
assert_raise(Encoding::UndefinedConversionError) { "\xA2\xE4".encode("utf-8", 'GB2312') }
check_both_ways("\u3220", "\xA2\xE5", 'GB2312') # ㈠
assert_raise(Encoding::UndefinedConversionError) { "\xA2\xF0".encode("utf-8", 'GB2312') }
check_both_ways("\u2160", "\xA2\xF1", 'GB2312') #
check_both_ways("\uFF01", "\xA3\xA1", 'GB2312') #
check_both_ways("\uFFE3", "\xA3\xFE", 'GB2312') #  ̄
check_both_ways("\u3041", "\xA4\xA1", 'GB2312') # ぁ
check_both_ways("\u30A1", "\xA5\xA1", 'GB2312') # ァ
check_both_ways("\u0391", "\xA6\xA1", 'GB2312') # Α
check_both_ways("\u03B1", "\xA6\xC1", 'GB2312') # α
check_both_ways("\u0410", "\xA7\xA1", 'GB2312') # А
check_both_ways("\u0430", "\xA7\xD1", 'GB2312') # а
check_both_ways("\u0101", "\xA8\xA1", 'GB2312') # ā
assert_raise(Encoding::UndefinedConversionError) { "\xA8\xC4".encode("utf-8", 'GB2312') }
check_both_ways("\u3105", "\xA8\xC5", 'GB2312') # ㄅ
assert_raise(Encoding::UndefinedConversionError) { "\xA9\xA3".encode("utf-8", 'GB2312') }
check_both_ways("\u2500", "\xA9\xA4", 'GB2312') # ─
check_both_ways("\u554A", "\xB0\xA1", 'GB2312') # 啊
check_both_ways("\u5265", "\xB0\xFE", 'GB2312') # 剥
check_both_ways("\u4FCA", "\xBF\xA1", 'GB2312') # 俊
check_both_ways("\u5080", "\xBF\xFE", 'GB2312') # 傀
check_both_ways("\u9988", "\xC0\xA1", 'GB2312') # 馈
check_both_ways("\u4FD0", "\xC0\xFE", 'GB2312') # 俐
check_both_ways("\u7A00", "\xCF\xA1", 'GB2312') # 稀
check_both_ways("\u6653", "\xCF\xFE", 'GB2312') # 晓
check_both_ways("\u5C0F", "\xD0\xA1", 'GB2312') # 小
check_both_ways("\u7384", "\xD0\xFE", 'GB2312') # 玄
check_both_ways("\u4F4F", "\xD7\xA1", 'GB2312') # 住
check_both_ways("\u5EA7", "\xD7\xF9", 'GB2312') # 座
assert_raise(Encoding::UndefinedConversionError) { "\xD7\xFA".encode("utf-8", 'GB2312') }
check_both_ways("\u647A", "\xDF\xA1", 'GB2312') # 摺
check_both_ways("\u553C", "\xDF\xFE", 'GB2312') # 唼
check_both_ways("\u5537", "\xE0\xA1", 'GB2312') # 唷
check_both_ways("\u5E3C", "\xE0\xFE", 'GB2312') # 帼
check_both_ways("\u94E9", "\xEF\xA1", 'GB2312') # 铩
check_both_ways("\u7A14", "\xEF\xFE", 'GB2312') # 稔
check_both_ways("\u7A39", "\xF0\xA1", 'GB2312') # 稹
check_both_ways("\u7619", "\xF0\xFE", 'GB2312') # 瘙
check_both_ways("\u9CCC", "\xF7\xA1", 'GB2312') # 鳌
check_both_ways("\u9F44", "\xF7\xFE", 'GB2312') # 齄
check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC7\xE0\xC9\xBD\xD1\xA7\xD4\xBA\xB4\xF3\xD1\xA7", 'GB2312') # 青山学院大学
end
def test_Big5
check_both_ways("\u3000", "\xA1\x40", 'Big5') # full-width space
check_both_ways("\uFE5A", "\xA1\x7E", 'Big5') # ﹚
check_both_ways("\uFE5B", "\xA1\xA1", 'Big5') # ﹛