mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enc/trans/single_byte.trans, maccroatioan-tbl.rb,
maccyrillic-tbl.rb, maciceland-tbl.rb: new single-byte transcodings (from Yoshihiro Kambayashi) * test/ruby/test_transcode.rb: added tests for the above (from Yoshihiro Kambayashi) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@20075 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
ec280ecbe6
commit
d37df9fb13
6 changed files with 473 additions and 0 deletions
|
@ -1,3 +1,12 @@
|
|||
Fri Oct 31 18:05:05 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/trans/single_byte.trans, maccroatioan-tbl.rb,
|
||||
maccyrillic-tbl.rb, maciceland-tbl.rb: new single-byte
|
||||
transcodings (from Yoshihiro Kambayashi)
|
||||
|
||||
* test/ruby/test_transcode.rb: added tests for the above
|
||||
(from Yoshihiro Kambayashi)
|
||||
|
||||
Fri Oct 31 12:51:25 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* dir.c (dir_globs): need taint check. reported by steve
|
||||
|
|
129
enc/trans/maccroatian-tbl.rb
Normal file
129
enc/trans/maccroatian-tbl.rb
Normal file
|
@ -0,0 +1,129 @@
|
|||
MACCROATIAN_TO_UCS_TBL = [
|
||||
["CA",0xA0],
|
||||
["C1",0xA1],
|
||||
["A2",0xA2],
|
||||
["A3",0xA3],
|
||||
["DB",0xA4],
|
||||
["A4",0xA7],
|
||||
["AC",0xA8],
|
||||
["D9",0xA9],
|
||||
["BB",0xAA],
|
||||
["C7",0xAB],
|
||||
["C2",0xAC],
|
||||
["A8",0xAE],
|
||||
["F8",0xAF],
|
||||
["A1",0xB0],
|
||||
["B1",0xB1],
|
||||
["AB",0xB4],
|
||||
["B5",0xB5],
|
||||
["A6",0xB6],
|
||||
["E1",0xB7],
|
||||
["FC",0xB8],
|
||||
["BC",0xBA],
|
||||
["DF",0xBB],
|
||||
["C0",0xBF],
|
||||
["CB",0xC0],
|
||||
["E7",0xC1],
|
||||
["E5",0xC2],
|
||||
["CC",0xC3],
|
||||
["80",0xC4],
|
||||
["81",0xC5],
|
||||
["DE",0xC6],
|
||||
["82",0xC7],
|
||||
["E9",0xC8],
|
||||
["83",0xC9],
|
||||
["FD",0xCA],
|
||||
["FA",0xCB],
|
||||
["ED",0xCC],
|
||||
["EA",0xCD],
|
||||
["EB",0xCE],
|
||||
["EC",0xCF],
|
||||
["84",0xD1],
|
||||
["F1",0xD2],
|
||||
["EE",0xD3],
|
||||
["EF",0xD4],
|
||||
["CD",0xD5],
|
||||
["85",0xD6],
|
||||
["AF",0xD8],
|
||||
["F4",0xD9],
|
||||
["F2",0xDA],
|
||||
["F3",0xDB],
|
||||
["86",0xDC],
|
||||
["A7",0xDF],
|
||||
["88",0xE0],
|
||||
["87",0xE1],
|
||||
["89",0xE2],
|
||||
["8B",0xE3],
|
||||
["8A",0xE4],
|
||||
["8C",0xE5],
|
||||
["FE",0xE6],
|
||||
["8D",0xE7],
|
||||
["8F",0xE8],
|
||||
["8E",0xE9],
|
||||
["90",0xEA],
|
||||
["91",0xEB],
|
||||
["93",0xEC],
|
||||
["92",0xED],
|
||||
["94",0xEE],
|
||||
["95",0xEF],
|
||||
["96",0xF1],
|
||||
["98",0xF2],
|
||||
["97",0xF3],
|
||||
["99",0xF4],
|
||||
["9B",0xF5],
|
||||
["9A",0xF6],
|
||||
["D6",0xF7],
|
||||
["BF",0xF8],
|
||||
["9D",0xF9],
|
||||
["9C",0xFA],
|
||||
["9E",0xFB],
|
||||
["9F",0xFC],
|
||||
["C6",0x106],
|
||||
["E6",0x107],
|
||||
["C8",0x10C],
|
||||
["E8",0x10D],
|
||||
["D0",0x110],
|
||||
["F0",0x111],
|
||||
["F5",0x131],
|
||||
["CE",0x152],
|
||||
["CF",0x153],
|
||||
["A9",0x160],
|
||||
["B9",0x161],
|
||||
["AE",0x17D],
|
||||
["BE",0x17E],
|
||||
["C4",0x192],
|
||||
["F6",0x2C6],
|
||||
["FF",0x2C7],
|
||||
["FB",0x2DA],
|
||||
["F7",0x2DC],
|
||||
["F9",0x3C0],
|
||||
["E0",0x2013],
|
||||
["D1",0x2014],
|
||||
["D4",0x2018],
|
||||
["D5",0x2019],
|
||||
["E2",0x201A],
|
||||
["D2",0x201C],
|
||||
["D3",0x201D],
|
||||
["E3",0x201E],
|
||||
["A0",0x2020],
|
||||
["A5",0x2022],
|
||||
["C9",0x2026],
|
||||
["E4",0x2030],
|
||||
["DC",0x2039],
|
||||
["DD",0x203A],
|
||||
["DA",0x2044],
|
||||
["AA",0x2122],
|
||||
["BD",0x2126],
|
||||
["B6",0x2202],
|
||||
["B4",0x2206],
|
||||
["B8",0x220F],
|
||||
["B7",0x2211],
|
||||
["C3",0x221A],
|
||||
["B0",0x221E],
|
||||
["BA",0x222B],
|
||||
["C5",0x2248],
|
||||
["AD",0x2260],
|
||||
["B2",0x2264],
|
||||
["B3",0x2265],
|
||||
["D7",0x25CA],
|
||||
]
|
130
enc/trans/maccyrillic-tbl.rb
Normal file
130
enc/trans/maccyrillic-tbl.rb
Normal file
|
@ -0,0 +1,130 @@
|
|||
MACCYRILLIC_TO_UCS_TBL = [
|
||||
["CA",0xA0],
|
||||
["A2",0xA2],
|
||||
["A3",0xA3],
|
||||
["FF",0xA4],
|
||||
["A4",0xA7],
|
||||
["A9",0xA9],
|
||||
["C7",0xAB],
|
||||
["C2",0xAC],
|
||||
["A8",0xAE],
|
||||
["A1",0xB0],
|
||||
["B1",0xB1],
|
||||
["B5",0xB5],
|
||||
["A6",0xB6],
|
||||
["C8",0xBB],
|
||||
["D6",0xF7],
|
||||
["C4",0x192],
|
||||
["DD",0x401],
|
||||
["AB",0x402],
|
||||
["AE",0x403],
|
||||
["B8",0x404],
|
||||
["C1",0x405],
|
||||
["A7",0x406],
|
||||
["BA",0x407],
|
||||
["B7",0x408],
|
||||
["BC",0x409],
|
||||
["BE",0x40A],
|
||||
["CB",0x40B],
|
||||
["CD",0x40C],
|
||||
["D8",0x40E],
|
||||
["DA",0x40F],
|
||||
["80",0x410],
|
||||
["81",0x411],
|
||||
["82",0x412],
|
||||
["83",0x413],
|
||||
["84",0x414],
|
||||
["85",0x415],
|
||||
["86",0x416],
|
||||
["87",0x417],
|
||||
["88",0x418],
|
||||
["89",0x419],
|
||||
["8A",0x41A],
|
||||
["8B",0x41B],
|
||||
["8C",0x41C],
|
||||
["8D",0x41D],
|
||||
["8E",0x41E],
|
||||
["8F",0x41F],
|
||||
["90",0x420],
|
||||
["91",0x421],
|
||||
["92",0x422],
|
||||
["93",0x423],
|
||||
["94",0x424],
|
||||
["95",0x425],
|
||||
["96",0x426],
|
||||
["97",0x427],
|
||||
["98",0x428],
|
||||
["99",0x429],
|
||||
["9A",0x42A],
|
||||
["9B",0x42B],
|
||||
["9C",0x42C],
|
||||
["9D",0x42D],
|
||||
["9E",0x42E],
|
||||
["9F",0x42F],
|
||||
["E0",0x430],
|
||||
["E1",0x431],
|
||||
["E2",0x432],
|
||||
["E3",0x433],
|
||||
["E4",0x434],
|
||||
["E5",0x435],
|
||||
["E6",0x436],
|
||||
["E7",0x437],
|
||||
["E8",0x438],
|
||||
["E9",0x439],
|
||||
["EA",0x43A],
|
||||
["EB",0x43B],
|
||||
["EC",0x43C],
|
||||
["ED",0x43D],
|
||||
["EE",0x43E],
|
||||
["EF",0x43F],
|
||||
["F0",0x440],
|
||||
["F1",0x441],
|
||||
["F2",0x442],
|
||||
["F3",0x443],
|
||||
["F4",0x444],
|
||||
["F5",0x445],
|
||||
["F6",0x446],
|
||||
["F7",0x447],
|
||||
["F8",0x448],
|
||||
["F9",0x449],
|
||||
["FA",0x44A],
|
||||
["FB",0x44B],
|
||||
["FC",0x44C],
|
||||
["FD",0x44D],
|
||||
["FE",0x44E],
|
||||
["DF",0x44F],
|
||||
["DE",0x451],
|
||||
["AC",0x452],
|
||||
["AF",0x453],
|
||||
["B9",0x454],
|
||||
["CF",0x455],
|
||||
["B4",0x456],
|
||||
["BB",0x457],
|
||||
["C0",0x458],
|
||||
["BD",0x459],
|
||||
["BF",0x45A],
|
||||
["CC",0x45B],
|
||||
["CE",0x45C],
|
||||
["D9",0x45E],
|
||||
["DB",0x45F],
|
||||
["D0",0x2013],
|
||||
["D1",0x2014],
|
||||
["D4",0x2018],
|
||||
["D5",0x2019],
|
||||
["D2",0x201C],
|
||||
["D3",0x201D],
|
||||
["D7",0x201E],
|
||||
["A0",0x2020],
|
||||
["A5",0x2022],
|
||||
["C9",0x2026],
|
||||
["DC",0x2116],
|
||||
["AA",0x2122],
|
||||
["B6",0x2202],
|
||||
["C6",0x2206],
|
||||
["C3",0x221A],
|
||||
["B0",0x221E],
|
||||
["C5",0x2248],
|
||||
["AD",0x2260],
|
||||
["B2",0x2264],
|
||||
["B3",0x2265],
|
||||
]
|
129
enc/trans/maciceland-tbl.rb
Normal file
129
enc/trans/maciceland-tbl.rb
Normal file
|
@ -0,0 +1,129 @@
|
|||
MACICELAND_TO_UCS_TBL = [
|
||||
["CA",0xA0],
|
||||
["C1",0xA1],
|
||||
["A2",0xA2],
|
||||
["A3",0xA3],
|
||||
["DB",0xA4],
|
||||
["B4",0xA5],
|
||||
["A4",0xA7],
|
||||
["AC",0xA8],
|
||||
["A9",0xA9],
|
||||
["BB",0xAA],
|
||||
["C7",0xAB],
|
||||
["C2",0xAC],
|
||||
["A8",0xAE],
|
||||
["F8",0xAF],
|
||||
["A1",0xB0],
|
||||
["B1",0xB1],
|
||||
["AB",0xB4],
|
||||
["B5",0xB5],
|
||||
["A6",0xB6],
|
||||
["E1",0xB7],
|
||||
["FC",0xB8],
|
||||
["BC",0xBA],
|
||||
["C8",0xBB],
|
||||
["C0",0xBF],
|
||||
["CB",0xC0],
|
||||
["E7",0xC1],
|
||||
["E5",0xC2],
|
||||
["CC",0xC3],
|
||||
["80",0xC4],
|
||||
["81",0xC5],
|
||||
["AE",0xC6],
|
||||
["82",0xC7],
|
||||
["E9",0xC8],
|
||||
["83",0xC9],
|
||||
["E6",0xCA],
|
||||
["E8",0xCB],
|
||||
["ED",0xCC],
|
||||
["EA",0xCD],
|
||||
["EB",0xCE],
|
||||
["EC",0xCF],
|
||||
["DC",0xD0],
|
||||
["84",0xD1],
|
||||
["F1",0xD2],
|
||||
["EE",0xD3],
|
||||
["EF",0xD4],
|
||||
["CD",0xD5],
|
||||
["85",0xD6],
|
||||
["AF",0xD8],
|
||||
["F4",0xD9],
|
||||
["F2",0xDA],
|
||||
["F3",0xDB],
|
||||
["86",0xDC],
|
||||
["A0",0xDD],
|
||||
["DE",0xDE],
|
||||
["A7",0xDF],
|
||||
["88",0xE0],
|
||||
["87",0xE1],
|
||||
["89",0xE2],
|
||||
["8B",0xE3],
|
||||
["8A",0xE4],
|
||||
["8C",0xE5],
|
||||
["BE",0xE6],
|
||||
["8D",0xE7],
|
||||
["8F",0xE8],
|
||||
["8E",0xE9],
|
||||
["90",0xEA],
|
||||
["91",0xEB],
|
||||
["93",0xEC],
|
||||
["92",0xED],
|
||||
["94",0xEE],
|
||||
["95",0xEF],
|
||||
["DD",0xF0],
|
||||
["96",0xF1],
|
||||
["98",0xF2],
|
||||
["97",0xF3],
|
||||
["99",0xF4],
|
||||
["9B",0xF5],
|
||||
["9A",0xF6],
|
||||
["D6",0xF7],
|
||||
["BF",0xF8],
|
||||
["9D",0xF9],
|
||||
["9C",0xFA],
|
||||
["9E",0xFB],
|
||||
["9F",0xFC],
|
||||
["E0",0xFD],
|
||||
["DF",0xFE],
|
||||
["D8",0xFF],
|
||||
["F5",0x131],
|
||||
["CE",0x152],
|
||||
["CF",0x153],
|
||||
["D9",0x178],
|
||||
["C4",0x192],
|
||||
["F6",0x2C6],
|
||||
["FF",0x2C7],
|
||||
["F9",0x2D8],
|
||||
["FA",0x2D9],
|
||||
["FB",0x2DA],
|
||||
["FE",0x2DB],
|
||||
["F7",0x2DC],
|
||||
["FD",0x2DD],
|
||||
["B9",0x3C0],
|
||||
["D0",0x2013],
|
||||
["D1",0x2014],
|
||||
["D4",0x2018],
|
||||
["D5",0x2019],
|
||||
["E2",0x201A],
|
||||
["D2",0x201C],
|
||||
["D3",0x201D],
|
||||
["E3",0x201E],
|
||||
["A5",0x2022],
|
||||
["C9",0x2026],
|
||||
["E4",0x2030],
|
||||
["DA",0x2044],
|
||||
["AA",0x2122],
|
||||
["BD",0x2126],
|
||||
["B6",0x2202],
|
||||
["C6",0x2206],
|
||||
["B8",0x220F],
|
||||
["B7",0x2211],
|
||||
["C3",0x221A],
|
||||
["B0",0x221E],
|
||||
["BA",0x222B],
|
||||
["C5",0x2248],
|
||||
["AD",0x2260],
|
||||
["B2",0x2264],
|
||||
["B3",0x2265],
|
||||
["D7",0x25CA],
|
||||
]
|
|
@ -53,6 +53,9 @@
|
|||
transcode_tblgen_singlebyte "WINDOWS-1255"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1256"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1257"
|
||||
transcode_tblgen_singlebyte "MACCROATIAN"
|
||||
transcode_tblgen_singlebyte "MACCYRILLIC"
|
||||
transcode_tblgen_singlebyte "MACICELAND"
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
|
|
@ -417,6 +417,79 @@ class TestTranscode < Test::Unit::TestCase
|
|||
check_both_ways("\u02D9", "\xFF", 'windows-1257') # ˙
|
||||
end
|
||||
|
||||
def test_macCroatian
|
||||
check_both_ways("\u00C4", "\x80", 'macCroatian') # Ä
|
||||
check_both_ways("\u00E8", "\x8F", 'macCroatian') # è
|
||||
check_both_ways("\u00EA", "\x90", 'macCroatian') # ê
|
||||
check_both_ways("\u00FC", "\x9F", 'macCroatian') # ü
|
||||
check_both_ways("\u2020", "\xA0", 'macCroatian') # †
|
||||
check_both_ways("\u00D8", "\xAF", 'macCroatian') # Ø
|
||||
check_both_ways("\u221E", "\xB0", 'macCroatian') # ∞
|
||||
check_both_ways("\u00F8", "\xBF", 'macCroatian') # ø
|
||||
check_both_ways("\u00BF", "\xC0", 'macCroatian') # ¿
|
||||
check_both_ways("\u0153", "\xCF", 'macCroatian') # œ
|
||||
check_both_ways("\u0110", "\xD0", 'macCroatian') # Đ
|
||||
check_both_ways("\u00A9", "\xD9", 'macCroatian') # ©
|
||||
check_both_ways("\u2044", "\xDA", 'macCroatian') # ⁄
|
||||
check_both_ways("\u203A", "\xDD", 'macCroatian') # ›
|
||||
check_both_ways("\u00C6", "\xDE", 'macCroatian') # Æ
|
||||
check_both_ways("\u00BB", "\xDF", 'macCroatian') # »
|
||||
check_both_ways("\u2013", "\xE0", 'macCroatian') # –
|
||||
check_both_ways("\u00B7", "\xE1", 'macCroatian') # ·
|
||||
check_both_ways("\u00C2", "\xE5", 'macCroatian') # Â
|
||||
check_both_ways("\u0107", "\xE6", 'macCroatian') # ć
|
||||
check_both_ways("\u00C1", "\xE7", 'macCroatian') # Á
|
||||
check_both_ways("\u010D", "\xE8", 'macCroatian') # č
|
||||
check_both_ways("\u00C8", "\xE9", 'macCroatian') # È
|
||||
check_both_ways("\u00D4", "\xEF", 'macCroatian') # Ô
|
||||
check_both_ways("\u0111", "\xF0", 'macCroatian') # đ
|
||||
check_both_ways("\u00D2", "\xF1", 'macCroatian') # Ò
|
||||
check_both_ways("\u00AF", "\xF8", 'macCroatian') # ¯
|
||||
check_both_ways("\u03C0", "\xF9", 'macCroatian') # π
|
||||
check_both_ways("\u00CB", "\xFA", 'macCroatian') # Ë
|
||||
check_both_ways("\u00CA", "\xFD", 'macCroatian') # Ê
|
||||
check_both_ways("\u00E6", "\xFE", 'macCroatian') # æ
|
||||
check_both_ways("\u02C7", "\xFF", 'macCroatian') # ˇ
|
||||
end
|
||||
|
||||
def test_macCyrillic
|
||||
check_both_ways("\u0410", "\x80", 'macCyrillic') # А
|
||||
check_both_ways("\u041F", "\x8F", 'macCyrillic') # П
|
||||
check_both_ways("\u0420", "\x90", 'macCyrillic') # Р
|
||||
check_both_ways("\u042F", "\x9F", 'macCyrillic') # Я
|
||||
check_both_ways("\u2020", "\xA0", 'macCyrillic') # †
|
||||
check_both_ways("\u0453", "\xAF", 'macCyrillic') # ѓ
|
||||
check_both_ways("\u221E", "\xB0", 'macCyrillic') # ∞
|
||||
check_both_ways("\u045A", "\xBF", 'macCyrillic') # њ
|
||||
check_both_ways("\u0458", "\xC0", 'macCyrillic') # ј
|
||||
check_both_ways("\u0455", "\xCF", 'macCyrillic') # ѕ
|
||||
check_both_ways("\u2013", "\xD0", 'macCyrillic') # –
|
||||
check_both_ways("\u044F", "\xDF", 'macCyrillic') # я
|
||||
check_both_ways("\u0430", "\xE0", 'macCyrillic') # а
|
||||
check_both_ways("\u043F", "\xEF", 'macCyrillic') # п
|
||||
check_both_ways("\u0440", "\xF0", 'macCyrillic') # р
|
||||
check_both_ways("\u00A4", "\xFF", 'macCyrillic') # ¤
|
||||
end
|
||||
|
||||
def test_macIceland
|
||||
check_both_ways("\u00C4", "\x80", 'macIceland') # Ä
|
||||
check_both_ways("\u00E8", "\x8F", 'macIceland') # è
|
||||
check_both_ways("\u00EA", "\x90", 'macIceland') # ê
|
||||
check_both_ways("\u00FC", "\x9F", 'macIceland') # ü
|
||||
check_both_ways("\u00DD", "\xA0", 'macIceland') # Ý
|
||||
check_both_ways("\u00D8", "\xAF", 'macIceland') # Ø
|
||||
check_both_ways("\u221E", "\xB0", 'macIceland') # ∞
|
||||
check_both_ways("\u00F8", "\xBF", 'macIceland') # ø
|
||||
check_both_ways("\u00BF", "\xC0", 'macIceland') # ¿
|
||||
check_both_ways("\u0153", "\xCF", 'macIceland') # œ
|
||||
check_both_ways("\u2013", "\xD0", 'macIceland') # –
|
||||
check_both_ways("\u00FE", "\xDF", 'macIceland') # þ
|
||||
check_both_ways("\u00FD", "\xE0", 'macIceland') # ý
|
||||
check_both_ways("\u00D4", "\xEF", 'macIceland') # Ô
|
||||
#check_both_ways("\uF8FF", "\xF0", 'macIceland') # Apple logo
|
||||
check_both_ways("\u02C7", "\xFF", 'macIceland') # ˇ
|
||||
end
|
||||
|
||||
def check_utf_16_both_ways(utf8, raw)
|
||||
copy = raw.dup
|
||||
0.step(copy.length-1, 2) { |i| copy[i+1], copy[i] = copy[i], copy[i+1] }
|
||||
|
|
Loading…
Reference in a new issue