1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* enc/trans/single_byte.trans: adding WINDOWS-wwww encodings

(wwww = 874/1250/1251/1253/1254/1255/1256/1257)
  (contributed by Yoshihiro Kambayashi)

* enc/trans/windows-wwww-tbl.rb: 8 new files
  (contributed by Yoshihiro Kambayashi)

* test/ruby/test_transcode.rb: added test_windows_wwww
  (contributed by Yoshihiro Kambayashi)


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19846 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
duerst 2008-10-19 09:15:37 +00:00
parent 81b1fef56c
commit b014f1bc02
11 changed files with 1265 additions and 0 deletions

View file

@ -1,3 +1,15 @@
Sun Oct 19 18:15:15 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
* enc/trans/single_byte.trans: adding WINDOWS-wwww encodings
(wwww = 874/1250/1251/1253/1254/1255/1256/1257)
(contributed by Yoshihiro Kambayashi)
* enc/trans/windows-wwww-tbl.rb: 8 new files
(contributed by Yoshihiro Kambayashi)
* test/ruby/test_transcode.rb: added test_windows_wwww
(contributed by Yoshihiro Kambayashi)
Sun Oct 19 07:37:13 2008 Tadayoshi Funaba <tadf@dotrb.org>
* lib/cmath.rb (log2,cbrt,frexp,ldexp,hypot,erf,erfc,gamma,lgamma):

View file

@ -19,7 +19,15 @@
require 'iso-8859-13-tbl'
require 'iso-8859-14-tbl'
require 'iso-8859-15-tbl'
require 'windows-874-tbl'
require 'windows-1250-tbl'
require 'windows-1251-tbl'
require 'windows-1252-tbl'
require 'windows-1253-tbl'
require 'windows-1254-tbl'
require 'windows-1255-tbl'
require 'windows-1256-tbl'
require 'windows-1257-tbl'
transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map
transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map
@ -53,7 +61,15 @@
transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL)
transcode_tblgen_singlebyte("WINDOWS-874", WINDOWS_874_TO_UCS_TBL)
transcode_tblgen_singlebyte("WINDOWS-1250", WINDOWS_1250_TO_UCS_TBL)
transcode_tblgen_singlebyte("WINDOWS-1251", WINDOWS_1251_TO_UCS_TBL)
transcode_tblgen_singlebyte("WINDOWS-1252", WINDOWS_1252_TO_UCS_TBL)
transcode_tblgen_singlebyte("WINDOWS-1253", WINDOWS_1253_TO_UCS_TBL)
transcode_tblgen_singlebyte("WINDOWS-1254", WINDOWS_1254_TO_UCS_TBL)
transcode_tblgen_singlebyte("WINDOWS-1255", WINDOWS_1255_TO_UCS_TBL)
transcode_tblgen_singlebyte("WINDOWS-1256", WINDOWS_1256_TO_UCS_TBL)
transcode_tblgen_singlebyte("WINDOWS-1257", WINDOWS_1257_TO_UCS_TBL)
%>
<%= transcode_generated_code %>

View file

@ -0,0 +1,125 @@
WINDOWS_1250_TO_UCS_TBL = [
["A0",0xA0],
["A4",0xA4],
["A6",0xA6],
["A7",0xA7],
["A8",0xA8],
["A9",0xA9],
["AB",0xAB],
["AC",0xAC],
["AD",0xAD],
["AE",0xAE],
["B0",0xB0],
["B1",0xB1],
["B4",0xB4],
["B5",0xB5],
["B6",0xB6],
["B7",0xB7],
["B8",0xB8],
["BB",0xBB],
["C1",0xC1],
["C2",0xC2],
["C4",0xC4],
["C7",0xC7],
["C9",0xC9],
["CB",0xCB],
["CD",0xCD],
["CE",0xCE],
["D3",0xD3],
["D4",0xD4],
["D6",0xD6],
["D7",0xD7],
["DA",0xDA],
["DC",0xDC],
["DD",0xDD],
["DF",0xDF],
["E1",0xE1],
["E2",0xE2],
["E4",0xE4],
["E7",0xE7],
["E9",0xE9],
["EB",0xEB],
["ED",0xED],
["EE",0xEE],
["F3",0xF3],
["F4",0xF4],
["F6",0xF6],
["F7",0xF7],
["FA",0xFA],
["FC",0xFC],
["FD",0xFD],
["C3",0x102],
["E3",0x103],
["A5",0x104],
["B9",0x105],
["C6",0x106],
["E6",0x107],
["C8",0x10C],
["E8",0x10D],
["CF",0x10E],
["EF",0x10F],
["D0",0x110],
["F0",0x111],
["CA",0x118],
["EA",0x119],
["CC",0x11A],
["EC",0x11B],
["C5",0x139],
["E5",0x13A],
["BC",0x13D],
["BE",0x13E],
["A3",0x141],
["B3",0x142],
["D1",0x143],
["F1",0x144],
["D2",0x147],
["F2",0x148],
["D5",0x150],
["F5",0x151],
["C0",0x154],
["E0",0x155],
["D8",0x158],
["F8",0x159],
["8C",0x15A],
["9C",0x15B],
["AA",0x15E],
["BA",0x15F],
["8A",0x160],
["9A",0x161],
["DE",0x162],
["FE",0x163],
["8D",0x164],
["9D",0x165],
["D9",0x16E],
["F9",0x16F],
["DB",0x170],
["FB",0x171],
["8F",0x179],
["9F",0x17A],
["AF",0x17B],
["BF",0x17C],
["8E",0x17D],
["9E",0x17E],
["A1",0x2C7],
["A2",0x2D8],
["FF",0x2D9],
["B2",0x2DB],
["BD",0x2DD],
["96",0x2013],
["97",0x2014],
["91",0x2018],
["92",0x2019],
["82",0x201A],
["93",0x201C],
["94",0x201D],
["84",0x201E],
["86",0x2020],
["87",0x2021],
["95",0x2022],
["85",0x2026],
["89",0x2030],
["8B",0x2039],
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
]

View file

@ -0,0 +1,129 @@
WINDOWS_1251_TO_UCS_TBL = [
["A0",0xA0],
["A4",0xA4],
["A6",0xA6],
["A7",0xA7],
["A9",0xA9],
["AB",0xAB],
["AC",0xAC],
["AD",0xAD],
["AE",0xAE],
["B0",0xB0],
["B1",0xB1],
["B5",0xB5],
["B6",0xB6],
["B7",0xB7],
["BB",0xBB],
["A8",0x401],
["80",0x402],
["81",0x403],
["AA",0x404],
["BD",0x405],
["B2",0x406],
["AF",0x407],
["A3",0x408],
["8A",0x409],
["8C",0x40A],
["8E",0x40B],
["8D",0x40C],
["A1",0x40E],
["8F",0x40F],
["C0",0x410],
["C1",0x411],
["C2",0x412],
["C3",0x413],
["C4",0x414],
["C5",0x415],
["C6",0x416],
["C7",0x417],
["C8",0x418],
["C9",0x419],
["CA",0x41A],
["CB",0x41B],
["CC",0x41C],
["CD",0x41D],
["CE",0x41E],
["CF",0x41F],
["D0",0x420],
["D1",0x421],
["D2",0x422],
["D3",0x423],
["D4",0x424],
["D5",0x425],
["D6",0x426],
["D7",0x427],
["D8",0x428],
["D9",0x429],
["DA",0x42A],
["DB",0x42B],
["DC",0x42C],
["DD",0x42D],
["DE",0x42E],
["DF",0x42F],
["E0",0x430],
["E1",0x431],
["E2",0x432],
["E3",0x433],
["E4",0x434],
["E5",0x435],
["E6",0x436],
["E7",0x437],
["E8",0x438],
["E9",0x439],
["EA",0x43A],
["EB",0x43B],
["EC",0x43C],
["ED",0x43D],
["EE",0x43E],
["EF",0x43F],
["F0",0x440],
["F1",0x441],
["F2",0x442],
["F3",0x443],
["F4",0x444],
["F5",0x445],
["F6",0x446],
["F7",0x447],
["F8",0x448],
["F9",0x449],
["FA",0x44A],
["FB",0x44B],
["FC",0x44C],
["FD",0x44D],
["FE",0x44E],
["FF",0x44F],
["B8",0x451],
["90",0x452],
["83",0x453],
["BA",0x454],
["BE",0x455],
["B3",0x456],
["BF",0x457],
["BC",0x458],
["9A",0x459],
["9C",0x45A],
["9E",0x45B],
["9D",0x45C],
["A2",0x45E],
["9F",0x45F],
["A5",0x490],
["B4",0x491],
["96",0x2013],
["97",0x2014],
["91",0x2018],
["92",0x2019],
["82",0x201A],
["93",0x201C],
["94",0x201D],
["84",0x201E],
["86",0x2020],
["87",0x2021],
["95",0x2022],
["85",0x2026],
["89",0x2030],
["8B",0x2039],
["9B",0x203A],
["88",0x20AC],
["B9",0x2116],
["99",0x2122],
]

View file

@ -0,0 +1,113 @@
WINDOWS_1253_TO_UCS_TBL = [
["A0",0xA0],
["A3",0xA3],
["A4",0xA4],
["A5",0xA5],
["A6",0xA6],
["A7",0xA7],
["A8",0xA8],
["A9",0xA9],
["AB",0xAB],
["AC",0xAC],
["AD",0xAD],
["AE",0xAE],
["B0",0xB0],
["B1",0xB1],
["B2",0xB2],
["B3",0xB3],
["B5",0xB5],
["B6",0xB6],
["B7",0xB7],
["BB",0xBB],
["BD",0xBD],
["83",0x192],
["B4",0x384],
["A1",0x385],
["A2",0x386],
["B8",0x388],
["B9",0x389],
["BA",0x38A],
["BC",0x38C],
["BE",0x38E],
["BF",0x38F],
["C0",0x390],
["C1",0x391],
["C2",0x392],
["C3",0x393],
["C4",0x394],
["C5",0x395],
["C6",0x396],
["C7",0x397],
["C8",0x398],
["C9",0x399],
["CA",0x39A],
["CB",0x39B],
["CC",0x39C],
["CD",0x39D],
["CE",0x39E],
["CF",0x39F],
["D0",0x3A0],
["D1",0x3A1],
["D3",0x3A3],
["D4",0x3A4],
["D5",0x3A5],
["D6",0x3A6],
["D7",0x3A7],
["D8",0x3A8],
["D9",0x3A9],
["DA",0x3AA],
["DB",0x3AB],
["DC",0x3AC],
["DD",0x3AD],
["DE",0x3AE],
["DF",0x3AF],
["E0",0x3B0],
["E1",0x3B1],
["E2",0x3B2],
["E3",0x3B3],
["E4",0x3B4],
["E5",0x3B5],
["E6",0x3B6],
["E7",0x3B7],
["E8",0x3B8],
["E9",0x3B9],
["EA",0x3BA],
["EB",0x3BB],
["EC",0x3BC],
["ED",0x3BD],
["EE",0x3BE],
["EF",0x3BF],
["F0",0x3C0],
["F1",0x3C1],
["F2",0x3C2],
["F3",0x3C3],
["F4",0x3C4],
["F5",0x3C5],
["F6",0x3C6],
["F7",0x3C7],
["F8",0x3C8],
["F9",0x3C9],
["FA",0x3CA],
["FB",0x3CB],
["FC",0x3CC],
["FD",0x3CD],
["FE",0x3CE],
["96",0x2013],
["97",0x2014],
["AF",0x2015],
["91",0x2018],
["92",0x2019],
["82",0x201A],
["93",0x201C],
["94",0x201D],
["84",0x201E],
["86",0x2020],
["87",0x2021],
["95",0x2022],
["85",0x2026],
["89",0x2030],
["8B",0x2039],
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
]

View file

@ -0,0 +1,123 @@
WINDOWS_1254_TO_UCS_TBL = [
["A0",0xA0],
["A1",0xA1],
["A2",0xA2],
["A3",0xA3],
["A4",0xA4],
["A5",0xA5],
["A6",0xA6],
["A7",0xA7],
["A8",0xA8],
["A9",0xA9],
["AA",0xAA],
["AB",0xAB],
["AC",0xAC],
["AD",0xAD],
["AE",0xAE],
["AF",0xAF],
["B0",0xB0],
["B1",0xB1],
["B2",0xB2],
["B3",0xB3],
["B4",0xB4],
["B5",0xB5],
["B6",0xB6],
["B7",0xB7],
["B8",0xB8],
["B9",0xB9],
["BA",0xBA],
["BB",0xBB],
["BC",0xBC],
["BD",0xBD],
["BE",0xBE],
["BF",0xBF],
["C0",0xC0],
["C1",0xC1],
["C2",0xC2],
["C3",0xC3],
["C4",0xC4],
["C5",0xC5],
["C6",0xC6],
["C7",0xC7],
["C8",0xC8],
["C9",0xC9],
["CA",0xCA],
["CB",0xCB],
["CC",0xCC],
["CD",0xCD],
["CE",0xCE],
["CF",0xCF],
["D1",0xD1],
["D2",0xD2],
["D3",0xD3],
["D4",0xD4],
["D5",0xD5],
["D6",0xD6],
["D7",0xD7],
["D8",0xD8],
["D9",0xD9],
["DA",0xDA],
["DB",0xDB],
["DC",0xDC],
["DF",0xDF],
["E0",0xE0],
["E1",0xE1],
["E2",0xE2],
["E3",0xE3],
["E4",0xE4],
["E5",0xE5],
["E6",0xE6],
["E7",0xE7],
["E8",0xE8],
["E9",0xE9],
["EA",0xEA],
["EB",0xEB],
["EC",0xEC],
["ED",0xED],
["EE",0xEE],
["EF",0xEF],
["F1",0xF1],
["F2",0xF2],
["F3",0xF3],
["F4",0xF4],
["F5",0xF5],
["F6",0xF6],
["F7",0xF7],
["F8",0xF8],
["F9",0xF9],
["FA",0xFA],
["FB",0xFB],
["FC",0xFC],
["FF",0xFF],
["D0",0x11E],
["F0",0x11F],
["DD",0x130],
["FD",0x131],
["8C",0x152],
["9C",0x153],
["DE",0x15E],
["FE",0x15F],
["8A",0x160],
["9A",0x161],
["9F",0x178],
["83",0x192],
["88",0x2C6],
["98",0x2DC],
["96",0x2013],
["97",0x2014],
["91",0x2018],
["92",0x2019],
["82",0x201A],
["93",0x201C],
["94",0x201D],
["84",0x201E],
["86",0x2020],
["87",0x2021],
["95",0x2022],
["85",0x2026],
["89",0x2030],
["8B",0x2039],
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
]

View file

@ -0,0 +1,141 @@
WINDOWS_1255_TO_UCS_TBL = [
["A0",0xA0],
["A1",0xA1],
["A2",0xA2],
["A3",0xA3],
["A5",0xA5],
["A6",0xA6],
["A7",0xA7],
["A8",0xA8],
["A9",0xA9],
["AB",0xAB],
["AC",0xAC],
["AD",0xAD],
["AE",0xAE],
["AF",0xAF],
["B0",0xB0],
["B1",0xB1],
["B2",0xB2],
["B3",0xB3],
["B4",0xB4],
["B5",0xB5],
["B6",0xB6],
["B7",0xB7],
["B8",0xB8],
["B9",0xB9],
["BB",0xBB],
["BC",0xBC],
["BD",0xBD],
["BE",0xBE],
["BF",0xBF],
["AA",0xD7],
["BA",0xF7],
["83",0x192],
["88",0x2C6],
["98",0x2DC],
["C0",0x5B0],
["C1",0x5B1],
["C2",0x5B2],
["C3",0x5B3],
["C4",0x5B4],
["C5",0x5B5],
["C6",0x5B6],
["C7",0x5B7],
["C8",0x5B8],
["C9",0x5B9],
["CB",0x5BB],
["CC",0x5BC],
["CD",0x5BD],
["CE",0x5BE],
["CF",0x5BF],
["D0",0x5C0],
["D1",0x5C1],
["D2",0x5C2],
["D3",0x5C3],
["E0",0x5D0],
["E1",0x5D1],
["E2",0x5D2],
["E3",0x5D3],
["E4",0x5D4],
["E5",0x5D5],
["E6",0x5D6],
["E7",0x5D7],
["E8",0x5D8],
["E9",0x5D9],
["EA",0x5DA],
["EB",0x5DB],
["EC",0x5DC],
["ED",0x5DD],
["EE",0x5DE],
["EF",0x5DF],
["F0",0x5E0],
["F1",0x5E1],
["F2",0x5E2],
["F3",0x5E3],
["F4",0x5E4],
["F5",0x5E5],
["F6",0x5E6],
["F7",0x5E7],
["F8",0x5E8],
["F9",0x5E9],
["FA",0x5EA],
["D4",0x5F0],
["D5",0x5F1],
["D6",0x5F2],
["D7",0x5F3],
["D8",0x5F4],
["FD",0x200E],
["FE",0x200F],
["96",0x2013],
["97",0x2014],
["91",0x2018],
["92",0x2019],
["82",0x201A],
["93",0x201C],
["94",0x201D],
["84",0x201E],
["86",0x2020],
["87",0x2021],
["95",0x2022],
["85",0x2026],
["89",0x2030],
["8B",0x2039],
["9B",0x203A],
["A4",0x20AA],
["80",0x20AC],
["99",0x2122],
["E9C4",0xFB1D],
["D6C7",0xFB1F],
["F9D1",0xFB2A],
["F9D2",0xFB2B],
["F9CCD1",0xFB2C],
["F9CCD2",0xFB2D],
["E0C7",0xFB2E],
["E0C8",0xFB2F],
["E0CC",0xFB30],
["E1CC",0xFB31],
["E2CC",0xFB32],
["E3CC",0xFB33],
["E4CC",0xFB34],
["E5CC",0xFB35],
["E6CC",0xFB36],
["E8CC",0xFB38],
["E9CC",0xFB39],
["EACC",0xFB3A],
["EBCC",0xFB3B],
["ECCC",0xFB3C],
["EECC",0xFB3E],
["F0CC",0xFB40],
["F1CC",0xFB41],
["F3CC",0xFB43],
["F4CC",0xFB44],
["F6CC",0xFB46],
["F7CC",0xFB47],
["F8CC",0xFB48],
["F9CC",0xFB49],
["FACC",0xFB4A],
["E5C9",0xFB4B],
["E1CF",0xFB4C],
["EBCF",0xFB4D],
["F4CF",0xFB4E],
]

View file

@ -0,0 +1,130 @@
WINDOWS_1256_TO_UCS_TBL = [
["A0",0xA0],
["A2",0xA2],
["A3",0xA3],
["A4",0xA4],
["A5",0xA5],
["A6",0xA6],
["A7",0xA7],
["A8",0xA8],
["A9",0xA9],
["AB",0xAB],
["AC",0xAC],
["AD",0xAD],
["AE",0xAE],
["AF",0xAF],
["B0",0xB0],
["B1",0xB1],
["B2",0xB2],
["B3",0xB3],
["B4",0xB4],
["B5",0xB5],
["B6",0xB6],
["B7",0xB7],
["B8",0xB8],
["B9",0xB9],
["BB",0xBB],
["BC",0xBC],
["BD",0xBD],
["BE",0xBE],
["D7",0xD7],
["E0",0xE0],
["E2",0xE2],
["E7",0xE7],
["E8",0xE8],
["E9",0xE9],
["EA",0xEA],
["EB",0xEB],
["EE",0xEE],
["EF",0xEF],
["F4",0xF4],
["F7",0xF7],
["F9",0xF9],
["FB",0xFB],
["FC",0xFC],
["8C",0x152],
["9C",0x153],
["83",0x192],
["88",0x2C6],
["A1",0x60C],
["BA",0x61B],
["BF",0x61F],
["C1",0x621],
["C2",0x622],
["C3",0x623],
["C4",0x624],
["C5",0x625],
["C6",0x626],
["C7",0x627],
["C8",0x628],
["C9",0x629],
["CA",0x62A],
["CB",0x62B],
["CC",0x62C],
["CD",0x62D],
["CE",0x62E],
["CF",0x62F],
["D0",0x630],
["D1",0x631],
["D2",0x632],
["D3",0x633],
["D4",0x634],
["D5",0x635],
["D6",0x636],
["D8",0x637],
["D9",0x638],
["DA",0x639],
["DB",0x63A],
["DC",0x640],
["DD",0x641],
["DE",0x642],
["DF",0x643],
["E1",0x644],
["E3",0x645],
["E4",0x646],
["E5",0x647],
["E6",0x648],
["EC",0x649],
["ED",0x64A],
["F0",0x64B],
["F1",0x64C],
["F2",0x64D],
["F3",0x64E],
["F5",0x64F],
["F6",0x650],
["F8",0x651],
["FA",0x652],
["8A",0x679],
["81",0x67E],
["8D",0x686],
["8F",0x688],
["9A",0x691],
["8E",0x698],
["98",0x6A9],
["90",0x6AF],
["9F",0x6BA],
["AA",0x6BE],
["C0",0x6C1],
["FF",0x6D2],
["9D",0x200C],
["9E",0x200D],
["FD",0x200E],
["FE",0x200F],
["96",0x2013],
["97",0x2014],
["91",0x2018],
["92",0x2019],
["82",0x201A],
["93",0x201C],
["94",0x201D],
["84",0x201E],
["86",0x2020],
["87",0x2021],
["95",0x2022],
["85",0x2026],
["89",0x2030],
["8B",0x2039],
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
]

View file

@ -0,0 +1,118 @@
WINDOWS_1257_TO_UCS_TBL = [
["A0",0xA0],
["A2",0xA2],
["A3",0xA3],
["A4",0xA4],
["A6",0xA6],
["A7",0xA7],
["8D",0xA8],
["A9",0xA9],
["AB",0xAB],
["AC",0xAC],
["AD",0xAD],
["AE",0xAE],
["9D",0xAF],
["B0",0xB0],
["B1",0xB1],
["B2",0xB2],
["B3",0xB3],
["B4",0xB4],
["B5",0xB5],
["B6",0xB6],
["B7",0xB7],
["8F",0xB8],
["B9",0xB9],
["BB",0xBB],
["BC",0xBC],
["BD",0xBD],
["BE",0xBE],
["C4",0xC4],
["C5",0xC5],
["AF",0xC6],
["C9",0xC9],
["D3",0xD3],
["D5",0xD5],
["D6",0xD6],
["D7",0xD7],
["A8",0xD8],
["DC",0xDC],
["DF",0xDF],
["E4",0xE4],
["E5",0xE5],
["BF",0xE6],
["E9",0xE9],
["F3",0xF3],
["F5",0xF5],
["F6",0xF6],
["F7",0xF7],
["B8",0xF8],
["FC",0xFC],
["C2",0x100],
["E2",0x101],
["C0",0x104],
["E0",0x105],
["C3",0x106],
["E3",0x107],
["C8",0x10C],
["E8",0x10D],
["C7",0x112],
["E7",0x113],
["CB",0x116],
["EB",0x117],
["C6",0x118],
["E6",0x119],
["CC",0x122],
["EC",0x123],
["CE",0x12A],
["EE",0x12B],
["C1",0x12E],
["E1",0x12F],
["CD",0x136],
["ED",0x137],
["CF",0x13B],
["EF",0x13C],
["D9",0x141],
["F9",0x142],
["D1",0x143],
["F1",0x144],
["D2",0x145],
["F2",0x146],
["D4",0x14C],
["F4",0x14D],
["AA",0x156],
["BA",0x157],
["DA",0x15A],
["FA",0x15B],
["D0",0x160],
["F0",0x161],
["DB",0x16A],
["FB",0x16B],
["D8",0x172],
["F8",0x173],
["CA",0x179],
["EA",0x17A],
["DD",0x17B],
["FD",0x17C],
["DE",0x17D],
["FE",0x17E],
["8E",0x2C7],
["FF",0x2D9],
["9E",0x2DB],
["96",0x2013],
["97",0x2014],
["91",0x2018],
["92",0x2019],
["82",0x201A],
["93",0x201C],
["94",0x201D],
["84",0x201E],
["86",0x2020],
["87",0x2021],
["95",0x2022],
["85",0x2026],
["89",0x2030],
["8B",0x2039],
["9B",0x203A],
["80",0x20AC],
["99",0x2122],
]

View file

@ -0,0 +1,99 @@
WINDOWS_874_TO_UCS_TBL = [
["A0",0xA0],
["A1",0xE01],
["A2",0xE02],
["A3",0xE03],
["A4",0xE04],
["A5",0xE05],
["A6",0xE06],
["A7",0xE07],
["A8",0xE08],
["A9",0xE09],
["AA",0xE0A],
["AB",0xE0B],
["AC",0xE0C],
["AD",0xE0D],
["AE",0xE0E],
["AF",0xE0F],
["B0",0xE10],
["B1",0xE11],
["B2",0xE12],
["B3",0xE13],
["B4",0xE14],
["B5",0xE15],
["B6",0xE16],
["B7",0xE17],
["B8",0xE18],
["B9",0xE19],
["BA",0xE1A],
["BB",0xE1B],
["BC",0xE1C],
["BD",0xE1D],
["BE",0xE1E],
["BF",0xE1F],
["C0",0xE20],
["C1",0xE21],
["C2",0xE22],
["C3",0xE23],
["C4",0xE24],
["C5",0xE25],
["C6",0xE26],
["C7",0xE27],
["C8",0xE28],
["C9",0xE29],
["CA",0xE2A],
["CB",0xE2B],
["CC",0xE2C],
["CD",0xE2D],
["CE",0xE2E],
["CF",0xE2F],
["D0",0xE30],
["D1",0xE31],
["D2",0xE32],
["D3",0xE33],
["D4",0xE34],
["D5",0xE35],
["D6",0xE36],
["D7",0xE37],
["D8",0xE38],
["D9",0xE39],
["DA",0xE3A],
["DF",0xE3F],
["E0",0xE40],
["E1",0xE41],
["E2",0xE42],
["E3",0xE43],
["E4",0xE44],
["E5",0xE45],
["E6",0xE46],
["E7",0xE47],
["E8",0xE48],
["E9",0xE49],
["EA",0xE4A],
["EB",0xE4B],
["EC",0xE4C],
["ED",0xE4D],
["EE",0xE4E],
["EF",0xE4F],
["F0",0xE50],
["F1",0xE51],
["F2",0xE52],
["F3",0xE53],
["F4",0xE54],
["F5",0xE55],
["F6",0xE56],
["F7",0xE57],
["F8",0xE58],
["F9",0xE59],
["FA",0xE5A],
["FB",0xE5B],
["96",0x2013],
["97",0x2014],
["91",0x2018],
["92",0x2019],
["93",0x201C],
["94",0x201D],
["95",0x2022],
["85",0x2026],
["80",0x20AC],
]

View file

@ -131,6 +131,87 @@ class TestTranscode < Test::Unit::TestCase
end
end
def test_windows_874
check_both_ways("\u20AC", "\x80", 'windows-874') # €
assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-874') }
assert_raise(Encoding::UndefinedConversionError) { "\x84".encode("utf-8", 'windows-874') }
check_both_ways("\u2026", "\x85", 'windows-874') # …
assert_raise(Encoding::UndefinedConversionError) { "\x86".encode("utf-8", 'windows-874') }
assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-874') }
assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-874') }
check_both_ways("\u2018", "\x91", 'windows-874') #
check_both_ways("\u2014", "\x97", 'windows-874') # —
assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-874') }
assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-874') }
check_both_ways("\u00A0", "\xA0", 'windows-874') # non-breaking space
check_both_ways("\u0E0F", "\xAF", 'windows-874') # ฏ
check_both_ways("\u0E10", "\xB0", 'windows-874') # ฐ
check_both_ways("\u0E1F", "\xBF", 'windows-874') # ฟ
check_both_ways("\u0E20", "\xC0", 'windows-874') # ภ
check_both_ways("\u0E2F", "\xCF", 'windows-874') # ฯ
check_both_ways("\u0E30", "\xD0", 'windows-874') # ะ
check_both_ways("\u0E3A", "\xDA", 'windows-874') # ฺ
assert_raise(Encoding::UndefinedConversionError) { "\xDB".encode("utf-8", 'windows-874') }
assert_raise(Encoding::UndefinedConversionError) { "\xDE".encode("utf-8", 'windows-874') }
check_both_ways("\u0E3F", "\xDF", 'windows-874') # ฿
check_both_ways("\u0E40", "\xE0", 'windows-874') # เ
check_both_ways("\u0E4F", "\xEF", 'windows-874') # ๏
check_both_ways("\u0E50", "\xF0", 'windows-874') #
check_both_ways("\u0E5B", "\xFB", 'windows-874') # ๛
assert_raise(Encoding::UndefinedConversionError) { "\xFC".encode("utf-8", 'windows-874') }
assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'windows-874') }
end
def test_windows_1250
check_both_ways("\u20AC", "\x80", 'windows-1250') # €
assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1250') }
check_both_ways("\u201A", "\x82", 'windows-1250') #
assert_raise(Encoding::UndefinedConversionError) { "\x83".encode("utf-8", 'windows-1250') }
check_both_ways("\u201E", "\x84", 'windows-1250') # „
check_both_ways("\u2021", "\x87", 'windows-1250') # ‡
assert_raise(Encoding::UndefinedConversionError) { "\x88".encode("utf-8", 'windows-1250') }
check_both_ways("\u2030", "\x89", 'windows-1250') # ‰
check_both_ways("\u0179", "\x8F", 'windows-1250') # Ź
assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1250') }
check_both_ways("\u2018", "\x91", 'windows-1250') #
check_both_ways("\u2014", "\x97", 'windows-1250') # —
assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1250') }
check_both_ways("\u2122", "\x99", 'windows-1250') # ™
check_both_ways("\u00A0", "\xA0", 'windows-1250') # non-breaking space
check_both_ways("\u017B", "\xAF", 'windows-1250') # Ż
check_both_ways("\u00B0", "\xB0", 'windows-1250') # °
check_both_ways("\u017C", "\xBF", 'windows-1250') # ż
check_both_ways("\u0154", "\xC0", 'windows-1250') # Ŕ
check_both_ways("\u010E", "\xCF", 'windows-1250') # Ď
check_both_ways("\u0110", "\xD0", 'windows-1250') # Đ
check_both_ways("\u00DF", "\xDF", 'windows-1250') # ß
check_both_ways("\u0155", "\xE0", 'windows-1250') # ŕ
check_both_ways("\u010F", "\xEF", 'windows-1250') # ď
check_both_ways("\u0111", "\xF0", 'windows-1250') # đ
check_both_ways("\u02D9", "\xFF", 'windows-1250') # ˙
end
def test_windows_1251
check_both_ways("\u0402", "\x80", 'windows-1251') # Ђ
check_both_ways("\u20AC", "\x88", 'windows-1251') # €
check_both_ways("\u040F", "\x8F", 'windows-1251') # Џ
check_both_ways("\u0452", "\x90", 'windows-1251') # ђ
assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1251') }
check_both_ways("\u045F", "\x9F", 'windows-1251') # џ
check_both_ways("\u00A0", "\xA0", 'windows-1251') # non-breaking space
check_both_ways("\u0407", "\xAF", 'windows-1251') # Ї
check_both_ways("\u00B0", "\xB0", 'windows-1251') # °
check_both_ways("\u0457", "\xBF", 'windows-1251') # ї
check_both_ways("\u0410", "\xC0", 'windows-1251') # А
check_both_ways("\u041F", "\xCF", 'windows-1251') # П
check_both_ways("\u0420", "\xD0", 'windows-1251') # Р
check_both_ways("\u042F", "\xDF", 'windows-1251') # Я
check_both_ways("\u0430", "\xE0", 'windows-1251') # а
check_both_ways("\u043F", "\xEF", 'windows-1251') # п
check_both_ways("\u0440", "\xF0", 'windows-1251') # р
check_both_ways("\u044F", "\xFF", 'windows-1251') # я
end
def test_windows_1252
check_both_ways("\u20AC", "\x80", 'windows-1252') # €
assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1252') }
@ -158,6 +239,184 @@ class TestTranscode < Test::Unit::TestCase
check_both_ways("\u00FF", "\xFF", 'windows-1252') # ÿ
end
def test_windows_1253
check_both_ways("\u20AC", "\x80", 'windows-1253') # €
assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1253') }
check_both_ways("\u201A", "\x82", 'windows-1253') #
check_both_ways("\u2021", "\x87", 'windows-1253') # ‡
assert_raise(Encoding::UndefinedConversionError) { "\x88".encode("utf-8", 'windows-1253') }
check_both_ways("\u2030", "\x89", 'windows-1253') # ‰
assert_raise(Encoding::UndefinedConversionError) { "\x8A".encode("utf-8", 'windows-1253') }
check_both_ways("\u2039", "\x8B", 'windows-1253') #
assert_raise(Encoding::UndefinedConversionError) { "\x8C".encode("utf-8", 'windows-1253') }
assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1253') }
assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1253') }
check_both_ways("\u2018", "\x91", 'windows-1253') #
check_both_ways("\u2014", "\x97", 'windows-1253') # —
assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1253') }
check_both_ways("\u2122", "\x99", 'windows-1253') # ™
assert_raise(Encoding::UndefinedConversionError) { "\x9A".encode("utf-8", 'windows-1253') }
check_both_ways("\u203A", "\x9B", 'windows-1253') #
assert_raise(Encoding::UndefinedConversionError) { "\x9C".encode("utf-8", 'windows-1253') }
assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-1253') }
check_both_ways("\u00A0", "\xA0", 'windows-1253') # non-breaking space
check_both_ways("\u2015", "\xAF", 'windows-1253') # ―
check_both_ways("\u00B0", "\xB0", 'windows-1253') # °
check_both_ways("\u038F", "\xBF", 'windows-1253') # Ώ
check_both_ways("\u0390", "\xC0", 'windows-1253') # ΐ
check_both_ways("\u039F", "\xCF", 'windows-1253') # Ο
check_both_ways("\u03A0", "\xD0", 'windows-1253') # Π
check_both_ways("\u03A1", "\xD1", 'windows-1253') # Ρ
assert_raise(Encoding::UndefinedConversionError) { "\xD2".encode("utf-8", 'windows-1253') }
check_both_ways("\u03A3", "\xD3", 'windows-1253') # Σ
check_both_ways("\u03AF", "\xDF", 'windows-1253') # ί
check_both_ways("\u03B0", "\xE0", 'windows-1253') # ΰ
check_both_ways("\u03BF", "\xEF", 'windows-1253') # ο
check_both_ways("\u03C0", "\xF0", 'windows-1253') # π
check_both_ways("\u03CE", "\xFE", 'windows-1253') # ώ
assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'windows-1253') }
end
def test_windows_1254
check_both_ways("\u20AC", "\x80", 'windows-1254') # €
assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1254') }
check_both_ways("\u201A", "\x82", 'windows-1254') #
check_both_ways("\u0152", "\x8C", 'windows-1254') # Œ
assert_raise(Encoding::UndefinedConversionError) { "\x8D".encode("utf-8", 'windows-1254') }
assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1254') }
assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1254') }
check_both_ways("\u2018", "\x91", 'windows-1254') #
check_both_ways("\u0153", "\x9C", 'windows-1254') # œ
assert_raise(Encoding::UndefinedConversionError) { "\x9D".encode("utf-8", 'windows-1254') }
assert_raise(Encoding::UndefinedConversionError) { "\x9E".encode("utf-8", 'windows-1254') }
check_both_ways("\u0178", "\x9F", 'windows-1254') # Ÿ
check_both_ways("\u00A0", "\xA0", 'windows-1254') # non-breaking space
check_both_ways("\u00AF", "\xAF", 'windows-1254') # ¯
check_both_ways("\u00B0", "\xB0", 'windows-1254') # °
check_both_ways("\u00BF", "\xBF", 'windows-1254') # ¿
check_both_ways("\u00C0", "\xC0", 'windows-1254') # À
check_both_ways("\u00CF", "\xCF", 'windows-1254') # Ï
check_both_ways("\u011E", "\xD0", 'windows-1254') # Ğ
check_both_ways("\u00DF", "\xDF", 'windows-1254') # ß
check_both_ways("\u00E0", "\xE0", 'windows-1254') # à
check_both_ways("\u00EF", "\xEF", 'windows-1254') # ï
check_both_ways("\u011F", "\xF0", 'windows-1254') # ğ
check_both_ways("\u00FF", "\xFF", 'windows-1254') # ÿ
end
def test_windows_1255
check_both_ways("\u20AC", "\x80", 'windows-1255') # €
assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1255') }
check_both_ways("\u201A", "\x82", 'windows-1255') #
check_both_ways("\u2030", "\x89", 'windows-1255') # ‰
assert_raise(Encoding::UndefinedConversionError) { "\x8A".encode("utf-8", 'windows-1255') }
check_both_ways("\u2039", "\x8B", 'windows-1255') #
assert_raise(Encoding::UndefinedConversionError) { "\x8C".encode("utf-8", 'windows-1255') }
assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1255') }
assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1255') }
check_both_ways("\u2018", "\x91", 'windows-1255') #
check_both_ways("\u2122", "\x99", 'windows-1255') # ™
assert_raise(Encoding::UndefinedConversionError) { "\x9A".encode("utf-8", 'windows-1255') }
check_both_ways("\u203A", "\x9B", 'windows-1255') #
assert_raise(Encoding::UndefinedConversionError) { "\x9C".encode("utf-8", 'windows-1255') }
assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-1255') }
check_both_ways("\u00A0", "\xA0", 'windows-1255') # non-breaking space
check_both_ways("\u00A1", "\xA1", 'windows-1255') # ¡
check_both_ways("\u00D7", "\xAA", 'windows-1255') # ×
check_both_ways("\u00AF", "\xAF", 'windows-1255') # ¯
check_both_ways("\u00B0", "\xB0", 'windows-1255') # °
check_both_ways("\u00B8", "\xB8", 'windows-1255') # ¸
check_both_ways("\u00F7", "\xBA", 'windows-1255') # ÷
check_both_ways("\u00BF", "\xBF", 'windows-1255') # ¿
check_both_ways("\u05B0", "\xC0", 'windows-1255') # ְ
check_both_ways("\u05B9", "\xC9", 'windows-1255') # ֹ
assert_raise(Encoding::UndefinedConversionError) { "\xCA".encode("utf-8", 'windows-1255') }
check_both_ways("\u05BB", "\xCB", 'windows-1255') # ֻ
check_both_ways("\u05BF", "\xCF", 'windows-1255') # ֿ
check_both_ways("\u05C0", "\xD0", 'windows-1255') # ׀
check_both_ways("\u05F3", "\xD7", 'windows-1255') # ׳
check_both_ways("\u05F4", "\xD8", 'windows-1255') # ״
assert_raise(Encoding::UndefinedConversionError) { "\xD9".encode("utf-8", 'windows-1255') }
assert_raise(Encoding::UndefinedConversionError) { "\xDF".encode("utf-8", 'windows-1255') }
check_both_ways("\u05D0", "\xE0", 'windows-1255') # א
check_both_ways("\u05DF", "\xEF", 'windows-1255') # ן
check_both_ways("\u05E0", "\xF0", 'windows-1255') # נ
check_both_ways("\u05EA", "\xFA", 'windows-1255') # ת
assert_raise(Encoding::UndefinedConversionError) { "\xFB".encode("utf-8", 'windows-1255') }
assert_raise(Encoding::UndefinedConversionError) { "\xFC".encode("utf-8", 'windows-1255') }
check_both_ways("\u200E", "\xFD", 'windows-1255') # left-to-right mark
check_both_ways("\u200F", "\xFE", 'windows-1255') # right-to-left mark
assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'windows-1255') }
end
def test_windows_1256
check_both_ways("\u20AC", "\x80", 'windows-1256') # €
check_both_ways("\u0679", "\x8A", 'windows-1256') # ٹ
check_both_ways("\u0688", "\x8F", 'windows-1256') # ڈ
check_both_ways("\u06AF", "\x90", 'windows-1256') # گ
check_both_ways("\u06A9", "\x98", 'windows-1256') # ک
check_both_ways("\u0691", "\x9A", 'windows-1256') # ڑ
check_both_ways("\u06BA", "\x9F", 'windows-1256') # ں
check_both_ways("\u00A0", "\xA0", 'windows-1256') # non-breaking space
check_both_ways("\u06BE", "\xAA", 'windows-1256') # ھ
check_both_ways("\u00AF", "\xAF", 'windows-1256') # ¯
check_both_ways("\u00B0", "\xB0", 'windows-1256') # °
check_both_ways("\u061F", "\xBF", 'windows-1256') # ؟
check_both_ways("\u06C1", "\xC0", 'windows-1256') # ہ
check_both_ways("\u062F", "\xCF", 'windows-1256') # د
check_both_ways("\u0630", "\xD0", 'windows-1256') # ذ
check_both_ways("\u0643", "\xDF", 'windows-1256') # ك
check_both_ways("\u00E0", "\xE0", 'windows-1256') # à
check_both_ways("\u00EF", "\xEF", 'windows-1256') # ï
check_both_ways("\u064B", "\xF0", 'windows-1256') # ًً
check_both_ways("\u06D2", "\xFF", 'windows-1256') # ے
end
def test_windows_1257
check_both_ways("\u20AC", "\x80", 'windows-1257') # €
assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1257') }
check_both_ways("\u201A", "\x82", 'windows-1257') #
assert_raise(Encoding::UndefinedConversionError) { "\x83".encode("utf-8", 'windows-1257') }
check_both_ways("\u201E", "\x84", 'windows-1257') # „
check_both_ways("\u2021", "\x87", 'windows-1257') # ‡
assert_raise(Encoding::UndefinedConversionError) { "\x88".encode("utf-8", 'windows-1257') }
check_both_ways("\u2030", "\x89", 'windows-1257') # ‰
assert_raise(Encoding::UndefinedConversionError) { "\x8A".encode("utf-8", 'windows-1257') }
check_both_ways("\u2039", "\x8B", 'windows-1257') #
assert_raise(Encoding::UndefinedConversionError) { "\x8C".encode("utf-8", 'windows-1257') }
check_both_ways("\u00A8", "\x8D", 'windows-1257') # ¨
check_both_ways("\u02C7", "\x8E", 'windows-1257') # ˇ
check_both_ways("\u00B8", "\x8F", 'windows-1257') # ¸
assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1257') }
check_both_ways("\u2018", "\x91", 'windows-1257') #
check_both_ways("\u2014", "\x97", 'windows-1257') # —
assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1257') }
check_both_ways("\u2122", "\x99", 'windows-1257') # ™
assert_raise(Encoding::UndefinedConversionError) { "\x9A".encode("utf-8", 'windows-1257') }
check_both_ways("\u203A", "\x9B", 'windows-1257') #
assert_raise(Encoding::UndefinedConversionError) { "\x9C".encode("utf-8", 'windows-1257') }
check_both_ways("\u00AF", "\x9D", 'windows-1257') # ¯
check_both_ways("\u02DB", "\x9E", 'windows-1257') # ˛
assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-1257') }
check_both_ways("\u00A0", "\xA0", 'windows-1257') # non-breaking space
assert_raise(Encoding::UndefinedConversionError) { "\xA1".encode("utf-8", 'windows-1257') }
check_both_ways("\u00A2", "\xA2", 'windows-1257') # ¢
check_both_ways("\u00A4", "\xA4", 'windows-1257') # ¤
assert_raise(Encoding::UndefinedConversionError) { "\xA5".encode("utf-8", 'windows-1257') }
check_both_ways("\u00A6", "\xA6", 'windows-1257') # ¦
check_both_ways("\u00C6", "\xAF", 'windows-1257') # Æ
check_both_ways("\u00B0", "\xB0", 'windows-1257') # °
check_both_ways("\u00E6", "\xBF", 'windows-1257') # æ
check_both_ways("\u0104", "\xC0", 'windows-1257') # Ą
check_both_ways("\u013B", "\xCF", 'windows-1257') # Ļ
check_both_ways("\u0160", "\xD0", 'windows-1257') # Š
check_both_ways("\u00DF", "\xDF", 'windows-1257') # ß
check_both_ways("\u0105", "\xE0", 'windows-1257') # ą
check_both_ways("\u013C", "\xEF", 'windows-1257') # ļ
check_both_ways("\u0161", "\xF0", 'windows-1257') # š
check_both_ways("\u02D9", "\xFF", 'windows-1257') # ˙
end
def check_utf_16_both_ways(utf8, raw)
copy = raw.dup
0.step(copy.length-1, 2) { |i| copy[i+1], copy[i] = copy[i], copy[i+1] }