mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* tool/build-transcode: new file.
* tool/transcode-tblgen.rb: new file. * enc/trans/make_transdb.rb: exclude *.erb.c. * enc/depend: exclude *.erb.c. * enc/trans/utf_16_32.erb.c: new file. * enc/trans/single_byte.erb.c: new file. * enc/trans/japanese.erb.c: new file. * enc/trans/korean.erb.c: new file. * enc/trans/iso-8859-2-tbl.rb: new file. * enc/trans/iso-8859-3-tbl.rb: new file. * enc/trans/iso-8859-4-tbl.rb: new file. * enc/trans/iso-8859-5-tbl.rb: new file. * enc/trans/iso-8859-6-tbl.rb: new file. * enc/trans/iso-8859-7-tbl.rb: new file. * enc/trans/iso-8859-8-tbl.rb: new file. * enc/trans/iso-8859-9-tbl.rb: new file. * enc/trans/iso-8859-10-tbl.rb: new file. * enc/trans/iso-8859-11-tbl.rb: new file. * enc/trans/iso-8859-13-tbl.rb: new file. * enc/trans/iso-8859-14-tbl.rb: new file. * enc/trans/iso-8859-15-tbl.rb: new file. * enc/trans/eucjp-tbl.rb: new file. * enc/trans/sjis-tbl.rb: new file. * enc/trans/euckr-tbl.rb: new file. * enc/trans/utf_16_32.c: regenerated. * enc/trans/single_byte.c: regenerated. * enc/trans/japanese.c: regenerated. * enc/trans/korean.c: regenerated. [ruby-dev:35730] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18373 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
81577c26ee
commit
f694ec83e8
30 changed files with 63539 additions and 33174 deletions
60
ChangeLog
60
ChangeLog
|
@ -1,3 +1,63 @@
|
|||
Tue Aug 5 20:46:20 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* tool/build-transcode: new file.
|
||||
|
||||
* tool/transcode-tblgen.rb: new file.
|
||||
|
||||
* enc/trans/make_transdb.rb: exclude *.erb.c.
|
||||
|
||||
* enc/depend: exclude *.erb.c.
|
||||
|
||||
* enc/trans/utf_16_32.erb.c: new file.
|
||||
|
||||
* enc/trans/single_byte.erb.c: new file.
|
||||
|
||||
* enc/trans/japanese.erb.c: new file.
|
||||
|
||||
* enc/trans/korean.erb.c: new file.
|
||||
|
||||
* enc/trans/iso-8859-2-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-3-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-4-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-5-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-6-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-7-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-8-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-9-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-10-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-11-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-13-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-14-tbl.rb: new file.
|
||||
|
||||
* enc/trans/iso-8859-15-tbl.rb: new file.
|
||||
|
||||
* enc/trans/eucjp-tbl.rb: new file.
|
||||
|
||||
* enc/trans/sjis-tbl.rb: new file.
|
||||
|
||||
* enc/trans/euckr-tbl.rb: new file.
|
||||
|
||||
* enc/trans/utf_16_32.c: regenerated.
|
||||
|
||||
* enc/trans/single_byte.c: regenerated.
|
||||
|
||||
* enc/trans/japanese.c: regenerated.
|
||||
|
||||
* enc/trans/korean.c: regenerated.
|
||||
|
||||
[ruby-dev:35730]
|
||||
|
||||
Tue Aug 5 18:02:53 2008 Kazuhiro NISHIYAMA <zn@mbf.nifty.com>
|
||||
|
||||
* test/io/nonblock/test_flush.rb (TestIONonblock#test_flush):
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
% encs.each {|e| e.chomp!(".c")}
|
||||
% alphanumeric_order = proc {|e| e.scan(/(\d+)|(\D+)/).map {|n,a| a||[n.size,n.to_i]}.flatten}
|
||||
% encs = encs.sort_by(&alphanumeric_order)
|
||||
% trans = Dir.open($srcdir+"/trans") {|d| d.select {|e| e.chomp!('.c')}}
|
||||
% trans = Dir.open($srcdir+"/trans") {|d| d.select {|e| e.chomp!('.c') && /\.erb\z/ !~ e }}
|
||||
% trans = trans.sort_by(&alphanumeric_order)
|
||||
% trans.map! {|e| "trans/#{e}"}
|
||||
% dependencies = encs + trans
|
||||
|
|
8831
enc/trans/cp949-tbl.rb
Normal file
8831
enc/trans/cp949-tbl.rb
Normal file
File diff suppressed because it is too large
Load diff
14803
enc/trans/eucjp-tbl.rb
Normal file
14803
enc/trans/eucjp-tbl.rb
Normal file
File diff suppressed because it is too large
Load diff
8228
enc/trans/euckr-tbl.rb
Normal file
8228
enc/trans/euckr-tbl.rb
Normal file
File diff suppressed because it is too large
Load diff
98
enc/trans/iso-8859-10-tbl.rb
Normal file
98
enc/trans/iso-8859-10-tbl.rb
Normal file
|
@ -0,0 +1,98 @@
|
|||
ISO_8859_10_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0x104],
|
||||
["A2",0x112],
|
||||
["A3",0x122],
|
||||
["A4",0x12A],
|
||||
["A5",0x128],
|
||||
["A6",0x136],
|
||||
["A7",0xA7],
|
||||
["A8",0x13B],
|
||||
["A9",0x110],
|
||||
["AA",0x160],
|
||||
["AB",0x166],
|
||||
["AC",0x17D],
|
||||
["AD",0xAD],
|
||||
["AE",0x16A],
|
||||
["AF",0x14A],
|
||||
["B0",0xB0],
|
||||
["B1",0x105],
|
||||
["B2",0x113],
|
||||
["B3",0x123],
|
||||
["B4",0x12B],
|
||||
["B5",0x129],
|
||||
["B6",0x137],
|
||||
["B7",0xB7],
|
||||
["B8",0x13C],
|
||||
["B9",0x111],
|
||||
["BA",0x161],
|
||||
["BB",0x167],
|
||||
["BC",0x17E],
|
||||
["BD",0x2015],
|
||||
["BE",0x16B],
|
||||
["BF",0x14B],
|
||||
["C0",0x100],
|
||||
["C1",0xC1],
|
||||
["C2",0xC2],
|
||||
["C3",0xC3],
|
||||
["C4",0xC4],
|
||||
["C5",0xC5],
|
||||
["C6",0xC6],
|
||||
["C7",0x12E],
|
||||
["C8",0x10C],
|
||||
["C9",0xC9],
|
||||
["CA",0x118],
|
||||
["CB",0xCB],
|
||||
["CC",0x116],
|
||||
["CD",0xCD],
|
||||
["CE",0xCE],
|
||||
["CF",0xCF],
|
||||
["D0",0xD0],
|
||||
["D1",0x145],
|
||||
["D2",0x14C],
|
||||
["D3",0xD3],
|
||||
["D4",0xD4],
|
||||
["D5",0xD5],
|
||||
["D6",0xD6],
|
||||
["D7",0x168],
|
||||
["D8",0xD8],
|
||||
["D9",0x172],
|
||||
["DA",0xDA],
|
||||
["DB",0xDB],
|
||||
["DC",0xDC],
|
||||
["DD",0xDD],
|
||||
["DE",0xDE],
|
||||
["DF",0xDF],
|
||||
["E0",0x101],
|
||||
["E1",0xE1],
|
||||
["E2",0xE2],
|
||||
["E3",0xE3],
|
||||
["E4",0xE4],
|
||||
["E5",0xE5],
|
||||
["E6",0xE6],
|
||||
["E7",0x12F],
|
||||
["E8",0x10D],
|
||||
["E9",0xE9],
|
||||
["EA",0x119],
|
||||
["EB",0xEB],
|
||||
["EC",0x117],
|
||||
["ED",0xED],
|
||||
["EE",0xEE],
|
||||
["EF",0xEF],
|
||||
["F0",0xF0],
|
||||
["F1",0x146],
|
||||
["F2",0x14D],
|
||||
["F3",0xF3],
|
||||
["F4",0xF4],
|
||||
["F5",0xF5],
|
||||
["F6",0xF6],
|
||||
["F7",0x169],
|
||||
["F8",0xF8],
|
||||
["F9",0x173],
|
||||
["FA",0xFA],
|
||||
["FB",0xFB],
|
||||
["FC",0xFC],
|
||||
["FD",0xFD],
|
||||
["FE",0xFE],
|
||||
["FF",0x138],
|
||||
]
|
90
enc/trans/iso-8859-11-tbl.rb
Normal file
90
enc/trans/iso-8859-11-tbl.rb
Normal file
|
@ -0,0 +1,90 @@
|
|||
ISO_8859_11_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0xE01],
|
||||
["A2",0xE02],
|
||||
["A3",0xE03],
|
||||
["A4",0xE04],
|
||||
["A5",0xE05],
|
||||
["A6",0xE06],
|
||||
["A7",0xE07],
|
||||
["A8",0xE08],
|
||||
["A9",0xE09],
|
||||
["AA",0xE0A],
|
||||
["AB",0xE0B],
|
||||
["AC",0xE0C],
|
||||
["AD",0xE0D],
|
||||
["AE",0xE0E],
|
||||
["AF",0xE0F],
|
||||
["B0",0xE10],
|
||||
["B1",0xE11],
|
||||
["B2",0xE12],
|
||||
["B3",0xE13],
|
||||
["B4",0xE14],
|
||||
["B5",0xE15],
|
||||
["B6",0xE16],
|
||||
["B7",0xE17],
|
||||
["B8",0xE18],
|
||||
["B9",0xE19],
|
||||
["BA",0xE1A],
|
||||
["BB",0xE1B],
|
||||
["BC",0xE1C],
|
||||
["BD",0xE1D],
|
||||
["BE",0xE1E],
|
||||
["BF",0xE1F],
|
||||
["C0",0xE20],
|
||||
["C1",0xE21],
|
||||
["C2",0xE22],
|
||||
["C3",0xE23],
|
||||
["C4",0xE24],
|
||||
["C5",0xE25],
|
||||
["C6",0xE26],
|
||||
["C7",0xE27],
|
||||
["C8",0xE28],
|
||||
["C9",0xE29],
|
||||
["CA",0xE2A],
|
||||
["CB",0xE2B],
|
||||
["CC",0xE2C],
|
||||
["CD",0xE2D],
|
||||
["CE",0xE2E],
|
||||
["CF",0xE2F],
|
||||
["D0",0xE30],
|
||||
["D1",0xE31],
|
||||
["D2",0xE32],
|
||||
["D3",0xE33],
|
||||
["D4",0xE34],
|
||||
["D5",0xE35],
|
||||
["D6",0xE36],
|
||||
["D7",0xE37],
|
||||
["D8",0xE38],
|
||||
["D9",0xE39],
|
||||
["DA",0xE3A],
|
||||
["DF",0xE3F],
|
||||
["E0",0xE40],
|
||||
["E1",0xE41],
|
||||
["E2",0xE42],
|
||||
["E3",0xE43],
|
||||
["E4",0xE44],
|
||||
["E5",0xE45],
|
||||
["E6",0xE46],
|
||||
["E7",0xE47],
|
||||
["E8",0xE48],
|
||||
["E9",0xE49],
|
||||
["EA",0xE4A],
|
||||
["EB",0xE4B],
|
||||
["EC",0xE4C],
|
||||
["ED",0xE4D],
|
||||
["EE",0xE4E],
|
||||
["EF",0xE4F],
|
||||
["F0",0xE50],
|
||||
["F1",0xE51],
|
||||
["F2",0xE52],
|
||||
["F3",0xE53],
|
||||
["F4",0xE54],
|
||||
["F5",0xE55],
|
||||
["F6",0xE56],
|
||||
["F7",0xE57],
|
||||
["F8",0xE58],
|
||||
["F9",0xE59],
|
||||
["FA",0xE5A],
|
||||
["FB",0xE5B],
|
||||
]
|
98
enc/trans/iso-8859-13-tbl.rb
Normal file
98
enc/trans/iso-8859-13-tbl.rb
Normal file
|
@ -0,0 +1,98 @@
|
|||
ISO_8859_13_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0x201D],
|
||||
["A2",0xA2],
|
||||
["A3",0xA3],
|
||||
["A4",0xA4],
|
||||
["A5",0x201E],
|
||||
["A6",0xA6],
|
||||
["A7",0xA7],
|
||||
["A8",0xD8],
|
||||
["A9",0xA9],
|
||||
["AA",0x156],
|
||||
["AB",0xAB],
|
||||
["AC",0xAC],
|
||||
["AD",0xAD],
|
||||
["AE",0xAE],
|
||||
["AF",0xC6],
|
||||
["B0",0xB0],
|
||||
["B1",0xB1],
|
||||
["B2",0xB2],
|
||||
["B3",0xB3],
|
||||
["B4",0x201C],
|
||||
["B5",0xB5],
|
||||
["B6",0xB6],
|
||||
["B7",0xB7],
|
||||
["B8",0xF8],
|
||||
["B9",0xB9],
|
||||
["BA",0x157],
|
||||
["BB",0xBB],
|
||||
["BC",0xBC],
|
||||
["BD",0xBD],
|
||||
["BE",0xBE],
|
||||
["BF",0xE6],
|
||||
["C0",0x104],
|
||||
["C1",0x12E],
|
||||
["C2",0x100],
|
||||
["C3",0x106],
|
||||
["C4",0xC4],
|
||||
["C5",0xC5],
|
||||
["C6",0x118],
|
||||
["C7",0x112],
|
||||
["C8",0x10C],
|
||||
["C9",0xC9],
|
||||
["CA",0x179],
|
||||
["CB",0x116],
|
||||
["CC",0x122],
|
||||
["CD",0x136],
|
||||
["CE",0x12A],
|
||||
["CF",0x13B],
|
||||
["D0",0x160],
|
||||
["D1",0x143],
|
||||
["D2",0x145],
|
||||
["D3",0xD3],
|
||||
["D4",0x14C],
|
||||
["D5",0xD5],
|
||||
["D6",0xD6],
|
||||
["D7",0xD7],
|
||||
["D8",0x172],
|
||||
["D9",0x141],
|
||||
["DA",0x15A],
|
||||
["DB",0x16A],
|
||||
["DC",0xDC],
|
||||
["DD",0x17B],
|
||||
["DE",0x17D],
|
||||
["DF",0xDF],
|
||||
["E0",0x105],
|
||||
["E1",0x12F],
|
||||
["E2",0x101],
|
||||
["E3",0x107],
|
||||
["E4",0xE4],
|
||||
["E5",0xE5],
|
||||
["E6",0x119],
|
||||
["E7",0x113],
|
||||
["E8",0x10D],
|
||||
["E9",0xE9],
|
||||
["EA",0x17A],
|
||||
["EB",0x117],
|
||||
["EC",0x123],
|
||||
["ED",0x137],
|
||||
["EE",0x12B],
|
||||
["EF",0x13C],
|
||||
["F0",0x161],
|
||||
["F1",0x144],
|
||||
["F2",0x146],
|
||||
["F3",0xF3],
|
||||
["F4",0x14D],
|
||||
["F5",0xF5],
|
||||
["F6",0xF6],
|
||||
["F7",0xF7],
|
||||
["F8",0x173],
|
||||
["F9",0x142],
|
||||
["FA",0x15B],
|
||||
["FB",0x16B],
|
||||
["FC",0xFC],
|
||||
["FD",0x17C],
|
||||
["FE",0x17E],
|
||||
["FF",0x2019],
|
||||
]
|
98
enc/trans/iso-8859-14-tbl.rb
Normal file
98
enc/trans/iso-8859-14-tbl.rb
Normal file
|
@ -0,0 +1,98 @@
|
|||
ISO_8859_14_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0x1E02],
|
||||
["A2",0x1E03],
|
||||
["A3",0xA3],
|
||||
["A4",0x10A],
|
||||
["A5",0x10B],
|
||||
["A6",0x1E0A],
|
||||
["A7",0xA7],
|
||||
["A8",0x1E80],
|
||||
["A9",0xA9],
|
||||
["AA",0x1E82],
|
||||
["AB",0x1E0B],
|
||||
["AC",0x1EF2],
|
||||
["AD",0xAD],
|
||||
["AE",0xAE],
|
||||
["AF",0x178],
|
||||
["B0",0x1E1E],
|
||||
["B1",0x1E1F],
|
||||
["B2",0x120],
|
||||
["B3",0x121],
|
||||
["B4",0x1E40],
|
||||
["B5",0x1E41],
|
||||
["B6",0xB6],
|
||||
["B7",0x1E56],
|
||||
["B8",0x1E81],
|
||||
["B9",0x1E57],
|
||||
["BA",0x1E83],
|
||||
["BB",0x1E60],
|
||||
["BC",0x1EF3],
|
||||
["BD",0x1E84],
|
||||
["BE",0x1E85],
|
||||
["BF",0x1E61],
|
||||
["C0",0xC0],
|
||||
["C1",0xC1],
|
||||
["C2",0xC2],
|
||||
["C3",0xC3],
|
||||
["C4",0xC4],
|
||||
["C5",0xC5],
|
||||
["C6",0xC6],
|
||||
["C7",0xC7],
|
||||
["C8",0xC8],
|
||||
["C9",0xC9],
|
||||
["CA",0xCA],
|
||||
["CB",0xCB],
|
||||
["CC",0xCC],
|
||||
["CD",0xCD],
|
||||
["CE",0xCE],
|
||||
["CF",0xCF],
|
||||
["D0",0x174],
|
||||
["D1",0xD1],
|
||||
["D2",0xD2],
|
||||
["D3",0xD3],
|
||||
["D4",0xD4],
|
||||
["D5",0xD5],
|
||||
["D6",0xD6],
|
||||
["D7",0x1E6A],
|
||||
["D8",0xD8],
|
||||
["D9",0xD9],
|
||||
["DA",0xDA],
|
||||
["DB",0xDB],
|
||||
["DC",0xDC],
|
||||
["DD",0xDD],
|
||||
["DE",0x176],
|
||||
["DF",0xDF],
|
||||
["E0",0xE0],
|
||||
["E1",0xE1],
|
||||
["E2",0xE2],
|
||||
["E3",0xE3],
|
||||
["E4",0xE4],
|
||||
["E5",0xE5],
|
||||
["E6",0xE6],
|
||||
["E7",0xE7],
|
||||
["E8",0xE8],
|
||||
["E9",0xE9],
|
||||
["EA",0xEA],
|
||||
["EB",0xEB],
|
||||
["EC",0xEC],
|
||||
["ED",0xED],
|
||||
["EE",0xEE],
|
||||
["EF",0xEF],
|
||||
["F0",0x175],
|
||||
["F1",0xF1],
|
||||
["F2",0xF2],
|
||||
["F3",0xF3],
|
||||
["F4",0xF4],
|
||||
["F5",0xF5],
|
||||
["F6",0xF6],
|
||||
["F7",0x1E6B],
|
||||
["F8",0xF8],
|
||||
["F9",0xF9],
|
||||
["FA",0xFA],
|
||||
["FB",0xFB],
|
||||
["FC",0xFC],
|
||||
["FD",0xFD],
|
||||
["FE",0x177],
|
||||
["FF",0xFF],
|
||||
]
|
98
enc/trans/iso-8859-15-tbl.rb
Normal file
98
enc/trans/iso-8859-15-tbl.rb
Normal file
|
@ -0,0 +1,98 @@
|
|||
ISO_8859_15_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0xA1],
|
||||
["A2",0xA2],
|
||||
["A3",0xA3],
|
||||
["A4",0x20AC],
|
||||
["A5",0xA5],
|
||||
["A6",0x160],
|
||||
["A7",0xA7],
|
||||
["A8",0x161],
|
||||
["A9",0xA9],
|
||||
["AA",0xAA],
|
||||
["AB",0xAB],
|
||||
["AC",0xAC],
|
||||
["AD",0xAD],
|
||||
["AE",0xAE],
|
||||
["AF",0xAF],
|
||||
["B0",0xB0],
|
||||
["B1",0xB1],
|
||||
["B2",0xB2],
|
||||
["B3",0xB3],
|
||||
["B4",0x17D],
|
||||
["B5",0xB5],
|
||||
["B6",0xB6],
|
||||
["B7",0xB7],
|
||||
["B8",0x17E],
|
||||
["B9",0xB9],
|
||||
["BA",0xBA],
|
||||
["BB",0xBB],
|
||||
["BC",0x152],
|
||||
["BD",0x153],
|
||||
["BE",0x178],
|
||||
["BF",0xBF],
|
||||
["C0",0xC0],
|
||||
["C1",0xC1],
|
||||
["C2",0xC2],
|
||||
["C3",0xC3],
|
||||
["C4",0xC4],
|
||||
["C5",0xC5],
|
||||
["C6",0xC6],
|
||||
["C7",0xC7],
|
||||
["C8",0xC8],
|
||||
["C9",0xC9],
|
||||
["CA",0xCA],
|
||||
["CB",0xCB],
|
||||
["CC",0xCC],
|
||||
["CD",0xCD],
|
||||
["CE",0xCE],
|
||||
["CF",0xCF],
|
||||
["D0",0xD0],
|
||||
["D1",0xD1],
|
||||
["D2",0xD2],
|
||||
["D3",0xD3],
|
||||
["D4",0xD4],
|
||||
["D5",0xD5],
|
||||
["D6",0xD6],
|
||||
["D7",0xD7],
|
||||
["D8",0xD8],
|
||||
["D9",0xD9],
|
||||
["DA",0xDA],
|
||||
["DB",0xDB],
|
||||
["DC",0xDC],
|
||||
["DD",0xDD],
|
||||
["DE",0xDE],
|
||||
["DF",0xDF],
|
||||
["E0",0xE0],
|
||||
["E1",0xE1],
|
||||
["E2",0xE2],
|
||||
["E3",0xE3],
|
||||
["E4",0xE4],
|
||||
["E5",0xE5],
|
||||
["E6",0xE6],
|
||||
["E7",0xE7],
|
||||
["E8",0xE8],
|
||||
["E9",0xE9],
|
||||
["EA",0xEA],
|
||||
["EB",0xEB],
|
||||
["EC",0xEC],
|
||||
["ED",0xED],
|
||||
["EE",0xEE],
|
||||
["EF",0xEF],
|
||||
["F0",0xF0],
|
||||
["F1",0xF1],
|
||||
["F2",0xF2],
|
||||
["F3",0xF3],
|
||||
["F4",0xF4],
|
||||
["F5",0xF5],
|
||||
["F6",0xF6],
|
||||
["F7",0xF7],
|
||||
["F8",0xF8],
|
||||
["F9",0xF9],
|
||||
["FA",0xFA],
|
||||
["FB",0xFB],
|
||||
["FC",0xFC],
|
||||
["FD",0xFD],
|
||||
["FE",0xFE],
|
||||
["FF",0xFF],
|
||||
]
|
98
enc/trans/iso-8859-2-tbl.rb
Normal file
98
enc/trans/iso-8859-2-tbl.rb
Normal file
|
@ -0,0 +1,98 @@
|
|||
ISO_8859_2_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0x104],
|
||||
["A2",0x2D8],
|
||||
["A3",0x141],
|
||||
["A4",0xA4],
|
||||
["A5",0x13D],
|
||||
["A6",0x15A],
|
||||
["A7",0xA7],
|
||||
["A8",0xA8],
|
||||
["A9",0x160],
|
||||
["AA",0x15E],
|
||||
["AB",0x164],
|
||||
["AC",0x179],
|
||||
["AD",0xAD],
|
||||
["AE",0x17D],
|
||||
["AF",0x17B],
|
||||
["B0",0xB0],
|
||||
["B1",0x105],
|
||||
["B2",0x2DB],
|
||||
["B3",0x142],
|
||||
["B4",0xB4],
|
||||
["B5",0x13E],
|
||||
["B6",0x15B],
|
||||
["B7",0x2C7],
|
||||
["B8",0xB8],
|
||||
["B9",0x161],
|
||||
["BA",0x15F],
|
||||
["BB",0x165],
|
||||
["BC",0x17A],
|
||||
["BD",0x2DD],
|
||||
["BE",0x17E],
|
||||
["BF",0x17C],
|
||||
["C0",0x154],
|
||||
["C1",0xC1],
|
||||
["C2",0xC2],
|
||||
["C3",0x102],
|
||||
["C4",0xC4],
|
||||
["C5",0x139],
|
||||
["C6",0x106],
|
||||
["C7",0xC7],
|
||||
["C8",0x10C],
|
||||
["C9",0xC9],
|
||||
["CA",0x118],
|
||||
["CB",0xCB],
|
||||
["CC",0x11A],
|
||||
["CD",0xCD],
|
||||
["CE",0xCE],
|
||||
["CF",0x10E],
|
||||
["D0",0x110],
|
||||
["D1",0x143],
|
||||
["D2",0x147],
|
||||
["D3",0xD3],
|
||||
["D4",0xD4],
|
||||
["D5",0x150],
|
||||
["D6",0xD6],
|
||||
["D7",0xD7],
|
||||
["D8",0x158],
|
||||
["D9",0x16E],
|
||||
["DA",0xDA],
|
||||
["DB",0x170],
|
||||
["DC",0xDC],
|
||||
["DD",0xDD],
|
||||
["DE",0x162],
|
||||
["DF",0xDF],
|
||||
["E0",0x155],
|
||||
["E1",0xE1],
|
||||
["E2",0xE2],
|
||||
["E3",0x103],
|
||||
["E4",0xE4],
|
||||
["E5",0x13A],
|
||||
["E6",0x107],
|
||||
["E7",0xE7],
|
||||
["E8",0x10D],
|
||||
["E9",0xE9],
|
||||
["EA",0x119],
|
||||
["EB",0xEB],
|
||||
["EC",0x11B],
|
||||
["ED",0xED],
|
||||
["EE",0xEE],
|
||||
["EF",0x10F],
|
||||
["F0",0x111],
|
||||
["F1",0x144],
|
||||
["F2",0x148],
|
||||
["F3",0xF3],
|
||||
["F4",0xF4],
|
||||
["F5",0x151],
|
||||
["F6",0xF6],
|
||||
["F7",0xF7],
|
||||
["F8",0x159],
|
||||
["F9",0x16F],
|
||||
["FA",0xFA],
|
||||
["FB",0x171],
|
||||
["FC",0xFC],
|
||||
["FD",0xFD],
|
||||
["FE",0x163],
|
||||
["FF",0x2D9],
|
||||
]
|
91
enc/trans/iso-8859-3-tbl.rb
Normal file
91
enc/trans/iso-8859-3-tbl.rb
Normal file
|
@ -0,0 +1,91 @@
|
|||
ISO_8859_3_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0x126],
|
||||
["A2",0x2D8],
|
||||
["A3",0xA3],
|
||||
["A4",0xA4],
|
||||
["A6",0x124],
|
||||
["A7",0xA7],
|
||||
["A8",0xA8],
|
||||
["A9",0x130],
|
||||
["AA",0x15E],
|
||||
["AB",0x11E],
|
||||
["AC",0x134],
|
||||
["AD",0xAD],
|
||||
["AF",0x17B],
|
||||
["B0",0xB0],
|
||||
["B1",0x127],
|
||||
["B2",0xB2],
|
||||
["B3",0xB3],
|
||||
["B4",0xB4],
|
||||
["B5",0xB5],
|
||||
["B6",0x125],
|
||||
["B7",0xB7],
|
||||
["B8",0xB8],
|
||||
["B9",0x131],
|
||||
["BA",0x15F],
|
||||
["BB",0x11F],
|
||||
["BC",0x135],
|
||||
["BD",0xBD],
|
||||
["BF",0x17C],
|
||||
["C0",0xC0],
|
||||
["C1",0xC1],
|
||||
["C2",0xC2],
|
||||
["C4",0xC4],
|
||||
["C5",0x10A],
|
||||
["C6",0x108],
|
||||
["C7",0xC7],
|
||||
["C8",0xC8],
|
||||
["C9",0xC9],
|
||||
["CA",0xCA],
|
||||
["CB",0xCB],
|
||||
["CC",0xCC],
|
||||
["CD",0xCD],
|
||||
["CE",0xCE],
|
||||
["CF",0xCF],
|
||||
["D1",0xD1],
|
||||
["D2",0xD2],
|
||||
["D3",0xD3],
|
||||
["D4",0xD4],
|
||||
["D5",0x120],
|
||||
["D6",0xD6],
|
||||
["D7",0xD7],
|
||||
["D8",0x11C],
|
||||
["D9",0xD9],
|
||||
["DA",0xDA],
|
||||
["DB",0xDB],
|
||||
["DC",0xDC],
|
||||
["DD",0x16C],
|
||||
["DE",0x15C],
|
||||
["DF",0xDF],
|
||||
["E0",0xE0],
|
||||
["E1",0xE1],
|
||||
["E2",0xE2],
|
||||
["E4",0xE4],
|
||||
["E5",0x10B],
|
||||
["E6",0x109],
|
||||
["E7",0xE7],
|
||||
["E8",0xE8],
|
||||
["E9",0xE9],
|
||||
["EA",0xEA],
|
||||
["EB",0xEB],
|
||||
["EC",0xEC],
|
||||
["ED",0xED],
|
||||
["EE",0xEE],
|
||||
["EF",0xEF],
|
||||
["F1",0xF1],
|
||||
["F2",0xF2],
|
||||
["F3",0xF3],
|
||||
["F4",0xF4],
|
||||
["F5",0x121],
|
||||
["F6",0xF6],
|
||||
["F7",0xF7],
|
||||
["F8",0x11D],
|
||||
["F9",0xF9],
|
||||
["FA",0xFA],
|
||||
["FB",0xFB],
|
||||
["FC",0xFC],
|
||||
["FD",0x16D],
|
||||
["FE",0x15D],
|
||||
["FF",0x2D9],
|
||||
]
|
98
enc/trans/iso-8859-4-tbl.rb
Normal file
98
enc/trans/iso-8859-4-tbl.rb
Normal file
|
@ -0,0 +1,98 @@
|
|||
ISO_8859_4_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0x104],
|
||||
["A2",0x138],
|
||||
["A3",0x156],
|
||||
["A4",0xA4],
|
||||
["A5",0x128],
|
||||
["A6",0x13B],
|
||||
["A7",0xA7],
|
||||
["A8",0xA8],
|
||||
["A9",0x160],
|
||||
["AA",0x112],
|
||||
["AB",0x122],
|
||||
["AC",0x166],
|
||||
["AD",0xAD],
|
||||
["AE",0x17D],
|
||||
["AF",0xAF],
|
||||
["B0",0xB0],
|
||||
["B1",0x105],
|
||||
["B2",0x2DB],
|
||||
["B3",0x157],
|
||||
["B4",0xB4],
|
||||
["B5",0x129],
|
||||
["B6",0x13C],
|
||||
["B7",0x2C7],
|
||||
["B8",0xB8],
|
||||
["B9",0x161],
|
||||
["BA",0x113],
|
||||
["BB",0x123],
|
||||
["BC",0x167],
|
||||
["BD",0x14A],
|
||||
["BE",0x17E],
|
||||
["BF",0x14B],
|
||||
["C0",0x100],
|
||||
["C1",0xC1],
|
||||
["C2",0xC2],
|
||||
["C3",0xC3],
|
||||
["C4",0xC4],
|
||||
["C5",0xC5],
|
||||
["C6",0xC6],
|
||||
["C7",0x12E],
|
||||
["C8",0x10C],
|
||||
["C9",0xC9],
|
||||
["CA",0x118],
|
||||
["CB",0xCB],
|
||||
["CC",0x116],
|
||||
["CD",0xCD],
|
||||
["CE",0xCE],
|
||||
["CF",0x12A],
|
||||
["D0",0x110],
|
||||
["D1",0x145],
|
||||
["D2",0x14C],
|
||||
["D3",0x136],
|
||||
["D4",0xD4],
|
||||
["D5",0xD5],
|
||||
["D6",0xD6],
|
||||
["D7",0xD7],
|
||||
["D8",0xD8],
|
||||
["D9",0x172],
|
||||
["DA",0xDA],
|
||||
["DB",0xDB],
|
||||
["DC",0xDC],
|
||||
["DD",0x168],
|
||||
["DE",0x16A],
|
||||
["DF",0xDF],
|
||||
["E0",0x101],
|
||||
["E1",0xE1],
|
||||
["E2",0xE2],
|
||||
["E3",0xE3],
|
||||
["E4",0xE4],
|
||||
["E5",0xE5],
|
||||
["E6",0xE6],
|
||||
["E7",0x12F],
|
||||
["E8",0x10D],
|
||||
["E9",0xE9],
|
||||
["EA",0x119],
|
||||
["EB",0xEB],
|
||||
["EC",0x117],
|
||||
["ED",0xED],
|
||||
["EE",0xEE],
|
||||
["EF",0x12B],
|
||||
["F0",0x111],
|
||||
["F1",0x146],
|
||||
["F2",0x14D],
|
||||
["F3",0x137],
|
||||
["F4",0xF4],
|
||||
["F5",0xF5],
|
||||
["F6",0xF6],
|
||||
["F7",0xF7],
|
||||
["F8",0xF8],
|
||||
["F9",0x173],
|
||||
["FA",0xFA],
|
||||
["FB",0xFB],
|
||||
["FC",0xFC],
|
||||
["FD",0x169],
|
||||
["FE",0x16B],
|
||||
["FF",0x2D9],
|
||||
]
|
98
enc/trans/iso-8859-5-tbl.rb
Normal file
98
enc/trans/iso-8859-5-tbl.rb
Normal file
|
@ -0,0 +1,98 @@
|
|||
ISO_8859_5_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0x401],
|
||||
["A2",0x402],
|
||||
["A3",0x403],
|
||||
["A4",0x404],
|
||||
["A5",0x405],
|
||||
["A6",0x406],
|
||||
["A7",0x407],
|
||||
["A8",0x408],
|
||||
["A9",0x409],
|
||||
["AA",0x40A],
|
||||
["AB",0x40B],
|
||||
["AC",0x40C],
|
||||
["AD",0xAD],
|
||||
["AE",0x40E],
|
||||
["AF",0x40F],
|
||||
["B0",0x410],
|
||||
["B1",0x411],
|
||||
["B2",0x412],
|
||||
["B3",0x413],
|
||||
["B4",0x414],
|
||||
["B5",0x415],
|
||||
["B6",0x416],
|
||||
["B7",0x417],
|
||||
["B8",0x418],
|
||||
["B9",0x419],
|
||||
["BA",0x41A],
|
||||
["BB",0x41B],
|
||||
["BC",0x41C],
|
||||
["BD",0x41D],
|
||||
["BE",0x41E],
|
||||
["BF",0x41F],
|
||||
["C0",0x420],
|
||||
["C1",0x421],
|
||||
["C2",0x422],
|
||||
["C3",0x423],
|
||||
["C4",0x424],
|
||||
["C5",0x425],
|
||||
["C6",0x426],
|
||||
["C7",0x427],
|
||||
["C8",0x428],
|
||||
["C9",0x429],
|
||||
["CA",0x42A],
|
||||
["CB",0x42B],
|
||||
["CC",0x42C],
|
||||
["CD",0x42D],
|
||||
["CE",0x42E],
|
||||
["CF",0x42F],
|
||||
["D0",0x430],
|
||||
["D1",0x431],
|
||||
["D2",0x432],
|
||||
["D3",0x433],
|
||||
["D4",0x434],
|
||||
["D5",0x435],
|
||||
["D6",0x436],
|
||||
["D7",0x437],
|
||||
["D8",0x438],
|
||||
["D9",0x439],
|
||||
["DA",0x43A],
|
||||
["DB",0x43B],
|
||||
["DC",0x43C],
|
||||
["DD",0x43D],
|
||||
["DE",0x43E],
|
||||
["DF",0x43F],
|
||||
["E0",0x440],
|
||||
["E1",0x441],
|
||||
["E2",0x442],
|
||||
["E3",0x443],
|
||||
["E4",0x444],
|
||||
["E5",0x445],
|
||||
["E6",0x446],
|
||||
["E7",0x447],
|
||||
["E8",0x448],
|
||||
["E9",0x449],
|
||||
["EA",0x44A],
|
||||
["EB",0x44B],
|
||||
["EC",0x44C],
|
||||
["ED",0x44D],
|
||||
["EE",0x44E],
|
||||
["EF",0x44F],
|
||||
["F0",0x2116],
|
||||
["F1",0x451],
|
||||
["F2",0x452],
|
||||
["F3",0x453],
|
||||
["F4",0x454],
|
||||
["F5",0x455],
|
||||
["F6",0x456],
|
||||
["F7",0x457],
|
||||
["F8",0x458],
|
||||
["F9",0x459],
|
||||
["FA",0x45A],
|
||||
["FB",0x45B],
|
||||
["FC",0x45C],
|
||||
["FD",0xA7],
|
||||
["FE",0x45E],
|
||||
["FF",0x45F],
|
||||
]
|
53
enc/trans/iso-8859-6-tbl.rb
Normal file
53
enc/trans/iso-8859-6-tbl.rb
Normal file
|
@ -0,0 +1,53 @@
|
|||
ISO_8859_6_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A4",0xA4],
|
||||
["AC",0x60C],
|
||||
["AD",0xAD],
|
||||
["BB",0x61B],
|
||||
["BF",0x61F],
|
||||
["C1",0x621],
|
||||
["C2",0x622],
|
||||
["C3",0x623],
|
||||
["C4",0x624],
|
||||
["C5",0x625],
|
||||
["C6",0x626],
|
||||
["C7",0x627],
|
||||
["C8",0x628],
|
||||
["C9",0x629],
|
||||
["CA",0x62A],
|
||||
["CB",0x62B],
|
||||
["CC",0x62C],
|
||||
["CD",0x62D],
|
||||
["CE",0x62E],
|
||||
["CF",0x62F],
|
||||
["D0",0x630],
|
||||
["D1",0x631],
|
||||
["D2",0x632],
|
||||
["D3",0x633],
|
||||
["D4",0x634],
|
||||
["D5",0x635],
|
||||
["D6",0x636],
|
||||
["D7",0x637],
|
||||
["D8",0x638],
|
||||
["D9",0x639],
|
||||
["DA",0x63A],
|
||||
["E0",0x640],
|
||||
["E1",0x641],
|
||||
["E2",0x642],
|
||||
["E3",0x643],
|
||||
["E4",0x644],
|
||||
["E5",0x645],
|
||||
["E6",0x646],
|
||||
["E7",0x647],
|
||||
["E8",0x648],
|
||||
["E9",0x649],
|
||||
["EA",0x64A],
|
||||
["EB",0x64B],
|
||||
["EC",0x64C],
|
||||
["ED",0x64D],
|
||||
["EE",0x64E],
|
||||
["EF",0x64F],
|
||||
["F0",0x650],
|
||||
["F1",0x651],
|
||||
["F2",0x652],
|
||||
]
|
95
enc/trans/iso-8859-7-tbl.rb
Normal file
95
enc/trans/iso-8859-7-tbl.rb
Normal file
|
@ -0,0 +1,95 @@
|
|||
ISO_8859_7_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0x2018],
|
||||
["A2",0x2019],
|
||||
["A3",0xA3],
|
||||
["A4",0x20AC],
|
||||
["A5",0x20AF],
|
||||
["A6",0xA6],
|
||||
["A7",0xA7],
|
||||
["A8",0xA8],
|
||||
["A9",0xA9],
|
||||
["AA",0x37A],
|
||||
["AB",0xAB],
|
||||
["AC",0xAC],
|
||||
["AD",0xAD],
|
||||
["AF",0x2015],
|
||||
["B0",0xB0],
|
||||
["B1",0xB1],
|
||||
["B2",0xB2],
|
||||
["B3",0xB3],
|
||||
["B4",0x384],
|
||||
["B5",0x385],
|
||||
["B6",0x386],
|
||||
["B7",0xB7],
|
||||
["B8",0x388],
|
||||
["B9",0x389],
|
||||
["BA",0x38A],
|
||||
["BB",0xBB],
|
||||
["BC",0x38C],
|
||||
["BD",0xBD],
|
||||
["BE",0x38E],
|
||||
["BF",0x38F],
|
||||
["C0",0x390],
|
||||
["C1",0x391],
|
||||
["C2",0x392],
|
||||
["C3",0x393],
|
||||
["C4",0x394],
|
||||
["C5",0x395],
|
||||
["C6",0x396],
|
||||
["C7",0x397],
|
||||
["C8",0x398],
|
||||
["C9",0x399],
|
||||
["CA",0x39A],
|
||||
["CB",0x39B],
|
||||
["CC",0x39C],
|
||||
["CD",0x39D],
|
||||
["CE",0x39E],
|
||||
["CF",0x39F],
|
||||
["D0",0x3A0],
|
||||
["D1",0x3A1],
|
||||
["D3",0x3A3],
|
||||
["D4",0x3A4],
|
||||
["D5",0x3A5],
|
||||
["D6",0x3A6],
|
||||
["D7",0x3A7],
|
||||
["D8",0x3A8],
|
||||
["D9",0x3A9],
|
||||
["DA",0x3AA],
|
||||
["DB",0x3AB],
|
||||
["DC",0x3AC],
|
||||
["DD",0x3AD],
|
||||
["DE",0x3AE],
|
||||
["DF",0x3AF],
|
||||
["E0",0x3B0],
|
||||
["E1",0x3B1],
|
||||
["E2",0x3B2],
|
||||
["E3",0x3B3],
|
||||
["E4",0x3B4],
|
||||
["E5",0x3B5],
|
||||
["E6",0x3B6],
|
||||
["E7",0x3B7],
|
||||
["E8",0x3B8],
|
||||
["E9",0x3B9],
|
||||
["EA",0x3BA],
|
||||
["EB",0x3BB],
|
||||
["EC",0x3BC],
|
||||
["ED",0x3BD],
|
||||
["EE",0x3BE],
|
||||
["EF",0x3BF],
|
||||
["F0",0x3C0],
|
||||
["F1",0x3C1],
|
||||
["F2",0x3C2],
|
||||
["F3",0x3C3],
|
||||
["F4",0x3C4],
|
||||
["F5",0x3C5],
|
||||
["F6",0x3C6],
|
||||
["F7",0x3C7],
|
||||
["F8",0x3C8],
|
||||
["F9",0x3C9],
|
||||
["FA",0x3CA],
|
||||
["FB",0x3CB],
|
||||
["FC",0x3CC],
|
||||
["FD",0x3CD],
|
||||
["FE",0x3CE],
|
||||
]
|
62
enc/trans/iso-8859-8-tbl.rb
Normal file
62
enc/trans/iso-8859-8-tbl.rb
Normal file
|
@ -0,0 +1,62 @@
|
|||
ISO_8859_8_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A2",0xA2],
|
||||
["A3",0xA3],
|
||||
["A4",0xA4],
|
||||
["A5",0xA5],
|
||||
["A6",0xA6],
|
||||
["A7",0xA7],
|
||||
["A8",0xA8],
|
||||
["A9",0xA9],
|
||||
["AA",0xD7],
|
||||
["AB",0xAB],
|
||||
["AC",0xAC],
|
||||
["AD",0xAD],
|
||||
["AE",0xAE],
|
||||
["AF",0xAF],
|
||||
["B0",0xB0],
|
||||
["B1",0xB1],
|
||||
["B2",0xB2],
|
||||
["B3",0xB3],
|
||||
["B4",0xB4],
|
||||
["B5",0xB5],
|
||||
["B6",0xB6],
|
||||
["B7",0xB7],
|
||||
["B8",0xB8],
|
||||
["B9",0xB9],
|
||||
["BA",0xF7],
|
||||
["BB",0xBB],
|
||||
["BC",0xBC],
|
||||
["BD",0xBD],
|
||||
["BE",0xBE],
|
||||
["DF",0x2017],
|
||||
["E0",0x5D0],
|
||||
["E1",0x5D1],
|
||||
["E2",0x5D2],
|
||||
["E3",0x5D3],
|
||||
["E4",0x5D4],
|
||||
["E5",0x5D5],
|
||||
["E6",0x5D6],
|
||||
["E7",0x5D7],
|
||||
["E8",0x5D8],
|
||||
["E9",0x5D9],
|
||||
["EA",0x5DA],
|
||||
["EB",0x5DB],
|
||||
["EC",0x5DC],
|
||||
["ED",0x5DD],
|
||||
["EE",0x5DE],
|
||||
["EF",0x5DF],
|
||||
["F0",0x5E0],
|
||||
["F1",0x5E1],
|
||||
["F2",0x5E2],
|
||||
["F3",0x5E3],
|
||||
["F4",0x5E4],
|
||||
["F5",0x5E5],
|
||||
["F6",0x5E6],
|
||||
["F7",0x5E7],
|
||||
["F8",0x5E8],
|
||||
["F9",0x5E9],
|
||||
["FA",0x5EA],
|
||||
["FD",0x200E],
|
||||
["FE",0x200F],
|
||||
]
|
98
enc/trans/iso-8859-9-tbl.rb
Normal file
98
enc/trans/iso-8859-9-tbl.rb
Normal file
|
@ -0,0 +1,98 @@
|
|||
ISO_8859_9_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0xA1],
|
||||
["A2",0xA2],
|
||||
["A3",0xA3],
|
||||
["A4",0xA4],
|
||||
["A5",0xA5],
|
||||
["A6",0xA6],
|
||||
["A7",0xA7],
|
||||
["A8",0xA8],
|
||||
["A9",0xA9],
|
||||
["AA",0xAA],
|
||||
["AB",0xAB],
|
||||
["AC",0xAC],
|
||||
["AD",0xAD],
|
||||
["AE",0xAE],
|
||||
["AF",0xAF],
|
||||
["B0",0xB0],
|
||||
["B1",0xB1],
|
||||
["B2",0xB2],
|
||||
["B3",0xB3],
|
||||
["B4",0xB4],
|
||||
["B5",0xB5],
|
||||
["B6",0xB6],
|
||||
["B7",0xB7],
|
||||
["B8",0xB8],
|
||||
["B9",0xB9],
|
||||
["BA",0xBA],
|
||||
["BB",0xBB],
|
||||
["BC",0xBC],
|
||||
["BD",0xBD],
|
||||
["BE",0xBE],
|
||||
["BF",0xBF],
|
||||
["C0",0xC0],
|
||||
["C1",0xC1],
|
||||
["C2",0xC2],
|
||||
["C3",0xC3],
|
||||
["C4",0xC4],
|
||||
["C5",0xC5],
|
||||
["C6",0xC6],
|
||||
["C7",0xC7],
|
||||
["C8",0xC8],
|
||||
["C9",0xC9],
|
||||
["CA",0xCA],
|
||||
["CB",0xCB],
|
||||
["CC",0xCC],
|
||||
["CD",0xCD],
|
||||
["CE",0xCE],
|
||||
["CF",0xCF],
|
||||
["D0",0x11E],
|
||||
["D1",0xD1],
|
||||
["D2",0xD2],
|
||||
["D3",0xD3],
|
||||
["D4",0xD4],
|
||||
["D5",0xD5],
|
||||
["D6",0xD6],
|
||||
["D7",0xD7],
|
||||
["D8",0xD8],
|
||||
["D9",0xD9],
|
||||
["DA",0xDA],
|
||||
["DB",0xDB],
|
||||
["DC",0xDC],
|
||||
["DD",0x130],
|
||||
["DE",0x15E],
|
||||
["DF",0xDF],
|
||||
["E0",0xE0],
|
||||
["E1",0xE1],
|
||||
["E2",0xE2],
|
||||
["E3",0xE3],
|
||||
["E4",0xE4],
|
||||
["E5",0xE5],
|
||||
["E6",0xE6],
|
||||
["E7",0xE7],
|
||||
["E8",0xE8],
|
||||
["E9",0xE9],
|
||||
["EA",0xEA],
|
||||
["EB",0xEB],
|
||||
["EC",0xEC],
|
||||
["ED",0xED],
|
||||
["EE",0xEE],
|
||||
["EF",0xEF],
|
||||
["F0",0x11F],
|
||||
["F1",0xF1],
|
||||
["F2",0xF2],
|
||||
["F3",0xF3],
|
||||
["F4",0xF4],
|
||||
["F5",0xF5],
|
||||
["F6",0xF6],
|
||||
["F7",0xF7],
|
||||
["F8",0xF8],
|
||||
["F9",0xF9],
|
||||
["FA",0xFA],
|
||||
["FB",0xFB],
|
||||
["FC",0xFC],
|
||||
["FD",0x131],
|
||||
["FE",0x15F],
|
||||
["FF",0xFF],
|
||||
]
|
6302
enc/trans/japanese.c
6302
enc/trans/japanese.c
File diff suppressed because it is too large
Load diff
251
enc/trans/japanese.erb.c
Normal file
251
enc/trans/japanese.erb.c
Normal file
|
@ -0,0 +1,251 @@
|
|||
#include "transcode_data.h"
|
||||
|
||||
<%
|
||||
require 'sjis-tbl'
|
||||
require 'eucjp-tbl'
|
||||
%>
|
||||
|
||||
<%= transcode_tblgen "Shift_JIS", "UTF-8", [["{00-7f}", :nomap], *SJIS_TO_UCS_TBL] %>
|
||||
<%= transcode_tblgen "Windows-31J", "UTF-8", [["{00-7f}", :nomap], *SJIS_TO_UCS_TBL] %>
|
||||
|
||||
<%= transcode_tblgen "UTF-8", "Shift_JIS", [["{00-7f}", :nomap], *UCS_TO_SJIS_TBL] %>
|
||||
<%= transcode_tblgen "UTF-8", "Windows-31J", [["{00-7f}", :nomap], *UCS_TO_SJIS_TBL] %>
|
||||
|
||||
<%= transcode_tblgen "EUC-JP", "UTF-8", [["{00-7f}", :nomap], *EUCJP_TO_UCS_TBL] %>
|
||||
<%= transcode_tblgen "CP51932", "UTF-8", [["{00-7f}", :nomap], *EUCJP_TO_UCS_TBL] %>
|
||||
|
||||
<%= transcode_tblgen "UTF-8", "EUC-JP", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %>
|
||||
<%= transcode_tblgen "UTF-8", "CP51932", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %>
|
||||
|
||||
#define ISO_2022_ENCODING(escseq, byte) ((escseq<<8)|byte)
|
||||
enum ISO_2022_ESCSEQ {
|
||||
ISO_2022_CZD = '!',
|
||||
ISO_2022_C1D = '"',
|
||||
ISO_2022_GZD4 = '(',
|
||||
ISO_2022_G1D4 = ')',
|
||||
ISO_2022_G2D4 = '*',
|
||||
ISO_2022_G3D4 = '+',
|
||||
ISO_2022_G1D6 = '-',
|
||||
ISO_2022_G2D6 = '.',
|
||||
ISO_2022_G3D6 = '/',
|
||||
ISO_2022_GZDM4 = ISO_2022_ENCODING('$','('),
|
||||
ISO_2022_G1DM4 = ISO_2022_ENCODING('$',')'),
|
||||
ISO_2022_G2DM4 = ISO_2022_ENCODING('$','*'),
|
||||
ISO_2022_G3DM4 = ISO_2022_ENCODING('$','+'),
|
||||
ISO_2022_G1DM6 = ISO_2022_ENCODING('$','-'),
|
||||
ISO_2022_G2DM6 = ISO_2022_ENCODING('$','.'),
|
||||
ISO_2022_G3DM6 = ISO_2022_ENCODING('$','/'),
|
||||
ISO_2022_DOCS = ISO_2022_ENCODING('%','I'),
|
||||
ISO_2022_IRR = '&'
|
||||
};
|
||||
|
||||
|
||||
#define ISO_2022_GZ_ASCII ISO_2022_ENCODING(ISO_2022_GZD4, 'B')
|
||||
#define ISO_2022_GZ_JIS_X_0201_Katakana ISO_2022_ENCODING(ISO_2022_GZD4, 'I')
|
||||
#define ISO_2022_GZ_JIS_X_0201_Roman ISO_2022_ENCODING(ISO_2022_GZD4, 'J')
|
||||
#define ISO_2022_GZ_JIS_C_6226_1978 ISO_2022_ENCODING(ISO_2022_GZDM4,'@')
|
||||
#define ISO_2022_GZ_JIS_X_0208_1983 ISO_2022_ENCODING(ISO_2022_GZDM4,'B')
|
||||
#define ISO_2022_GZ_JIS_X_0212_1990 ISO_2022_ENCODING(ISO_2022_GZDM4,'D')
|
||||
#define ISO_2022_GZ_JIS_X_0213_2000_1 ISO_2022_ENCODING(ISO_2022_GZDM4,'O')
|
||||
#define ISO_2022_GZ_JIS_X_0213_2000_2 ISO_2022_ENCODING(ISO_2022_GZDM4,'P')
|
||||
#define ISO_2022_GZ_JIS_X_0213_2004_1 ISO_2022_ENCODING(ISO_2022_GZDM4,'Q')
|
||||
|
||||
#define UNSUPPORTED_MODE TRANSCODE_ERROR
|
||||
|
||||
static int
|
||||
get_iso_2022_mode(const unsigned char **in_pos)
|
||||
{
|
||||
int new_mode;
|
||||
const unsigned char *in_p = *in_pos;
|
||||
switch (*in_p++) {
|
||||
case '(':
|
||||
switch (*in_p++) {
|
||||
case 'B': case 'I': case 'J':
|
||||
new_mode = ISO_2022_ENCODING(ISO_2022_GZD4, *(in_p-1));
|
||||
break;
|
||||
default:
|
||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported (ESC ( %c)", *(in_p-1));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '$':
|
||||
switch (*in_p++) {
|
||||
case '@': case 'A': case 'B':
|
||||
new_mode = ISO_2022_ENCODING(ISO_2022_GZDM4, *(in_p-1));
|
||||
break;
|
||||
case '(':
|
||||
switch (*in_p++) {
|
||||
case 'D': case 'O': case 'P': case 'Q':
|
||||
new_mode = ISO_2022_ENCODING(ISO_2022_GZDM4, *(in_p-1));
|
||||
break;
|
||||
default:
|
||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported (ESC $ ( %c)", *(in_p-1));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported (ESC $ %c)", *(in_p-1));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported (ESC %c)", *(in_p-1));
|
||||
break;
|
||||
}
|
||||
*in_pos = in_p;
|
||||
return new_mode;
|
||||
}
|
||||
|
||||
static void
|
||||
from_iso_2022_jp_transcoder_preprocessor(const unsigned char **in_pos, unsigned char **out_pos,
|
||||
const unsigned char *in_stop, unsigned char *out_stop,
|
||||
rb_transcoding *my_transcoding)
|
||||
{
|
||||
const rb_transcoder *my_transcoder = my_transcoding->transcoder;
|
||||
const unsigned char *in_p = *in_pos;
|
||||
unsigned char *out_p = *out_pos;
|
||||
int cur_mode = ISO_2022_GZ_ASCII;
|
||||
unsigned char c1;
|
||||
unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
|
||||
while (in_p < in_stop) {
|
||||
if (out_p >= out_s) {
|
||||
int len = (out_p - *out_pos);
|
||||
int new_len = (len + my_transcoder->max_output) * 2;
|
||||
*out_pos = (*my_transcoding->flush_func)(my_transcoding, len, new_len);
|
||||
out_p = *out_pos + len;
|
||||
out_s = *out_pos + new_len - my_transcoder->max_output;
|
||||
}
|
||||
c1 = *in_p++;
|
||||
if (c1 == 0x1B) {
|
||||
cur_mode = get_iso_2022_mode(&in_p);
|
||||
}
|
||||
else if (c1 == 0x1E || c1 == 0x1F) {
|
||||
/* SHIFT */
|
||||
rb_raise(UNSUPPORTED_MODE, "shift is not supported");
|
||||
}
|
||||
else if (c1 >= 0x80) {
|
||||
rb_raise(TRANSCODE_ERROR, "invalid byte sequence");
|
||||
}
|
||||
else {
|
||||
switch (cur_mode) {
|
||||
case ISO_2022_GZ_ASCII:
|
||||
case ISO_2022_GZ_JIS_X_0201_Roman:
|
||||
*out_p++ = c1;
|
||||
break;
|
||||
case ISO_2022_GZ_JIS_X_0201_Katakana:
|
||||
*out_p++ = 0x8E;
|
||||
*out_p++ = c1 | 0x80;
|
||||
break;
|
||||
case ISO_2022_GZ_JIS_X_0212_1990:
|
||||
*out_p++ = 0x8F;
|
||||
case ISO_2022_GZ_JIS_C_6226_1978:
|
||||
case ISO_2022_GZ_JIS_X_0208_1983:
|
||||
*out_p++ = c1 | 0x80;
|
||||
*out_p++ = *in_p++ | 0x80;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* cleanup */
|
||||
*in_pos = in_p;
|
||||
*out_pos = out_p;
|
||||
}
|
||||
|
||||
static int
|
||||
select_iso_2022_mode(unsigned char **out_pos, int new_mode)
|
||||
{
|
||||
unsigned char *out_p = *out_pos;
|
||||
*out_p++ = '\x1b';
|
||||
switch (new_mode>>8) {
|
||||
case ISO_2022_GZD4:
|
||||
*out_p++ = new_mode >> 8;
|
||||
*out_p++ = new_mode & 0x7F;
|
||||
break;
|
||||
case ISO_2022_GZDM4:
|
||||
*out_p++ = new_mode >> 16;
|
||||
if ((new_mode & 0x7F) != '@' &&
|
||||
(new_mode & 0x7F) != 'A' &&
|
||||
(new_mode & 0x7F) != 'B')
|
||||
{
|
||||
*out_p++ = (new_mode>>8) & 0x7F;
|
||||
}
|
||||
*out_p++ = new_mode & 0x7F;
|
||||
break;
|
||||
default:
|
||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported.");
|
||||
break;
|
||||
}
|
||||
*out_pos = out_p;
|
||||
return new_mode;
|
||||
}
|
||||
|
||||
static void
|
||||
to_iso_2022_jp_transcoder_postprocessor(const unsigned char **in_pos, unsigned char **out_pos,
|
||||
const unsigned char *in_stop, unsigned char *out_stop,
|
||||
rb_transcoding *my_transcoding)
|
||||
{
|
||||
const rb_transcoder *my_transcoder = my_transcoding->transcoder;
|
||||
const unsigned char *in_p = *in_pos;
|
||||
unsigned char *out_p = *out_pos;
|
||||
int cur_mode = ISO_2022_GZ_ASCII, new_mode = 0;
|
||||
unsigned char next_byte;
|
||||
unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
|
||||
while (in_p < in_stop) {
|
||||
if (out_p >= out_s) {
|
||||
int len = (out_p - *out_pos);
|
||||
int new_len = (len + my_transcoder->max_output) * 2;
|
||||
*out_pos = (*my_transcoding->flush_func)(my_transcoding, len, new_len);
|
||||
out_p = *out_pos + len;
|
||||
out_s = *out_pos + new_len - my_transcoder->max_output;
|
||||
}
|
||||
next_byte = *in_p++;
|
||||
if (next_byte < 0x80) {
|
||||
new_mode = ISO_2022_GZ_ASCII;
|
||||
}
|
||||
else if (next_byte == 0x8E) {
|
||||
new_mode = ISO_2022_GZ_JIS_X_0201_Katakana;
|
||||
next_byte = *in_p++;
|
||||
}
|
||||
else if (next_byte == 0x8F) {
|
||||
new_mode = ISO_2022_GZ_JIS_X_0212_1990;
|
||||
next_byte = *in_p++;
|
||||
}
|
||||
else {
|
||||
new_mode = ISO_2022_GZ_JIS_X_0208_1983;
|
||||
}
|
||||
if (cur_mode != new_mode)
|
||||
cur_mode = select_iso_2022_mode(&out_p, new_mode);
|
||||
if (cur_mode < 0xFFFF) {
|
||||
*out_p++ = next_byte & 0x7F;
|
||||
}
|
||||
else {
|
||||
*out_p++ = next_byte & 0x7F;
|
||||
*out_p++ = *in_p++ & 0x7F;
|
||||
}
|
||||
}
|
||||
if (cur_mode != ISO_2022_GZ_ASCII)
|
||||
cur_mode = select_iso_2022_mode(&out_p, ISO_2022_GZ_ASCII);
|
||||
/* cleanup */
|
||||
*in_pos = in_p;
|
||||
*out_pos = out_p;
|
||||
}
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_2022_JP = {
|
||||
"ISO-2022-JP", "UTF-8", &from_EUC_JP, 8, 0,
|
||||
&from_iso_2022_jp_transcoder_preprocessor, NULL,
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_2022_JP = {
|
||||
"UTF-8", "ISO-2022-JP", &to_EUC_JP, 8, 1,
|
||||
NULL, &to_iso_2022_jp_transcoder_postprocessor,
|
||||
};
|
||||
|
||||
void
|
||||
Init_japanese(void)
|
||||
{
|
||||
<%= transcode_register_code %>
|
||||
rb_register_transcoder(&rb_from_ISO_2022_JP);
|
||||
rb_register_transcoder(&rb_to_ISO_2022_JP);
|
||||
}
|
40943
enc/trans/korean.c
40943
enc/trans/korean.c
File diff suppressed because it is too large
Load diff
17
enc/trans/korean.erb.c
Normal file
17
enc/trans/korean.erb.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
#include "transcode_data.h"
|
||||
|
||||
<%
|
||||
require "euckr-tbl"
|
||||
require "cp949-tbl"
|
||||
%>
|
||||
|
||||
<%= transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL] %>
|
||||
<%= transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL] %>
|
||||
<%= transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL] %>
|
||||
<%= transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL] %>
|
||||
|
||||
void
|
||||
Init_korean(void)
|
||||
{
|
||||
<%= transcode_register_code %>
|
||||
}
|
|
@ -10,7 +10,7 @@ count = 0
|
|||
converters = {}
|
||||
transdir = ARGV[0]
|
||||
outhdr = ARGV[1] || 'transdb.h'
|
||||
Dir.open(transdir) {|d| d.grep(/.+\.[ch]\z/)}.sort_by {|e|
|
||||
Dir.open(transdir) {|d| d.grep(/.+\.[ch]\z/).reject {|n| /\.erb\.c\z/ =~ n }}.sort_by {|e|
|
||||
e.scan(/(\d+)|(\D+)/).map {|n,a| a||[n.size,n.to_i]}.flatten
|
||||
}.each do |fn|
|
||||
open(File.join(transdir,fn)) do |f|
|
||||
|
|
|
@ -1,5 +1,23 @@
|
|||
/* autogenerated. */
|
||||
/* src="single_byte.erb.c", len=2228, checksum=35690 */
|
||||
/* src="iso-8859-2-tbl.rb", len=1525, checksum=18386 */
|
||||
/* src="iso-8859-3-tbl.rb", len=1391, checksum=11560 */
|
||||
/* src="iso-8859-4-tbl.rb", len=1518, checksum=18050 */
|
||||
/* src="iso-8859-5-tbl.rb", len=1562, checksum=19680 */
|
||||
/* src="iso-8859-6-tbl.rb", len=841, checksum=46155 */
|
||||
/* src="iso-8859-7-tbl.rb", len=1505, checksum=17611 */
|
||||
/* src="iso-8859-8-tbl.rb", len=961, checksum=53500 */
|
||||
/* src="iso-8859-9-tbl.rb", len=1474, checksum=16589 */
|
||||
/* src="iso-8859-10-tbl.rb", len=1516, checksum=18011 */
|
||||
/* src="iso-8859-11-tbl.rb", len=1436, checksum=14115 */
|
||||
/* src="iso-8859-13-tbl.rb", len=1525, checksum=18280 */
|
||||
/* src="iso-8859-14-tbl.rb", len=1522, checksum=18993 */
|
||||
/* src="iso-8859-15-tbl.rb", len=1478, checksum=16787 */
|
||||
|
||||
#include "transcode_data.h"
|
||||
|
||||
|
||||
|
||||
static const unsigned char
|
||||
from_US_ASCII_offsets[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -25,49 +43,43 @@ from_US_ASCII_infos[2] = {
|
|||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_US_ASCII = {
|
||||
/* used from from_US_ASCII */
|
||||
/* used from to_US_ASCII */
|
||||
/* used from to_ASCII_8BIT */
|
||||
/* used from from_ASCII_8BIT */
|
||||
from_US_ASCII_offsets,
|
||||
from_US_ASCII_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_US_ASCII = {
|
||||
"US-ASCII", "UTF-8", &from_US_ASCII, 1, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_US_ASCII = {
|
||||
"UTF-8", "US-ASCII", &from_US_ASCII, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ASCII_8BIT = {
|
||||
"ASCII-8BIT", "UTF-8", &from_US_ASCII, 1, 0,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ASCII_8BIT = {
|
||||
"UTF-8", "ASCII-8BIT", &from_US_ASCII, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
static const unsigned char
|
||||
from_ISO_8859_1_offsets[256] = {
|
||||
/* used from from_ISO_8859_1 */
|
||||
/* used from from_ISO_8859_2 */
|
||||
/* used from from_ISO_8859_4 */
|
||||
/* used from from_ISO_8859_5 */
|
||||
/* used from from_ISO_8859_9 */
|
||||
/* used from from_ISO_8859_10 */
|
||||
/* used from from_ISO_8859_13 */
|
||||
/* used from from_ISO_8859_14 */
|
||||
/* used from from_ISO_8859_15 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -127,6 +139,7 @@ from_ISO_8859_1 = {
|
|||
from_ISO_8859_1_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_1 = {
|
||||
"ISO-8859-1", "UTF-8", &from_ISO_8859_1, 2, 0,
|
||||
|
@ -135,8 +148,6 @@ rb_from_ISO_8859_1 = {
|
|||
|
||||
static const unsigned char
|
||||
to_ISO_8859_1_C2_offsets[64] = {
|
||||
/* used from to_ISO_8859_1_C2 */
|
||||
/* used from to_ISO_8859_1_C3 */
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
|
@ -163,8 +174,6 @@ to_ISO_8859_1_C2_infos[64] = {
|
|||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_1_C2 = {
|
||||
/* used from to_ISO_8859_1 */
|
||||
/* used from to_ISO_8859_9 */
|
||||
to_ISO_8859_1_C2_offsets,
|
||||
to_ISO_8859_1_C2_infos
|
||||
};
|
||||
|
@ -190,8 +199,6 @@ to_ISO_8859_1_C3_infos[64] = {
|
|||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_ISO_8859_1_C3 = {
|
||||
/* used from to_ISO_8859_1 */
|
||||
/* used from to_ISO_8859_15 */
|
||||
to_ISO_8859_1_C2_offsets,
|
||||
to_ISO_8859_1_C3_infos
|
||||
};
|
||||
|
@ -226,6 +233,7 @@ to_ISO_8859_1 = {
|
|||
to_ISO_8859_1_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_1 = {
|
||||
"UTF-8", "ISO-8859-1", &to_ISO_8859_1, 1, 1,
|
||||
|
@ -274,6 +282,7 @@ from_ISO_8859_2 = {
|
|||
from_ISO_8859_2_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_2 = {
|
||||
"ISO-8859-2", "UTF-8", &from_ISO_8859_2, 2, 0,
|
||||
|
@ -398,9 +407,6 @@ to_ISO_8859_2_CB = {
|
|||
|
||||
static const unsigned char
|
||||
to_ISO_8859_2_offsets[256] = {
|
||||
/* used from to_ISO_8859_2 */
|
||||
/* used from to_ISO_8859_3 */
|
||||
/* used from to_ISO_8859_4 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -431,6 +437,7 @@ to_ISO_8859_2 = {
|
|||
to_ISO_8859_2_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_2 = {
|
||||
"UTF-8", "ISO-8859-2", &to_ISO_8859_2, 1, 1,
|
||||
|
@ -496,6 +503,7 @@ from_ISO_8859_3 = {
|
|||
from_ISO_8859_3_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_3 = {
|
||||
"ISO-8859-3", "UTF-8", &from_ISO_8859_3, 2, 0,
|
||||
|
@ -628,6 +636,7 @@ to_ISO_8859_3 = {
|
|||
to_ISO_8859_3_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_3 = {
|
||||
"UTF-8", "ISO-8859-3", &to_ISO_8859_3, 1, 1,
|
||||
|
@ -676,6 +685,7 @@ from_ISO_8859_4 = {
|
|||
from_ISO_8859_4_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_4 = {
|
||||
"ISO-8859-4", "UTF-8", &from_ISO_8859_4, 2, 0,
|
||||
|
@ -737,8 +747,6 @@ to_ISO_8859_4_C3 = {
|
|||
|
||||
static const unsigned char
|
||||
to_ISO_8859_4_C4_offsets[64] = {
|
||||
/* used from to_ISO_8859_4_C4 */
|
||||
/* used from to_ISO_8859_10_C4 */
|
||||
0, 1, 27, 27, 2, 3, 27, 27, 27, 27, 27, 27, 4, 5, 27, 27,
|
||||
6, 7, 8, 9, 27, 27, 10, 11, 12, 13, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 14, 15, 27, 27, 27, 27, 16, 17, 18, 19, 27, 27, 20, 21,
|
||||
|
@ -812,6 +820,7 @@ to_ISO_8859_4 = {
|
|||
to_ISO_8859_4_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_4 = {
|
||||
"UTF-8", "ISO-8859-4", &to_ISO_8859_4, 1, 1,
|
||||
|
@ -892,6 +901,7 @@ from_ISO_8859_5 = {
|
|||
from_ISO_8859_5_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_5 = {
|
||||
"ISO-8859-5", "UTF-8", &from_ISO_8859_5, 3, 0,
|
||||
|
@ -1044,6 +1054,7 @@ to_ISO_8859_5 = {
|
|||
to_ISO_8859_5_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_5 = {
|
||||
"UTF-8", "ISO-8859-5", &to_ISO_8859_5, 1, 1,
|
||||
|
@ -1100,6 +1111,7 @@ from_ISO_8859_6 = {
|
|||
from_ISO_8859_6_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_6 = {
|
||||
"ISO-8859-6", "UTF-8", &from_ISO_8859_6, 2, 0,
|
||||
|
@ -1207,6 +1219,7 @@ to_ISO_8859_6 = {
|
|||
to_ISO_8859_6_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_6 = {
|
||||
"UTF-8", "ISO-8859-6", &to_ISO_8859_6, 1, 1,
|
||||
|
@ -1305,6 +1318,7 @@ from_ISO_8859_7 = {
|
|||
from_ISO_8859_7_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_7 = {
|
||||
"ISO-8859-7", "UTF-8", &from_ISO_8859_7, 3, 0,
|
||||
|
@ -1492,6 +1506,7 @@ to_ISO_8859_7 = {
|
|||
to_ISO_8859_7_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_7 = {
|
||||
"UTF-8", "ISO-8859-7", &to_ISO_8859_7, 1, 1,
|
||||
|
@ -1573,6 +1588,7 @@ from_ISO_8859_8 = {
|
|||
from_ISO_8859_8_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_8 = {
|
||||
"ISO-8859-8", "UTF-8", &from_ISO_8859_8, 3, 0,
|
||||
|
@ -1670,9 +1686,6 @@ to_ISO_8859_8_E2_80 = {
|
|||
|
||||
static const unsigned char
|
||||
to_ISO_8859_8_E2_offsets[64] = {
|
||||
/* used from to_ISO_8859_8_E2 */
|
||||
/* used from to_ISO_8859_10_E2 */
|
||||
/* used from to_ISO_8859_13_E2 */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
|
@ -1719,6 +1732,7 @@ to_ISO_8859_8 = {
|
|||
to_ISO_8859_8_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_8 = {
|
||||
"UTF-8", "ISO-8859-8", &to_ISO_8859_8, 1, 1,
|
||||
|
@ -1767,6 +1781,7 @@ from_ISO_8859_9 = {
|
|||
from_ISO_8859_9_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_9 = {
|
||||
"ISO-8859-9", "UTF-8", &from_ISO_8859_9, 2, 0,
|
||||
|
@ -1870,6 +1885,7 @@ to_ISO_8859_9 = {
|
|||
to_ISO_8859_9_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_9 = {
|
||||
"UTF-8", "ISO-8859-9", &to_ISO_8859_9, 1, 1,
|
||||
|
@ -1950,6 +1966,7 @@ from_ISO_8859_10 = {
|
|||
from_ISO_8859_10_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_10 = {
|
||||
"ISO-8859-10", "UTF-8", &from_ISO_8859_10, 3, 0,
|
||||
|
@ -2076,8 +2093,6 @@ to_ISO_8859_10_E2 = {
|
|||
|
||||
static const unsigned char
|
||||
to_ISO_8859_10_offsets[256] = {
|
||||
/* used from to_ISO_8859_10 */
|
||||
/* used from to_ISO_8859_13 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -2108,6 +2123,7 @@ to_ISO_8859_10 = {
|
|||
to_ISO_8859_10_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_10 = {
|
||||
"UTF-8", "ISO-8859-10", &to_ISO_8859_10, 1, 1,
|
||||
|
@ -2203,6 +2219,7 @@ from_ISO_8859_11 = {
|
|||
from_ISO_8859_11_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_11 = {
|
||||
"ISO-8859-11", "UTF-8", &from_ISO_8859_11, 3, 0,
|
||||
|
@ -2337,6 +2354,7 @@ to_ISO_8859_11 = {
|
|||
to_ISO_8859_11_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_11 = {
|
||||
"UTF-8", "ISO-8859-11", &to_ISO_8859_11, 1, 1,
|
||||
|
@ -2417,6 +2435,7 @@ from_ISO_8859_13 = {
|
|||
from_ISO_8859_13_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_13 = {
|
||||
"ISO-8859-13", "UTF-8", &from_ISO_8859_13, 3, 0,
|
||||
|
@ -2562,6 +2581,7 @@ to_ISO_8859_13 = {
|
|||
to_ISO_8859_13_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_13 = {
|
||||
"UTF-8", "ISO-8859-13", &to_ISO_8859_13, 1, 1,
|
||||
|
@ -2642,6 +2662,7 @@ from_ISO_8859_14 = {
|
|||
from_ISO_8859_14_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_14 = {
|
||||
"ISO-8859-14", "UTF-8", &from_ISO_8859_14, 3, 0,
|
||||
|
@ -2864,6 +2885,7 @@ to_ISO_8859_14 = {
|
|||
to_ISO_8859_14_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_14 = {
|
||||
"UTF-8", "ISO-8859-14", &to_ISO_8859_14, 1, 1,
|
||||
|
@ -2944,6 +2966,7 @@ from_ISO_8859_15 = {
|
|||
from_ISO_8859_15_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_ISO_8859_15 = {
|
||||
"ISO-8859-15", "UTF-8", &from_ISO_8859_15, 3, 0,
|
||||
|
@ -3064,12 +3087,14 @@ to_ISO_8859_15 = {
|
|||
to_ISO_8859_15_infos
|
||||
};
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_ISO_8859_15 = {
|
||||
"UTF-8", "ISO-8859-15", &to_ISO_8859_15, 1, 1,
|
||||
NULL, NULL,
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
Init_single_byte(void)
|
||||
{
|
||||
|
@ -3105,5 +3130,7 @@ Init_single_byte(void)
|
|||
rb_register_transcoder(&rb_to_ISO_8859_14);
|
||||
rb_register_transcoder(&rb_from_ISO_8859_15);
|
||||
rb_register_transcoder(&rb_to_ISO_8859_15);
|
||||
|
||||
}
|
||||
/* Footprint (bytes): gross: 27876, saved: 4544, net: 23332 */
|
||||
|
||||
|
||||
|
|
62
enc/trans/single_byte.erb.c
Normal file
62
enc/trans/single_byte.erb.c
Normal file
|
@ -0,0 +1,62 @@
|
|||
#include "transcode_data.h"
|
||||
|
||||
<%
|
||||
us_ascii_map = [["{00-7f}", :nomap], ["{80-ff}", :undef]]
|
||||
|
||||
ISO_8859_1_TO_UCS_TBL = (0x80..0xff).map {|c| ["%02X" % c, c] }
|
||||
CONTROL1_TO_UCS_TBL = (0x80..0x9f).map {|c| ["%02X" % c, c] }
|
||||
|
||||
require 'iso-8859-2-tbl'
|
||||
require 'iso-8859-3-tbl'
|
||||
require 'iso-8859-4-tbl'
|
||||
require 'iso-8859-5-tbl'
|
||||
require 'iso-8859-6-tbl'
|
||||
require 'iso-8859-7-tbl'
|
||||
require 'iso-8859-8-tbl'
|
||||
require 'iso-8859-9-tbl'
|
||||
require 'iso-8859-10-tbl'
|
||||
require 'iso-8859-11-tbl'
|
||||
require 'iso-8859-13-tbl'
|
||||
require 'iso-8859-14-tbl'
|
||||
require 'iso-8859-15-tbl'
|
||||
|
||||
%>
|
||||
|
||||
<%= transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map %>
|
||||
<%= transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map %>
|
||||
<%= transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map %>
|
||||
<%= transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map %>
|
||||
|
||||
<%
|
||||
def transcode_tblgen_iso8859(name, tbl_to_ucs)
|
||||
tbl_to_ucs = CONTROL1_TO_UCS_TBL + tbl_to_ucs
|
||||
name_ident = name.tr('-','_')
|
||||
code = ''
|
||||
code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs])
|
||||
code << "\n"
|
||||
code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
|
||||
code
|
||||
end
|
||||
%>
|
||||
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) %>
|
||||
<%= transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) %>
|
||||
|
||||
void
|
||||
Init_single_byte(void)
|
||||
{
|
||||
<%= transcode_register_code %>
|
||||
}
|
||||
|
14803
enc/trans/sjis-tbl.rb
Normal file
14803
enc/trans/sjis-tbl.rb
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,5 +1,5 @@
|
|||
/* Autogenerated, do not change */
|
||||
/* Report bugs to Martin Duerst (duerst@it.aoyama.ac.jp) */
|
||||
/* autogenerated. */
|
||||
/* src="utf_16_32.erb.c", len=8014, checksum=26811 */
|
||||
|
||||
#include "transcode_data.h"
|
||||
|
||||
|
@ -181,16 +181,9 @@ fun_so_to_utf_32le(const unsigned char* s, unsigned char* o)
|
|||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static const unsigned char
|
||||
from_UTF_16BE_00_offsets[256] = {
|
||||
/* used by from_UTF_16BE_00 */
|
||||
/* used by from_UTF_32BE_00_00_D8 */
|
||||
/* used by from_UTF_32BE_00_01 */
|
||||
/* used by from_UTF_32BE_00_11 */
|
||||
/* used by from_UTF_16BE_D8 */
|
||||
/* used by from_UTF_16LE */
|
||||
/* used by from_UTF_32LE */
|
||||
/* used by from_UTF_16LE_00_D8 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -210,42 +203,26 @@ from_UTF_16BE_00_offsets[256] = {
|
|||
};
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_16BE_00_infos[1] = {
|
||||
/* used by from_UTF_16BE_00 */
|
||||
/* used by to_UTF_32BE_C2 */
|
||||
FUNso,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_16BE_00 = {
|
||||
/* used as from_UTF_16BE */
|
||||
/* used as from_UTF_32BE_00_00 */
|
||||
/* used as from_UTF_32BE_00_01 */
|
||||
/* used as from_UTF_16BE_D8_00 */
|
||||
/* used as from_UTF_32LE_00_00 */
|
||||
/* used as from_UTF_32LE_00_D8 */
|
||||
from_UTF_16BE_00_offsets,
|
||||
from_UTF_16BE_00_infos
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_32BE_00_00_D8_infos[1] = {
|
||||
from_UTF_16BE_D8_00_00_infos[1] = {
|
||||
INVALID,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_32BE_00_00_D8 = {
|
||||
/* used as from_UTF_32BE_00_00 */
|
||||
/* used as from_UTF_32BE_00_11 */
|
||||
/* used as from_UTF_16BE_D8_00 */
|
||||
/* used as from_UTF_16BE */
|
||||
/* used as from_UTF_32LE_00_00 */
|
||||
/* used as from_UTF_32LE_00_D8 */
|
||||
from_UTF_16BE_D8_00_00 = {
|
||||
from_UTF_16BE_00_offsets,
|
||||
from_UTF_32BE_00_00_D8_infos
|
||||
from_UTF_16BE_D8_00_00_infos
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_UTF_16BE_D8_00_offsets[256] = {
|
||||
/* used by from_UTF_16BE_D8_00 */
|
||||
/* used by from_UTF_16LE_00_D8_00 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -265,9 +242,7 @@ from_UTF_16BE_D8_00_offsets[256] = {
|
|||
};
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_16BE_D8_00_infos[2] = {
|
||||
/* used by from_UTF_16BE_D8_00 */
|
||||
/* used by from_UTF_32LE_00_D8 */
|
||||
&from_UTF_32BE_00_00_D8, &from_UTF_16BE_00,
|
||||
&from_UTF_16BE_D8_00_00, &from_UTF_16BE_00,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_16BE_D8_00 = {
|
||||
|
@ -287,8 +262,6 @@ from_UTF_16BE_D8 = {
|
|||
|
||||
static const unsigned char
|
||||
from_UTF_16BE_offsets[256] = {
|
||||
/* used by from_UTF_16BE */
|
||||
/* used by from_UTF_16LE_00 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -309,7 +282,7 @@ from_UTF_16BE_offsets[256] = {
|
|||
static const struct byte_lookup* const
|
||||
from_UTF_16BE_infos[3] = {
|
||||
&from_UTF_16BE_00, &from_UTF_16BE_D8,
|
||||
&from_UTF_32BE_00_00_D8,
|
||||
&from_UTF_16BE_D8_00_00,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_16BE = {
|
||||
|
@ -317,6 +290,8 @@ from_UTF_16BE = {
|
|||
from_UTF_16BE_infos
|
||||
};
|
||||
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_16BE = {
|
||||
"UTF-16BE", "UTF-8", &from_UTF_16BE, 4, 0,
|
||||
|
@ -324,165 +299,94 @@ rb_from_UTF_16BE = {
|
|||
};
|
||||
|
||||
static const unsigned char
|
||||
to_UTF_32BE_C2_offsets[64] = {
|
||||
/* used by to_UTF_32BE_C2 */
|
||||
/* used by to_UTF_32BE_E1 */
|
||||
/* used by to_UTF_32BE_F1 */
|
||||
to_UTF_16BE_C2_offsets[64] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_UTF_32BE_C2 = {
|
||||
/* used as to_UTF_32BE */
|
||||
/* used as to_UTF_16BE */
|
||||
/* used as to_UTF_32BE_E0 */
|
||||
/* used as to_UTF_16BE_E0 */
|
||||
/* used as to_UTF_16BE_E1 */
|
||||
/* used as to_UTF_32BE_E1 */
|
||||
/* used as to_UTF_32BE_ED */
|
||||
/* used as to_UTF_16BE_ED */
|
||||
/* used as to_UTF_16BE_F0_90 */
|
||||
/* used as to_UTF_32BE_F0_90 */
|
||||
/* used as to_UTF_16BE_F1_80 */
|
||||
/* used as to_UTF_32BE_F1_80 */
|
||||
/* used as to_UTF_32BE_F4_80 */
|
||||
/* used as to_UTF_16BE_F4_80 */
|
||||
/* used as to_UTF_16LE */
|
||||
/* used as to_UTF_32LE */
|
||||
/* used as to_UTF_16LE_E0 */
|
||||
/* used as to_UTF_32LE_E0 */
|
||||
/* used as to_UTF_32LE_E1 */
|
||||
/* used as to_UTF_16LE_E1 */
|
||||
/* used as to_UTF_32LE_ED */
|
||||
/* used as to_UTF_16LE_ED */
|
||||
/* used as to_UTF_16LE_F0_90 */
|
||||
/* used as to_UTF_32LE_F0_90 */
|
||||
/* used as to_UTF_16LE_F1_80 */
|
||||
/* used as to_UTF_32LE_F1_80 */
|
||||
/* used as to_UTF_16LE_F4_80 */
|
||||
/* used as to_UTF_32LE_F4_80 */
|
||||
to_UTF_32BE_C2_offsets,
|
||||
to_UTF_16BE_C2 = {
|
||||
to_UTF_16BE_C2_offsets,
|
||||
from_UTF_16BE_00_infos
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_UTF_32BE_E0_offsets[64] = {
|
||||
/* used by to_UTF_32BE_E0 */
|
||||
/* used by to_UTF_32BE_ED */
|
||||
to_UTF_16BE_E0_offsets[64] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
};
|
||||
static const struct byte_lookup* const
|
||||
to_UTF_32BE_E0_infos[2] = {
|
||||
INVALID, &to_UTF_32BE_C2,
|
||||
to_UTF_16BE_E0_infos[2] = {
|
||||
INVALID, &to_UTF_16BE_C2,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_UTF_32BE_E0 = {
|
||||
/* used as to_UTF_32BE */
|
||||
/* used as to_UTF_16BE */
|
||||
/* used as to_UTF_16LE */
|
||||
/* used as to_UTF_32LE */
|
||||
to_UTF_32BE_E0_offsets,
|
||||
to_UTF_32BE_E0_infos
|
||||
to_UTF_16BE_E0 = {
|
||||
to_UTF_16BE_E0_offsets,
|
||||
to_UTF_16BE_E0_infos
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
to_UTF_32BE_E1_infos[1] = {
|
||||
&to_UTF_32BE_C2,
|
||||
to_UTF_16BE_E1_infos[1] = {
|
||||
&to_UTF_16BE_C2,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_UTF_32BE_E1 = {
|
||||
/* used as to_UTF_32BE */
|
||||
/* used as to_UTF_16BE */
|
||||
/* used as to_UTF_32BE_F0 */
|
||||
/* used as to_UTF_16BE_F0 */
|
||||
/* used as to_UTF_16BE_F1 */
|
||||
/* used as to_UTF_32BE_F1 */
|
||||
/* used as to_UTF_16BE_F4 */
|
||||
/* used as to_UTF_32BE_F4 */
|
||||
/* used as to_UTF_32LE */
|
||||
/* used as to_UTF_16LE */
|
||||
/* used as to_UTF_16LE_F0 */
|
||||
/* used as to_UTF_32LE_F0 */
|
||||
/* used as to_UTF_16LE_F1 */
|
||||
/* used as to_UTF_32LE_F1 */
|
||||
/* used as to_UTF_16LE_F4 */
|
||||
/* used as to_UTF_32LE_F4 */
|
||||
to_UTF_32BE_C2_offsets,
|
||||
to_UTF_32BE_E1_infos
|
||||
to_UTF_16BE_E1 = {
|
||||
to_UTF_16BE_C2_offsets,
|
||||
to_UTF_16BE_E1_infos
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
to_UTF_32BE_ED_infos[2] = {
|
||||
&to_UTF_32BE_C2, INVALID,
|
||||
to_UTF_16BE_ED_infos[2] = {
|
||||
&to_UTF_16BE_C2, INVALID,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_UTF_32BE_ED = {
|
||||
/* used as to_UTF_32BE */
|
||||
/* used as to_UTF_16BE */
|
||||
/* used as to_UTF_16LE */
|
||||
/* used as to_UTF_32LE */
|
||||
to_UTF_32BE_E0_offsets,
|
||||
to_UTF_32BE_ED_infos
|
||||
to_UTF_16BE_ED = {
|
||||
to_UTF_16BE_E0_offsets,
|
||||
to_UTF_16BE_ED_infos
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_UTF_32BE_F0_offsets[64] = {
|
||||
/* used by to_UTF_32BE_F0 */
|
||||
/* used by to_UTF_32BE_F4 */
|
||||
to_UTF_16BE_F0_offsets[64] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
};
|
||||
static const struct byte_lookup* const
|
||||
to_UTF_32BE_F0_infos[2] = {
|
||||
INVALID, &to_UTF_32BE_E1,
|
||||
to_UTF_16BE_F0_infos[2] = {
|
||||
INVALID, &to_UTF_16BE_E1,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_UTF_32BE_F0 = {
|
||||
/* used as to_UTF_32BE */
|
||||
/* used as to_UTF_16BE */
|
||||
/* used as to_UTF_16LE */
|
||||
/* used as to_UTF_32LE */
|
||||
to_UTF_32BE_F0_offsets,
|
||||
to_UTF_32BE_F0_infos
|
||||
to_UTF_16BE_F0 = {
|
||||
to_UTF_16BE_F0_offsets,
|
||||
to_UTF_16BE_F0_infos
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
to_UTF_32BE_F1_infos[1] = {
|
||||
&to_UTF_32BE_E1,
|
||||
to_UTF_16BE_F1_infos[1] = {
|
||||
&to_UTF_16BE_E1,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_UTF_32BE_F1 = {
|
||||
/* used as to_UTF_32BE */
|
||||
/* used as to_UTF_16BE */
|
||||
/* used as to_UTF_16LE */
|
||||
/* used as to_UTF_32LE */
|
||||
to_UTF_32BE_C2_offsets,
|
||||
to_UTF_32BE_F1_infos
|
||||
to_UTF_16BE_F1 = {
|
||||
to_UTF_16BE_C2_offsets,
|
||||
to_UTF_16BE_F1_infos
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
to_UTF_32BE_F4_infos[2] = {
|
||||
&to_UTF_32BE_E1, INVALID,
|
||||
to_UTF_16BE_F4_infos[2] = {
|
||||
&to_UTF_16BE_E1, INVALID,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_UTF_32BE_F4 = {
|
||||
/* used as to_UTF_32BE */
|
||||
/* used as to_UTF_16BE */
|
||||
/* used as to_UTF_16LE */
|
||||
/* used as to_UTF_32LE */
|
||||
to_UTF_32BE_F0_offsets,
|
||||
to_UTF_32BE_F4_infos
|
||||
to_UTF_16BE_F4 = {
|
||||
to_UTF_16BE_F0_offsets,
|
||||
to_UTF_16BE_F4_infos
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
to_UTF_32BE_offsets[256] = {
|
||||
to_UTF_16BE_offsets[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -501,24 +405,22 @@ to_UTF_32BE_offsets[256] = {
|
|||
6, 7, 7, 7, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
};
|
||||
static const struct byte_lookup* const
|
||||
to_UTF_32BE_infos[9] = {
|
||||
FUNso, INVALID, &to_UTF_32BE_C2, &to_UTF_32BE_E0,
|
||||
&to_UTF_32BE_E1, &to_UTF_32BE_ED, &to_UTF_32BE_F0, &to_UTF_32BE_F1,
|
||||
&to_UTF_32BE_F4,
|
||||
to_UTF_16BE_infos[9] = {
|
||||
FUNso, INVALID, &to_UTF_16BE_C2, &to_UTF_16BE_E0,
|
||||
&to_UTF_16BE_E1, &to_UTF_16BE_ED, &to_UTF_16BE_F0, &to_UTF_16BE_F1,
|
||||
&to_UTF_16BE_F4,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
to_UTF_32BE = {
|
||||
/* used as to_UTF_32BE */
|
||||
/* used as to_UTF_16BE */
|
||||
/* used as to_UTF_16LE */
|
||||
/* used as to_UTF_32LE */
|
||||
to_UTF_32BE_offsets,
|
||||
to_UTF_32BE_infos
|
||||
to_UTF_16BE = {
|
||||
to_UTF_16BE_offsets,
|
||||
to_UTF_16BE_infos
|
||||
};
|
||||
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_16BE = {
|
||||
"UTF-8", "UTF-16BE", &to_UTF_32BE, 4, 1,
|
||||
"UTF-8", "UTF-16BE", &to_UTF_16BE, 4, 1,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_16be
|
||||
};
|
||||
|
||||
|
@ -563,6 +465,8 @@ from_UTF_16LE = {
|
|||
from_UTF_16LE_infos
|
||||
};
|
||||
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_16LE = {
|
||||
"UTF-16LE", "UTF-8", &from_UTF_16LE, 4, 0,
|
||||
|
@ -571,14 +475,12 @@ rb_from_UTF_16LE = {
|
|||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_16LE = {
|
||||
"UTF-8", "UTF-16LE", &to_UTF_32BE, 4, 1,
|
||||
"UTF-8", "UTF-16LE", &to_UTF_16BE, 4, 1,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_16le
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_UTF_32BE_00_00_offsets[256] = {
|
||||
/* used by from_UTF_32BE_00_00 */
|
||||
/* used by from_UTF_32LE_00 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -598,9 +500,7 @@ from_UTF_32BE_00_00_offsets[256] = {
|
|||
};
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_32BE_00_00_infos[2] = {
|
||||
/* used by from_UTF_32BE_00_00 */
|
||||
/* used by from_UTF_32LE_00_00 */
|
||||
&from_UTF_16BE_00, &from_UTF_32BE_00_00_D8,
|
||||
&from_UTF_16BE_00, &from_UTF_16BE_D8_00_00,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_32BE_00_00 = {
|
||||
|
@ -620,7 +520,7 @@ from_UTF_32BE_00_01 = {
|
|||
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_32BE_00_11_infos[1] = {
|
||||
&from_UTF_32BE_00_00_D8,
|
||||
&from_UTF_16BE_D8_00_00,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_32BE_00_11 = {
|
||||
|
@ -658,6 +558,16 @@ from_UTF_32BE_00 = {
|
|||
from_UTF_32BE_00_infos
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_32BE_01_infos[1] = {
|
||||
&from_UTF_32BE_00_11,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_32BE_01 = {
|
||||
from_UTF_16BE_00_offsets,
|
||||
from_UTF_32BE_01_infos
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_UTF_32BE_offsets[256] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
|
@ -679,7 +589,7 @@ from_UTF_32BE_offsets[256] = {
|
|||
};
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_32BE_infos[2] = {
|
||||
&from_UTF_32BE_00, INVALID,
|
||||
&from_UTF_32BE_00, &from_UTF_32BE_01,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_32BE = {
|
||||
|
@ -687,6 +597,8 @@ from_UTF_32BE = {
|
|||
from_UTF_32BE_infos
|
||||
};
|
||||
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_32BE = {
|
||||
"UTF-32BE", "UTF-8", &from_UTF_32BE, 4, 0,
|
||||
|
@ -695,10 +607,20 @@ rb_from_UTF_32BE = {
|
|||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_32BE = {
|
||||
"UTF-8", "UTF-32BE", &to_UTF_32BE, 4, 1,
|
||||
"UTF-8", "UTF-32BE", &to_UTF_16BE, 4, 1,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_32be
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_32LE_00_00_00_infos[2] = {
|
||||
FUNso, INVALID,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_32LE_00_00_00 = {
|
||||
from_UTF_32BE_offsets,
|
||||
from_UTF_32LE_00_00_00_infos
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
from_UTF_32LE_00_00_offsets[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -718,10 +640,14 @@ from_UTF_32LE_00_00_offsets[256] = {
|
|||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
};
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_32LE_00_00_infos[2] = {
|
||||
&from_UTF_32LE_00_00_00, &from_UTF_16BE_D8_00_00,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_32LE_00_00 = {
|
||||
from_UTF_32LE_00_00_offsets,
|
||||
from_UTF_32BE_00_00_infos
|
||||
from_UTF_32LE_00_00_infos
|
||||
};
|
||||
|
||||
static const unsigned char
|
||||
|
@ -743,10 +669,14 @@ from_UTF_32LE_00_D8_offsets[256] = {
|
|||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
static const struct byte_lookup* const
|
||||
from_UTF_32LE_00_D8_infos[2] = {
|
||||
&from_UTF_16BE_D8_00_00, &from_UTF_32LE_00_00_00,
|
||||
};
|
||||
static const BYTE_LOOKUP
|
||||
from_UTF_32LE_00_D8 = {
|
||||
from_UTF_32LE_00_D8_offsets,
|
||||
from_UTF_16BE_D8_00_infos
|
||||
from_UTF_32LE_00_D8_infos
|
||||
};
|
||||
|
||||
static const struct byte_lookup* const
|
||||
|
@ -769,6 +699,8 @@ from_UTF_32LE = {
|
|||
from_UTF_32LE_infos
|
||||
};
|
||||
|
||||
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_32LE = {
|
||||
"UTF-32LE", "UTF-8", &from_UTF_32LE, 4, 0,
|
||||
|
@ -777,7 +709,7 @@ rb_from_UTF_32LE = {
|
|||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_32LE = {
|
||||
"UTF-8", "UTF-32LE", &to_UTF_32BE, 4, 1,
|
||||
"UTF-8", "UTF-32LE", &to_UTF_16BE, 4, 1,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_32le
|
||||
};
|
||||
|
||||
|
@ -793,4 +725,4 @@ Init_utf_16_32(void)
|
|||
rb_register_transcoder(&rb_from_UTF_32LE);
|
||||
rb_register_transcoder(&rb_to_UTF_32LE);
|
||||
}
|
||||
/* Footprint (bytes): gross: 13684, saved: 10796, net: 2888 */
|
||||
|
||||
|
|
310
enc/trans/utf_16_32.erb.c
Normal file
310
enc/trans/utf_16_32.erb.c
Normal file
|
@ -0,0 +1,310 @@
|
|||
#include "transcode_data.h"
|
||||
|
||||
static int
|
||||
fun_so_from_utf_16be(const unsigned char* s, unsigned char* o)
|
||||
{
|
||||
if (!s[0] && s[1]<0x80) {
|
||||
o[0] = s[1];
|
||||
return 1;
|
||||
}
|
||||
else if (s[0]<0x08) {
|
||||
o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6);
|
||||
o[1] = 0x80 | (s[1]&0x3F);
|
||||
return 2;
|
||||
}
|
||||
else if ((s[0]&0xF8)!=0xD8) {
|
||||
o[0] = 0xE0 | (s[0]>>4);
|
||||
o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6);
|
||||
o[2] = 0x80 | (s[1]&0x3F);
|
||||
return 3;
|
||||
}
|
||||
else {
|
||||
unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1;
|
||||
o[0] = 0xF0 | (u>>2);
|
||||
o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F);
|
||||
o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6);
|
||||
o[3] = 0x80 | (s[3]&0x3F);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fun_so_to_utf_16be(const unsigned char* s, unsigned char* o)
|
||||
{
|
||||
if (!(s[0]&0x80)) {
|
||||
o[0] = 0x00;
|
||||
o[1] = s[0];
|
||||
return 2;
|
||||
}
|
||||
else if ((s[0]&0xE0)==0xC0) {
|
||||
o[0] = (s[0]>>2)&0x07;
|
||||
o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
|
||||
return 2;
|
||||
}
|
||||
else if ((s[0]&0xF0)==0xE0) {
|
||||
o[0] = (s[0]<<4) | ((s[1]>>2)^0x20);
|
||||
o[1] = (s[1]<<6) | (s[2]^0x80);
|
||||
return 2;
|
||||
}
|
||||
else {
|
||||
int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
|
||||
o[0] = 0xD8 | (w>>2);
|
||||
o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
|
||||
o[2] = 0xDC | ((s[2]>>2)&0x03);
|
||||
o[3] = (s[2]<<6) | (s[3]&~0x80);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fun_so_from_utf_16le(const unsigned char* s, unsigned char* o)
|
||||
{
|
||||
if (!s[1] && s[0]<0x80) {
|
||||
o[0] = s[0];
|
||||
return 1;
|
||||
}
|
||||
else if (s[1]<0x08) {
|
||||
o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
|
||||
o[1] = 0x80 | (s[0]&0x3F);
|
||||
return 2;
|
||||
}
|
||||
else if ((s[1]&0xF8)!=0xD8) {
|
||||
o[0] = 0xE0 | (s[1]>>4);
|
||||
o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
|
||||
o[2] = 0x80 | (s[0]&0x3F);
|
||||
return 3;
|
||||
}
|
||||
else {
|
||||
unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1;
|
||||
o[0] = 0xF0 | u>>2;
|
||||
o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F);
|
||||
o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6);
|
||||
o[3] = 0x80 | (s[2]&0x3F);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fun_so_to_utf_16le(const unsigned char* s, unsigned char* o)
|
||||
{
|
||||
if (!(s[0]&0x80)) {
|
||||
o[1] = 0x00;
|
||||
o[0] = s[0];
|
||||
return 2;
|
||||
}
|
||||
else if ((s[0]&0xE0)==0xC0) {
|
||||
o[1] = (s[0]>>2)&0x07;
|
||||
o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
|
||||
return 2;
|
||||
}
|
||||
else if ((s[0]&0xF0)==0xE0) {
|
||||
o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
|
||||
o[0] = (s[1]<<6) | (s[2]^0x80);
|
||||
return 2;
|
||||
}
|
||||
else {
|
||||
int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
|
||||
o[1] = 0xD8 | (w>>2);
|
||||
o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
|
||||
o[3] = 0xDC | ((s[2]>>2)&0x03);
|
||||
o[2] = (s[2]<<6) | (s[3]&~0x80);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fun_so_from_utf_32be(const unsigned char* s, unsigned char* o)
|
||||
{
|
||||
if (!s[1]) {
|
||||
if (s[2]==0 && s[3]<0x80) {
|
||||
o[0] = s[3];
|
||||
return 1;
|
||||
}
|
||||
else if (s[2]<0x08) {
|
||||
o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6);
|
||||
o[1] = 0x80 | (s[3]&0x3F);
|
||||
return 2;
|
||||
}
|
||||
else {
|
||||
o[0] = 0xE0 | (s[2]>>4);
|
||||
o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
|
||||
o[2] = 0x80 | (s[3]&0x3F);
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
else {
|
||||
o[0] = 0xF0 | (s[1]>>2);
|
||||
o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4);
|
||||
o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
|
||||
o[3] = 0x80 | (s[3]&0x3F);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fun_so_to_utf_32be(const unsigned char* s, unsigned char* o)
|
||||
{
|
||||
o[0] = 0;
|
||||
if (!(s[0]&0x80)) {
|
||||
o[1] = o[2] = 0x00;
|
||||
o[3] = s[0];
|
||||
}
|
||||
else if ((s[0]&0xE0)==0xC0) {
|
||||
o[1] = 0x00;
|
||||
o[2] = (s[0]>>2)&0x07;
|
||||
o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
|
||||
}
|
||||
else if ((s[0]&0xF0)==0xE0) {
|
||||
o[1] = 0x00;
|
||||
o[2] = (s[0]<<4) | ((s[1]>>2)^0x20);
|
||||
o[3] = (s[1]<<6) | (s[2]^0x80);
|
||||
}
|
||||
else {
|
||||
o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
|
||||
o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
|
||||
o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
fun_so_from_utf_32le(const unsigned char* s, unsigned char* o)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
fun_so_to_utf_32le(const unsigned char* s, unsigned char* o)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
<%=
|
||||
map = {}
|
||||
map["{00-d7,e0-ff}{00-ff}"] = :func_so
|
||||
map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so
|
||||
map["{dc-df}{00-ff}"] = :invalid
|
||||
map["{d8-db}{00-ff}{00-db,e0-ff}{00-ff}"] = :invalid
|
||||
code = ''
|
||||
ActionMap.parse(map).generate_node(code, "from_UTF_16BE", [])
|
||||
code
|
||||
%>
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_16BE = {
|
||||
"UTF-16BE", "UTF-8", &from_UTF_16BE, 4, 0,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_from_utf_16be
|
||||
};
|
||||
|
||||
<%=
|
||||
map = {}
|
||||
map["{00-7f}"] = :func_so
|
||||
map["{c2-df}{80-bf}"] = :func_so
|
||||
map["e0{a0-bf}{80-bf}"] = :func_so
|
||||
map["{e1-ec}{80-bf}{80-bf}"] = :func_so
|
||||
map["ed{80-9f}{80-bf}"] = :func_so
|
||||
map["{ee-ef}{80-bf}{80-bf}"] = :func_so
|
||||
map["f0{90-bf}{80-bf}{80-bf}"] = :func_so
|
||||
map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so
|
||||
map["f4{80-8f}{80-bf}{80-bf}"] = :func_so
|
||||
map["{80-c1,f5-ff}"] = :invalid
|
||||
map["e0{80-9f}"] = :invalid
|
||||
map["ed{a0-bf}"] = :invalid
|
||||
map["f0{80-8f}"] = :invalid
|
||||
map["f4{90-bf}"] = :invalid
|
||||
code = ''
|
||||
am = ActionMap.parse(map)
|
||||
am.generate_node(code, "to_UTF_16BE", [0x00..0xff, 0x80..0xbf, 0x80..0xbf, 0x80..0xbf])
|
||||
code
|
||||
%>
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_16BE = {
|
||||
"UTF-8", "UTF-16BE", &to_UTF_16BE, 4, 1,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_16be
|
||||
};
|
||||
|
||||
<%=
|
||||
map = {}
|
||||
map["{00-ff}{00-d7,e0-ff}"] = :func_so
|
||||
map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so
|
||||
map["{00-ff}{dc-df}"] = :invalid
|
||||
map["{00-ff}{d8-db}{00-ff}{00-db,e0-ff}"] = :invalid
|
||||
code = ''
|
||||
ActionMap.parse(map).generate_node(code, "from_UTF_16LE", [])
|
||||
code
|
||||
%>
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_16LE = {
|
||||
"UTF-16LE", "UTF-8", &from_UTF_16LE, 4, 0,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_from_utf_16le
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_16LE = {
|
||||
"UTF-8", "UTF-16LE", &to_UTF_16BE, 4, 1,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_16le
|
||||
};
|
||||
|
||||
<%=
|
||||
map = {}
|
||||
map["0000{00-d7,e0-ff}{00-ff}"] = :func_so
|
||||
map["00{01-10}{00-ff}{00-ff}"] = :func_so
|
||||
map["00{11-ff}{00-ff}{00-ff}"] = :invalid
|
||||
map["0000{d8-df}{00-ff}"] = :invalid
|
||||
#map["{01-ff}"] = :invalid
|
||||
map["{01-ff}{00-ff}{00-ff}{00-ff}"] = :invalid
|
||||
code = ''
|
||||
ActionMap.parse(map).generate_node(code, "from_UTF_32BE", [])
|
||||
code
|
||||
%>
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_32BE = {
|
||||
"UTF-32BE", "UTF-8", &from_UTF_32BE, 4, 0,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_from_utf_32be
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_32BE = {
|
||||
"UTF-8", "UTF-32BE", &to_UTF_16BE, 4, 1,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_32be
|
||||
};
|
||||
|
||||
<%=
|
||||
map = {}
|
||||
map["{00-ff}{00-d7,e0-ff}0000"] = :func_so
|
||||
map["{00-ff}{00-ff}{01-10}00"] = :func_so
|
||||
map["{00-ff}{00-ff}{00-ff}{01-ff}"] = :invalid
|
||||
map["{00-ff}{00-ff}{11-ff}00"] = :invalid
|
||||
map["{00-ff}{d8-df}0000"] = :invalid
|
||||
code = ''
|
||||
ActionMap.parse(map).generate_node(code, "from_UTF_32LE", [])
|
||||
code
|
||||
%>
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_32LE = {
|
||||
"UTF-32LE", "UTF-8", &from_UTF_32LE, 4, 0,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_from_utf_32le
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_32LE = {
|
||||
"UTF-8", "UTF-32LE", &to_UTF_16BE, 4, 1,
|
||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_32le
|
||||
};
|
||||
|
||||
void
|
||||
Init_utf_16_32(void)
|
||||
{
|
||||
rb_register_transcoder(&rb_from_UTF_16BE);
|
||||
rb_register_transcoder(&rb_to_UTF_16BE);
|
||||
rb_register_transcoder(&rb_from_UTF_16LE);
|
||||
rb_register_transcoder(&rb_to_UTF_16LE);
|
||||
rb_register_transcoder(&rb_from_UTF_32BE);
|
||||
rb_register_transcoder(&rb_to_UTF_32BE);
|
||||
rb_register_transcoder(&rb_from_UTF_32LE);
|
||||
rb_register_transcoder(&rb_to_UTF_32LE);
|
||||
}
|
6
tool/build-transcode
Executable file
6
tool/build-transcode
Executable file
|
@ -0,0 +1,6 @@
|
|||
#!/bin/sh
|
||||
|
||||
ruby tool/transcode-tblgen.rb -vo enc/trans/single_byte.c enc/trans/single_byte.erb.c
|
||||
ruby tool/transcode-tblgen.rb -vo enc/trans/utf_16_32.c enc/trans/utf_16_32.erb.c
|
||||
ruby tool/transcode-tblgen.rb -vo enc/trans/japanese.c enc/trans/japanese.erb.c
|
||||
ruby tool/transcode-tblgen.rb -vo enc/trans/korean.c enc/trans/korean.erb.c
|
565
tool/transcode-tblgen.rb
Normal file
565
tool/transcode-tblgen.rb
Normal file
|
@ -0,0 +1,565 @@
|
|||
require 'optparse'
|
||||
require 'erb'
|
||||
|
||||
C_ESC = {
|
||||
"\\" => "\\\\",
|
||||
'"' => '\"',
|
||||
"\n" => '\n',
|
||||
}
|
||||
|
||||
0x00.upto(0x1f) {|ch| C_ESC[[ch].pack("C")] ||= "\\%03o" % ch }
|
||||
0x7f.upto(0xff) {|ch| C_ESC[[ch].pack("C")] = "\\%03o" % ch }
|
||||
C_ESC_PAT = Regexp.union(*C_ESC.keys)
|
||||
|
||||
def c_esc(str)
|
||||
'"' + str.gsub(C_ESC_PAT) { C_ESC[$&] } + '"'
|
||||
end
|
||||
|
||||
class StrSet
|
||||
def self.parse(pattern)
|
||||
result = []
|
||||
pattern.scan(/\S+/) {|seq|
|
||||
seq_result = []
|
||||
while !seq.empty?
|
||||
if /\A([0-9a-f][0-9a-f])/i =~ seq
|
||||
byte = $1.to_i(16)
|
||||
seq_result << [byte..byte]
|
||||
seq = $'
|
||||
elsif /\A\{([^\}]+)\}/ =~ seq
|
||||
set = $1
|
||||
seq = $'
|
||||
set_result = []
|
||||
set.scan(/[^,]+/) {|range|
|
||||
if /\A([0-9a-f][0-9a-f])-([0-9a-f][0-9a-f])\z/ =~ range
|
||||
b = $1.to_i(16)
|
||||
e = $2.to_i(16)
|
||||
set_result << (b..e)
|
||||
elsif /\A([0-9a-f][0-9a-f])\z/ =~ range
|
||||
byte = $1.to_i(16)
|
||||
set_result << (byte..byte)
|
||||
else
|
||||
raise "invalid range: #{range.inspect}"
|
||||
end
|
||||
}
|
||||
seq_result << set_result
|
||||
else
|
||||
raise "invalid sequence: #{seq.inspect}"
|
||||
end
|
||||
end
|
||||
result << seq_result
|
||||
}
|
||||
self.new(result)
|
||||
end
|
||||
|
||||
def initialize(pat)
|
||||
@pat = pat
|
||||
end
|
||||
|
||||
def hash
|
||||
@pat.hash
|
||||
end
|
||||
|
||||
def eql?(other)
|
||||
self.class == other.class &&
|
||||
@pat == other.instance_eval { @pat }
|
||||
end
|
||||
|
||||
alias == eql?
|
||||
|
||||
def to_s
|
||||
if @pat.empty?
|
||||
"(empset)"
|
||||
elsif @pat == [[]]
|
||||
"(empstr)"
|
||||
else
|
||||
@pat.map {|seq|
|
||||
seq.map {|byteset|
|
||||
if byteset.length == 1 && byteset[0].begin == byteset[0].end
|
||||
"%02x" % byteset[0].begin
|
||||
else
|
||||
"{" +
|
||||
byteset.map {|range|
|
||||
if range.begin == range.end
|
||||
"%02x" % range.begin
|
||||
else
|
||||
"%02x-%02x" % [range.begin, range.end]
|
||||
end
|
||||
}.join(',') +
|
||||
"}"
|
||||
end
|
||||
}.join('')
|
||||
}.join(' ')
|
||||
end
|
||||
end
|
||||
|
||||
def inspect
|
||||
"\#<#{self.class}: #{self.to_s}>"
|
||||
end
|
||||
|
||||
def emptyable?
|
||||
@pat.any? {|seq|
|
||||
seq.empty?
|
||||
}
|
||||
end
|
||||
|
||||
def first_bytes
|
||||
result = {}
|
||||
@pat.each {|seq|
|
||||
next if seq.empty?
|
||||
seq.first.each {|range|
|
||||
range.each {|byte|
|
||||
result[byte] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
result.keys.sort
|
||||
end
|
||||
|
||||
def each_firstbyte
|
||||
h = {}
|
||||
@pat.each {|seq|
|
||||
next if seq.empty?
|
||||
seq.first.each {|range|
|
||||
range.each {|byte|
|
||||
(h[byte] ||= []) << seq[1..-1]
|
||||
}
|
||||
}
|
||||
}
|
||||
h.keys.sort.each {|byte|
|
||||
yield byte, StrSet.new(h[byte])
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class ActionMap
|
||||
def self.parse(hash)
|
||||
h = {}
|
||||
hash.each {|pat, action|
|
||||
h[StrSet.parse(pat)] = action
|
||||
}
|
||||
self.new(h)
|
||||
end
|
||||
|
||||
def initialize(h)
|
||||
@map = h
|
||||
@default_action = :undef
|
||||
end
|
||||
attr_accessor :default_action
|
||||
|
||||
def hash
|
||||
hash = 0
|
||||
@map.each {|k,v|
|
||||
hash ^= k.hash ^ v.hash
|
||||
}
|
||||
hash
|
||||
end
|
||||
|
||||
def eql?(other)
|
||||
self.class == other.class &&
|
||||
@map.eql?(other.instance_eval { @map })
|
||||
end
|
||||
|
||||
alias == eql?
|
||||
|
||||
def inspect
|
||||
"\#<#{self.class}:" +
|
||||
@map.map {|k, v| " [" + k.to_s + "]=>" + v.inspect }.join('') +
|
||||
">"
|
||||
end
|
||||
|
||||
def empty_action
|
||||
@map.each {|ss, action|
|
||||
return action if ss.emptyable?
|
||||
}
|
||||
nil
|
||||
end
|
||||
|
||||
def each_firstbyte
|
||||
h = {}
|
||||
@map.each {|ss, action|
|
||||
if ss.emptyable?
|
||||
raise "emptyable pattern"
|
||||
else
|
||||
ss.each_firstbyte {|byte, rest|
|
||||
h[byte] ||= {}
|
||||
if h[byte][rest]
|
||||
raise "ambiguous"
|
||||
else
|
||||
h[byte][rest] = action
|
||||
end
|
||||
}
|
||||
end
|
||||
}
|
||||
h.keys.sort.each {|byte|
|
||||
am = ActionMap.new(h[byte])
|
||||
am.default_action = @default_action
|
||||
yield byte, am
|
||||
}
|
||||
end
|
||||
|
||||
OffsetsMemo = {}
|
||||
InfosMemo = {}
|
||||
|
||||
def format_offsets(offsets)
|
||||
code = "{\n"
|
||||
0.step(offsets.length-1,16) {|i|
|
||||
code << " "
|
||||
code << offsets[i,8].map {|off| "%3d," % off.to_s }.join('')
|
||||
code << " "
|
||||
code << offsets[i+8,8].map {|off| "%3d," % off.to_s }.join('')
|
||||
code << "\n"
|
||||
}
|
||||
code << '}'
|
||||
code
|
||||
end
|
||||
|
||||
def generate_info(info)
|
||||
case info
|
||||
when :nomap
|
||||
"NOMAP"
|
||||
when :undef
|
||||
"UNDEF"
|
||||
when :invalid
|
||||
"INVALID"
|
||||
when :func_so
|
||||
"FUNso"
|
||||
when /\A([0-9a-f][0-9a-f])\z/i
|
||||
"o1(0x#$1)"
|
||||
when /\A([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i
|
||||
"o2(0x#$1,0x#$2)"
|
||||
when /\A([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i
|
||||
"o3(0x#$1,0x#$2,0x#$3)"
|
||||
when /\A([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i
|
||||
"o4(0x#$1,0x#$2,0x#$3,0x#$4)"
|
||||
else
|
||||
info.to_s
|
||||
end
|
||||
end
|
||||
|
||||
def format_infos(infos)
|
||||
infos = infos.map {|info| generate_info(info) }
|
||||
maxlen = infos.map {|info| info.length }.max
|
||||
columns = maxlen <= 16 ? 4 : 2
|
||||
code = "{\n"
|
||||
0.step(infos.length-1, columns) {|i|
|
||||
code << " "
|
||||
is = infos[i,columns]
|
||||
is.each {|info|
|
||||
code << sprintf(" %#{maxlen}s,", info)
|
||||
}
|
||||
code << "\n"
|
||||
}
|
||||
code << "}"
|
||||
code
|
||||
end
|
||||
|
||||
def generate_lookup_node(name, table)
|
||||
offsets = []
|
||||
infos = []
|
||||
infomap = {}
|
||||
noaction_bytes = []
|
||||
table.each_with_index {|action, byte|
|
||||
if !action
|
||||
noaction_bytes << byte
|
||||
next
|
||||
end
|
||||
unless o = infomap[action]
|
||||
infomap[action] = o = infos.length
|
||||
infos[o] = action
|
||||
end
|
||||
offsets[byte] = o
|
||||
}
|
||||
if !noaction_bytes.empty?
|
||||
noaction_bytes.each {|byte|
|
||||
offsets[byte] = infos.length
|
||||
}
|
||||
infos << @default_action
|
||||
end
|
||||
|
||||
if n = OffsetsMemo[offsets]
|
||||
offsets_name = n
|
||||
offsets_code = ''
|
||||
else
|
||||
offsets_name = "#{name}_offsets"
|
||||
offsets_code = <<"End"
|
||||
static const unsigned char
|
||||
#{offsets_name}[#{offsets.length}] = #{format_offsets(offsets)};
|
||||
End
|
||||
OffsetsMemo[offsets] = offsets_name
|
||||
end
|
||||
|
||||
if n = InfosMemo[infos]
|
||||
infos_name = n
|
||||
infos_code = ''
|
||||
else
|
||||
infos_name = "#{name}_infos"
|
||||
infos_code = <<"End"
|
||||
static const struct byte_lookup* const
|
||||
#{infos_name}[#{infos.length}] = #{format_infos(infos)};
|
||||
End
|
||||
InfosMemo[infos] = infos_name
|
||||
end
|
||||
|
||||
r = offsets_code + infos_code + <<"End"
|
||||
static const BYTE_LOOKUP
|
||||
#{name} = {
|
||||
#{offsets_name},
|
||||
#{infos_name}
|
||||
};
|
||||
|
||||
End
|
||||
r
|
||||
end
|
||||
|
||||
PreMemo = {}
|
||||
PostMemo = {}
|
||||
NextName = "a"
|
||||
|
||||
def generate_node(code, name_hint=nil, ranges=[])
|
||||
ranges = [0x00..0xff] if ranges.empty?
|
||||
range = ranges.first
|
||||
if n = PreMemo[self]
|
||||
return n
|
||||
end
|
||||
|
||||
table = Array.new(range.end - range.begin + 1)
|
||||
each_firstbyte {|byte, rest|
|
||||
unless range === byte
|
||||
raise "byte not in range"
|
||||
end
|
||||
if a = rest.empty_action
|
||||
table[byte-range.begin] = a
|
||||
else
|
||||
name_hint2 = nil
|
||||
name_hint2 = "#{name_hint}_#{'%02X' % byte}" if name_hint
|
||||
table[byte-range.begin] = "&" + rest.generate_node(code, name_hint2, ranges[1..-1])
|
||||
end
|
||||
}
|
||||
|
||||
if n = PostMemo[table]
|
||||
return n
|
||||
end
|
||||
|
||||
if !name_hint
|
||||
name_hint = "fun_" + NextName.dup
|
||||
NextName.succ!
|
||||
end
|
||||
|
||||
PreMemo[self] = PostMemo[table] = name_hint
|
||||
|
||||
code << generate_lookup_node(name_hint, table)
|
||||
name_hint
|
||||
end
|
||||
end
|
||||
|
||||
def encode_utf8(map)
|
||||
r = []
|
||||
map.each {|k, v|
|
||||
# integer means UTF-8 encoded sequence.
|
||||
k = [k].pack("U").unpack("H*")[0].upcase if Integer === k
|
||||
v = [v].pack("U").unpack("H*")[0].upcase if Integer === v
|
||||
r << [k,v]
|
||||
}
|
||||
r
|
||||
end
|
||||
|
||||
def transcode_compile_tree(name, from, map)
|
||||
map = encode_utf8(map)
|
||||
h = {}
|
||||
map.each {|k, v|
|
||||
h[k] = v
|
||||
}
|
||||
am = ActionMap.parse(h)
|
||||
|
||||
ranges = from == "UTF-8" ? [0x00..0xff, 0x80..0xbf, 0x80..0xbf, 0x80..0xbf] : []
|
||||
code = ''
|
||||
defined_name = am.generate_node(code, name, ranges)
|
||||
return defined_name, code
|
||||
end
|
||||
|
||||
TRANSCODERS = []
|
||||
|
||||
def transcode_tblgen(from, to, map)
|
||||
id_from = from.tr('^0-9A-Za-z', '_')
|
||||
id_to = to.tr('^0-9A-Za-z', '_')
|
||||
if from == "UTF-8"
|
||||
tree_name = "to_#{id_to}"
|
||||
elsif to == "UTF-8"
|
||||
tree_name = "from_#{id_from}"
|
||||
else
|
||||
tree_name = "from_#{id_from}_to_#{id_to}"
|
||||
end
|
||||
map = encode_utf8(map)
|
||||
real_tree_name, tree_code = transcode_compile_tree(tree_name, from, map)
|
||||
transcoder_name = "rb_#{tree_name}"
|
||||
TRANSCODERS << transcoder_name
|
||||
from_utf8 = from == 'UTF-8' ? 1 : 0
|
||||
max_output = map.map {|k,v| String === v ? v.length/2 : 1 }.max
|
||||
transcoder_code = <<"End"
|
||||
static const rb_transcoder
|
||||
#{transcoder_name} = {
|
||||
#{c_esc from}, #{c_esc to}, &#{real_tree_name}, #{max_output}, #{from_utf8},
|
||||
NULL, NULL,
|
||||
};
|
||||
End
|
||||
tree_code + "\n" + transcoder_code
|
||||
end
|
||||
|
||||
def transcode_register_code
|
||||
code = ''
|
||||
TRANSCODERS.each {|transcoder_name|
|
||||
code << " rb_register_transcoder(&#{transcoder_name});\n"
|
||||
}
|
||||
code
|
||||
end
|
||||
|
||||
Universe = {
|
||||
"singlebyte" => "{00-ff}",
|
||||
"doublebyte" => "{00-ff}{00-ff}",
|
||||
"quadruplebyte" => "{00-ff}{00-ff}{00-ff}{00-ff}",
|
||||
"US-ASCII" => "{00-7f}",
|
||||
"EUC-JP" => <<-End,
|
||||
{00-7f}
|
||||
{a1-fe}{a1-fe}
|
||||
8e{a1-fe}
|
||||
8f{a1-fe}{a1-fe}
|
||||
End
|
||||
"EUC-KR" => <<-End,
|
||||
{00-7f}
|
||||
{a1-fe}{a1-fe}
|
||||
End
|
||||
"EUC-TW" => <<-End,
|
||||
{00-7f}
|
||||
{a1-fe}{a1-fe}
|
||||
8e{a1-b0}{a1-fe}{a1-fe}
|
||||
End
|
||||
"Shift_JIS" => <<-End,
|
||||
{00-7f}
|
||||
{81-9f,e0-fc}{40-7e,80-fc}
|
||||
{a1-df}
|
||||
End
|
||||
"Big5" => <<-End,
|
||||
{00-7f}
|
||||
{a1-fe}{40-7e,a1-fe}
|
||||
End
|
||||
"GBK" => <<-End,
|
||||
{00-80}
|
||||
{81-fe}{40-7e,80-fe}
|
||||
End
|
||||
"CP949" => <<-End,
|
||||
{00-80}
|
||||
{81-fe}{41-5a,61-7a,81-fe}
|
||||
End
|
||||
"UTF-8" => <<-End,
|
||||
{00-7f}
|
||||
{c2-df}{80-bf}
|
||||
e0{a0-bf}{80-bf}
|
||||
{e1-ec}{80-bf}{80-bf}
|
||||
ed{80-9f}{80-bf}
|
||||
{ee-ef}{80-bf}{80-bf}
|
||||
f0{90-bf}{80-bf}{80-bf}
|
||||
{f1-f3}{80-bf}{80-bf}{80-bf}
|
||||
f4{80-8f}{80-bf}{80-bf}
|
||||
End
|
||||
"GB18030" => <<-End,
|
||||
{00-7f}
|
||||
{81-fe}{40-7e,80-fe}
|
||||
{81-fe}{30-93}{81-fe}{30-93}
|
||||
End
|
||||
"UTF-16BE" => <<-End,
|
||||
{00-d7,e0-ff}{00-ff}
|
||||
{d8-db}{00-ff}{dc-df}{00-ff}
|
||||
End
|
||||
"UTF-16LE" => <<-End,
|
||||
{00-ff}{00-d7,e0-ff}
|
||||
{00-ff}{d8-db}{00-ff}{dc-df}
|
||||
End
|
||||
"UTF-32BE" => <<-End,
|
||||
0000{00-d7,e0-ff}{00-ff}
|
||||
00{01-10}{00-ff}{00-ff}
|
||||
End
|
||||
"UTF-32LE" => <<-End,
|
||||
{00-ff}{00-d7,e0-ff}0000
|
||||
{00-ff}{00-ff}{01-10}00
|
||||
End
|
||||
}
|
||||
|
||||
def make_signature(filename, src)
|
||||
"src=#{filename.dump}, len=#{src.length}, checksum=#{src.sum}"
|
||||
end
|
||||
|
||||
output_filename = nil
|
||||
verbose_mode = false
|
||||
force_mode = false
|
||||
|
||||
op = OptionParser.new
|
||||
op.def_option("--help", "show help message") { puts op; exit 0 }
|
||||
op.def_option("--verbose", "verbose mode") { verbose_mode = true }
|
||||
op.def_option("--force", "force table generation") { force_mode = true }
|
||||
op.def_option("--output=FILE", "specify output file") {|arg| output_filename = arg }
|
||||
op.parse!
|
||||
|
||||
arg = ARGV.shift
|
||||
dir = File.dirname(arg)
|
||||
$:.unshift dir unless $:.include? dir
|
||||
src = File.read(arg)
|
||||
src.force_encoding("ascii-8bit") if src.respond_to? :force_encoding
|
||||
base_signature = "/* autogenerated. */\n"
|
||||
base_signature << "/* #{make_signature(File.basename(arg), src)} */\n"
|
||||
|
||||
if !force_mode && output_filename && File.readable?(output_filename)
|
||||
old_signature = File.open(output_filename) {|f| f.gets("").chomp }
|
||||
chk_signature = base_signature.dup
|
||||
old_signature.each_line {|line|
|
||||
if %r{/\* src="([0-9a-z_.-]+)",} =~ line
|
||||
name = $1
|
||||
next if name == File.basename(arg)
|
||||
path = File.join(dir, name)
|
||||
if File.readable? path
|
||||
chk_signature << "/* #{make_signature(name, File.read(path))} */\n"
|
||||
end
|
||||
end
|
||||
}
|
||||
if old_signature == chk_signature
|
||||
now = Time.now
|
||||
File.utime(now, now, output_filename)
|
||||
STDERR.puts "#{output_filename} is already up-to-date." if verbose_mode
|
||||
exit
|
||||
end
|
||||
end
|
||||
|
||||
if verbose_mode
|
||||
if output_filename
|
||||
STDERR.print "generate #{output_filename} ..."
|
||||
end
|
||||
end
|
||||
|
||||
libs1 = $".dup
|
||||
erb_result = ERB.new(src, nil, '%').result(binding)
|
||||
libs2 = $".dup
|
||||
|
||||
libs = libs2 - libs1
|
||||
lib_sigs = ''
|
||||
libs.each {|lib|
|
||||
lib = File.basename(lib)
|
||||
path = File.join(dir, lib)
|
||||
if File.readable? path
|
||||
lib_sigs << "/* #{make_signature(lib, File.read(path))} */\n"
|
||||
end
|
||||
}
|
||||
|
||||
result = ''
|
||||
result << base_signature
|
||||
result << lib_sigs
|
||||
result << "\n"
|
||||
result << erb_result
|
||||
result << "\n"
|
||||
|
||||
if output_filename
|
||||
new_filename = output_filename + ".new"
|
||||
File.open(new_filename, "w") {|f| f << result }
|
||||
File.rename(new_filename, output_filename)
|
||||
STDERR.puts " done." if verbose_mode
|
||||
else
|
||||
print result
|
||||
end
|
Loading…
Reference in a new issue