mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Add string encoding IBM720 alias CP720 (#3803)
The mapping table is generated from the ICU project: https://github.com/unicode-org/icu/blob/master/icu4c/source/data/mappings/ibm-720_P100-1997.ucm Fixes bug 16233 : https://bugs.ruby-lang.org/issues/16233
This commit is contained in:
parent
2d112c346a
commit
d403591b34
Notes:
git
2020-11-22 22:24:04 +09:00
Merged-By: nurse <naruse@airemix.jp>
6 changed files with 154 additions and 1 deletions
|
@ -61,6 +61,8 @@ OnigEncodingDefine(ascii, ASCII) = {
|
|||
ENC_ALIAS("BINARY", "ASCII-8BIT")
|
||||
ENC_REPLICATE("IBM437", "ASCII-8BIT")
|
||||
ENC_ALIAS("CP437", "IBM437")
|
||||
ENC_REPLICATE("IBM720", "ASCII-8BIT")
|
||||
ENC_ALIAS("CP720", "IBM720")
|
||||
ENC_REPLICATE("IBM737", "ASCII-8BIT")
|
||||
ENC_ALIAS("CP737", "IBM737")
|
||||
ENC_REPLICATE("IBM775", "ASCII-8BIT")
|
||||
|
|
122
enc/trans/ibm720-tbl.rb
Normal file
122
enc/trans/ibm720-tbl.rb
Normal file
|
@ -0,0 +1,122 @@
|
|||
IBM720_TO_UCS_TBL = [
|
||||
["FF",0xA0],
|
||||
["9C",0xA3],
|
||||
["94",0xA4],
|
||||
["AE",0xAB],
|
||||
["F8",0xB0],
|
||||
["FD",0xB2],
|
||||
["E6",0xB5],
|
||||
["FA",0xB7],
|
||||
["AF",0xBB],
|
||||
["85",0xE0],
|
||||
["83",0xE2],
|
||||
["87",0xE7],
|
||||
["8A",0xE8],
|
||||
["82",0xE9],
|
||||
["88",0xEA],
|
||||
["89",0xEB],
|
||||
["8C",0xEE],
|
||||
["8B",0xEF],
|
||||
["93",0xF4],
|
||||
["97",0xF9],
|
||||
["96",0xFB],
|
||||
["98",0x621],
|
||||
["99",0x622],
|
||||
["9A",0x623],
|
||||
["9B",0x624],
|
||||
["9D",0x625],
|
||||
["9E",0x626],
|
||||
["9F",0x627],
|
||||
["A0",0x628],
|
||||
["A1",0x629],
|
||||
["A2",0x62A],
|
||||
["A3",0x62B],
|
||||
["A4",0x62C],
|
||||
["A5",0x62D],
|
||||
["A6",0x62E],
|
||||
["A7",0x62F],
|
||||
["A8",0x630],
|
||||
["A9",0x631],
|
||||
["AA",0x632],
|
||||
["AB",0x633],
|
||||
["AC",0x634],
|
||||
["AD",0x635],
|
||||
["E0",0x636],
|
||||
["E1",0x637],
|
||||
["E2",0x638],
|
||||
["E3",0x639],
|
||||
["E4",0x63A],
|
||||
["95",0x640],
|
||||
["E5",0x641],
|
||||
["E7",0x642],
|
||||
["E8",0x643],
|
||||
["E9",0x644],
|
||||
["EA",0x645],
|
||||
["EB",0x646],
|
||||
["EC",0x647],
|
||||
["ED",0x648],
|
||||
["EE",0x649],
|
||||
["EF",0x64A],
|
||||
["F1",0x64B],
|
||||
["F2",0x64C],
|
||||
["F3",0x64D],
|
||||
["F4",0x64E],
|
||||
["F5",0x64F],
|
||||
["F6",0x650],
|
||||
["91",0x651],
|
||||
["92",0x652],
|
||||
["FC",0x207F],
|
||||
["F9",0x2219],
|
||||
["FB",0x221A],
|
||||
["F7",0x2248],
|
||||
["F0",0x2261],
|
||||
["C4",0x2500],
|
||||
["B3",0x2502],
|
||||
["DA",0x250C],
|
||||
["BF",0x2510],
|
||||
["C0",0x2514],
|
||||
["D9",0x2518],
|
||||
["C3",0x251C],
|
||||
["B4",0x2524],
|
||||
["C2",0x252C],
|
||||
["C1",0x2534],
|
||||
["C5",0x253C],
|
||||
["CD",0x2550],
|
||||
["BA",0x2551],
|
||||
["D5",0x2552],
|
||||
["D6",0x2553],
|
||||
["C9",0x2554],
|
||||
["B8",0x2555],
|
||||
["B7",0x2556],
|
||||
["BB",0x2557],
|
||||
["D4",0x2558],
|
||||
["D3",0x2559],
|
||||
["C8",0x255A],
|
||||
["BE",0x255B],
|
||||
["BD",0x255C],
|
||||
["BC",0x255D],
|
||||
["C6",0x255E],
|
||||
["C7",0x255F],
|
||||
["CC",0x2560],
|
||||
["B5",0x2561],
|
||||
["B6",0x2562],
|
||||
["B9",0x2563],
|
||||
["D1",0x2564],
|
||||
["D2",0x2565],
|
||||
["CB",0x2566],
|
||||
["CF",0x2567],
|
||||
["D0",0x2568],
|
||||
["CA",0x2569],
|
||||
["D8",0x256A],
|
||||
["D7",0x256B],
|
||||
["CE",0x256C],
|
||||
["DF",0x2580],
|
||||
["DC",0x2584],
|
||||
["DB",0x2588],
|
||||
["DD",0x258C],
|
||||
["DE",0x2590],
|
||||
["B0",0x2591],
|
||||
["B1",0x2592],
|
||||
["B2",0x2593],
|
||||
["FE",0x25A0],
|
||||
]
|
|
@ -51,8 +51,9 @@
|
|||
transcode_tblgen_singlebyte "WINDOWS-1256"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1257"
|
||||
transcode_tblgen_singlebyte "IBM437"
|
||||
transcode_tblgen_singlebyte "IBM775"
|
||||
transcode_tblgen_singlebyte "IBM720"
|
||||
transcode_tblgen_singlebyte "IBM737"
|
||||
transcode_tblgen_singlebyte "IBM775"
|
||||
transcode_tblgen_singlebyte "IBM852"
|
||||
transcode_tblgen_singlebyte "IBM855"
|
||||
transcode_tblgen_singlebyte "IBM857"
|
||||
|
|
|
@ -507,6 +507,7 @@ static UINT ole_encoding2cp(rb_encoding *enc)
|
|||
ENC_MACHING_CP(enc, "GB2312", 20936);
|
||||
ENC_MACHING_CP(enc, "GBK", 936);
|
||||
ENC_MACHING_CP(enc, "IBM437", 437);
|
||||
ENC_MACHING_CP(enc, "IBM720", 720);
|
||||
ENC_MACHING_CP(enc, "IBM737", 737);
|
||||
ENC_MACHING_CP(enc, "IBM775", 775);
|
||||
ENC_MACHING_CP(enc, "IBM852", 852);
|
||||
|
|
|
@ -100,6 +100,14 @@ describe "String#valid_encoding?" do
|
|||
str.force_encoding('UTF8-MAC').valid_encoding?.should be_true
|
||||
end
|
||||
|
||||
ruby_version_is '3.0' do
|
||||
it "returns true for IBM720 encoding self is valid in" do
|
||||
str = "\u{6754}"
|
||||
str.force_encoding('IBM720').valid_encoding?.should be_true
|
||||
str.force_encoding('CP720').valid_encoding?.should be_true
|
||||
end
|
||||
end
|
||||
|
||||
it "returns false if self is valid in one encoding, but invalid in the one it's tagged with" do
|
||||
str = "\u{8765}"
|
||||
str.valid_encoding?.should be_true
|
||||
|
|
|
@ -469,6 +469,25 @@ class TestTranscode < Test::Unit::TestCase
|
|||
check_both_ways("\u00A0", "\xFF", 'IBM437') # non-breaking space
|
||||
end
|
||||
|
||||
def test_IBM720
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\x80".encode("utf-8", 'IBM720') }
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'IBM720') }
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'IBM720') }
|
||||
check_both_ways("\u0627", "\x9F", 'IBM720') # ا
|
||||
check_both_ways("\u0628", "\xA0", 'IBM720') # ب
|
||||
check_both_ways("\u00BB", "\xAF", 'IBM720') # »
|
||||
check_both_ways("\u2591", "\xB0", 'IBM720') # ░
|
||||
check_both_ways("\u2510", "\xBF", 'IBM720') # ┐
|
||||
check_both_ways("\u2514", "\xC0", 'IBM720') # └
|
||||
check_both_ways("\u2567", "\xCF", 'IBM720') # ╧
|
||||
check_both_ways("\u2568", "\xD0", 'IBM720') # ╨
|
||||
check_both_ways("\u2580", "\xDF", 'IBM720') # ▀
|
||||
check_both_ways("\u0636", "\xE0", 'IBM720') # ض
|
||||
check_both_ways("\u064A", "\xEF", 'IBM720') # ي
|
||||
check_both_ways("\u2261", "\xF0", 'IBM720') # ≡
|
||||
check_both_ways("\u00A0", "\xFF", 'IBM720') # non-breaking space
|
||||
end
|
||||
|
||||
def test_IBM775
|
||||
check_both_ways("\u0106", "\x80", 'IBM775') # Ć
|
||||
check_both_ways("\u00C5", "\x8F", 'IBM775') # Å
|
||||
|
|
Loading…
Add table
Reference in a new issue