1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Add string encoding IBM720 alias CP720 (#3803)

The mapping table is generated from the ICU project:
  https://github.com/unicode-org/icu/blob/master/icu4c/source/data/mappings/ibm-720_P100-1997.ucm

Fixes bug 16233 : https://bugs.ruby-lang.org/issues/16233
This commit is contained in:
Lars Kanis 2020-11-22 14:23:40 +01:00 committed by GitHub
parent 2d112c346a
commit d403591b34
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
Notes: git 2020-11-22 22:24:04 +09:00
Merged-By: nurse <naruse@airemix.jp>
6 changed files with 154 additions and 1 deletions

View file

@ -61,6 +61,8 @@ OnigEncodingDefine(ascii, ASCII) = {
ENC_ALIAS("BINARY", "ASCII-8BIT")
ENC_REPLICATE("IBM437", "ASCII-8BIT")
ENC_ALIAS("CP437", "IBM437")
ENC_REPLICATE("IBM720", "ASCII-8BIT")
ENC_ALIAS("CP720", "IBM720")
ENC_REPLICATE("IBM737", "ASCII-8BIT")
ENC_ALIAS("CP737", "IBM737")
ENC_REPLICATE("IBM775", "ASCII-8BIT")

122
enc/trans/ibm720-tbl.rb Normal file
View file

@ -0,0 +1,122 @@
IBM720_TO_UCS_TBL = [
["FF",0xA0],
["9C",0xA3],
["94",0xA4],
["AE",0xAB],
["F8",0xB0],
["FD",0xB2],
["E6",0xB5],
["FA",0xB7],
["AF",0xBB],
["85",0xE0],
["83",0xE2],
["87",0xE7],
["8A",0xE8],
["82",0xE9],
["88",0xEA],
["89",0xEB],
["8C",0xEE],
["8B",0xEF],
["93",0xF4],
["97",0xF9],
["96",0xFB],
["98",0x621],
["99",0x622],
["9A",0x623],
["9B",0x624],
["9D",0x625],
["9E",0x626],
["9F",0x627],
["A0",0x628],
["A1",0x629],
["A2",0x62A],
["A3",0x62B],
["A4",0x62C],
["A5",0x62D],
["A6",0x62E],
["A7",0x62F],
["A8",0x630],
["A9",0x631],
["AA",0x632],
["AB",0x633],
["AC",0x634],
["AD",0x635],
["E0",0x636],
["E1",0x637],
["E2",0x638],
["E3",0x639],
["E4",0x63A],
["95",0x640],
["E5",0x641],
["E7",0x642],
["E8",0x643],
["E9",0x644],
["EA",0x645],
["EB",0x646],
["EC",0x647],
["ED",0x648],
["EE",0x649],
["EF",0x64A],
["F1",0x64B],
["F2",0x64C],
["F3",0x64D],
["F4",0x64E],
["F5",0x64F],
["F6",0x650],
["91",0x651],
["92",0x652],
["FC",0x207F],
["F9",0x2219],
["FB",0x221A],
["F7",0x2248],
["F0",0x2261],
["C4",0x2500],
["B3",0x2502],
["DA",0x250C],
["BF",0x2510],
["C0",0x2514],
["D9",0x2518],
["C3",0x251C],
["B4",0x2524],
["C2",0x252C],
["C1",0x2534],
["C5",0x253C],
["CD",0x2550],
["BA",0x2551],
["D5",0x2552],
["D6",0x2553],
["C9",0x2554],
["B8",0x2555],
["B7",0x2556],
["BB",0x2557],
["D4",0x2558],
["D3",0x2559],
["C8",0x255A],
["BE",0x255B],
["BD",0x255C],
["BC",0x255D],
["C6",0x255E],
["C7",0x255F],
["CC",0x2560],
["B5",0x2561],
["B6",0x2562],
["B9",0x2563],
["D1",0x2564],
["D2",0x2565],
["CB",0x2566],
["CF",0x2567],
["D0",0x2568],
["CA",0x2569],
["D8",0x256A],
["D7",0x256B],
["CE",0x256C],
["DF",0x2580],
["DC",0x2584],
["DB",0x2588],
["DD",0x258C],
["DE",0x2590],
["B0",0x2591],
["B1",0x2592],
["B2",0x2593],
["FE",0x25A0],
]

View file

@ -51,8 +51,9 @@
transcode_tblgen_singlebyte "WINDOWS-1256"
transcode_tblgen_singlebyte "WINDOWS-1257"
transcode_tblgen_singlebyte "IBM437"
transcode_tblgen_singlebyte "IBM775"
transcode_tblgen_singlebyte "IBM720"
transcode_tblgen_singlebyte "IBM737"
transcode_tblgen_singlebyte "IBM775"
transcode_tblgen_singlebyte "IBM852"
transcode_tblgen_singlebyte "IBM855"
transcode_tblgen_singlebyte "IBM857"

View file

@ -507,6 +507,7 @@ static UINT ole_encoding2cp(rb_encoding *enc)
ENC_MACHING_CP(enc, "GB2312", 20936);
ENC_MACHING_CP(enc, "GBK", 936);
ENC_MACHING_CP(enc, "IBM437", 437);
ENC_MACHING_CP(enc, "IBM720", 720);
ENC_MACHING_CP(enc, "IBM737", 737);
ENC_MACHING_CP(enc, "IBM775", 775);
ENC_MACHING_CP(enc, "IBM852", 852);

View file

@ -100,6 +100,14 @@ describe "String#valid_encoding?" do
str.force_encoding('UTF8-MAC').valid_encoding?.should be_true
end
ruby_version_is '3.0' do
it "returns true for IBM720 encoding self is valid in" do
str = "\u{6754}"
str.force_encoding('IBM720').valid_encoding?.should be_true
str.force_encoding('CP720').valid_encoding?.should be_true
end
end
it "returns false if self is valid in one encoding, but invalid in the one it's tagged with" do
str = "\u{8765}"
str.valid_encoding?.should be_true

View file

@ -469,6 +469,25 @@ class TestTranscode < Test::Unit::TestCase
check_both_ways("\u00A0", "\xFF", 'IBM437') # non-breaking space
end
def test_IBM720
assert_raise(Encoding::UndefinedConversionError) { "\x80".encode("utf-8", 'IBM720') }
assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'IBM720') }
assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'IBM720') }
check_both_ways("\u0627", "\x9F", 'IBM720') # ا
check_both_ways("\u0628", "\xA0", 'IBM720') # ب
check_both_ways("\u00BB", "\xAF", 'IBM720') # »
check_both_ways("\u2591", "\xB0", 'IBM720') # ░
check_both_ways("\u2510", "\xBF", 'IBM720') # ┐
check_both_ways("\u2514", "\xC0", 'IBM720') # └
check_both_ways("\u2567", "\xCF", 'IBM720') # ╧
check_both_ways("\u2568", "\xD0", 'IBM720') # ╨
check_both_ways("\u2580", "\xDF", 'IBM720') # ▀
check_both_ways("\u0636", "\xE0", 'IBM720') # ض
check_both_ways("\u064A", "\xEF", 'IBM720') # ي
check_both_ways("\u2261", "\xF0", 'IBM720') # ≡
check_both_ways("\u00A0", "\xFF", 'IBM720') # non-breaking space
end
def test_IBM775
check_both_ways("\u0106", "\x80", 'IBM775') # Ć
check_both_ways("\u00C5", "\x8F", 'IBM775') # Å