mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enc/iso_8859_5.c: Large omicron should lowercase to small omicron.
* test/ruby/test_big5.rb, test/ruby/test_cp949.rb, test/ruby/test_euc_jp.rb, test/ruby/test_euc_kr.rb, test/ruby/test_euc_tw.rb, test/ruby/test_gb18030.rb, test/ruby/test_gbk.rb, test/ruby/test_iso_8859.rb, test/ruby/test_koi8.rb, test/ruby/test_shift_jis.rb, test/ruby/test_windows_1251.rb: new tests for encoding. * test/ruby/test_utf16.rb, test/ruby/test_utf32.rb, test/ruby/test_regexp.rb: add tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16759 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
e58dc6a968
commit
65670f9400
16 changed files with 699 additions and 3 deletions
14
ChangeLog
14
ChangeLog
|
@ -1,3 +1,17 @@
|
|||
Mon Jun 2 22:27:57 2008 Yusuke Endoh <mame@tsg.ne.jp>
|
||||
|
||||
* enc/iso_8859_5.c: Large omicron should lowercase to small omicron.
|
||||
|
||||
* test/ruby/test_big5.rb, test/ruby/test_cp949.rb,
|
||||
test/ruby/test_euc_jp.rb, test/ruby/test_euc_kr.rb,
|
||||
test/ruby/test_euc_tw.rb, test/ruby/test_gb18030.rb,
|
||||
test/ruby/test_gbk.rb, test/ruby/test_iso_8859.rb,
|
||||
test/ruby/test_koi8.rb, test/ruby/test_shift_jis.rb,
|
||||
test/ruby/test_windows_1251.rb: new tests for encoding.
|
||||
|
||||
* test/ruby/test_utf16.rb, test/ruby/test_utf32.rb,
|
||||
test/ruby/test_regexp.rb: add tests.
|
||||
|
||||
Mon Jun 2 21:56:47 2008 Yusuke Endoh <mame@tsg.ne.jp>
|
||||
|
||||
* test/ruby/test_file.rb: add tests for uninitialized object.
|
||||
|
|
|
@ -167,7 +167,7 @@ static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
|||
{ 0xbb, 0xdb },
|
||||
{ 0xbc, 0xdc },
|
||||
{ 0xbd, 0xdd },
|
||||
{ 0xbe, 0xdf },
|
||||
{ 0xbe, 0xde },
|
||||
{ 0xbf, 0xdf },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
|
|
28
test/ruby/test_big5.rb
Normal file
28
test/ruby/test_big5.rb
Normal file
|
@ -0,0 +1,28 @@
|
|||
require "test/unit"
|
||||
|
||||
class TestBig5 < Test::Unit::TestCase
|
||||
def s(s)
|
||||
s.force_encoding("big5")
|
||||
end
|
||||
|
||||
def test_mbc_enc_len
|
||||
assert_equal(1, s("\xa1\xa1").size)
|
||||
end
|
||||
|
||||
def test_mbc_to_code
|
||||
assert_equal(0xa1a1, s("\xa1\xa1").ord)
|
||||
end
|
||||
|
||||
def test_code_to_mbc
|
||||
assert_equal(s("\xa1\xa1"), 0xa1a1.chr("big5"))
|
||||
end
|
||||
|
||||
def test_mbc_case_fold
|
||||
r = Regexp.new(s("(\xa1\xa1)\\1"), "i")
|
||||
assert_match(r, s("\xa1\xa1\xa1\xa1"))
|
||||
end
|
||||
|
||||
def test_left_adjust_char_head
|
||||
assert_equal(s("\xa1\xa1"), s("\xa1\xa1\xa1\xa1").chop)
|
||||
end
|
||||
end
|
28
test/ruby/test_cp949.rb
Normal file
28
test/ruby/test_cp949.rb
Normal file
|
@ -0,0 +1,28 @@
|
|||
require "test/unit"
|
||||
|
||||
class TestCP949 < Test::Unit::TestCase
|
||||
def s(s)
|
||||
s.force_encoding("cp949")
|
||||
end
|
||||
|
||||
def test_mbc_enc_len
|
||||
assert_equal(1, s("\xa1\xa1").size)
|
||||
end
|
||||
|
||||
def test_mbc_to_code
|
||||
assert_equal(0xa1a1, s("\xa1\xa1").ord)
|
||||
end
|
||||
|
||||
def test_code_to_mbc
|
||||
assert_equal(s("\xa1\xa1"), 0xa1a1.chr("cp949"))
|
||||
end
|
||||
|
||||
def test_mbc_case_fold
|
||||
r = Regexp.new(s("(\xa1\xa1)\\1"), "i")
|
||||
assert_match(r, s("\xa1\xa1\xa1\xa1"))
|
||||
end
|
||||
|
||||
def test_left_adjust_char_head
|
||||
assert_equal(s("\xa1\xa1"), s("\xa1\xa1\xa1\xa1").chop)
|
||||
end
|
||||
end
|
20
test/ruby/test_euc_jp.rb
Normal file
20
test/ruby/test_euc_jp.rb
Normal file
|
@ -0,0 +1,20 @@
|
|||
# vim: set fileencoding=euc-jp
|
||||
|
||||
require "test/unit"
|
||||
|
||||
class TestEUC_JP < Test::Unit::TestCase
|
||||
def test_mbc_case_fold
|
||||
assert_match(/(a)(a)\1\2/i, "aaaA")
|
||||
assert_no_match(/(a)(a)\1\2/i, "aaAA")
|
||||
end
|
||||
|
||||
def test_property
|
||||
assert_match(/あ{0}\p{Hiragana}{4}/, "ひらがな")
|
||||
assert_no_match(/あ{0}\p{Hiragana}{4}/, "カタカナ")
|
||||
assert_no_match(/あ{0}\p{Hiragana}{4}/, "漢字漢字")
|
||||
assert_no_match(/あ{0}\p{Katakana}{4}/, "ひらがな")
|
||||
assert_match(/あ{0}\p{Katakana}{4}/, "カタカナ")
|
||||
assert_no_match(/あ{0}\p{Katakana}{4}/, "漢字漢字")
|
||||
assert_raise(RegexpError) { Regexp.new('あ{0}\p{foobarbaz}') }
|
||||
end
|
||||
end
|
28
test/ruby/test_euc_kr.rb
Normal file
28
test/ruby/test_euc_kr.rb
Normal file
|
@ -0,0 +1,28 @@
|
|||
require "test/unit"
|
||||
|
||||
class TestEucKr < Test::Unit::TestCase
|
||||
def s(s)
|
||||
s.force_encoding("euc-kr")
|
||||
end
|
||||
|
||||
def test_mbc_enc_len
|
||||
assert_equal(1, s("\xa1\xa1").size)
|
||||
end
|
||||
|
||||
def test_mbc_to_code
|
||||
assert_equal(0xa1a1, s("\xa1\xa1").ord)
|
||||
end
|
||||
|
||||
def test_code_to_mbc
|
||||
assert_equal(s("\xa1\xa1"), 0xa1a1.chr("euc-kr"))
|
||||
end
|
||||
|
||||
def test_mbc_case_fold
|
||||
r = Regexp.new(s("(\xa1\xa1)\\1"), "i")
|
||||
assert_match(r, s("\xa1\xa1\xa1\xa1"))
|
||||
end
|
||||
|
||||
def test_left_adjust_char_head
|
||||
assert_equal(s("\xa1\xa1"), s("\xa1\xa1\xa1\xa1").chop)
|
||||
end
|
||||
end
|
28
test/ruby/test_euc_tw.rb
Normal file
28
test/ruby/test_euc_tw.rb
Normal file
|
@ -0,0 +1,28 @@
|
|||
require "test/unit"
|
||||
|
||||
class TestEucTw < Test::Unit::TestCase
|
||||
def s(s)
|
||||
s.force_encoding("euc-tw")
|
||||
end
|
||||
|
||||
def test_mbc_enc_len
|
||||
assert_equal(1, s("\xa1\xa1").size)
|
||||
end
|
||||
|
||||
def test_mbc_to_code
|
||||
assert_equal(0xa1a1, s("\xa1\xa1").ord)
|
||||
end
|
||||
|
||||
def test_code_to_mbc
|
||||
assert_equal(s("\xa1\xa1"), 0xa1a1.chr("euc-tw"))
|
||||
end
|
||||
|
||||
def test_mbc_case_fold
|
||||
r = Regexp.new(s("(\xa1\xa1)\\1"), "i")
|
||||
assert_match(r, s("\xa1\xa1\xa1\xa1"))
|
||||
end
|
||||
|
||||
def test_left_adjust_char_head
|
||||
assert_equal(s("\xa1\xa1"), s("\xa1\xa1\xa1\xa1").chop)
|
||||
end
|
||||
end
|
125
test/ruby/test_gb18030.rb
Normal file
125
test/ruby/test_gb18030.rb
Normal file
|
@ -0,0 +1,125 @@
|
|||
require "test/unit"
|
||||
|
||||
class TestGB18030 < Test::Unit::TestCase
|
||||
def s(s)
|
||||
s.force_encoding("gb18030")
|
||||
end
|
||||
|
||||
def test_mbc_enc_len
|
||||
assert_equal(1, s("\x81\x40").size)
|
||||
assert_equal(1, s("\x81\x30\x81\x30").size)
|
||||
end
|
||||
|
||||
def test_mbc_to_code
|
||||
assert_equal(0x8140, s("\x81\x40").ord)
|
||||
end
|
||||
|
||||
def test_code_to_mbc
|
||||
assert_equal(s("\x81\x40"), 0x8140.chr("gb18030"))
|
||||
end
|
||||
|
||||
def test_mbc_case_fold
|
||||
r = Regexp.new(s("(\x81\x40)\\1"), "i")
|
||||
assert_match(r, s("\x81\x40\x81\x40"))
|
||||
end
|
||||
|
||||
def scheck(c, i)
|
||||
assert_equal(s(c.reverse.take(c.size - i).join), s(c.reverse.join).chop)
|
||||
end
|
||||
|
||||
def fcheck(c)
|
||||
assert_raise(ArgumentError) { s(c.reverse.join).chop }
|
||||
end
|
||||
|
||||
def test_left_adjust_char_head
|
||||
# C1: 00-2f, 3a-3f, 7f, ff
|
||||
# C2: 40-7e, 80
|
||||
# C4: 30-39
|
||||
# CM: 81-fe
|
||||
c1 = "\x2f"
|
||||
c2 = "\x40"
|
||||
c4 = "\x30"
|
||||
cm = "\x81"
|
||||
|
||||
# S_START-c1
|
||||
# S_START-c2-S_one_C2-0
|
||||
# S_START-c2-S_one_C2-c1
|
||||
# S_START-c2-S_one_C2-cm-S_odd_CM_one_CX-c1
|
||||
# S_START-c2-S_one_C2-cm-S_odd_CM_one_CX-cm-S_even_CM_one_CX-c1
|
||||
# S_START-c2-S_one_C2-cm-S_odd_CM_one_CX-cm-S_even_CM_one_CX-cm-S_odd_CM_one_CX(rec)
|
||||
# S_START-c4-S_one_C4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-c4-S_one_C4_odd_CMC4(rec)
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-cm-S_odd_CM_odd_CMC4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-cm-S_odd_CM_odd_CMC4-cm-S_even_CM_odd_CMC4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-cm-S_odd_CM_odd_CMC4-cm-S_even_CM_odd_CMC4-cm-S_odd_CM_odd_CMC4(rec)
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-cm-S_odd_CM_even_CMC4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-cm-S_odd_CM_even_CMC4-cm-S_even_CM_even_CMC4-c1
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-cm-S_odd_CM_even_CMC4-cm-S_even_CM_even_CMC4-cm-S_odd_CM_even_CMC4(rec)
|
||||
# S_START-c4-S_one_C4-cm-S_one_CMC4-cm-S_even_CM_one_CX(rec)
|
||||
# S_START-cm-S_one_CM-c1
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-c1
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c1
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-c1
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-c1
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-c4-S_odd_C4CM(rec)
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-cm-S_even_CM_even_C4CM-c1
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-cm-S_even_CM_even_C4CM-cm-S_odd_CM_even_C4CM-c1
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-cm-S_even_CM_even_C4CM-cm-S_odd_CM_even_C4CM-cm-S_even_CM_even_C4CM(rec)
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-cm-S_even_CM_odd_C4CM-c1
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-cm-S_even_CM_odd_C4CM-cm-S_odd_CM_odd_C4CM-c1
|
||||
# S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-cm-S_even_CM_odd_C4CM-cm-S_odd_CM_odd_C4CM-cm-S_even_CM_odd_C4CM(rec)
|
||||
# S_START-cm-S_one_CM-cm-S_odd_CM_one_CX(rec)
|
||||
|
||||
scheck([c1], 1)
|
||||
scheck([c2], 1)
|
||||
scheck([c2, c1], 1)
|
||||
scheck([c2, cm, c1], 2)
|
||||
scheck([c2, cm, cm, c1], 1)
|
||||
scheck([c2, cm, cm, cm], 2)
|
||||
scheck([c4], 1)
|
||||
scheck([c4, c1], 1)
|
||||
scheck([c4, cm], 2)
|
||||
fcheck([c4, cm, c1])
|
||||
fcheck([c4, cm, c4, c1])
|
||||
scheck([c4, cm, c4, cm], 4)
|
||||
scheck([c4, cm, c4, cm, c1], 4)
|
||||
scheck([c4, cm, c4, cm, c4], 4)
|
||||
scheck([c4, cm, c4, cm, c4, c1], 4)
|
||||
fcheck([c4, cm, c4, cm, c4, cm])
|
||||
fcheck([c4, cm, c4, cm, c4, cm, c1])
|
||||
fcheck([c4, cm, c4, cm, c4, cm, c4])
|
||||
scheck([c4, cm, c4, cm, c4, cm, cm, c1], 4)
|
||||
fcheck([c4, cm, c4, cm, c4, cm, cm, cm])
|
||||
fcheck([c4, cm, c4, cm, c4, cm, cm, cm, c1])
|
||||
scheck([c4, cm, c4, cm, c4, cm, cm, cm, cm], 4)
|
||||
fcheck([c4, cm, c4, cm, cm, c1])
|
||||
scheck([c4, cm, c4, cm, cm, cm], 4)
|
||||
scheck([c4, cm, c4, cm, cm, cm, c1], 4)
|
||||
fcheck([c4, cm, c4, cm, cm, cm, cm])
|
||||
scheck([c4, cm, cm], 1)
|
||||
scheck([cm], 1)
|
||||
fcheck([cm, c1])
|
||||
fcheck([cm, c4, c1])
|
||||
scheck([cm, c4, cm], 3)
|
||||
fcheck([cm, c4, cm, c1])
|
||||
fcheck([cm, c4, cm, c4])
|
||||
fcheck([cm, c4, cm, c4, c1])
|
||||
fcheck([cm, c4, cm, c4, cm])
|
||||
fcheck([cm, c4, cm, c4, cm, c1])
|
||||
fcheck([cm, c4, cm, c4, cm, c4])
|
||||
fcheck([cm, c4, cm, c4, cm, cm, c1])
|
||||
fcheck([cm, c4, cm, c4, cm, cm, cm])
|
||||
fcheck([cm, c4, cm, c4, cm, cm, cm, c1])
|
||||
fcheck([cm, c4, cm, c4, cm, cm, cm, cm])
|
||||
fcheck([cm, c4, cm, cm, c1])
|
||||
fcheck([cm, c4, cm, cm, cm])
|
||||
fcheck([cm, c4, cm, cm, cm, c1])
|
||||
fcheck([cm, c4, cm, cm, cm, cm])
|
||||
scheck([cm, cm], 2)
|
||||
end
|
||||
end
|
28
test/ruby/test_gbk.rb
Normal file
28
test/ruby/test_gbk.rb
Normal file
|
@ -0,0 +1,28 @@
|
|||
require "test/unit"
|
||||
|
||||
class TestGBK < Test::Unit::TestCase
|
||||
def s(s)
|
||||
s.force_encoding("gbk")
|
||||
end
|
||||
|
||||
def test_mbc_enc_len
|
||||
assert_equal(1, s("\x81\x40").size)
|
||||
end
|
||||
|
||||
def test_mbc_to_code
|
||||
assert_equal(0x8140, s("\x81\x40").ord)
|
||||
end
|
||||
|
||||
def test_code_to_mbc
|
||||
assert_equal(s("\x81\x40"), 0x8140.chr("gbk"))
|
||||
end
|
||||
|
||||
def test_mbc_case_fold
|
||||
r = Regexp.new(s("(\x81\x40)\\1"), "i")
|
||||
assert_match(r, s("\x81\x40\x81\x40"))
|
||||
end
|
||||
|
||||
def test_left_adjust_char_head
|
||||
assert_equal(s("\x81\x40"), s("\x81\x40\x81\x40").chop)
|
||||
end
|
||||
end
|
163
test/ruby/test_iso_8859.rb
Normal file
163
test/ruby/test_iso_8859.rb
Normal file
|
@ -0,0 +1,163 @@
|
|||
require 'test/unit'
|
||||
|
||||
class TestISO8859 < Test::Unit::TestCase
|
||||
ASSERTS = %q(
|
||||
assert_match(/^(\xdf)\1$/i, "\xdf\xdf")
|
||||
assert_match(/^(\xdf)\1$/i, "ssss")
|
||||
# assert_match(/^(\xdf)\1$/i, "\xdfss") # this must be bug...
|
||||
assert_match(/^[\xdfz]+$/i, "sszzsszz")
|
||||
assert_match(/^SS$/i, "\xdf")
|
||||
assert_match(/^Ss$/i, "\xdf")
|
||||
((0xc0..0xde).to_a - [0xd7]).each do |c|
|
||||
c1 = c.chr("ENCODING")
|
||||
c2 = (c + 0x20).chr("ENCODING")
|
||||
assert_match(/^(#{ c1 })\1$/i, c2 + c1)
|
||||
assert_match(/^(#{ c2 })\1$/i, c1 + c2)
|
||||
assert_match(/^[#{ c1 }]+$/i, c2 + c1)
|
||||
assert_match(/^[#{ c2 }]+$/i, c1 + c2)
|
||||
end
|
||||
assert_match(/^\xff$/i, "\xff")
|
||||
)
|
||||
|
||||
def test_iso_8859_1
|
||||
eval("# encoding: iso8859-1\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-1"))
|
||||
end
|
||||
|
||||
def test_iso_8859_2
|
||||
eval("# encoding: iso8859-2\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-2"))
|
||||
end
|
||||
|
||||
def test_iso_8859_3
|
||||
eval(%q(# encoding: iso8859-3
|
||||
assert_match(/^(\xdf)\1$/i, "\xdf\xdf")
|
||||
assert_match(/^(\xdf)\1$/i, "ssss")
|
||||
assert_match(/^[\xdfz]+$/i, "sszzsszz")
|
||||
assert_match(/^SS$/i, "\xdf")
|
||||
assert_match(/^Ss$/i, "\xdf")
|
||||
[0xa1, 0xa6, *(0xa9..0xac), 0xaf].each do |c|
|
||||
c1 = c.chr("iso8859-3")
|
||||
c2 = (c + 0x10).chr("iso8859-3")
|
||||
assert_match(/^(#{ c1 })\1$/i, c2 + c1)
|
||||
assert_match(/^(#{ c2 })\1$/i, c1 + c2)
|
||||
assert_match(/^[#{ c1 }]+$/i, c2 + c1)
|
||||
assert_match(/^[#{ c2 }]+$/i, c1 + c2)
|
||||
end
|
||||
([*(0xc0..0xde)] - [0xc3, 0xd0, 0xd7]).each do |c|
|
||||
c1 = c.chr("iso8859-3")
|
||||
c2 = (c + 0x20).chr("iso8859-3")
|
||||
assert_match(/^(#{ c1 })\1$/i, c2 + c1)
|
||||
assert_match(/^(#{ c2 })\1$/i, c1 + c2)
|
||||
assert_match(/^[#{ c1 }]+$/i, c2 + c1)
|
||||
assert_match(/^[#{ c2 }]+$/i, c1 + c2)
|
||||
end
|
||||
))
|
||||
end
|
||||
|
||||
def test_iso_8859_4
|
||||
eval("# encoding: iso8859-4\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-4"))
|
||||
end
|
||||
|
||||
def test_iso_8859_5
|
||||
eval(%q(# encoding: iso8859-5
|
||||
(0xb0..0xcf).each do |c|
|
||||
c1 = c.chr("iso8859-5")
|
||||
c2 = (c + 0x20).chr("iso8859-5")
|
||||
assert_match(/^(#{ c1 })\1$/i, c2 + c1)
|
||||
assert_match(/^(#{ c2 })\1$/i, c1 + c2)
|
||||
assert_match(/^[#{ c1 }]+$/i, c2 + c1)
|
||||
assert_match(/^[#{ c2 }]+$/i, c1 + c2)
|
||||
end
|
||||
((0xa1..0xaf).to_a - [0xad]).each do |c|
|
||||
c1 = c.chr("iso8859-5")
|
||||
c2 = (c + 0x50).chr("iso8859-5")
|
||||
assert_match(/^(#{ c1 })\1$/i, c2 + c1)
|
||||
assert_match(/^(#{ c2 })\1$/i, c1 + c2)
|
||||
assert_match(/^[#{ c1 }]+$/i, c2 + c1)
|
||||
assert_match(/^[#{ c2 }]+$/i, c1 + c2)
|
||||
end
|
||||
))
|
||||
end
|
||||
|
||||
def test_iso_8859_6
|
||||
eval(%q(# encoding: iso8859-6
|
||||
[0xa4, 0xac, 0xbb, 0xbf, *(0xc1..0xda), *(0xe0..0xf2)].each do |c|
|
||||
c1 = c.chr("iso8859-6")
|
||||
assert_match(/^(#{ c1 })\1$/i, c1 * 2)
|
||||
end
|
||||
))
|
||||
end
|
||||
|
||||
def test_iso_8859_7
|
||||
eval(%q(# encoding: iso8859-7
|
||||
((0xa0..0xfe).to_a - [0xae, 0xd2]).each do |c|
|
||||
c1 = c.chr("iso8859-7")
|
||||
assert_match(/^(#{ c1 })\1$/i, c1 * 2)
|
||||
end
|
||||
((0xc1..0xd9).to_a - [0xd2]).each do |c|
|
||||
c1 = c.chr("iso8859-7")
|
||||
c2 = (c + 0x20).chr("iso8859-7")
|
||||
assert_match(/^(#{ c1 })\1$/i, c2 + c1)
|
||||
assert_match(/^(#{ c2 })\1$/i, c1 + c2)
|
||||
assert_match(/^[#{ c1 }]+$/i, c2 + c1)
|
||||
assert_match(/^[#{ c2 }]+$/i, c1 + c2)
|
||||
end
|
||||
))
|
||||
end
|
||||
|
||||
def test_iso_8859_8
|
||||
eval(%q(# encoding: iso8859-8
|
||||
[0xa0, *(0xa2..0xbe), *(0xdf..0xfa), 0xfc, 0xfd].each do |c|
|
||||
c1 = c.chr("iso8859-8")
|
||||
assert_match(/^(#{ c1 })\1$/i, c1 * 2)
|
||||
end
|
||||
))
|
||||
end
|
||||
|
||||
def test_iso_8859_9
|
||||
eval(%q(# encoding: iso8859-9
|
||||
assert_match(/^(\xdf)\1$/i, "\xdf\xdf")
|
||||
assert_match(/^(\xdf)\1$/i, "ssss")
|
||||
assert_match(/^[\xdfz]+$/i, "sszzsszz")
|
||||
assert_match(/^SS$/i, "\xdf")
|
||||
assert_match(/^Ss$/i, "\xdf")
|
||||
([*(0xc0..0xdc)] - [0xd7]).each do |c|
|
||||
c1 = c.chr("iso8859-9")
|
||||
c2 = (c + 0x20).chr("iso8859-9")
|
||||
assert_match(/^(#{ c1 })\1$/i, c2 + c1)
|
||||
assert_match(/^(#{ c2 })\1$/i, c1 + c2)
|
||||
assert_match(/^[#{ c1 }]+$/i, c2 + c1)
|
||||
assert_match(/^[#{ c2 }]+$/i, c1 + c2)
|
||||
end
|
||||
))
|
||||
end
|
||||
|
||||
def test_iso_8859_10
|
||||
eval("# encoding: iso8859-10\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-10"))
|
||||
end
|
||||
|
||||
def test_iso_8859_11
|
||||
eval(%q(# encoding: iso8859-11
|
||||
[*(0xa0..0xda), *(0xdf..0xfb)].each do |c|
|
||||
c1 = c.chr("iso8859-11")
|
||||
assert_match(/^(#{ c1 })\1$/i, c1 * 2)
|
||||
end
|
||||
))
|
||||
end
|
||||
|
||||
def test_iso_8859_13
|
||||
eval("# encoding: iso8859-13\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-13"))
|
||||
end
|
||||
|
||||
def test_iso_8859_14
|
||||
eval("# encoding: iso8859-14\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-14"))
|
||||
end
|
||||
|
||||
def test_iso_8859_15
|
||||
eval("# encoding: iso8859-15\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-15"))
|
||||
end
|
||||
|
||||
def test_iso_8859_16
|
||||
eval("# encoding: iso8859-16\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-16"))
|
||||
end
|
||||
end
|
||||
|
22
test/ruby/test_koi8.rb
Normal file
22
test/ruby/test_koi8.rb
Normal file
|
@ -0,0 +1,22 @@
|
|||
require "test/unit"
|
||||
|
||||
class TestKOI8 < Test::Unit::TestCase
|
||||
ASSERTS = %q(
|
||||
(0xc0..0xdf).each do |c|
|
||||
c1 = c.chr("ENCODING")
|
||||
c2 = (c + 0x20).chr("ENCODING")
|
||||
assert_match(/^(#{ c1 })\1$/i, c2 + c1)
|
||||
assert_match(/^(#{ c2 })\1$/i, c1 + c2)
|
||||
assert_match(/^[#{ c1 }]+$/i, c2 + c1)
|
||||
assert_match(/^[#{ c2 }]+$/i, c1 + c2)
|
||||
end
|
||||
)
|
||||
|
||||
def test_koi8_r
|
||||
eval("# encoding: koi8-r\n" + ASSERTS.gsub("ENCODING", "koi8-r"))
|
||||
end
|
||||
|
||||
def test_koi8_u
|
||||
eval("# encoding: koi8-u\n" + ASSERTS.gsub("ENCODING", "koi8-u"))
|
||||
end
|
||||
end
|
|
@ -542,8 +542,8 @@ class TestRegexp < Test::Unit::TestCase
|
|||
check(/^(A+|B(?>\g<1>)*)[AC]$/, %w(AAAC BBBAAAAC), %w(BBBAAA))
|
||||
check(/^()(?>\g<1>)*$/, "", "a")
|
||||
check(/^(?>(?=a)(#{ "a" * 1000 }|))++$/, ["a" * 1000, "a" * 2000, "a" * 3000], ["", "a" * 500, "b" * 1000])
|
||||
check(/^(?:a?)?$/, ["", "a"], ["aa"])
|
||||
check(/^(?:a+)?$/, ["", "a", "aa"], ["ab"])
|
||||
check(eval('/^(?:a?)?$/'), ["", "a"], ["aa"])
|
||||
check(eval('/^(?:a+)?$/'), ["", "a", "aa"], ["ab"])
|
||||
check(/^(?:a?)+?$/, ["", "a", "aa"], ["ab"])
|
||||
check(/^a??[ab]/, [["a", "a"], ["a", "aa"], ["b", "b"], ["a", "ab"]], ["c"])
|
||||
check(/^(?:a*){3,5}$/, ["", "a", "aa", "aaa", "aaaa", "aaaaa", "aaaaaa"], ["b"])
|
||||
|
@ -717,4 +717,42 @@ class TestRegexp < Test::Unit::TestCase
|
|||
assert_raise(TypeError) { $' }
|
||||
assert_raise(TypeError) { $+ }
|
||||
end
|
||||
|
||||
def test_unicode
|
||||
assert_match(/^\u3042{0}\p{Any}$/, "a")
|
||||
assert_match(/^\u3042{0}\p{Any}$/, "\u3041")
|
||||
assert_match(/^\u3042{0}\p{Any}$/, "\0")
|
||||
assert_no_match(/^\u3042{0}\p{Any}$/, "\0\0")
|
||||
assert_no_match(/^\u3042{0}\p{Any}$/, "")
|
||||
assert_raise(SyntaxError) { eval('/^\u3042{0}\p{' + "\u3042" + '}$/') }
|
||||
assert_raise(SyntaxError) { eval('/^\u3042{0}\p{' + 'a' * 1000 + '}$/') }
|
||||
assert_raise(SyntaxError) { eval('/^\u3042{0}\p{foobarbazqux}$/') }
|
||||
assert_match(/^(\uff21)(a)\1\2$/i, "\uff21A\uff41a")
|
||||
assert_no_match(/^(\uff21)\1$/i, "\uff21A")
|
||||
assert_no_match(/^(\uff41)\1$/i, "\uff41a")
|
||||
assert_match(/^\u00df$/i, "\u00df")
|
||||
assert_match(/^\u00df$/i, "ss")
|
||||
#assert_match(/^(\u00df)\1$/i, "\u00dfss") # this must be bug...
|
||||
assert_match(/^\u00df{2}$/i, "\u00dfss")
|
||||
assert_match(/^\u00c5$/i, "\u00c5")
|
||||
assert_match(/^\u00c5$/i, "\u00e5")
|
||||
assert_match(/^\u00c5$/i, "\u212b")
|
||||
assert_match(/^(\u00c5)\1\1$/i, "\u00c5\u00e5\u212b")
|
||||
assert_match(/^\u0149$/i, "\u0149")
|
||||
assert_match(/^\u0149$/i, "\u02bcn")
|
||||
#assert_match(/^(\u0149)\1$/i, "\u0149\u02bcn") # this must be bug...
|
||||
assert_match(/^\u0149{2}$/i, "\u0149\u02bcn")
|
||||
assert_match(/^\u0390$/i, "\u0390")
|
||||
assert_match(/^\u0390$/i, "\u03b9\u0308\u0301")
|
||||
#assert_match(/^(\u0390)\1$/i, "\u0390\u03b9\u0308\u0301") # this must be bug...
|
||||
assert_match(/^\u0390{2}$/i, "\u0390\u03b9\u0308\u0301")
|
||||
assert_match(/^\ufb05$/i, "\ufb05")
|
||||
assert_match(/^\ufb05$/i, "\ufb06")
|
||||
assert_match(/^\ufb05$/i, "st")
|
||||
#assert_match(/^(\ufb05)\1\1$/i, "\ufb05\ufb06st") # this must be bug...
|
||||
assert_match(/^\ufb05{3}$/i, "\ufb05\ufb06st")
|
||||
assert_match(/^\u03b9\u0308\u0301$/i, "\u0390")
|
||||
assert_nothing_raised { 0x03ffffff.chr("utf-8").size }
|
||||
assert_nothing_raised { 0x7fffffff.chr("utf-8").size }
|
||||
end
|
||||
end
|
||||
|
|
27
test/ruby/test_shift_jis.rb
Normal file
27
test/ruby/test_shift_jis.rb
Normal file
|
@ -0,0 +1,27 @@
|
|||
# vim: set fileencoding=shift_jis
|
||||
|
||||
require "test/unit"
|
||||
|
||||
class TestShiftJIS < Test::Unit::TestCase
|
||||
def test_mbc_case_fold
|
||||
assert_match(/(a)(a)\1\2/i, "aaaA")
|
||||
assert_no_match(/(a)(a)\1\2/i, "aaAA")
|
||||
end
|
||||
|
||||
def test_property
|
||||
assert_match(/あ{0}\p{Hiragana}{4}/, "ひらがな")
|
||||
assert_no_match(/あ{0}\p{Hiragana}{4}/, "カタカナ")
|
||||
assert_no_match(/あ{0}\p{Hiragana}{4}/, "漢字漢字")
|
||||
assert_no_match(/あ{0}\p{Katakana}{4}/, "ひらがな")
|
||||
assert_match(/あ{0}\p{Katakana}{4}/, "カタカナ")
|
||||
assert_no_match(/あ{0}\p{Katakana}{4}/, "漢字漢字")
|
||||
assert_raise(RegexpError) { Regexp.new('あ{0}\p{foobarbaz}') }
|
||||
end
|
||||
|
||||
def test_code_to_mbclen
|
||||
s = "あいうえお"
|
||||
s << 0x82a9
|
||||
assert_equal("あいうえおか", s)
|
||||
assert_raise(ArgumentError) { s << 0x82 }
|
||||
end
|
||||
end
|
|
@ -290,4 +290,69 @@ EOT
|
|||
assert_equal(e, "abc".encode("utf-16be").count("^b".encode("utf-16be")))
|
||||
assert_equal(e, "abc".encode("utf-16le").count("^b".encode("utf-16le")))
|
||||
end
|
||||
|
||||
def test_header
|
||||
assert_raise(ArgumentError) { eval("# encoding:utf-16le\nfoo") }
|
||||
assert_raise(ArgumentError) { eval("# encoding:utf-16be\nfoo") }
|
||||
end
|
||||
|
||||
|
||||
def test_is_mbc_newline
|
||||
sl = "f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n\0".force_encoding("utf-16le")
|
||||
sb = "\0f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n".force_encoding("utf-16be")
|
||||
al = sl.lines.to_a
|
||||
ab = sb.lines.to_a
|
||||
assert_equal("f\0o\0o\0\n\0".force_encoding("utf-16le"), al.shift)
|
||||
assert_equal("b\0a\0r\0\n\0".force_encoding("utf-16le"), al.shift)
|
||||
assert_equal("b\0a\0z\0\n\0".force_encoding("utf-16le"), al.shift)
|
||||
assert_equal("\0f\0o\0o\0\n".force_encoding("utf-16be"), ab.shift)
|
||||
assert_equal("\0b\0a\0r\0\n".force_encoding("utf-16be"), ab.shift)
|
||||
assert_equal("\0b\0a\0z\0\n".force_encoding("utf-16be"), ab.shift)
|
||||
|
||||
sl = "f\0o\0o\0\n\0".force_encoding("utf-16le")
|
||||
sb = "\0f\0o\0o\0\n".force_encoding("utf-16be")
|
||||
sl2 = "f\0o\0o\0".force_encoding("utf-16le")
|
||||
sb2 = "\0f\0o\0o".force_encoding("utf-16be")
|
||||
assert_equal(sl2, sl.chomp)
|
||||
assert_equal(sl2, sl.chomp.chomp)
|
||||
assert_equal(sb2, sb.chomp)
|
||||
assert_equal(sb2, sb.chomp.chomp)
|
||||
|
||||
sl = "f\0o\0o\0\n".force_encoding("utf-16le")
|
||||
sb = "\0f\0o\0o\n".force_encoding("utf-16be")
|
||||
assert_equal(sl, sl.chomp)
|
||||
assert_equal(sb, sb.chomp)
|
||||
end
|
||||
|
||||
def test_code_to_mbc
|
||||
assert_equal("a\0".force_encoding("utf-16le"), "a".ord.chr("utf-16le"))
|
||||
assert_equal("\0a".force_encoding("utf-16be"), "a".ord.chr("utf-16be"))
|
||||
end
|
||||
|
||||
def utf8_to_utf16(s, e)
|
||||
s.chars.map {|c| c.ord.chr(e) }.join
|
||||
end
|
||||
|
||||
def test_mbc_case_fold
|
||||
rl = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16le"), "i")
|
||||
rb = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16be"), "i")
|
||||
assert_equal(Encoding.find("utf-16le"), rl.encoding)
|
||||
assert_equal(Encoding.find("utf-16be"), rb.encoding)
|
||||
assert_match(rl, utf8_to_utf16("\u3042a\u3042a", "utf-16le"))
|
||||
assert_match(rb, utf8_to_utf16("\u3042a\u3042a", "utf-16be"))
|
||||
end
|
||||
|
||||
def test_surrogate_pair
|
||||
sl = "\x42\xd8\xb7\xdf".force_encoding("utf-16le")
|
||||
sb = "\xd8\x42\xdf\xb7".force_encoding("utf-16be")
|
||||
|
||||
assert_equal(1, sl.size)
|
||||
assert_equal(1, sb.size)
|
||||
assert_equal(0x20bb7, sl.ord)
|
||||
assert_equal(0x20bb7, sb.ord)
|
||||
assert_equal(sl, 0x20bb7.chr("utf-16le"))
|
||||
assert_equal(sb, 0x20bb7.chr("utf-16be"))
|
||||
assert_equal("", sl.chop)
|
||||
assert_equal("", sb.chop)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -19,9 +19,75 @@ EOT
|
|||
end
|
||||
|
||||
def test_substr
|
||||
assert_str_equal(
|
||||
"abcdefgh".force_encoding("utf-32le"),
|
||||
"abcdefgh".force_encoding("utf-32le")[0,3])
|
||||
assert_str_equal(
|
||||
"abcdefgh".force_encoding("utf-32be"),
|
||||
"abcdefgh".force_encoding("utf-32be")[0,3])
|
||||
end
|
||||
|
||||
def test_mbc_len
|
||||
al = "abcdefghijkl".force_encoding("utf-32le").each_char.to_a
|
||||
ab = "abcdefghijkl".force_encoding("utf-32be").each_char.to_a
|
||||
assert_equal("abcd".force_encoding("utf-32le"), al.shift)
|
||||
assert_equal("efgh".force_encoding("utf-32le"), al.shift)
|
||||
assert_equal("ijkl".force_encoding("utf-32le"), al.shift)
|
||||
assert_equal("abcd".force_encoding("utf-32be"), ab.shift)
|
||||
assert_equal("efgh".force_encoding("utf-32be"), ab.shift)
|
||||
assert_equal("ijkl".force_encoding("utf-32be"), ab.shift)
|
||||
end
|
||||
|
||||
def ascii_to_utf16le(s)
|
||||
s.unpack("C*").map {|x| [x,0,0,0] }.flatten.pack("C*").force_encoding("utf-32le")
|
||||
end
|
||||
|
||||
def ascii_to_utf16be(s)
|
||||
s.unpack("C*").map {|x| [0,0,0,x] }.flatten.pack("C*").force_encoding("utf-32be")
|
||||
end
|
||||
|
||||
def test_mbc_newline
|
||||
al = ascii_to_utf16le("foo\nbar\nbaz\n").lines.to_a
|
||||
ab = ascii_to_utf16be("foo\nbar\nbaz\n").lines.to_a
|
||||
|
||||
assert_equal(ascii_to_utf16le("foo\n"), al.shift)
|
||||
assert_equal(ascii_to_utf16le("bar\n"), al.shift)
|
||||
assert_equal(ascii_to_utf16le("baz\n"), al.shift)
|
||||
assert_equal(ascii_to_utf16be("foo\n"), ab.shift)
|
||||
assert_equal(ascii_to_utf16be("bar\n"), ab.shift)
|
||||
assert_equal(ascii_to_utf16be("baz\n"), ab.shift)
|
||||
|
||||
sl = "a\0".force_encoding("utf-32le")
|
||||
sb = "a\0".force_encoding("utf-32be")
|
||||
assert_equal(sl, sl.chomp)
|
||||
assert_equal(sb, sb.chomp)
|
||||
end
|
||||
|
||||
def test_mbc_to_code
|
||||
sl = "a\0\0\0".force_encoding("utf-32le")
|
||||
sb = "\0\0\0a".force_encoding("utf-32be")
|
||||
assert_equal("a".ord, sl.ord)
|
||||
assert_equal("a".ord, sb.ord)
|
||||
end
|
||||
|
||||
def utf8_to_utf32(s, e)
|
||||
s.chars.map {|c| c.ord.chr(e) }.join
|
||||
end
|
||||
|
||||
def test_mbc_case_fold
|
||||
rl = Regexp.new(utf8_to_utf32("^(\u3042)(a)\\1\\2$", "utf-32le"), "i")
|
||||
rb = Regexp.new(utf8_to_utf32("^(\u3042)(a)\\1\\2$", "utf-32be"), "i")
|
||||
assert_equal(Encoding.find("utf-32le"), rl.encoding)
|
||||
assert_equal(Encoding.find("utf-32be"), rb.encoding)
|
||||
assert_match(rl, utf8_to_utf32("\u3042a\u3042a", "utf-32le"))
|
||||
assert_match(rb, utf8_to_utf32("\u3042a\u3042a", "utf-32be"))
|
||||
end
|
||||
|
||||
def test_code_to_mbc
|
||||
sl = "a\0\0\0".force_encoding("utf-32le")
|
||||
sb = "\0\0\0a".force_encoding("utf-32be")
|
||||
assert_equal(sl, "a".ord.chr("utf-32le"))
|
||||
assert_equal(sb, "a".ord.chr("utf-32be"))
|
||||
end
|
||||
end
|
||||
|
||||
|
|
16
test/ruby/test_windows_1251.rb
Normal file
16
test/ruby/test_windows_1251.rb
Normal file
|
@ -0,0 +1,16 @@
|
|||
# encoding:windows-1251
|
||||
|
||||
require "test/unit"
|
||||
|
||||
class TestWindows1251 < Test::Unit::TestCase
|
||||
def test_windows_1251
|
||||
(0xc0..0xdf).each do |c|
|
||||
c1 = c.chr("windows-1251")
|
||||
c2 = (c + 0x20).chr("windows-1251")
|
||||
assert_match(/^(#{ c1 })\1$/i, c2 + c1)
|
||||
assert_match(/^(#{ c2 })\1$/i, c1 + c2)
|
||||
assert_match(/^[#{ c1 }]+$/i, c2 + c1)
|
||||
assert_match(/^[#{ c2 }]+$/i, c1 + c2)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue