2007-11-23 01:30:26 -05:00
|
|
|
require 'test/unit'
|
|
|
|
|
|
|
|
class TestM17N < Test::Unit::TestCase
|
|
|
|
def assert_encoding(encname, actual, message=nil)
|
|
|
|
assert_equal(Encoding.find(encname), actual, message)
|
|
|
|
end
|
|
|
|
|
|
|
|
def a(str) str.force_encoding("ASCII-8BIT") end
|
|
|
|
def e(str) str.force_encoding("EUC-JP") end
|
|
|
|
def s(str) str.force_encoding("Shift_JIS") end
|
|
|
|
def u(str) str.force_encoding("UTF-8") end
|
|
|
|
|
|
|
|
def test_string_ascii_literal
|
|
|
|
assert_encoding("ASCII-8BIT", eval(a(%{""})).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", eval(a(%{"a"})).encoding)
|
|
|
|
end
|
|
|
|
|
2007-12-01 21:53:46 -05:00
|
|
|
def test_string_eucjp_literal
|
2007-11-23 01:30:26 -05:00
|
|
|
assert_encoding("ASCII-8BIT", eval(e(%{""})).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", eval(e(%{"a"})).encoding)
|
|
|
|
assert_encoding("EUC-JP", eval(e(%{"\xa1\xa1"})).encoding)
|
|
|
|
assert_encoding("EUC-JP", eval(e(%{"\\xa1\\xa1"})).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", eval(e(%{"\\x20"})).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", eval(e(%{"\\n"})).encoding)
|
|
|
|
assert_encoding("EUC-JP", eval(e(%{"\\x80"})).encoding)
|
|
|
|
end
|
|
|
|
|
2007-12-01 11:56:19 -05:00
|
|
|
def test_string_mixed_unicode
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_raise(SyntaxError) { eval(a(%{"\xc2\xa0\\u{6666}"})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{"\xc2\xa0\\u{6666}"})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{"\xc2\xa0\\u{6666}"})) }
|
|
|
|
assert_nothing_raised { eval(u(%{"\xc2\xa0\\u{6666}"})) }
|
|
|
|
assert_raise(SyntaxError) { eval(a(%{"\\u{6666}\xc2\xa0"})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{"\\u{6666}\xc2\xa0"})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{"\\u{6666}\xc2\xa0"})) }
|
|
|
|
assert_nothing_raised { eval(u(%{"\\u{6666}\xc2\xa0"})) }
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_string_inspect
|
2007-12-12 19:21:18 -05:00
|
|
|
assert_equal('"\xFE"', e("\xfe").inspect)
|
|
|
|
assert_equal('"\x8E"', e("\x8e").inspect)
|
|
|
|
assert_equal('"\x8F"', e("\x8f").inspect)
|
|
|
|
assert_equal('"\x8F\xA1"', e("\x8f\xa1").inspect)
|
|
|
|
assert_equal('"\xEF"', s("\xef").inspect)
|
|
|
|
assert_equal('"\xC2"', u("\xc2").inspect)
|
|
|
|
assert_equal('"\xE0\x80"', u("\xe0\x80").inspect)
|
|
|
|
assert_equal('"\xF0\x80\x80"', u("\xf0\x80\x80").inspect)
|
|
|
|
assert_equal('"\xF8\x80\x80\x80"', u("\xf8\x80\x80\x80").inspect)
|
|
|
|
assert_equal('"\xFC\x80\x80\x80\x80"', u("\xfc\x80\x80\x80\x80").inspect)
|
2007-12-06 04:28:26 -05:00
|
|
|
|
2007-12-12 19:21:18 -05:00
|
|
|
assert_equal('"\xFE "', e("\xfe ").inspect)
|
|
|
|
assert_equal('"\x8E "', e("\x8e ").inspect)
|
|
|
|
assert_equal('"\x8F "', e("\x8f ").inspect)
|
|
|
|
assert_equal('"\x8F\xA1 "', e("\x8f\xa1 ").inspect)
|
|
|
|
assert_equal('"\xEF "', s("\xef ").inspect)
|
|
|
|
assert_equal('"\xC2 "', u("\xc2 ").inspect)
|
|
|
|
assert_equal('"\xE0\x80 "', u("\xe0\x80 ").inspect)
|
|
|
|
assert_equal('"\xF0\x80\x80 "', u("\xf0\x80\x80 ").inspect)
|
|
|
|
assert_equal('"\xF8\x80\x80\x80 "', u("\xf8\x80\x80\x80 ").inspect)
|
|
|
|
assert_equal('"\xFC\x80\x80\x80\x80 "', u("\xfc\x80\x80\x80\x80 ").inspect)
|
2007-12-06 04:28:26 -05:00
|
|
|
|
|
|
|
|
2007-12-12 19:21:18 -05:00
|
|
|
assert_equal(e("\"\\xA1\x8f\xA1\xA1\""), e("\xa1\x8f\xa1\xa1").inspect)
|
2007-12-06 04:28:26 -05:00
|
|
|
|
2007-12-09 18:18:25 -05:00
|
|
|
assert_equal('"\x81."', s("\x81.").inspect)
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_equal(s("\"\x81@\""), s("\x81@").inspect)
|
|
|
|
|
2007-12-12 19:21:18 -05:00
|
|
|
assert_equal('"\xFC"', u("\xfc").inspect)
|
2007-12-01 11:56:19 -05:00
|
|
|
end
|
|
|
|
|
2007-12-07 23:31:26 -05:00
|
|
|
def test_validate_redundant_utf8
|
|
|
|
bits_0x10ffff = "11110100 10001111 10111111 10111111"
|
|
|
|
[
|
|
|
|
"0xxxxxxx",
|
|
|
|
"110XXXXx 10xxxxxx",
|
|
|
|
"1110XXXX 10Xxxxxx 10xxxxxx",
|
|
|
|
"11110XXX 10XXxxxx 10xxxxxx 10xxxxxx",
|
|
|
|
"111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx",
|
|
|
|
"1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
|
|
|
|
"11111110 10XXXXXx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
|
|
|
|
"11111111 10XXXXXX 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
|
|
|
|
].each {|pat0|
|
|
|
|
[
|
|
|
|
pat0.gsub(/x/, '1'),
|
|
|
|
pat0.gsub(/x/, '0')
|
|
|
|
].each {|pat1|
|
|
|
|
[
|
|
|
|
pat1.sub(/X([^X]*)\z/, '1\1').gsub(/X/, "0"),
|
|
|
|
pat1.gsub(/X/, "1"),
|
|
|
|
].each {|pat2|
|
|
|
|
s = [pat2.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
|
|
|
|
if pat2 <= bits_0x10ffff
|
|
|
|
assert(s.valid_encoding?, "#{pat2}")
|
|
|
|
else
|
|
|
|
assert(!s.valid_encoding?, "#{pat2}")
|
|
|
|
end
|
|
|
|
}
|
|
|
|
if / / =~ pat0
|
|
|
|
pat3 = pat1.gsub(/X/, "0")
|
|
|
|
s = [pat3.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
|
|
|
|
assert(!s.valid_encoding?, "#{pat3}")
|
|
|
|
end
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_validate_surrogate
|
|
|
|
# 1110XXXX 10Xxxxxx 10xxxxxx : 3 bytes UTF-8
|
|
|
|
pats = [
|
|
|
|
"11101101 10011111 10111111", # just before surrogate high
|
|
|
|
"11101101 1010xxxx 10xxxxxx", # surrogate high
|
|
|
|
"11101101 1011xxxx 10xxxxxx", # surrogate low
|
|
|
|
"11101110 10000000 10000000", # just after surrogate low
|
|
|
|
]
|
|
|
|
pats.values_at(1,2).each {|pat0|
|
|
|
|
[
|
|
|
|
pat0.gsub(/x/, '0'),
|
|
|
|
pat0.gsub(/x/, '1'),
|
|
|
|
].each {|pat1|
|
|
|
|
s = [pat1.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
|
|
|
|
assert(!s.valid_encoding?, "#{pat1}")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pats.values_at(0,3).each {|pat|
|
|
|
|
s = [pat.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
|
|
|
|
assert(s.valid_encoding?, "#{pat}")
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-11-23 01:30:26 -05:00
|
|
|
def test_regexp_too_short_multibyte_character
|
|
|
|
assert_raise(SyntaxError) { eval('/\xfe/e') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\x8e/e') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\x8f/e') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\x8f\xa1/e') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\xef/s') }
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_raise(SyntaxError) { eval('/\xc2/u') }
|
2007-11-23 01:30:26 -05:00
|
|
|
assert_raise(SyntaxError) { eval('/\xe0\x80/u') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\xf0\x80\x80/u') }
|
2007-12-07 21:50:43 -05:00
|
|
|
assert_raise(SyntaxError) { eval('/\xf8\x80\x80\x80/u') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\xfc\x80\x80\x80\x80/u') }
|
2007-11-23 01:30:26 -05:00
|
|
|
|
|
|
|
# raw 8bit
|
2007-12-01 11:56:19 -05:00
|
|
|
assert_raise(SyntaxError) { eval("/\xfe/e") }
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_raise(SyntaxError) { eval("/\xc2/u") }
|
2007-11-23 01:30:26 -05:00
|
|
|
|
|
|
|
# invalid suffix
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_raise(SyntaxError) { eval('/\xc2\xff/u') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\xc2 /u') }
|
2007-12-07 21:50:43 -05:00
|
|
|
assert_raise(SyntaxError) { eval('/\xc2\x20/u') }
|
2007-11-23 01:30:26 -05:00
|
|
|
end
|
|
|
|
|
2007-11-25 08:25:34 -05:00
|
|
|
def assert_regexp_generic_encoding(r)
|
2007-11-26 08:05:08 -05:00
|
|
|
assert(!r.fixed_encoding?)
|
2007-11-25 08:25:34 -05:00
|
|
|
%w[ASCII-8BIT EUC-JP Shift_JIS UTF-8].each {|ename|
|
2007-12-06 04:28:26 -05:00
|
|
|
# "\xc2\xa1" is a valid sequence for ASCII-8BIT, EUC-JP, Shift_JIS and UTF-8.
|
|
|
|
assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(ename) }
|
2007-11-25 08:25:34 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_regexp_fixed_encoding(r)
|
2007-11-26 08:05:08 -05:00
|
|
|
assert(r.fixed_encoding?)
|
2007-11-25 08:25:34 -05:00
|
|
|
%w[ASCII-8BIT EUC-JP Shift_JIS UTF-8].each {|ename|
|
|
|
|
enc = Encoding.find(ename)
|
|
|
|
if enc == r.encoding
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(enc) }
|
2007-11-25 08:25:34 -05:00
|
|
|
else
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_raise(ArgumentError) { r =~ "\xc2\xa1".force_encoding(enc) }
|
2007-11-25 08:25:34 -05:00
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_regexp_generic_ascii(r)
|
|
|
|
assert_encoding("ASCII-8BIT", r.encoding)
|
|
|
|
assert_regexp_generic_encoding(r)
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_regexp_fixed_ascii8bit(r)
|
|
|
|
assert_encoding("ASCII-8BIT", r.encoding)
|
|
|
|
assert_regexp_fixed_encoding(r)
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_regexp_fixed_eucjp(r)
|
|
|
|
assert_encoding("EUC-JP", r.encoding)
|
|
|
|
assert_regexp_fixed_encoding(r)
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_regexp_fixed_sjis(r)
|
|
|
|
assert_encoding("Shift_JIS", r.encoding)
|
|
|
|
assert_regexp_fixed_encoding(r)
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_regexp_fixed_utf8(r)
|
|
|
|
assert_encoding("UTF-8", r.encoding)
|
|
|
|
assert_regexp_fixed_encoding(r)
|
|
|
|
end
|
|
|
|
|
2007-11-23 01:30:26 -05:00
|
|
|
def test_regexp_generic
|
2007-11-26 08:05:08 -05:00
|
|
|
assert_regexp_generic_ascii(/a/)
|
|
|
|
assert_regexp_generic_ascii(Regexp.new(a("a")))
|
2007-12-01 11:56:19 -05:00
|
|
|
assert_regexp_generic_ascii(Regexp.new(e("a")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.new(s("a")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.new(u("a")))
|
2007-11-26 08:05:08 -05:00
|
|
|
|
|
|
|
[/a/, Regexp.new(a("a"))].each {|r|
|
|
|
|
assert_equal(0, r =~ a("a"))
|
|
|
|
assert_equal(0, r =~ e("a"))
|
|
|
|
assert_equal(0, r =~ s("a"))
|
|
|
|
assert_equal(0, r =~ u("a"))
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_equal(nil, r =~ a("\xc2\xa1"))
|
|
|
|
assert_equal(nil, r =~ e("\xc2\xa1"))
|
|
|
|
assert_equal(nil, r =~ s("\xc2\xa1"))
|
|
|
|
assert_equal(nil, r =~ u("\xc2\xa1"))
|
2007-11-26 08:05:08 -05:00
|
|
|
}
|
2007-11-23 01:30:26 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_regexp_ascii
|
2007-11-26 08:05:08 -05:00
|
|
|
assert_regexp_fixed_ascii8bit(/a/n)
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_regexp_fixed_ascii8bit(/\xc2\xa1/n)
|
|
|
|
assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/})))
|
|
|
|
assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/n})))
|
|
|
|
assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc2\xa1/})))
|
2007-11-26 08:05:08 -05:00
|
|
|
|
|
|
|
[/a/n].each {|r|
|
|
|
|
assert_equal(0, r =~ a("a"))
|
|
|
|
assert_equal(0, r =~ e("a"))
|
|
|
|
assert_equal(0, r =~ s("a"))
|
|
|
|
assert_equal(0, r =~ u("a"))
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_equal(nil, r =~ a("\xc2\xa1"))
|
|
|
|
assert_raise(ArgumentError) { r =~ e("\xc2\xa1") }
|
|
|
|
assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
|
|
|
|
assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
|
2007-11-26 08:05:08 -05:00
|
|
|
}
|
|
|
|
|
2007-12-06 04:28:26 -05:00
|
|
|
[/\xc2\xa1/n, eval(a(%{/\xc2\xa1/})), eval(a(%{/\xc2\xa1/n}))].each {|r|
|
2007-11-26 08:05:08 -05:00
|
|
|
assert_equal(nil, r =~ a("a"))
|
|
|
|
assert_equal(nil, r =~ e("a"))
|
|
|
|
assert_equal(nil, r =~ s("a"))
|
|
|
|
assert_equal(nil, r =~ u("a"))
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_equal(0, r =~ a("\xc2\xa1"))
|
|
|
|
assert_raise(ArgumentError) { r =~ e("\xc2\xa1") }
|
|
|
|
assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
|
|
|
|
assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
|
2007-11-26 08:05:08 -05:00
|
|
|
}
|
2007-11-23 01:30:26 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_regexp_euc
|
2007-11-26 08:05:08 -05:00
|
|
|
assert_regexp_fixed_eucjp(/a/e)
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_regexp_fixed_eucjp(/\xc2\xa1/e)
|
|
|
|
assert_regexp_fixed_eucjp(eval(e(%{/\xc2\xa1/})))
|
|
|
|
assert_regexp_fixed_eucjp(eval(e(%q{/\xc2\xa1/})))
|
2007-11-26 08:05:08 -05:00
|
|
|
|
2007-12-01 11:56:19 -05:00
|
|
|
[/a/e].each {|r|
|
2007-11-26 08:05:08 -05:00
|
|
|
assert_equal(0, r =~ a("a"))
|
|
|
|
assert_equal(0, r =~ e("a"))
|
|
|
|
assert_equal(0, r =~ s("a"))
|
|
|
|
assert_equal(0, r =~ u("a"))
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_raise(ArgumentError) { r =~ a("\xc2\xa1") }
|
|
|
|
assert_equal(nil, r =~ e("\xc2\xa1"))
|
|
|
|
assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
|
|
|
|
assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
|
2007-11-26 08:05:08 -05:00
|
|
|
}
|
2007-11-25 08:25:34 -05:00
|
|
|
|
2007-12-06 04:28:26 -05:00
|
|
|
[/\xc2\xa1/e, eval(e(%{/\xc2\xa1/})), eval(e(%q{/\xc2\xa1/}))].each {|r|
|
2007-11-26 08:05:08 -05:00
|
|
|
assert_equal(nil, r =~ a("a"))
|
|
|
|
assert_equal(nil, r =~ e("a"))
|
|
|
|
assert_equal(nil, r =~ s("a"))
|
|
|
|
assert_equal(nil, r =~ u("a"))
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_raise(ArgumentError) { r =~ a("\xc2\xa1") }
|
|
|
|
assert_equal(0, r =~ e("\xc2\xa1"))
|
|
|
|
assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
|
|
|
|
assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
|
2007-11-26 08:05:08 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_regexp_sjis
|
|
|
|
assert_regexp_fixed_sjis(/a/s)
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_regexp_fixed_sjis(/\xc2\xa1/s)
|
|
|
|
assert_regexp_fixed_sjis(eval(s(%{/\xc2\xa1/})))
|
|
|
|
assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/})))
|
2007-11-23 01:30:26 -05:00
|
|
|
end
|
|
|
|
|
2007-12-08 02:21:05 -05:00
|
|
|
def test_regexp_embed
|
|
|
|
r = eval(e("/\xc2\xa1/"))
|
|
|
|
assert_raise(ArgumentError) { eval(s("/\xc2\xa1\#{r}/s")) }
|
|
|
|
assert_raise(ArgumentError) { eval(s("/\#{r}\xc2\xa1/s")) }
|
|
|
|
|
|
|
|
r = /\xc2\xa1/e
|
|
|
|
#assert_raise(ArgumentError) { eval(s("/\xc2\xa1\#{r}/s")) }
|
|
|
|
#assert_raise(ArgumentError) { eval(s("/\#{r}\xc2\xa1/s")) }
|
|
|
|
|
|
|
|
r = eval(e("/\xc2\xa1/"))
|
|
|
|
#assert_raise(ArgumentError) { /\xc2\xa1#{r}/s }
|
|
|
|
|
|
|
|
r = /\xc2\xa1/e
|
|
|
|
#assert_raise(ArgumentError) { /\xc2\xa1#{r}/s }
|
|
|
|
end
|
|
|
|
|
2007-11-23 01:30:26 -05:00
|
|
|
def test_begin_end_offset
|
|
|
|
str = e("\244\242\244\244\244\246\244\250\244\252a")
|
|
|
|
assert(/(a)/ =~ str)
|
|
|
|
assert_equal("a", $&)
|
|
|
|
assert_equal(5, $~.begin(0))
|
|
|
|
assert_equal(6, $~.end(0))
|
|
|
|
assert_equal([5,6], $~.offset(0))
|
|
|
|
assert_equal(5, $~.begin(1))
|
|
|
|
assert_equal(6, $~.end(1))
|
|
|
|
assert_equal([5,6], $~.offset(1))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_begin_end_offset_sjis
|
|
|
|
str = s("\x81@@")
|
|
|
|
assert(/@/ =~ str)
|
|
|
|
assert_equal(s("\x81@"), $`)
|
|
|
|
assert_equal("@", $&)
|
|
|
|
assert_equal("", $')
|
|
|
|
assert_equal([1,2], $~.offset(0))
|
|
|
|
end
|
|
|
|
|
2007-11-25 08:25:34 -05:00
|
|
|
def test_quote
|
|
|
|
assert_regexp_generic_ascii(/#{Regexp.quote(a("a"))}#{Regexp.quote(e("e"))}/)
|
|
|
|
|
|
|
|
# Regexp.quote returns ASCII-8BIT string for ASCII only string
|
|
|
|
# to make generic regexp if possible.
|
|
|
|
assert_encoding("ASCII-8BIT", Regexp.quote(a("")).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", Regexp.quote(e("")).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", Regexp.quote(s("")).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", Regexp.quote(u("")).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", Regexp.quote(a("a")).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", Regexp.quote(e("a")).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", Regexp.quote(s("a")).encoding)
|
|
|
|
assert_encoding("ASCII-8BIT", Regexp.quote(u("a")).encoding)
|
|
|
|
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_encoding("ASCII-8BIT", Regexp.quote(a("\xc2\xa1")).encoding)
|
|
|
|
assert_encoding("EUC-JP", Regexp.quote(e("\xc2\xa1")).encoding)
|
|
|
|
assert_encoding("Shift_JIS", Regexp.quote(s("\xc2\xa1")).encoding)
|
|
|
|
assert_encoding("UTF-8", Regexp.quote(u("\xc2\xa1")).encoding)
|
2007-11-25 08:25:34 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_union_0
|
|
|
|
r = Regexp.union
|
|
|
|
assert_regexp_generic_ascii(r)
|
|
|
|
assert(r !~ a(""))
|
|
|
|
assert(r !~ e(""))
|
|
|
|
assert(r !~ s(""))
|
|
|
|
assert(r !~ u(""))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_union_1_asciionly_string
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(a("")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(e("")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(s("")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(u("")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(a("a")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(e("a")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(s("a")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(u("a")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(a("\t")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(e("\t")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(s("\t")))
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(u("\t")))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_union_1_nonascii_string
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_regexp_fixed_ascii8bit(Regexp.union(a("\xc2\xa1")))
|
|
|
|
assert_regexp_fixed_eucjp(Regexp.union(e("\xc2\xa1")))
|
|
|
|
assert_regexp_fixed_sjis(Regexp.union(s("\xc2\xa1")))
|
|
|
|
assert_regexp_fixed_utf8(Regexp.union(u("\xc2\xa1")))
|
2007-11-25 08:25:34 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_union_1_regexp
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(//))
|
|
|
|
assert_regexp_fixed_ascii8bit(Regexp.union(//n))
|
|
|
|
assert_regexp_fixed_eucjp(Regexp.union(//e))
|
|
|
|
assert_regexp_fixed_sjis(Regexp.union(//s))
|
|
|
|
assert_regexp_fixed_utf8(Regexp.union(//u))
|
|
|
|
end
|
|
|
|
|
2007-11-26 08:05:08 -05:00
|
|
|
def test_union_2
|
2007-11-25 08:25:34 -05:00
|
|
|
ary = [
|
|
|
|
a(""), e(""), s(""), u(""),
|
2007-12-06 04:28:26 -05:00
|
|
|
a("\xc2\xa1"), e("\xc2\xa1"), s("\xc2\xa1"), u("\xc2\xa1")
|
2007-11-25 08:25:34 -05:00
|
|
|
]
|
|
|
|
ary.each {|s1|
|
|
|
|
ary.each {|s2|
|
|
|
|
if s1.empty?
|
|
|
|
if s2.empty?
|
|
|
|
assert_regexp_generic_ascii(Regexp.union(s1, s2))
|
|
|
|
else
|
|
|
|
r = Regexp.union(s1, s2)
|
|
|
|
assert_regexp_fixed_encoding(r)
|
|
|
|
assert_equal(s2.encoding, r.encoding)
|
|
|
|
end
|
|
|
|
else
|
|
|
|
if s2.empty?
|
|
|
|
r = Regexp.union(s1, s2)
|
|
|
|
assert_regexp_fixed_encoding(r)
|
|
|
|
assert_equal(s1.encoding, r.encoding)
|
|
|
|
else
|
|
|
|
if s1.encoding == s2.encoding
|
|
|
|
r = Regexp.union(s1, s2)
|
|
|
|
assert_regexp_fixed_encoding(r)
|
|
|
|
assert_equal(s1.encoding, r.encoding)
|
|
|
|
else
|
|
|
|
assert_raise(ArgumentError) { Regexp.union(s1, s2) }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
2007-12-01 19:16:24 -05:00
|
|
|
|
2007-12-01 21:53:46 -05:00
|
|
|
def test_dynamic_ascii_regexp
|
|
|
|
assert_regexp_fixed_ascii8bit(/#{}/n)
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_regexp_fixed_ascii8bit(/#{}\xc2\xa1/n)
|
|
|
|
assert_regexp_fixed_ascii8bit(/\xc2\xa1#{}/n)
|
|
|
|
#assert_raise(SyntaxError) { s1, s2 = s('\xc2'), s('\xa1'); /#{s1}#{s2}/ }
|
2007-12-01 21:53:46 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_dynamic_eucjp_regexp
|
|
|
|
assert_regexp_fixed_eucjp(/#{}/e)
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_regexp_fixed_eucjp(/#{}\xc2\xa1/e)
|
|
|
|
assert_regexp_fixed_eucjp(/\xc2\xa1#{}/e)
|
2007-12-08 02:21:05 -05:00
|
|
|
assert_raise(SyntaxError) { eval('/\xc2#{}/e') }
|
|
|
|
assert_raise(SyntaxError) { eval('/#{}\xc2/e') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/e') }
|
2007-12-06 04:28:26 -05:00
|
|
|
#assert_raise(SyntaxError) { s1, s2 = e('\xc2'), e('\xa1'); /#{s1}#{s2}/ }
|
2007-12-01 21:53:46 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_dynamic_sjis_regexp
|
|
|
|
assert_regexp_fixed_sjis(/#{}/s)
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_regexp_fixed_sjis(/#{}\xc2\xa1/s)
|
|
|
|
assert_regexp_fixed_sjis(/\xc2\xa1#{}/s)
|
2007-12-08 02:21:05 -05:00
|
|
|
assert_raise(SyntaxError) { eval('/\x81#{}/s') }
|
|
|
|
assert_raise(SyntaxError) { eval('/#{}\x81/s') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\x81#{}\xa1/s') }
|
2007-12-01 21:53:46 -05:00
|
|
|
#assert_raise(SyntaxError) { s1, s2 = s('\x81'), s('\xa1'); /#{s1}#{s2}/ }
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_dynamic_utf8_regexp
|
|
|
|
assert_regexp_fixed_utf8(/#{}/u)
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_regexp_fixed_utf8(/#{}\xc2\xa1/u)
|
|
|
|
assert_regexp_fixed_utf8(/\xc2\xa1#{}/u)
|
2007-12-08 02:21:05 -05:00
|
|
|
assert_raise(SyntaxError) { eval('/\xc2#{}/u') }
|
|
|
|
assert_raise(SyntaxError) { eval('/#{}\xc2/u') }
|
|
|
|
assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/u') }
|
2007-12-06 04:28:26 -05:00
|
|
|
#assert_raise(SyntaxError) { s1, s2 = u('\xc2'), u('\xa1'); /#{s1}#{s2}/ }
|
2007-12-01 21:53:46 -05:00
|
|
|
end
|
|
|
|
|
2007-12-08 22:50:11 -05:00
|
|
|
def test_regexp_unicode
|
2007-12-13 08:44:02 -05:00
|
|
|
assert_nothing_raised { eval '/\u{0}/u' }
|
|
|
|
assert_nothing_raised { eval '/\u{D7FF}/u' }
|
|
|
|
assert_raise(SyntaxError) { eval '/\u{D800}/u' }
|
|
|
|
assert_raise(SyntaxError) { eval '/\u{DFFF}/u' }
|
|
|
|
assert_nothing_raised { eval '/\u{E000}/u' }
|
|
|
|
assert_nothing_raised { eval '/\u{10FFFF}/u' }
|
|
|
|
assert_raise(SyntaxError) { eval '/\u{110000}/u' }
|
2007-12-08 22:50:11 -05:00
|
|
|
end
|
|
|
|
|
2007-12-01 21:53:46 -05:00
|
|
|
def test_regexp_mixed_unicode
|
2007-12-06 04:28:26 -05:00
|
|
|
assert_raise(SyntaxError) { eval(a(%{/\xc2\xa0\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{/\xc2\xa0\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{/\xc2\xa0\\u{6666}/})) }
|
|
|
|
assert_nothing_raised { eval(u(%{/\xc2\xa0\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\xc2\xa0/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\xc2\xa0/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\xc2\xa0/})) }
|
|
|
|
assert_nothing_raised { eval(u(%{/\\u{6666}\xc2\xa0/})) }
|
|
|
|
|
|
|
|
assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa0\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa0\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa0\\u{6666}/})) }
|
|
|
|
assert_nothing_raised { eval(u(%{/\\xc2\\xa0\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\\xc2\\xa0/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\\xc2\\xa0/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\\xc2\\xa0/})) }
|
|
|
|
assert_nothing_raised { eval(u(%{/\\u{6666}\\xc2\\xa0/})) }
|
|
|
|
|
|
|
|
assert_raise(SyntaxError) { eval(a(%{/\xc2\xa0#{}\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{/\xc2\xa0#{}\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{/\xc2\xa0#{}\\u{6666}/})) }
|
|
|
|
assert_nothing_raised { eval(u(%{/\xc2\xa0#{}\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\xc2\xa0/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\xc2\xa0/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\xc2\xa0/})) }
|
|
|
|
assert_nothing_raised { eval(u(%{/\\u{6666}#{}\xc2\xa0/})) }
|
|
|
|
|
|
|
|
assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa0#{}\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa0#{}\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa0#{}\\u{6666}/})) }
|
|
|
|
assert_nothing_raised { eval(u(%{/\\xc2\\xa0#{}\\u{6666}/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\\xc2\\xa0/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\\xc2\\xa0/})) }
|
|
|
|
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\\xc2\\xa0/})) }
|
|
|
|
assert_nothing_raised { eval(u(%{/\\u{6666}#{}\\xc2\\xa0/})) }
|
2007-12-01 19:16:24 -05:00
|
|
|
end
|
2007-12-08 21:29:24 -05:00
|
|
|
|
|
|
|
def test_tr
|
|
|
|
s = "\x81\x41".force_encoding("shift_jis")
|
|
|
|
assert_equal(s.tr("A", "B"), s)
|
|
|
|
assert_equal(s.tr_s("A", "B"), s)
|
|
|
|
end
|
2007-12-08 22:12:25 -05:00
|
|
|
|
|
|
|
def test_squeeze
|
|
|
|
s = "\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
|
|
|
|
assert_equal("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4".force_encoding("euc-jp"), s.squeeze)
|
|
|
|
end
|
2007-11-23 01:30:26 -05:00
|
|
|
end
|