2008-01-23 02:58:48 -05:00
|
|
|
require 'test/unit'
|
|
|
|
|
2008-01-23 02:59:56 -05:00
|
|
|
class TestUTF16 < Test::Unit::TestCase
|
2008-09-15 12:24:39 -04:00
|
|
|
def encdump(obj)
|
|
|
|
case obj
|
|
|
|
when String
|
|
|
|
d = obj.dump
|
|
|
|
if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
|
|
|
|
d
|
|
|
|
else
|
|
|
|
"#{d}.force_encoding(#{obj.encoding.name.dump})"
|
|
|
|
end
|
|
|
|
when Regexp
|
|
|
|
"Regexp.new(#{encdump(obj.source)}, #{obj.options})"
|
2008-01-23 02:58:48 -05:00
|
|
|
else
|
2008-09-15 12:24:39 -04:00
|
|
|
raise Argument, "unexpected: #{obj.inspect}"
|
2008-01-23 02:58:48 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2008-01-23 04:25:17 -05:00
|
|
|
def enccall(recv, meth, *args)
|
|
|
|
desc = ''
|
|
|
|
if String === recv
|
|
|
|
desc << encdump(recv)
|
|
|
|
else
|
|
|
|
desc << recv.inspect
|
|
|
|
end
|
|
|
|
desc << '.' << meth.to_s
|
|
|
|
if !args.empty?
|
|
|
|
desc << '('
|
|
|
|
args.each_with_index {|a, i|
|
|
|
|
desc << ',' if 0 < i
|
|
|
|
if String === a
|
|
|
|
desc << encdump(a)
|
|
|
|
else
|
|
|
|
desc << a.inspect
|
|
|
|
end
|
|
|
|
}
|
|
|
|
desc << ')'
|
|
|
|
end
|
|
|
|
result = nil
|
|
|
|
assert_nothing_raised(desc) {
|
|
|
|
result = recv.send(meth, *args)
|
|
|
|
}
|
|
|
|
result
|
|
|
|
end
|
|
|
|
|
2008-01-26 16:23:59 -05:00
|
|
|
def assert_str_equal(expected, actual, message=nil)
|
|
|
|
full_message = build_message(message, <<EOT)
|
|
|
|
#{encdump expected} expected but not equal to
|
|
|
|
#{encdump actual}.
|
|
|
|
EOT
|
2012-05-06 21:23:07 -04:00
|
|
|
assert_equal(expected, actual, full_message)
|
2008-01-26 16:23:59 -05:00
|
|
|
end
|
|
|
|
|
2008-01-23 02:58:48 -05:00
|
|
|
# tests start
|
|
|
|
|
|
|
|
def test_utf16be_valid_encoding
|
2008-01-29 22:49:54 -05:00
|
|
|
[
|
|
|
|
"\x00\x00",
|
|
|
|
"\xd7\xff",
|
|
|
|
"\xd8\x00\xdc\x00",
|
|
|
|
"\xdb\xff\xdf\xff",
|
|
|
|
"\xe0\x00",
|
|
|
|
"\xff\xff",
|
|
|
|
].each {|s|
|
|
|
|
s.force_encoding("utf-16be")
|
|
|
|
assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")
|
|
|
|
}
|
|
|
|
[
|
|
|
|
"\x00",
|
|
|
|
"\xd7",
|
|
|
|
"\xd8\x00",
|
|
|
|
"\xd8\x00\xd8\x00",
|
|
|
|
"\xdc\x00",
|
|
|
|
"\xdc\x00\xd8\x00",
|
|
|
|
"\xdc\x00\xdc\x00",
|
|
|
|
"\xe0",
|
|
|
|
"\xff",
|
|
|
|
].each {|s|
|
|
|
|
s.force_encoding("utf-16be")
|
|
|
|
assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_utf16le_valid_encoding
|
|
|
|
[
|
|
|
|
"\x00\x00",
|
|
|
|
"\xff\xd7",
|
|
|
|
"\x00\xd8\x00\xdc",
|
|
|
|
"\xff\xdb\xff\xdf",
|
|
|
|
"\x00\xe0",
|
|
|
|
"\xff\xff",
|
|
|
|
].each {|s|
|
|
|
|
s.force_encoding("utf-16le")
|
|
|
|
assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")
|
|
|
|
}
|
|
|
|
[
|
|
|
|
"\x00",
|
|
|
|
"\xd7",
|
|
|
|
"\x00\xd8",
|
|
|
|
"\x00\xd8\x00\xd8",
|
|
|
|
"\x00\xdc",
|
|
|
|
"\x00\xdc\x00\xd8",
|
|
|
|
"\x00\xdc\x00\xdc",
|
|
|
|
"\xe0",
|
|
|
|
"\xff",
|
|
|
|
].each {|s|
|
|
|
|
s.force_encoding("utf-16le")
|
|
|
|
assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")
|
|
|
|
}
|
2008-01-23 02:58:48 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_strftime
|
|
|
|
s = "aa".force_encoding("utf-16be")
|
|
|
|
assert_raise(ArgumentError, "Time.now.strftime(#{encdump s})") { Time.now.strftime(s) }
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_intern
|
|
|
|
s = "aaaa".force_encoding("utf-16be")
|
|
|
|
assert_equal(s.encoding, s.intern.to_s.encoding, "#{encdump s}.intern.to_s.encoding")
|
|
|
|
end
|
|
|
|
|
2008-03-06 03:04:18 -05:00
|
|
|
def test_sym_eq
|
|
|
|
s = "aa".force_encoding("utf-16le")
|
|
|
|
assert(s.intern != :aa, "#{encdump s}.intern != :aa")
|
|
|
|
end
|
|
|
|
|
2008-01-23 02:58:48 -05:00
|
|
|
def test_compatible
|
|
|
|
s1 = "aa".force_encoding("utf-16be")
|
|
|
|
s2 = "z".force_encoding("us-ascii")
|
|
|
|
assert_nil(Encoding.compatible?(s1, s2), "Encoding.compatible?(#{encdump s1}, #{encdump s2})")
|
|
|
|
end
|
|
|
|
|
2008-01-30 11:42:15 -05:00
|
|
|
def test_casecmp
|
|
|
|
s1 = "aa".force_encoding("utf-16be")
|
|
|
|
s2 = "AA"
|
|
|
|
assert_not_equal(0, s1.casecmp(s2), "#{encdump s1}.casecmp(#{encdump s2})")
|
|
|
|
end
|
|
|
|
|
2008-01-23 02:58:48 -05:00
|
|
|
def test_end_with
|
|
|
|
s1 = "ab".force_encoding("utf-16be")
|
|
|
|
s2 = "b".force_encoding("utf-16be")
|
|
|
|
assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_hex
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError) {
|
2008-02-29 04:19:15 -05:00
|
|
|
"ff".encode("utf-16le").hex
|
|
|
|
}
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError) {
|
2008-02-29 04:19:15 -05:00
|
|
|
"ff".encode("utf-16be").hex
|
|
|
|
}
|
2008-01-23 02:58:48 -05:00
|
|
|
end
|
|
|
|
|
2008-02-17 11:50:49 -05:00
|
|
|
def test_oct
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError) {
|
2008-02-29 04:19:15 -05:00
|
|
|
"77".encode("utf-16le").oct
|
|
|
|
}
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError) {
|
2008-02-29 04:19:15 -05:00
|
|
|
"77".encode("utf-16be").oct
|
|
|
|
}
|
2008-02-17 11:50:49 -05:00
|
|
|
end
|
|
|
|
|
2008-01-23 02:58:48 -05:00
|
|
|
def test_count
|
|
|
|
s1 = "aa".force_encoding("utf-16be")
|
|
|
|
s2 = "aa"
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError, "#{encdump s1}.count(#{encdump s2})") {
|
2008-01-23 02:58:48 -05:00
|
|
|
s1.count(s2)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_plus
|
|
|
|
s1 = "a".force_encoding("us-ascii")
|
|
|
|
s2 = "aa".force_encoding("utf-16be")
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError, "#{encdump s1} + #{encdump s2}") {
|
2008-01-23 02:58:48 -05:00
|
|
|
s1 + s2
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_encoding_find
|
|
|
|
assert_raise(ArgumentError) {
|
|
|
|
Encoding.find("utf-8".force_encoding("utf-16be"))
|
|
|
|
}
|
|
|
|
end
|
2008-01-23 03:14:30 -05:00
|
|
|
|
|
|
|
def test_interpolation
|
|
|
|
s = "aa".force_encoding("utf-16be")
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError, "\"a\#{#{encdump s}}\"") {
|
2008-01-23 03:14:30 -05:00
|
|
|
"a#{s}"
|
|
|
|
}
|
|
|
|
end
|
2008-01-23 04:25:17 -05:00
|
|
|
|
|
|
|
def test_slice!
|
|
|
|
enccall("aa".force_encoding("UTF-16BE"), :slice!, -1)
|
|
|
|
end
|
2008-01-23 21:17:26 -05:00
|
|
|
|
2008-02-16 04:02:12 -05:00
|
|
|
def test_plus_empty1
|
2008-01-23 21:17:26 -05:00
|
|
|
s1 = ""
|
|
|
|
s2 = "aa".force_encoding("utf-16be")
|
2008-02-16 04:02:12 -05:00
|
|
|
assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
|
|
|
|
s1 + s2
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_plus_empty2
|
|
|
|
s1 = "aa"
|
|
|
|
s2 = "".force_encoding("utf-16be")
|
|
|
|
assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
|
|
|
|
s1 + s2
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_plus_nonempty
|
|
|
|
s1 = "aa"
|
|
|
|
s2 = "bb".force_encoding("utf-16be")
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError, "#{encdump s1} << #{encdump s2}") {
|
2008-02-16 04:02:12 -05:00
|
|
|
s1 + s2
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_concat_empty1
|
|
|
|
s1 = ""
|
|
|
|
s2 = "aa".force_encoding("utf-16be")
|
|
|
|
assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
|
|
|
|
s1 << s2
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_concat_empty2
|
|
|
|
s1 = "aa"
|
|
|
|
s2 = "".force_encoding("utf-16be")
|
|
|
|
assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
|
|
|
|
s1 << s2
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_concat_nonempty
|
|
|
|
s1 = "aa"
|
|
|
|
s2 = "bb".force_encoding("utf-16be")
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError, "#{encdump s1} << #{encdump s2}") {
|
2008-01-23 21:17:26 -05:00
|
|
|
s1 << s2
|
|
|
|
}
|
|
|
|
end
|
2008-01-24 22:58:32 -05:00
|
|
|
|
2008-01-26 16:23:59 -05:00
|
|
|
def test_chomp
|
|
|
|
s = "\1\n".force_encoding("utf-16be")
|
2008-02-29 08:33:26 -05:00
|
|
|
assert_equal(s, s.chomp, "#{encdump s}.chomp")
|
|
|
|
s = "\0\n".force_encoding("utf-16be")
|
|
|
|
assert_equal("", s.chomp, "#{encdump s}.chomp")
|
|
|
|
s = "\0\r\0\n".force_encoding("utf-16be")
|
|
|
|
assert_equal("", s.chomp, "#{encdump s}.chomp")
|
2008-01-26 16:23:59 -05:00
|
|
|
end
|
|
|
|
|
2008-01-30 00:29:37 -05:00
|
|
|
def test_succ
|
|
|
|
s = "\xff\xff".force_encoding("utf-16be")
|
|
|
|
assert(s.succ.valid_encoding?, "#{encdump s}.succ.valid_encoding?")
|
|
|
|
|
|
|
|
s = "\xdb\xff\xdf\xff".force_encoding("utf-16be")
|
|
|
|
assert(s.succ.valid_encoding?, "#{encdump s}.succ.valid_encoding?")
|
|
|
|
end
|
|
|
|
|
2008-01-24 22:58:32 -05:00
|
|
|
def test_regexp_union
|
|
|
|
enccall(Regexp, :union, "aa".force_encoding("utf-16be"), "bb".force_encoding("utf-16be"))
|
|
|
|
end
|
2008-01-26 16:01:52 -05:00
|
|
|
|
|
|
|
def test_empty_regexp
|
|
|
|
s = "".force_encoding("utf-16be")
|
|
|
|
assert_equal(Encoding.find("utf-16be"), Regexp.new(s).encoding,
|
|
|
|
"Regexp.new(#{encdump s}).encoding")
|
|
|
|
end
|
2008-02-17 11:50:49 -05:00
|
|
|
|
2008-05-13 13:16:03 -04:00
|
|
|
def test_regexp_match
|
2008-12-13 00:26:37 -05:00
|
|
|
assert_raise(Encoding::CompatibilityError) { Regexp.new("aa".force_encoding("utf-16be")) =~ "aa" }
|
2008-05-13 13:16:03 -04:00
|
|
|
end
|
|
|
|
|
2008-02-17 11:50:49 -05:00
|
|
|
def test_gsub
|
|
|
|
s = "abcd".force_encoding("utf-16be")
|
2008-03-03 03:54:31 -05:00
|
|
|
assert_nothing_raised {
|
|
|
|
s.gsub(Regexp.new(".".encode("utf-16be")), "xy")
|
|
|
|
}
|
|
|
|
s = "ab\0\ncd".force_encoding("utf-16be")
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::CompatibilityError) {
|
2008-02-17 11:50:49 -05:00
|
|
|
s.gsub(Regexp.new(".".encode("utf-16be")), "xy")
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_split_awk
|
|
|
|
s = " ab cd ".encode("utf-16be")
|
|
|
|
r = s.split(" ".encode("utf-16be"))
|
|
|
|
assert_equal(2, r.length)
|
|
|
|
assert_str_equal("ab".encode("utf-16be"), r[0])
|
|
|
|
assert_str_equal("cd".encode("utf-16be"), r[1])
|
|
|
|
end
|
2008-02-17 19:08:01 -05:00
|
|
|
|
2008-05-13 11:57:22 -04:00
|
|
|
def test_count2
|
2008-02-17 19:08:01 -05:00
|
|
|
e = "abc".count("^b")
|
|
|
|
assert_equal(e, "abc".encode("utf-16be").count("^b".encode("utf-16be")))
|
|
|
|
assert_equal(e, "abc".encode("utf-16le").count("^b".encode("utf-16le")))
|
|
|
|
end
|
* enc/iso_8859_5.c: Large omicron should lowercase to small omicron.
* test/ruby/test_big5.rb, test/ruby/test_cp949.rb,
test/ruby/test_euc_jp.rb, test/ruby/test_euc_kr.rb,
test/ruby/test_euc_tw.rb, test/ruby/test_gb18030.rb,
test/ruby/test_gbk.rb, test/ruby/test_iso_8859.rb,
test/ruby/test_koi8.rb, test/ruby/test_shift_jis.rb,
test/ruby/test_windows_1251.rb: new tests for encoding.
* test/ruby/test_utf16.rb, test/ruby/test_utf32.rb,
test/ruby/test_regexp.rb: add tests.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16759 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-06-02 09:30:38 -04:00
|
|
|
|
|
|
|
def test_header
|
|
|
|
assert_raise(ArgumentError) { eval("# encoding:utf-16le\nfoo") }
|
|
|
|
assert_raise(ArgumentError) { eval("# encoding:utf-16be\nfoo") }
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
def test_is_mbc_newline
|
|
|
|
sl = "f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n\0".force_encoding("utf-16le")
|
|
|
|
sb = "\0f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n".force_encoding("utf-16be")
|
|
|
|
al = sl.lines.to_a
|
|
|
|
ab = sb.lines.to_a
|
|
|
|
assert_equal("f\0o\0o\0\n\0".force_encoding("utf-16le"), al.shift)
|
|
|
|
assert_equal("b\0a\0r\0\n\0".force_encoding("utf-16le"), al.shift)
|
|
|
|
assert_equal("b\0a\0z\0\n\0".force_encoding("utf-16le"), al.shift)
|
|
|
|
assert_equal("\0f\0o\0o\0\n".force_encoding("utf-16be"), ab.shift)
|
|
|
|
assert_equal("\0b\0a\0r\0\n".force_encoding("utf-16be"), ab.shift)
|
|
|
|
assert_equal("\0b\0a\0z\0\n".force_encoding("utf-16be"), ab.shift)
|
|
|
|
|
|
|
|
sl = "f\0o\0o\0\n\0".force_encoding("utf-16le")
|
|
|
|
sb = "\0f\0o\0o\0\n".force_encoding("utf-16be")
|
|
|
|
sl2 = "f\0o\0o\0".force_encoding("utf-16le")
|
|
|
|
sb2 = "\0f\0o\0o".force_encoding("utf-16be")
|
|
|
|
assert_equal(sl2, sl.chomp)
|
|
|
|
assert_equal(sl2, sl.chomp.chomp)
|
|
|
|
assert_equal(sb2, sb.chomp)
|
|
|
|
assert_equal(sb2, sb.chomp.chomp)
|
|
|
|
|
|
|
|
sl = "f\0o\0o\0\n".force_encoding("utf-16le")
|
|
|
|
sb = "\0f\0o\0o\n".force_encoding("utf-16be")
|
|
|
|
assert_equal(sl, sl.chomp)
|
|
|
|
assert_equal(sb, sb.chomp)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_code_to_mbc
|
|
|
|
assert_equal("a\0".force_encoding("utf-16le"), "a".ord.chr("utf-16le"))
|
|
|
|
assert_equal("\0a".force_encoding("utf-16be"), "a".ord.chr("utf-16be"))
|
|
|
|
end
|
|
|
|
|
|
|
|
def utf8_to_utf16(s, e)
|
|
|
|
s.chars.map {|c| c.ord.chr(e) }.join
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_mbc_case_fold
|
|
|
|
rl = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16le"), "i")
|
|
|
|
rb = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16be"), "i")
|
|
|
|
assert_equal(Encoding.find("utf-16le"), rl.encoding)
|
|
|
|
assert_equal(Encoding.find("utf-16be"), rb.encoding)
|
|
|
|
assert_match(rl, utf8_to_utf16("\u3042a\u3042a", "utf-16le"))
|
|
|
|
assert_match(rb, utf8_to_utf16("\u3042a\u3042a", "utf-16be"))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_surrogate_pair
|
|
|
|
sl = "\x42\xd8\xb7\xdf".force_encoding("utf-16le")
|
|
|
|
sb = "\xd8\x42\xdf\xb7".force_encoding("utf-16be")
|
|
|
|
|
|
|
|
assert_equal(1, sl.size)
|
|
|
|
assert_equal(1, sb.size)
|
|
|
|
assert_equal(0x20bb7, sl.ord)
|
|
|
|
assert_equal(0x20bb7, sb.ord)
|
|
|
|
assert_equal(sl, 0x20bb7.chr("utf-16le"))
|
|
|
|
assert_equal(sb, 0x20bb7.chr("utf-16be"))
|
|
|
|
assert_equal("", sl.chop)
|
|
|
|
assert_equal("", sb.chop)
|
|
|
|
end
|
2008-09-15 12:01:08 -04:00
|
|
|
|
|
|
|
def test_regexp_escape
|
|
|
|
s = "\0*".force_encoding("UTF-16BE")
|
|
|
|
r = Regexp.new(Regexp.escape(s))
|
2008-09-15 12:24:39 -04:00
|
|
|
assert(r =~ s, "#{encdump(r)} =~ #{encdump(s)}")
|
2008-09-15 12:01:08 -04:00
|
|
|
end
|
2008-09-23 08:27:35 -04:00
|
|
|
|
2010-02-13 22:09:53 -05:00
|
|
|
def test_casecmp2
|
2008-09-23 08:27:35 -04:00
|
|
|
assert_equal(0, "\0A".force_encoding("UTF-16BE").casecmp("\0a".force_encoding("UTF-16BE")))
|
|
|
|
assert_not_equal(0, "\0A".force_encoding("UTF-16LE").casecmp("\0a".force_encoding("UTF-16LE")))
|
|
|
|
assert_not_equal(0, "A\0".force_encoding("UTF-16BE").casecmp("a\0".force_encoding("UTF-16BE")))
|
|
|
|
assert_equal(0, "A\0".force_encoding("UTF-16LE").casecmp("a\0".force_encoding("UTF-16LE")))
|
|
|
|
|
|
|
|
ary = ["01".force_encoding("UTF-16LE"),
|
|
|
|
"10".force_encoding("UTF-16LE")]
|
|
|
|
e = ary.sort {|x,y| x <=> y }
|
|
|
|
a = ary.sort {|x,y| x.casecmp(y) }
|
|
|
|
assert_equal(e, a)
|
|
|
|
end
|
2008-01-23 02:58:48 -05:00
|
|
|
end
|