ruby--ruby/test/ruby/test_utf16.rb

require 'test/unit'

class TestUTF16 < Test::Unit::TestCase
  def encdump(str)
    d = str.dump
    if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
      d
    else
      "#{d}.force_encoding(#{str.encoding.name.dump})"
    end
  end

  def enccall(recv, meth, *args)
    desc = ''
    if String === recv
      desc << encdump(recv)
    else
      desc << recv.inspect
    end
    desc << '.' << meth.to_s
    if !args.empty?
      desc << '('
      args.each_with_index {|a, i|
        desc << ',' if 0 < i
        if String === a
          desc << encdump(a)
        else
          desc << a.inspect
        end
      }
      desc << ')'
    end
    result = nil
    assert_nothing_raised(desc) {
      result = recv.send(meth, *args)
    }
    result
  end

  def assert_str_equal(expected, actual, message=nil)
    full_message = build_message(message, <<EOT)
#{encdump expected} expected but not equal to
#{encdump actual}.
EOT
    assert_block(full_message) { expected == actual }
  end

  # tests start

  def test_utf16be_valid_encoding
    [
      "\x00\x00",
      "\xd7\xff",
      "\xd8\x00\xdc\x00",
      "\xdb\xff\xdf\xff",
      "\xe0\x00",
      "\xff\xff",
    ].each {|s|
      s.force_encoding("utf-16be")
      assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")
    }
    [
      "\x00",
      "\xd7",
      "\xd8\x00",
      "\xd8\x00\xd8\x00",
      "\xdc\x00",
      "\xdc\x00\xd8\x00",
      "\xdc\x00\xdc\x00",
      "\xe0",
      "\xff",
    ].each {|s|
      s.force_encoding("utf-16be")
      assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")
    }
  end

  def test_utf16le_valid_encoding
    [
      "\x00\x00",
      "\xff\xd7",
      "\x00\xd8\x00\xdc",
      "\xff\xdb\xff\xdf",
      "\x00\xe0",
      "\xff\xff",
    ].each {|s|
      s.force_encoding("utf-16le")
      assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")
    }
    [
      "\x00",
      "\xd7",
      "\x00\xd8",
      "\x00\xd8\x00\xd8",
      "\x00\xdc",
      "\x00\xdc\x00\xd8",
      "\x00\xdc\x00\xdc",
      "\xe0",
      "\xff",
    ].each {|s|
      s.force_encoding("utf-16le")
      assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")
    }
  end

  def test_strftime
    s = "aa".force_encoding("utf-16be")
    assert_raise(ArgumentError, "Time.now.strftime(#{encdump s})") { Time.now.strftime(s) }
  end

  def test_intern
    s = "aaaa".force_encoding("utf-16be")
    assert_equal(s.encoding, s.intern.to_s.encoding, "#{encdump s}.intern.to_s.encoding")
  end

  def test_sym_eq
    s = "aa".force_encoding("utf-16le")
    assert(s.intern != :aa, "#{encdump s}.intern != :aa")
  end

  def test_compatible
    s1 = "aa".force_encoding("utf-16be")
    s2 = "z".force_encoding("us-ascii")
    assert_nil(Encoding.compatible?(s1, s2), "Encoding.compatible?(#{encdump s1}, #{encdump s2})")
  end

  def test_casecmp
    s1 = "aa".force_encoding("utf-16be")
    s2 = "AA"
    assert_not_equal(0, s1.casecmp(s2), "#{encdump s1}.casecmp(#{encdump s2})")
  end

  def test_end_with
    s1 = "ab".force_encoding("utf-16be")
    s2 = "b".force_encoding("utf-16be")
    assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")
  end

  def test_hex
    assert_raise(ArgumentError) {
      "ff".encode("utf-16le").hex
    }
    assert_raise(ArgumentError) {
      "ff".encode("utf-16be").hex
    }
  end

  def test_oct
    assert_raise(ArgumentError) {
      "77".encode("utf-16le").oct
    }
    assert_raise(ArgumentError) {
      "77".encode("utf-16be").oct
    }
  end

  def test_count
    s1 = "aa".force_encoding("utf-16be")
    s2 = "aa"
    assert_raise(ArgumentError, "#{encdump s1}.count(#{encdump s2})") {
      s1.count(s2)
    }
  end

  def test_plus
    s1 = "a".force_encoding("us-ascii")
    s2 = "aa".force_encoding("utf-16be")
    assert_raise(ArgumentError, "#{encdump s1} + #{encdump s2}") {
      s1 + s2
    }
  end

  def test_encoding_find
    assert_raise(ArgumentError) {
      Encoding.find("utf-8".force_encoding("utf-16be"))
    }
  end

  def test_interpolation
    s = "aa".force_encoding("utf-16be")
    assert_raise(ArgumentError, "\"a\#{#{encdump s}}\"") {
      "a#{s}"
    }
  end

  def test_slice!
    enccall("aa".force_encoding("UTF-16BE"), :slice!, -1)
  end

  def test_plus_empty1
    s1 = ""
    s2 = "aa".force_encoding("utf-16be")
    assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
      s1 + s2
    }
  end

  def test_plus_empty2
    s1 = "aa"
    s2 = "".force_encoding("utf-16be")
    assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
      s1 + s2
    }
  end

  def test_plus_nonempty
    s1 = "aa"
    s2 = "bb".force_encoding("utf-16be")
    assert_raise(ArgumentError, "#{encdump s1} << #{encdump s2}") {
      s1 + s2
    }
  end

  def test_concat_empty1
    s1 = ""
    s2 = "aa".force_encoding("utf-16be")
    assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
      s1 << s2
    }
  end

  def test_concat_empty2
    s1 = "aa"
    s2 = "".force_encoding("utf-16be")
    assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
      s1 << s2
    }
  end

  def test_concat_nonempty
    s1 = "aa"
    s2 = "bb".force_encoding("utf-16be")
    assert_raise(ArgumentError, "#{encdump s1} << #{encdump s2}") {
      s1 << s2
    }
  end

  def test_chomp
    s = "\1\n".force_encoding("utf-16be")
    assert_equal(s, s.chomp, "#{encdump s}.chomp")
    s = "\0\n".force_encoding("utf-16be")
    assert_equal("", s.chomp, "#{encdump s}.chomp")
    s = "\0\r\0\n".force_encoding("utf-16be")
    assert_equal("", s.chomp, "#{encdump s}.chomp")
  end

  def test_succ
    s = "\xff\xff".force_encoding("utf-16be")
    assert(s.succ.valid_encoding?, "#{encdump s}.succ.valid_encoding?")

    s = "\xdb\xff\xdf\xff".force_encoding("utf-16be")
    assert(s.succ.valid_encoding?, "#{encdump s}.succ.valid_encoding?")
  end

  def test_regexp_union
    enccall(Regexp, :union, "aa".force_encoding("utf-16be"), "bb".force_encoding("utf-16be"))
  end

  def test_empty_regexp
    s = "".force_encoding("utf-16be")
    assert_equal(Encoding.find("utf-16be"), Regexp.new(s).encoding,
                "Regexp.new(#{encdump s}).encoding")
  end

  def test_regexp_match
    assert_raise(ArgumentError) { Regexp.new("aa".force_encoding("utf-16be")) =~ "aa" }
  end

  def test_gsub
    s = "abcd".force_encoding("utf-16be")
    assert_nothing_raised {
      s.gsub(Regexp.new(".".encode("utf-16be")), "xy")
    }
    s = "ab\0\ncd".force_encoding("utf-16be")
    assert_raise(ArgumentError) {
      s.gsub(Regexp.new(".".encode("utf-16be")), "xy")
    }
  end

  def test_split_awk
    s = " ab cd ".encode("utf-16be")
    r = s.split(" ".encode("utf-16be"))
    assert_equal(2, r.length)
    assert_str_equal("ab".encode("utf-16be"), r[0])
    assert_str_equal("cd".encode("utf-16be"), r[1])
  end

  def test_count2
    e = "abc".count("^b")
    assert_equal(e, "abc".encode("utf-16be").count("^b".encode("utf-16be")))
    assert_equal(e, "abc".encode("utf-16le").count("^b".encode("utf-16le")))
  end

  def test_header
    assert_raise(ArgumentError) { eval("# encoding:utf-16le\nfoo") }
    assert_raise(ArgumentError) { eval("# encoding:utf-16be\nfoo") }
  end


  def test_is_mbc_newline
    sl = "f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n\0".force_encoding("utf-16le")
    sb = "\0f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n".force_encoding("utf-16be")
    al = sl.lines.to_a
    ab = sb.lines.to_a
    assert_equal("f\0o\0o\0\n\0".force_encoding("utf-16le"), al.shift)
    assert_equal("b\0a\0r\0\n\0".force_encoding("utf-16le"), al.shift)
    assert_equal("b\0a\0z\0\n\0".force_encoding("utf-16le"), al.shift)
    assert_equal("\0f\0o\0o\0\n".force_encoding("utf-16be"), ab.shift)
    assert_equal("\0b\0a\0r\0\n".force_encoding("utf-16be"), ab.shift)
    assert_equal("\0b\0a\0z\0\n".force_encoding("utf-16be"), ab.shift)

    sl = "f\0o\0o\0\n\0".force_encoding("utf-16le")
    sb = "\0f\0o\0o\0\n".force_encoding("utf-16be")
    sl2 = "f\0o\0o\0".force_encoding("utf-16le")
    sb2 = "\0f\0o\0o".force_encoding("utf-16be")
    assert_equal(sl2, sl.chomp)
    assert_equal(sl2, sl.chomp.chomp)
    assert_equal(sb2, sb.chomp)
    assert_equal(sb2, sb.chomp.chomp)

    sl = "f\0o\0o\0\n".force_encoding("utf-16le")
    sb = "\0f\0o\0o\n".force_encoding("utf-16be")
    assert_equal(sl, sl.chomp)
    assert_equal(sb, sb.chomp)
  end

  def test_code_to_mbc
    assert_equal("a\0".force_encoding("utf-16le"), "a".ord.chr("utf-16le"))
    assert_equal("\0a".force_encoding("utf-16be"), "a".ord.chr("utf-16be"))
  end

  def utf8_to_utf16(s, e)
    s.chars.map {|c| c.ord.chr(e) }.join
  end

  def test_mbc_case_fold
    rl = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16le"), "i")
    rb = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16be"), "i")
    assert_equal(Encoding.find("utf-16le"), rl.encoding)
    assert_equal(Encoding.find("utf-16be"), rb.encoding)
    assert_match(rl, utf8_to_utf16("\u3042a\u3042a", "utf-16le"))
    assert_match(rb, utf8_to_utf16("\u3042a\u3042a", "utf-16be"))
  end

  def test_surrogate_pair
    sl = "\x42\xd8\xb7\xdf".force_encoding("utf-16le")
    sb = "\xd8\x42\xdf\xb7".force_encoding("utf-16be")

    assert_equal(1, sl.size)
    assert_equal(1, sb.size)
    assert_equal(0x20bb7, sl.ord)
    assert_equal(0x20bb7, sb.ord)
    assert_equal(sl, 0x20bb7.chr("utf-16le"))
    assert_equal(sb, 0x20bb7.chr("utf-16be"))
    assert_equal("", sl.chop)
    assert_equal("", sb.chop)
  end
end
split UTF-16 tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:58:48 -05:00			`require 'test/unit'`

change class name. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15184 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:59:56 -05:00			`class TestUTF16 < Test::Unit::TestCase`
split UTF-16 tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:58:48 -05:00			`def encdump(str)`
			`d = str.dump`
			`if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d`
			`d`
			`else`
			`"#{d}.force_encoding(#{str.encoding.name.dump})"`
			`end`
			`end`

add a test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15187 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 04:25:17 -05:00			`def enccall(recv, meth, *args)`
			`desc = ''`
			`if String === recv`
			`desc << encdump(recv)`
			`else`
			`desc << recv.inspect`
			`end`
			`desc << '.' << meth.to_s`
			`if !args.empty?`
			`desc << '('`
			`args.each_with_index {\|a, i\|`
			`desc << ',' if 0 < i`
			`if String === a`
			`desc << encdump(a)`
			`else`
			`desc << a.inspect`
			`end`
			`}`
			`desc << ')'`
			`end`
			`result = nil`
			`assert_nothing_raised(desc) {`
			`result = recv.send(meth, *args)`
			`}`
			`result`
			`end`

add a test for chomp. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-26 16:23:59 -05:00			`def assert_str_equal(expected, actual, message=nil)`
			`full_message = build_message(message, <<EOT)`
			`#{encdump expected} expected but not equal to`
			`#{encdump actual}.`
			`EOT`
			`assert_block(full_message) { expected == actual }`
			`end`

split UTF-16 tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:58:48 -05:00			`# tests start`

			`def test_utf16be_valid_encoding`
* enc/utf_16be.c (UTF16_IS_SURROGATE_FIRST): avoid branch. (UTF16_IS_SURROGATE_SECOND): ditto. (UTF16_IS_SURROGATE): defined. (utf16be_mbc_enc_len): validation implemented. * enc/utf_16le.c (UTF16_IS_SURROGATE_FIRST): avoid branch. (UTF16_IS_SURROGATE_SECOND): ditto. (UTF16_IS_SURROGATE): defined. (utf16le_mbc_enc_len): validation implemented. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15338 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-29 22:49:54 -05:00			`[`
			`"\x00\x00",`
			`"\xd7\xff",`
			`"\xd8\x00\xdc\x00",`
			`"\xdb\xff\xdf\xff",`
			`"\xe0\x00",`
			`"\xff\xff",`
			`].each {\|s\|`
			`s.force_encoding("utf-16be")`
			`assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")`
			`}`
			`[`
			`"\x00",`
			`"\xd7",`
			`"\xd8\x00",`
			`"\xd8\x00\xd8\x00",`
			`"\xdc\x00",`
			`"\xdc\x00\xd8\x00",`
			`"\xdc\x00\xdc\x00",`
			`"\xe0",`
			`"\xff",`
			`].each {\|s\|`
			`s.force_encoding("utf-16be")`
			`assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")`
			`}`
			`end`

			`def test_utf16le_valid_encoding`
			`[`
			`"\x00\x00",`
			`"\xff\xd7",`
			`"\x00\xd8\x00\xdc",`
			`"\xff\xdb\xff\xdf",`
			`"\x00\xe0",`
			`"\xff\xff",`
			`].each {\|s\|`
			`s.force_encoding("utf-16le")`
			`assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")`
			`}`
			`[`
			`"\x00",`
			`"\xd7",`
			`"\x00\xd8",`
			`"\x00\xd8\x00\xd8",`
			`"\x00\xdc",`
			`"\x00\xdc\x00\xd8",`
			`"\x00\xdc\x00\xdc",`
			`"\xe0",`
			`"\xff",`
			`].each {\|s\|`
			`s.force_encoding("utf-16le")`
			`assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")`
			`}`
split UTF-16 tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:58:48 -05:00			`end`

			`def test_strftime`
			`s = "aa".force_encoding("utf-16be")`
			`assert_raise(ArgumentError, "Time.now.strftime(#{encdump s})") { Time.now.strftime(s) }`
			`end`

			`def test_intern`
			`s = "aaaa".force_encoding("utf-16be")`
			`assert_equal(s.encoding, s.intern.to_s.encoding, "#{encdump s}.intern.to_s.encoding")`
			`end`

add a test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15719 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-03-06 03:04:18 -05:00			`def test_sym_eq`
			`s = "aa".force_encoding("utf-16le")`
			`assert(s.intern != :aa, "#{encdump s}.intern != :aa")`
			`end`

split UTF-16 tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:58:48 -05:00			`def test_compatible`
			`s1 = "aa".force_encoding("utf-16be")`
			`s2 = "z".force_encoding("us-ascii")`
			`assert_nil(Encoding.compatible?(s1, s2), "Encoding.compatible?(#{encdump s1}, #{encdump s2})")`
			`end`

add a test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15348 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-30 11:42:15 -05:00			`def test_casecmp`
			`s1 = "aa".force_encoding("utf-16be")`
			`s2 = "AA"`
			`assert_not_equal(0, s1.casecmp(s2), "#{encdump s1}.casecmp(#{encdump s2})")`
			`end`

split UTF-16 tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:58:48 -05:00			`def test_end_with`
			`s1 = "ab".force_encoding("utf-16be")`
			`s2 = "b".force_encoding("utf-16be")`
			`assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")`
			`end`

			`def test_hex`
* time.c (time_strftime): format should be ascii compatible. * parse.y (rb_intern3): non ASCII compatible symbols. * re.c (rb_reg_regsub): add encoding check. * string.c (rb_str_chomp_bang): ditto. * test/ruby/test_utf16.rb (TestUTF16::test_chomp): raises exception. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15640 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-29 04:19:15 -05:00			`assert_raise(ArgumentError) {`
			`"ff".encode("utf-16le").hex`
			`}`
			`assert_raise(ArgumentError) {`
			`"ff".encode("utf-16be").hex`
			`}`
split UTF-16 tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:58:48 -05:00			`end`

add tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15528 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-17 11:50:49 -05:00			`def test_oct`
* time.c (time_strftime): format should be ascii compatible. * parse.y (rb_intern3): non ASCII compatible symbols. * re.c (rb_reg_regsub): add encoding check. * string.c (rb_str_chomp_bang): ditto. * test/ruby/test_utf16.rb (TestUTF16::test_chomp): raises exception. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15640 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-29 04:19:15 -05:00			`assert_raise(ArgumentError) {`
			`"77".encode("utf-16le").oct`
			`}`
			`assert_raise(ArgumentError) {`
			`"77".encode("utf-16be").oct`
			`}`
add tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15528 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-17 11:50:49 -05:00			`end`

split UTF-16 tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:58:48 -05:00			`def test_count`
			`s1 = "aa".force_encoding("utf-16be")`
			`s2 = "aa"`
			`assert_raise(ArgumentError, "#{encdump s1}.count(#{encdump s2})") {`
			`s1.count(s2)`
			`}`
			`end`

			`def test_plus`
			`s1 = "a".force_encoding("us-ascii")`
			`s2 = "aa".force_encoding("utf-16be")`
			`assert_raise(ArgumentError, "#{encdump s1} + #{encdump s2}") {`
			`s1 + s2`
			`}`
			`end`

			`def test_encoding_find`
			`assert_raise(ArgumentError) {`
			`Encoding.find("utf-8".force_encoding("utf-16be"))`
			`}`
			`end`
add a test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15186 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 03:14:30 -05:00
			`def test_interpolation`
			`s = "aa".force_encoding("utf-16be")`
			`assert_raise(ArgumentError, "\"a\#{#{encdump s}}\"") {`
			`"a#{s}"`
			`}`
			`end`
add a test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15187 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 04:25:17 -05:00
			`def test_slice!`
			`enccall("aa".force_encoding("UTF-16BE"), :slice!, -1)`
			`end`
* string.c (rb_enc_cr_str_buf_cat): ASCII incompatible encoding is not compatible with any other encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15202 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 21:17:26 -05:00
* encoding.c (rb_enc_compatible): empty strings are always compatible. * string.c (rb_enc_cr_str_buf_cat): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15506 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-16 04:02:12 -05:00			`def test_plus_empty1`
* string.c (rb_enc_cr_str_buf_cat): ASCII incompatible encoding is not compatible with any other encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15202 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 21:17:26 -05:00			`s1 = ""`
			`s2 = "aa".force_encoding("utf-16be")`
* encoding.c (rb_enc_compatible): empty strings are always compatible. * string.c (rb_enc_cr_str_buf_cat): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15506 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-16 04:02:12 -05:00			`assert_nothing_raised("#{encdump s1} << #{encdump s2}") {`
			`s1 + s2`
			`}`
			`end`

			`def test_plus_empty2`
			`s1 = "aa"`
			`s2 = "".force_encoding("utf-16be")`
			`assert_nothing_raised("#{encdump s1} << #{encdump s2}") {`
			`s1 + s2`
			`}`
			`end`

			`def test_plus_nonempty`
			`s1 = "aa"`
			`s2 = "bb".force_encoding("utf-16be")`
			`assert_raise(ArgumentError, "#{encdump s1} << #{encdump s2}") {`
			`s1 + s2`
			`}`
			`end`

			`def test_concat_empty1`
			`s1 = ""`
			`s2 = "aa".force_encoding("utf-16be")`
			`assert_nothing_raised("#{encdump s1} << #{encdump s2}") {`
			`s1 << s2`
			`}`
			`end`

			`def test_concat_empty2`
			`s1 = "aa"`
			`s2 = "".force_encoding("utf-16be")`
			`assert_nothing_raised("#{encdump s1} << #{encdump s2}") {`
			`s1 << s2`
			`}`
			`end`

			`def test_concat_nonempty`
			`s1 = "aa"`
			`s2 = "bb".force_encoding("utf-16be")`
* string.c (rb_enc_cr_str_buf_cat): ASCII incompatible encoding is not compatible with any other encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15202 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 21:17:26 -05:00			`assert_raise(ArgumentError, "#{encdump s1} << #{encdump s2}") {`
			`s1 << s2`
			`}`
			`end`
add a test for Regexp.union. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15225 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-24 22:58:32 -05:00
add a test for chomp. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-26 16:23:59 -05:00			`def test_chomp`
			`s = "\1\n".force_encoding("utf-16be")`
* string.c (rb_str_chomp_bang): now works on UTF-16. * string.c (tr_setup_table): negation should work on non ASCII compatible strings as well. * string.c (rb_str_split_m): awk split should work on non ASCII compatible strings as well. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15641 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-29 08:33:26 -05:00			`assert_equal(s, s.chomp, "#{encdump s}.chomp")`
			`s = "\0\n".force_encoding("utf-16be")`
			`assert_equal("", s.chomp, "#{encdump s}.chomp")`
			`s = "\0\r\0\n".force_encoding("utf-16be")`
			`assert_equal("", s.chomp, "#{encdump s}.chomp")`
add a test for chomp. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-26 16:23:59 -05:00			`end`

* string.c (rb_str_succ): use wrapped character as a carry for ASCII incompatible encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15339 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-30 00:29:37 -05:00			`def test_succ`
			`s = "\xff\xff".force_encoding("utf-16be")`
			`assert(s.succ.valid_encoding?, "#{encdump s}.succ.valid_encoding?")`

			`s = "\xdb\xff\xdf\xff".force_encoding("utf-16be")`
			`assert(s.succ.valid_encoding?, "#{encdump s}.succ.valid_encoding?")`
			`end`

add a test for Regexp.union. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15225 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-24 22:58:32 -05:00			`def test_regexp_union`
			`enccall(Regexp, :union, "aa".force_encoding("utf-16be"), "bb".force_encoding("utf-16be"))`
			`end`
* re.c (rb_reg_preprocess): force fixed encoding when ASCII incompatible source string. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15260 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-26 16:01:52 -05:00
			`def test_empty_regexp`
			`s = "".force_encoding("utf-16be")`
			`assert_equal(Encoding.find("utf-16be"), Regexp.new(s).encoding,`
			`"Regexp.new(#{encdump s}).encoding")`
			`end`
add tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15528 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-17 11:50:49 -05:00
add a test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16407 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-05-13 13:16:03 -04:00			`def test_regexp_match`
			`assert_raise(ArgumentError) { Regexp.new("aa".force_encoding("utf-16be")) =~ "aa" }`
			`end`

add tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15528 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-17 11:50:49 -05:00			`def test_gsub`
			`s = "abcd".force_encoding("utf-16be")`
test updated. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15675 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-03-03 03:54:31 -05:00			`assert_nothing_raised {`
			`s.gsub(Regexp.new(".".encode("utf-16be")), "xy")`
			`}`
			`s = "ab\0\ncd".force_encoding("utf-16be")`
add tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15528 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-17 11:50:49 -05:00			`assert_raise(ArgumentError) {`
			`s.gsub(Regexp.new(".".encode("utf-16be")), "xy")`
			`}`
			`end`

			`def test_split_awk`
			`s = " ab cd ".encode("utf-16be")`
			`r = s.split(" ".encode("utf-16be"))`
			`assert_equal(2, r.length)`
			`assert_str_equal("ab".encode("utf-16be"), r[0])`
			`assert_str_equal("cd".encode("utf-16be"), r[1])`
			`end`
add a test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15530 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-17 19:08:01 -05:00
* test/ruby/test_io_m17n.rb: remove a duplicative method. * test/ruby/test_utf16.rb: rename a conflicting method name. * test/ruby/test_array.rb: ditto. * test/ruby/test_file_exhaustive.rb: ditto. * test/ruby/test_hash.rb: ditto. * test/ruby/test_env.rb: ditto. * test/ruby/test_fixnum.rb: ditto. * test/ruby/test_rational.rb: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16406 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-05-13 11:57:22 -04:00			`def test_count2`
add a test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15530 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-02-17 19:08:01 -05:00			`e = "abc".count("^b")`
			`assert_equal(e, "abc".encode("utf-16be").count("^b".encode("utf-16be")))`
			`assert_equal(e, "abc".encode("utf-16le").count("^b".encode("utf-16le")))`
			`end`
* enc/iso_8859_5.c: Large omicron should lowercase to small omicron. * test/ruby/test_big5.rb, test/ruby/test_cp949.rb, test/ruby/test_euc_jp.rb, test/ruby/test_euc_kr.rb, test/ruby/test_euc_tw.rb, test/ruby/test_gb18030.rb, test/ruby/test_gbk.rb, test/ruby/test_iso_8859.rb, test/ruby/test_koi8.rb, test/ruby/test_shift_jis.rb, test/ruby/test_windows_1251.rb: new tests for encoding. * test/ruby/test_utf16.rb, test/ruby/test_utf32.rb, test/ruby/test_regexp.rb: add tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16759 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-06-02 09:30:38 -04:00
			`def test_header`
			`assert_raise(ArgumentError) { eval("# encoding:utf-16le\nfoo") }`
			`assert_raise(ArgumentError) { eval("# encoding:utf-16be\nfoo") }`
			`end`


			`def test_is_mbc_newline`
			`sl = "f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n\0".force_encoding("utf-16le")`
			`sb = "\0f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n".force_encoding("utf-16be")`
			`al = sl.lines.to_a`
			`ab = sb.lines.to_a`
			`assert_equal("f\0o\0o\0\n\0".force_encoding("utf-16le"), al.shift)`
			`assert_equal("b\0a\0r\0\n\0".force_encoding("utf-16le"), al.shift)`
			`assert_equal("b\0a\0z\0\n\0".force_encoding("utf-16le"), al.shift)`
			`assert_equal("\0f\0o\0o\0\n".force_encoding("utf-16be"), ab.shift)`
			`assert_equal("\0b\0a\0r\0\n".force_encoding("utf-16be"), ab.shift)`
			`assert_equal("\0b\0a\0z\0\n".force_encoding("utf-16be"), ab.shift)`

			`sl = "f\0o\0o\0\n\0".force_encoding("utf-16le")`
			`sb = "\0f\0o\0o\0\n".force_encoding("utf-16be")`
			`sl2 = "f\0o\0o\0".force_encoding("utf-16le")`
			`sb2 = "\0f\0o\0o".force_encoding("utf-16be")`
			`assert_equal(sl2, sl.chomp)`
			`assert_equal(sl2, sl.chomp.chomp)`
			`assert_equal(sb2, sb.chomp)`
			`assert_equal(sb2, sb.chomp.chomp)`

			`sl = "f\0o\0o\0\n".force_encoding("utf-16le")`
			`sb = "\0f\0o\0o\n".force_encoding("utf-16be")`
			`assert_equal(sl, sl.chomp)`
			`assert_equal(sb, sb.chomp)`
			`end`

			`def test_code_to_mbc`
			`assert_equal("a\0".force_encoding("utf-16le"), "a".ord.chr("utf-16le"))`
			`assert_equal("\0a".force_encoding("utf-16be"), "a".ord.chr("utf-16be"))`
			`end`

			`def utf8_to_utf16(s, e)`
			`s.chars.map {\|c\| c.ord.chr(e) }.join`
			`end`

			`def test_mbc_case_fold`
			`rl = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16le"), "i")`
			`rb = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16be"), "i")`
			`assert_equal(Encoding.find("utf-16le"), rl.encoding)`
			`assert_equal(Encoding.find("utf-16be"), rb.encoding)`
			`assert_match(rl, utf8_to_utf16("\u3042a\u3042a", "utf-16le"))`
			`assert_match(rb, utf8_to_utf16("\u3042a\u3042a", "utf-16be"))`
			`end`

			`def test_surrogate_pair`
			`sl = "\x42\xd8\xb7\xdf".force_encoding("utf-16le")`
			`sb = "\xd8\x42\xdf\xb7".force_encoding("utf-16be")`

			`assert_equal(1, sl.size)`
			`assert_equal(1, sb.size)`
			`assert_equal(0x20bb7, sl.ord)`
			`assert_equal(0x20bb7, sb.ord)`
			`assert_equal(sl, 0x20bb7.chr("utf-16le"))`
			`assert_equal(sb, 0x20bb7.chr("utf-16be"))`
			`assert_equal("", sl.chop)`
			`assert_equal("", sb.chop)`
			`end`
split UTF-16 tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-01-23 02:58:48 -05:00			`end`