1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/test/ruby/test_econv.rb

944 lines
41 KiB
Ruby
Raw Normal View History

# frozen_string_literal: false
require 'test/unit'
class TestEncodingConverter < Test::Unit::TestCase
def check_ec(edst, esrc, eres, dst, src, ec, off, len, opts=nil)
Make rb_scan_args handle keywords more similar to Ruby methods (#2460) Cfuncs that use rb_scan_args with the : entry suffer similar keyword argument separation issues that Ruby methods suffer if the cfuncs accept optional or variable arguments. This makes the following changes to : handling. * Treats as **kw, prompting keyword argument separation warnings if called with a positional hash. * Do not look for an option hash if empty keywords are provided. For backwards compatibility, treat an empty keyword splat as a empty mandatory positional hash argument, but emit a a warning, as this behavior will be removed in Ruby 3. The argument number check needs to be moved lower so it can correctly handle an empty positional argument being added. * If the last argument is nil and it is necessary to treat it as an option hash in order to make sure all arguments are processed, continue to treat the last argument as the option hash. Emit a warning in this case, as this behavior will be removed in Ruby 3. * If splitting the keyword hash into two hashes, issue a warning, as we will not be splitting hashes in Ruby 3. * If the keyword argument is required to fill a mandatory positional argument, continue to do so, but emit a warning as this behavior will be going away in Ruby 3. * If keyword arguments are provided and the last argument is not a hash, that indicates something wrong. This can happen if a cfunc is calling rb_scan_args multiple times, and providing arguments that were not passed to it from Ruby. Callers need to switch to the new rb_scan_args_kw function, which allows passing of whether keywords were provided. This commit fixes all warnings caused by the changes above. It switches some function calls to *_kw versions with appropriate kw_splat flags. If delegating arguments, RB_PASS_CALLED_KEYWORDS is used. If creating new arguments, RB_PASS_KEYWORDS is used if the last argument is a hash to be treated as keywords. In open_key_args in io.c, use rb_scan_args_kw. In this case, the arguments provided come from another C function, not Ruby. The last argument may or may not be a hash, so we can't set keyword argument mode. However, if it is a hash, we don't want to warn when treating it as keywords. In Ruby files, make sure to appropriately use keyword splats or literal keywords when calling Cfuncs that now issue keyword argument separation warnings through rb_scan_args. Also, make sure not to pass nil in place of an option hash. Work around Kernel#warn warnings due to problems in the Rubygems override of the method. There is an open pull request to fix these issues in Rubygems, but part of the Rubygems tests for their override fail on ruby-head due to rb_scan_args not recognizing empty keyword splats, which this commit fixes. Implementation wise, adding rb_scan_args_kw is kind of a pain, because rb_scan_args takes a variable number of arguments. In order to not duplicate all the code, the function internals need to be split into two functions taking a va_list, and to avoid passing in a ton of arguments, a single struct argument is used to handle the variables previously local to the function.
2019-09-25 14:18:49 -04:00
case opts
when Hash
res = ec.primitive_convert(src, dst, off, len, **opts)
else
res = ec.primitive_convert(src, dst, off, len, opts)
end
assert_equal([edst.b, esrc.b, eres],
[dst.b, src.b, res])
end
def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, opts=nil)
ec = Encoding::Converter.new(*ec) if Array === ec
i = consumed + rest
o = ""
ret = ec.primitive_convert(i, o, 0, obuf_bytesize, opts)
assert_equal([converted, eres, rest],
[o, ret, i])
end
def assert_errinfo(e_res, e_enc1, e_enc2, e_error_bytes, e_readagain_bytes, ec)
assert_equal([e_res, e_enc1, e_enc2,
e_error_bytes&.b,
e_readagain_bytes&.b],
ec.primitive_errinfo)
end
def test_s_asciicompat_encoding
assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding("ISO-2022-JP"))
assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding(Encoding::ISO_2022_JP))
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16BE"))
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16LE"))
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32BE"))
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32LE"))
assert_nil(Encoding::Converter.asciicompat_encoding("EUC-JP"))
assert_nil(Encoding::Converter.asciicompat_encoding("UTF-8"))
assert_nil(Encoding::Converter.asciicompat_encoding(Encoding::UTF_8))
assert_nil(Encoding::Converter.asciicompat_encoding("xml_attr_escape"))
assert_nil(Encoding::Converter.asciicompat_encoding("encoding-not-exist"))
end
def test_asciicompat_encoding_iso2022jp
acenc = Encoding::Converter.asciicompat_encoding("ISO-2022-JP")
str = "\e$B~~\e(B".force_encoding("iso-2022-jp")
str2 = str.encode(acenc)
str3 = str2.encode("ISO-2022-JP")
assert_equal(str, str3)
end
def test_s_new
assert_kind_of(Encoding::Converter, Encoding::Converter.new("UTF-8", "EUC-JP"))
assert_kind_of(Encoding::Converter, Encoding::Converter.new(Encoding::UTF_8, Encoding::EUC_JP))
end
def test_s_new_convpath
assert_equal([], Encoding::Converter.new([]).convpath)
assert_equal([[Encoding::UTF_8, Encoding::EUC_JP]],
Encoding::Converter.new([["UTF-8", "EUC-JP"]]).convpath)
assert_equal([[Encoding::UTF_8, Encoding::WINDOWS_31J]],
Encoding::Converter.new([["utf-8", "cp932"]]).convpath)
assert_equal([[Encoding::UTF_8, Encoding::EUC_JP]],
Encoding::Converter.new([[Encoding::UTF_8, Encoding::EUC_JP]]).convpath)
assert_equal([[Encoding::ISO_8859_1, Encoding::UTF_8],
[Encoding::UTF_8, Encoding::EUC_JP]],
Encoding::Converter.new([["iso-8859-1", "euc-jp"]]).convpath)
assert_equal([[Encoding::ISO_8859_1, Encoding::UTF_8],
[Encoding::UTF_8, Encoding::EUC_JP],
"universal_newline"],
Encoding::Converter.new([["iso-8859-1", "euc-jp"], "universal_newline"]).convpath)
assert_equal(["universal_newline",
[Encoding::ISO_8859_1, Encoding::UTF_8],
[Encoding::UTF_8, Encoding::EUC_JP],
"universal_newline"],
Encoding::Converter.new(["universal_newline", ["iso-8859-1", "euc-jp"], "universal_newline"]).convpath)
end
def test_s_new_fail
name1 = "encoding-which-is-not-exist-1"
name2 = "encoding-which-is-not-exist-2"
assert_raise(Encoding::ConverterNotFoundError) {
Encoding::Converter.new(name1, name2)
}
encoding_list = Encoding.list.map {|e| e.name }
assert_not_include(encoding_list, name1)
assert_not_include(encoding_list, name2)
end
def test_newline_converter_with_ascii_incompatible
assert_nothing_raised {
Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR)
}
assert_nothing_raised {
Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CRLF_NEWLINE_DECORATOR)
}
assert_nothing_raised {
Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CR_NEWLINE_DECORATOR)
}
assert_nothing_raised {
Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR)
}
assert_nothing_raised {
Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CRLF_NEWLINE_DECORATOR)
}
assert_nothing_raised {
Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CR_NEWLINE_DECORATOR)
}
end
def test_get_encoding
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
assert_equal(Encoding::UTF_8, ec.source_encoding)
assert_equal(Encoding::EUC_JP, ec.destination_encoding)
end
def test_result_encoding
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
dst = "".force_encoding("ASCII-8BIT")
assert_equal(Encoding::ASCII_8BIT, dst.encoding)
ec.primitive_convert("\u{3042}", dst, nil, 10)
assert_equal(Encoding::EUC_JP, dst.encoding)
end
def test_output_region
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
ec.primitive_convert(src="a", dst="b", nil, 1, :partial_input=>true)
assert_equal("ba", dst)
ec.primitive_convert(src="a", dst="b", 0, 1, :partial_input=>true)
assert_equal("a", dst)
ec.primitive_convert(src="a", dst="b", 1, 1, :partial_input=>true)
assert_equal("ba", dst)
assert_raise(ArgumentError) {
ec.primitive_convert(src="a", dst="b", 2, 1, :partial_input=>true)
}
assert_raise(ArgumentError) {
ec.primitive_convert(src="a", dst="b", -1, 1, :partial_input=>true)
}
assert_raise(ArgumentError) {
ec.primitive_convert(src="a", dst="b", 1, -1, :partial_input=>true)
}
end
def test_nil_source_buffer
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
ret = ec.primitive_convert(nil, "", nil, 10)
assert_equal(:finished, ret)
end
def test_nil_destination_bytesize
ec = Encoding::Converter.new("Shift_JIS", "UTF-8")
n = 10000
src = "\xa1".force_encoding("Shift_JIS") * n
ret = ec.primitive_convert(src, dst="", nil, nil)
assert_equal(:finished, ret)
assert_equal("\xEF\xBD\xA1".force_encoding("UTF-8") * n, dst)
end
def test_nil_destination_bytesize2
ec = Encoding::Converter.new("Shift_JIS", "UTF-8")
n = 10000
src = "\xa1".force_encoding("Shift_JIS") * n
ret = ec.primitive_convert(src, dst="")
assert_equal(:finished, ret)
assert_equal("\xEF\xBD\xA1".force_encoding("UTF-8") * n, dst)
end
def test_nil_destination_bytesize_with_nonnil_byteoffset
ec = Encoding::Converter.new("Shift_JIS", "UTF-8")
n = 2000
src = "\xa1".force_encoding("Shift_JIS") * n
dst = "abcd" * 2000
ret = ec.primitive_convert(src, dst, 3, nil)
assert_equal(:finished, ret)
assert_equal("abc" + "\xEF\xBD\xA1".force_encoding("UTF-8") * n, dst)
end
def test_partial_input
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
ret = ec.primitive_convert(src="", dst="", nil, 10, :partial_input=>true)
assert_equal(:source_buffer_empty, ret)
ret = ec.primitive_convert(src="", dst="", nil, 10)
assert_equal(:finished, ret)
end
def test_accumulate_dst1
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
a = ["", "abc\u{3042}def", ec, nil, 1]
check_ec("a", "c\u{3042}def", :destination_buffer_full, *a)
check_ec("ab", "\u{3042}def", :destination_buffer_full, *a)
check_ec("abc", "def", :destination_buffer_full, *a)
check_ec("abc\xA4", "def", :destination_buffer_full, *a)
check_ec("abc\xA4\xA2", "ef", :destination_buffer_full, *a)
check_ec("abc\xA4\xA2d", "f", :destination_buffer_full, *a)
check_ec("abc\xA4\xA2de", "", :destination_buffer_full, *a)
check_ec("abc\xA4\xA2def", "", :finished, *a)
end
def test_accumulate_dst2
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
a = ["", "abc\u{3042}def", ec, nil, 2]
check_ec("ab", "\u{3042}def", :destination_buffer_full, *a)
check_ec("abc\xA4", "def", :destination_buffer_full, *a)
check_ec("abc\xA4\xA2d", "f", :destination_buffer_full, *a)
check_ec("abc\xA4\xA2def", "", :finished, *a)
end
def test_eucjp_to_utf8
assert_econv("", :finished, 100, ["UTF-8", "EUC-JP"], "", "")
assert_econv("a", :finished, 100, ["UTF-8", "EUC-JP"], "a", "")
end
def test_iso2022jp
assert_econv("", :finished, 100, ["Shift_JIS", "ISO-2022-JP"], "", "")
end
def test_iso2022jp_encode
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
src << "\xA2"; check_ec("a", "", :source_buffer_empty, *a)
src << "\xA4"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
src << "\xA1"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
src << "\xA2"; check_ec("a\e$B\"$!\"", "", :source_buffer_empty, *a)
src << "b"; check_ec("a\e$B\"$!\"\e(Bb", "", :source_buffer_empty, *a)
src << "\xA2\xA6"; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&", "", :source_buffer_empty, *a)
a[-1] = 0; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&\e(B", "", :finished, *a)
end
def test_iso2022jp_decode
ec = Encoding::Converter.new("ISO-2022-JP", "EUC-JP")
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
src << "\e"; check_ec("a", "", :source_buffer_empty, *a)
src << "$"; check_ec("a", "", :source_buffer_empty, *a)
src << "B"; check_ec("a", "", :source_buffer_empty, *a)
src << "\x21"; check_ec("a", "", :source_buffer_empty, *a)
src << "\x22"; check_ec("a\xA1\xA2", "", :source_buffer_empty, *a)
src << "\n"; check_ec("a\xA1\xA2", "", :invalid_byte_sequence, *a)
src << "\x23"; check_ec("a\xA1\xA2", "", :source_buffer_empty, *a)
src << "\x24"; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
src << "\e"; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
src << "("; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
src << "B"; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
src << "c"; check_ec("a\xA1\xA2\xA3\xA4c", "", :source_buffer_empty, *a)
src << "\n"; check_ec("a\xA1\xA2\xA3\xA4c\n","", :source_buffer_empty, *a)
end
def test_invalid
assert_econv("", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "\x80", "")
assert_econv("a", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "a\x80", "")
assert_econv("a", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "a\x80", "\x80")
assert_econv("abc", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "abc\xFF", "def")
assert_econv("abc", :invalid_byte_sequence, 100, ["Shift_JIS", "EUC-JP"], "abc\xFF", "def")
assert_econv("abc", :invalid_byte_sequence, 100, ["ISO-2022-JP", "EUC-JP"], "abc\xFF", "def")
end
def test_invalid2
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
a = ["", "abc\xFFdef", ec, nil, 1]
check_ec("a", "c\xFFdef", :destination_buffer_full, *a)
check_ec("ab", "\xFFdef", :destination_buffer_full, *a)
check_ec("abc", "def", :invalid_byte_sequence, *a)
check_ec("abcd", "f", :destination_buffer_full, *a)
check_ec("abcde", "", :destination_buffer_full, *a)
check_ec("abcdef", "", :finished, *a)
end
def test_invalid3
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
a = ["", "abc\xFFdef", ec, nil, 10]
check_ec("abc", "def", :invalid_byte_sequence, *a)
check_ec("abcdef", "", :finished, *a)
end
def test_invalid4
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
a = ["", "abc\xFFdef", ec, nil, 10, :after_output=>true]
check_ec("a", "bc\xFFdef", :after_output, *a)
check_ec("ab", "c\xFFdef", :after_output, *a)
check_ec("abc", "\xFFdef", :after_output, *a)
check_ec("abc", "def", :invalid_byte_sequence, *a)
check_ec("abcd", "ef", :after_output, *a)
check_ec("abcde", "f", :after_output, *a)
check_ec("abcdef", "", :after_output, *a)
check_ec("abcdef", "", :finished, *a)
end
def test_invalid_utf16le
ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "A"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x01"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x02"; check_ec("A", "", :invalid_byte_sequence, *a)
src << "\x03"; check_ec("A\u{0201}", "", :source_buffer_empty, *a)
src << "\x04"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :invalid_byte_sequence, *a)
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\xdc"; check_ec("A\u{0201}\u{0403}\u{10000}", "", :source_buffer_empty, *a)
end
def test_invalid_utf16be
ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "A"; check_ec("A", "", :source_buffer_empty, *a)
src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x02"; check_ec("A", "", :invalid_byte_sequence, *a)
src << "\x01"; check_ec("A\u{0201}", "", :source_buffer_empty, *a)
src << "\x04"; check_ec("A\u{0201}", "", :source_buffer_empty, *a)
src << "\x03"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :invalid_byte_sequence, *a)
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\xdc"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A\u{0201}\u{0403}\u{10000}", "", :source_buffer_empty, *a)
end
def test_invalid_utf32be
ec = Encoding::Converter.new("UTF-32BE", "UTF-8")
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "A"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\xdc"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :invalid_byte_sequence, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "B"; check_ec("AB", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
src << "C"; check_ec("ABC", "", :source_buffer_empty, *a)
end
def test_invalid_utf32le
ec = Encoding::Converter.new("UTF-32LE", "UTF-8")
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "A"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\xdc"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :invalid_byte_sequence, *a)
src << "B"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
src << "C"; check_ec("AB", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("ABC", "", :source_buffer_empty, *a)
end
def test_errors
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10]
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
check_ec("A", "\x00B", :invalid_byte_sequence, *a) # \xDC\x00 is invalid as UTF-16BE
check_ec("AB", "", :finished, *a)
end
def test_errors2
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, :after_output=>true]
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
check_ec("A", "\xDC\x00\x00B", :after_output, *a)
check_ec("A", "\x00B", :invalid_byte_sequence, *a)
check_ec("AB", "", :after_output, *a)
check_ec("AB", "", :finished, *a)
end
def test_universal_newline
ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline: true)
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu", "", :source_buffer_empty, *a)
src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
end
* include/ruby/io.h (FMODE_TEXTMODE): defined. * include/ruby/encoding.h (rb_econv_t): new field: flags. (rb_econv_binmode): declared. * io.c (io_unread): text mode hack removed. (NEED_NEWLINE_DECODER): defined. (NEED_NEWLINE_ENCODER): defined. (NEED_READCONV): defined. (NEED_WRITECONV): defined. (TEXTMODE_NEWLINE_ENCODER): defined for windows. (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for text mode. (io_fwrite): use NEED_WRITECONV. character code conversion is disabled if fptr->writeconv_stateless is nil. (make_readconv): setup converter with ECONV_UNIVERSAL_NEWLINE_DECODER for text mode. (read_all): use NEED_READCONV. (appendline): use NEED_READCONV. (rb_io_getline_1): use NEED_READCONV. (io_getc): use NEED_READCONV. (rb_io_ungetc): use NEED_READCONV. (rb_io_binmode): OS-level text mode test removed. call rb_econv_binmode. (rb_io_binmode_m): call rb_io_binmode_m with write_io as well. (rb_io_flags_mode): return mode string including "t". (rb_io_mode_flags): detect "t" for text mode. (rb_sysopen): always specify O_BINARY. * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags. (rb_econv_open): if source and destination encoding is both empty string, open newline converter. last_tc will be NULL in this case. (rb_econv_encoding_to_insert_output): last_tc may be NULL now. (rb_econv_string): ditto. (output_replacement_character): ditto. (transcode_loop): ditto. (econv_init): ditto. (econv_inspect): ditto. (rb_econv_binmode): new function. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18780 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-08-22 12:44:00 -04:00
def test_universal_newline2
ec = Encoding::Converter.new("", "", universal_newline: true)
a = ["", src="", ec, nil, 50, :partial_input=>true]
* include/ruby/io.h (FMODE_TEXTMODE): defined. * include/ruby/encoding.h (rb_econv_t): new field: flags. (rb_econv_binmode): declared. * io.c (io_unread): text mode hack removed. (NEED_NEWLINE_DECODER): defined. (NEED_NEWLINE_ENCODER): defined. (NEED_READCONV): defined. (NEED_WRITECONV): defined. (TEXTMODE_NEWLINE_ENCODER): defined for windows. (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for text mode. (io_fwrite): use NEED_WRITECONV. character code conversion is disabled if fptr->writeconv_stateless is nil. (make_readconv): setup converter with ECONV_UNIVERSAL_NEWLINE_DECODER for text mode. (read_all): use NEED_READCONV. (appendline): use NEED_READCONV. (rb_io_getline_1): use NEED_READCONV. (io_getc): use NEED_READCONV. (rb_io_ungetc): use NEED_READCONV. (rb_io_binmode): OS-level text mode test removed. call rb_econv_binmode. (rb_io_binmode_m): call rb_io_binmode_m with write_io as well. (rb_io_flags_mode): return mode string including "t". (rb_io_mode_flags): detect "t" for text mode. (rb_sysopen): always specify O_BINARY. * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags. (rb_econv_open): if source and destination encoding is both empty string, open newline converter. last_tc will be NULL in this case. (rb_econv_encoding_to_insert_output): last_tc may be NULL now. (rb_econv_string): ditto. (output_replacement_character): ditto. (transcode_loop): ditto. (econv_init): ditto. (econv_inspect): ditto. (rb_econv_binmode): new function. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18780 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-08-22 12:44:00 -04:00
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu", "", :source_buffer_empty, *a)
* include/ruby/io.h (FMODE_TEXTMODE): defined. * include/ruby/encoding.h (rb_econv_t): new field: flags. (rb_econv_binmode): declared. * io.c (io_unread): text mode hack removed. (NEED_NEWLINE_DECODER): defined. (NEED_NEWLINE_ENCODER): defined. (NEED_READCONV): defined. (NEED_WRITECONV): defined. (TEXTMODE_NEWLINE_ENCODER): defined for windows. (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for text mode. (io_fwrite): use NEED_WRITECONV. character code conversion is disabled if fptr->writeconv_stateless is nil. (make_readconv): setup converter with ECONV_UNIVERSAL_NEWLINE_DECODER for text mode. (read_all): use NEED_READCONV. (appendline): use NEED_READCONV. (rb_io_getline_1): use NEED_READCONV. (io_getc): use NEED_READCONV. (rb_io_ungetc): use NEED_READCONV. (rb_io_binmode): OS-level text mode test removed. call rb_econv_binmode. (rb_io_binmode_m): call rb_io_binmode_m with write_io as well. (rb_io_flags_mode): return mode string including "t". (rb_io_mode_flags): detect "t" for text mode. (rb_sysopen): always specify O_BINARY. * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags. (rb_econv_open): if source and destination encoding is both empty string, open newline converter. last_tc will be NULL in this case. (rb_econv_encoding_to_insert_output): last_tc may be NULL now. (rb_econv_string): ditto. (output_replacement_character): ditto. (transcode_loop): ditto. (econv_init): ditto. (econv_inspect): ditto. (rb_econv_binmode): new function. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18780 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-08-22 12:44:00 -04:00
src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
end
def test_universal_newline3
ec = Encoding::Converter.new("", "", universal_newline: true)
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu", "", :source_buffer_empty, *a)
src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
src << "\r"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
a[-1] = nil
src << ""; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz\n", "", :finished, *a)
end
def test_crlf_newline
ec = Encoding::Converter.new("UTF-8", "EUC-JP", crlf_newline: true)
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
end
* include/ruby/io.h (FMODE_TEXTMODE): defined. * include/ruby/encoding.h (rb_econv_t): new field: flags. (rb_econv_binmode): declared. * io.c (io_unread): text mode hack removed. (NEED_NEWLINE_DECODER): defined. (NEED_NEWLINE_ENCODER): defined. (NEED_READCONV): defined. (NEED_WRITECONV): defined. (TEXTMODE_NEWLINE_ENCODER): defined for windows. (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for text mode. (io_fwrite): use NEED_WRITECONV. character code conversion is disabled if fptr->writeconv_stateless is nil. (make_readconv): setup converter with ECONV_UNIVERSAL_NEWLINE_DECODER for text mode. (read_all): use NEED_READCONV. (appendline): use NEED_READCONV. (rb_io_getline_1): use NEED_READCONV. (io_getc): use NEED_READCONV. (rb_io_ungetc): use NEED_READCONV. (rb_io_binmode): OS-level text mode test removed. call rb_econv_binmode. (rb_io_binmode_m): call rb_io_binmode_m with write_io as well. (rb_io_flags_mode): return mode string including "t". (rb_io_mode_flags): detect "t" for text mode. (rb_sysopen): always specify O_BINARY. * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags. (rb_econv_open): if source and destination encoding is both empty string, open newline converter. last_tc will be NULL in this case. (rb_econv_encoding_to_insert_output): last_tc may be NULL now. (rb_econv_string): ditto. (output_replacement_character): ditto. (transcode_loop): ditto. (econv_init): ditto. (econv_inspect): ditto. (rb_econv_binmode): new function. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18780 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-08-22 12:44:00 -04:00
def test_crlf_newline2
ec = Encoding::Converter.new("", "", crlf_newline: true)
* include/ruby/io.h (FMODE_TEXTMODE): defined. * include/ruby/encoding.h (rb_econv_t): new field: flags. (rb_econv_binmode): declared. * io.c (io_unread): text mode hack removed. (NEED_NEWLINE_DECODER): defined. (NEED_NEWLINE_ENCODER): defined. (NEED_READCONV): defined. (NEED_WRITECONV): defined. (TEXTMODE_NEWLINE_ENCODER): defined for windows. (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for text mode. (io_fwrite): use NEED_WRITECONV. character code conversion is disabled if fptr->writeconv_stateless is nil. (make_readconv): setup converter with ECONV_UNIVERSAL_NEWLINE_DECODER for text mode. (read_all): use NEED_READCONV. (appendline): use NEED_READCONV. (rb_io_getline_1): use NEED_READCONV. (io_getc): use NEED_READCONV. (rb_io_ungetc): use NEED_READCONV. (rb_io_binmode): OS-level text mode test removed. call rb_econv_binmode. (rb_io_binmode_m): call rb_io_binmode_m with write_io as well. (rb_io_flags_mode): return mode string including "t". (rb_io_mode_flags): detect "t" for text mode. (rb_sysopen): always specify O_BINARY. * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags. (rb_econv_open): if source and destination encoding is both empty string, open newline converter. last_tc will be NULL in this case. (rb_econv_encoding_to_insert_output): last_tc may be NULL now. (rb_econv_string): ditto. (output_replacement_character): ditto. (transcode_loop): ditto. (econv_init): ditto. (econv_inspect): ditto. (rb_econv_binmode): new function. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18780 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-08-22 12:44:00 -04:00
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
end
def test_cr_newline
ec = Encoding::Converter.new("UTF-8", "EUC-JP", cr_newline: true)
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
end
* include/ruby/io.h (FMODE_TEXTMODE): defined. * include/ruby/encoding.h (rb_econv_t): new field: flags. (rb_econv_binmode): declared. * io.c (io_unread): text mode hack removed. (NEED_NEWLINE_DECODER): defined. (NEED_NEWLINE_ENCODER): defined. (NEED_READCONV): defined. (NEED_WRITECONV): defined. (TEXTMODE_NEWLINE_ENCODER): defined for windows. (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for text mode. (io_fwrite): use NEED_WRITECONV. character code conversion is disabled if fptr->writeconv_stateless is nil. (make_readconv): setup converter with ECONV_UNIVERSAL_NEWLINE_DECODER for text mode. (read_all): use NEED_READCONV. (appendline): use NEED_READCONV. (rb_io_getline_1): use NEED_READCONV. (io_getc): use NEED_READCONV. (rb_io_ungetc): use NEED_READCONV. (rb_io_binmode): OS-level text mode test removed. call rb_econv_binmode. (rb_io_binmode_m): call rb_io_binmode_m with write_io as well. (rb_io_flags_mode): return mode string including "t". (rb_io_mode_flags): detect "t" for text mode. (rb_sysopen): always specify O_BINARY. * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags. (rb_econv_open): if source and destination encoding is both empty string, open newline converter. last_tc will be NULL in this case. (rb_econv_encoding_to_insert_output): last_tc may be NULL now. (rb_econv_string): ditto. (output_replacement_character): ditto. (transcode_loop): ditto. (econv_init): ditto. (econv_inspect): ditto. (rb_econv_binmode): new function. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18780 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-08-22 12:44:00 -04:00
def test_cr_newline2
ec = Encoding::Converter.new("", "", cr_newline: true)
* include/ruby/io.h (FMODE_TEXTMODE): defined. * include/ruby/encoding.h (rb_econv_t): new field: flags. (rb_econv_binmode): declared. * io.c (io_unread): text mode hack removed. (NEED_NEWLINE_DECODER): defined. (NEED_NEWLINE_ENCODER): defined. (NEED_READCONV): defined. (NEED_WRITECONV): defined. (TEXTMODE_NEWLINE_ENCODER): defined for windows. (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for text mode. (io_fwrite): use NEED_WRITECONV. character code conversion is disabled if fptr->writeconv_stateless is nil. (make_readconv): setup converter with ECONV_UNIVERSAL_NEWLINE_DECODER for text mode. (read_all): use NEED_READCONV. (appendline): use NEED_READCONV. (rb_io_getline_1): use NEED_READCONV. (io_getc): use NEED_READCONV. (rb_io_ungetc): use NEED_READCONV. (rb_io_binmode): OS-level text mode test removed. call rb_econv_binmode. (rb_io_binmode_m): call rb_io_binmode_m with write_io as well. (rb_io_flags_mode): return mode string including "t". (rb_io_mode_flags): detect "t" for text mode. (rb_sysopen): always specify O_BINARY. * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags. (rb_econv_open): if source and destination encoding is both empty string, open newline converter. last_tc will be NULL in this case. (rb_econv_encoding_to_insert_output): last_tc may be NULL now. (rb_econv_string): ditto. (output_replacement_character): ditto. (transcode_loop): ditto. (econv_init): ditto. (econv_inspect): ditto. (rb_econv_binmode): new function. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18780 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-08-22 12:44:00 -04:00
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
end
def test_no_universal_newline1
ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline: false)
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\r\ndef", "")
end
def test_no_universal_newline2
ec = Encoding::Converter.new("", "", universal_newline: false)
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\r\ndef", "")
end
def test_after_output
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
a = ["", "abc\u{3042}def", ec, nil, 100, :after_output=>true]
check_ec("a", "bc\u{3042}def", :after_output, *a)
check_ec("ab", "c\u{3042}def", :after_output, *a)
check_ec("abc", "\u{3042}def", :after_output, *a)
check_ec("abc\xA4\xA2", "def", :after_output, *a)
check_ec("abc\xA4\xA2d", "ef", :after_output, *a)
check_ec("abc\xA4\xA2de", "f", :after_output, *a)
check_ec("abc\xA4\xA2def", "", :after_output, *a)
check_ec("abc\xA4\xA2def", "", :finished, *a)
end
def test_errinfo_invalid_euc_jp
ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
ec.primitive_convert("\xff", "", nil, 10)
assert_errinfo(:invalid_byte_sequence, "EUC-JP", "Shift_JIS", "\xFF", "", ec)
end
def test_errinfo_invalid_euc_jp2
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
ec.primitive_convert("\xff", "", nil, 10)
assert_errinfo(:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", "", ec)
end
def test_errinfo_undefined_hiragana
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
ec.primitive_convert("\xa4\xa2", "", nil, 10)
assert_errinfo(:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", "", ec)
end
def test_errinfo_invalid_partial_character
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
ec.primitive_convert("\xa4", "", nil, 10)
assert_errinfo(:incomplete_input, "EUC-JP", "UTF-8", "\xA4", "", ec)
end
def test_errinfo_valid_partial_character
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
ec.primitive_convert("\xa4", "", nil, 10, :partial_input=>true)
assert_errinfo(:source_buffer_empty, nil, nil, nil, nil, ec)
end
def test_errinfo_invalid_utf16be
ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
ec.primitive_convert(src="\xd8\x00\x00@", "", nil, 10)
assert_errinfo(:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00", ec)
assert_equal("@", src)
end
def test_errinfo_invalid_utf16le
ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
ec.primitive_convert(src="\x00\xd8@\x00", "", nil, 10)
assert_errinfo(:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00", ec)
assert_equal("", src)
end
def test_output_iso2022jp
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, :partial_input=>true)
assert_equal("\e$B!!".force_encoding("ISO-2022-JP"), dst)
assert_equal(nil, ec.insert_output("???"))
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???".force_encoding("ISO-2022-JP"), dst)
ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"".force_encoding("ISO-2022-JP"), dst)
assert_equal(nil, ec.insert_output("\xA1\xA1".force_encoding("EUC-JP")))
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"!!".force_encoding("ISO-2022-JP"), dst)
ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#".force_encoding("ISO-2022-JP"), dst)
assert_equal(nil, ec.insert_output("\u3042"))
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst)
assert_raise(Encoding::UndefinedConversionError) {
ec.insert_output("\uFFFD")
}
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst)
ec.primitive_convert("", dst, nil, 10)
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"\e(B".force_encoding("ISO-2022-JP"), dst)
end
def test_exc_invalid
err = assert_raise(Encoding::InvalidByteSequenceError) {
"abc\xa4def".encode("ISO-8859-1", "EUC-JP")
}
assert_equal("EUC-JP", err.source_encoding_name)
assert_equal("UTF-8", err.destination_encoding_name)
assert_equal(Encoding::EUC_JP, err.source_encoding)
assert_equal(Encoding::UTF_8, err.destination_encoding)
assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
assert_equal("d", err.readagain_bytes)
assert_equal(false, err.incomplete_input?)
end
def test_exc_incomplete
err = assert_raise(Encoding::InvalidByteSequenceError) {
"abc\xa4".encode("ISO-8859-1", "EUC-JP")
}
assert_equal("EUC-JP", err.source_encoding_name)
assert_equal("UTF-8", err.destination_encoding_name)
assert_equal(Encoding::EUC_JP, err.source_encoding)
assert_equal(Encoding::UTF_8, err.destination_encoding)
assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
assert_equal(nil, err.readagain_bytes)
assert_equal(true, err.incomplete_input?)
end
def test_exc_undef
err = assert_raise(Encoding::UndefinedConversionError) {
"abc\xa4\xa2def".encode("ISO-8859-1", "EUC-JP")
}
assert_equal("UTF-8", err.source_encoding_name)
assert_equal("ISO-8859-1", err.destination_encoding_name)
assert_equal(Encoding::UTF_8, err.source_encoding)
assert_equal(Encoding::ISO_8859_1, err.destination_encoding)
assert_equal("\u{3042}", err.error_char)
end
def test_putback
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
ret = ec.primitive_convert(src="abc\xa1def", dst="", nil, 10)
assert_equal(:invalid_byte_sequence, ret)
assert_equal(["abc", "ef"], [dst, src])
src = ec.putback + src
assert_equal(["abc", "def"], [dst, src])
ret = ec.primitive_convert(src, dst, nil, 10)
assert_equal(:finished, ret)
assert_equal(["abcdef", ""], [dst, src])
end
def test_putback2
ec = Encoding::Converter.new("utf-16le", "euc-jp")
ret = ec.primitive_convert("\x00\xd8\x21\x00", "", nil, nil)
assert_equal(:invalid_byte_sequence, ret)
assert_equal("\x00".force_encoding("utf-16le"), ec.putback(1))
assert_equal("\x21".force_encoding("utf-16le"), ec.putback(1))
assert_equal("", ec.putback(1))
end
def test_invalid_replace
ec = Encoding::Converter.new("UTF-8", "EUC-JP", invalid: :replace)
ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
assert_equal(:finished, ret)
assert_equal("", src)
assert_equal("abc?def", dst)
end
def test_invalid_ignore
ec = Encoding::Converter.new("UTF-8", "EUC-JP", :invalid => :replace, :replace => "")
ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
assert_equal(:finished, ret)
assert_equal("", src)
assert_equal("abcdef", dst)
end
def test_undef_replace
ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace)
ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
assert_equal(:finished, ret)
assert_equal("", src)
assert_equal("abc?def", dst)
end
def test_undef_ignore
ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace, :replace => "")
ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
assert_equal(:finished, ret)
assert_equal("", src)
assert_equal("abcdef", dst)
end
def test_noconv
ec = Encoding::Converter.new("", "")
assert_equal(nil, ec.source_encoding)
assert_equal(nil, ec.destination_encoding)
assert_equal([:source_buffer_empty, nil, nil, nil, nil], ec.primitive_errinfo)
a = ["", "abcdefg", ec, nil, 2]
check_ec("ab", "cdefg", :destination_buffer_full, *a)
check_ec("abcd", "efg", :destination_buffer_full, *a)
check_ec("abcdef", "g", :destination_buffer_full, *a)
check_ec("abcdefg", "", :finished, *a)
end
def test_noconv_partial
ec = Encoding::Converter.new("", "")
a = ["", "abcdefg", ec, nil, 2, :partial_input=>true]
check_ec("ab", "cdefg", :destination_buffer_full, *a)
check_ec("abcd", "efg", :destination_buffer_full, *a)
check_ec("abcdef", "g", :destination_buffer_full, *a)
check_ec("abcdefg", "", :source_buffer_empty, *a)
end
def test_noconv_after_output
ec = Encoding::Converter.new("", "")
a = ["", "abcdefg", ec, nil, 2, :after_output=>true]
check_ec("a", "bcdefg", :after_output, *a)
check_ec("ab", "cdefg", :after_output, *a)
check_ec("abc", "defg", :after_output, *a)
check_ec("abcd", "efg", :after_output, *a)
check_ec("abcde", "fg", :after_output, *a)
check_ec("abcdef", "g", :after_output, *a)
check_ec("abcdefg", "", :after_output, *a)
check_ec("abcdefg", "", :finished, *a)
end
def test_noconv_insert_output
ec = Encoding::Converter.new("", "")
ec.insert_output("xyz")
ret = ec.primitive_convert(src="abc", dst="", nil, 20)
assert_equal(:finished, ret)
assert_equal(["xyzabc", ""], [dst, src])
end
def test_convert
ec = Encoding::Converter.new("utf-8", "euc-jp")
assert_raise(Encoding::InvalidByteSequenceError) { ec.convert("a\x80") }
assert_raise(Encoding::UndefinedConversionError) { ec.convert("\ufffd") }
ret = ec.primitive_convert(nil, "", nil, nil)
assert_equal(:finished, ret)
assert_raise(ArgumentError) { ec.convert("a") }
end
def test_finish_iso2022jp
ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
assert_equal("\e$B$\"".force_encoding("iso-2022-jp"), ec.convert("\u3042"))
assert_equal("\e(B".force_encoding("iso-2022-jp"), ec.finish)
end
def test_finish_incomplete_error
ec = Encoding::Converter.new("utf-8", "euc-jp")
ec.convert("\xEF")
assert_raise(Encoding::InvalidByteSequenceError) { ec.finish }
end
def test_last_error1
ec = Encoding::Converter.new("sjis", "euc-jp")
assert_equal(nil, ec.last_error)
assert_equal(:incomplete_input, ec.primitive_convert("fo\x81", "", nil, nil))
assert_kind_of(Encoding::InvalidByteSequenceError, ec.last_error)
end
def test_last_error2
ec = Encoding::Converter.new("sjis", "euc-jp")
assert_equal("fo", ec.convert("fo\x81"))
assert_raise(Encoding::InvalidByteSequenceError) { ec.finish }
assert_kind_of(Encoding::InvalidByteSequenceError, ec.last_error)
end
def test_us_ascii
ec = Encoding::Converter.new("UTF-8", "US-ASCII")
ec.primitive_convert("\u{3042}", "")
err = ec.last_error
assert_kind_of(Encoding::UndefinedConversionError, err)
assert_equal("\u{3042}", err.error_char)
end
def test_88591
ec = Encoding::Converter.new("UTF-8", "ISO-8859-1")
ec.primitive_convert("\u{3042}", "")
err = ec.last_error
assert_kind_of(Encoding::UndefinedConversionError, err)
assert_equal("\u{3042}", err.error_char)
end
def test_get_replacement
ec = Encoding::Converter.new("euc-jp", "iso-8859-1")
assert_equal("?", ec.replacement)
ec = Encoding::Converter.new("euc-jp", "utf-8")
assert_equal("\uFFFD", ec.replacement)
end
def test_set_replacement
ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
ec.replacement = "<undef>"
assert_equal("a <undef> b", ec.convert("a \u3042 b"))
end
def test_econv_new_hash
ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
assert_equal("a ? b", ec.convert("a \u3042 b"))
ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace, :replace => "X")
assert_equal("a X b", ec.convert("a \u3042 b"))
end
def test_hex_charref
ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal("&#x3042;", ec.convert("\u3042"))
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal("\xa4\xcf\xa4\xa1\xa4\xa4&#x2665;\xa1\xa3".force_encoding("euc-jp"),
ec.convert("\u{306f 3041 3044 2665 3002}"))
ec = Encoding::Converter.new("UTF-8", "ISO-2022-JP", Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal("\e$B$O$!$$\e(B&#x2665;\e$B!#".force_encoding("ISO-2022-JP"),
ec.convert("\u{306f 3041 3044 2665 3002}"))
assert_equal("\e(B".force_encoding("ISO-2022-JP"),
ec.finish)
ec = Encoding::Converter.new("EUC-JP", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal("&#x4EA4;&#x63DB;&#x6CD5;&#x5247;: n&#xD7;m=m&#xD7;n".force_encoding("ISO-8859-1"),
ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn"))
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1", Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal("&#x4EA4;&#x63DB;&#x6CD5;&#x5247;: n\xD7m=m\xD7n".force_encoding("ISO-8859-1"),
ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn"))
ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal("&", ec.convert("&"))
end
def test_xml_escape_text
ec = Encoding::Converter.new("", "amp_escape")
assert_equal('&amp;<>"', ec.convert("&<>\""))
assert_equal('', ec.finish)
ec = Encoding::Converter.new("", "xml_text_escape")
assert_equal('&amp;&lt;&gt;"', ec.convert("&<>\""))
assert_equal('', ec.finish)
end
def test_xml_escape_attr_content
ec = Encoding::Converter.new("", "xml_attr_content_escape")
assert_equal('', ec.finish)
ec = Encoding::Converter.new("", "xml_attr_content_escape")
assert_equal('', ec.convert(""))
assert_equal('', ec.finish)
ec = Encoding::Converter.new("", "xml_attr_content_escape")
assert_equal('&quot;', ec.convert('"'))
assert_equal('', ec.finish)
ec = Encoding::Converter.new("", "xml_attr_content_escape")
assert_equal('&amp;&lt;&gt;&quot;&apos;', ec.convert("&<>\"'"))
assert_equal('', ec.finish)
end
def test_xml_escape_attr_quote
ec = Encoding::Converter.new("", "xml_attr_quote")
assert_equal('""', ec.finish)
ec = Encoding::Converter.new("", "xml_attr_quote")
assert_equal('', ec.convert(""))
assert_equal('""', ec.finish)
ec = Encoding::Converter.new("", "xml_attr_quote")
assert_equal('""', ec.convert('"'))
assert_equal('"', ec.finish)
ec = Encoding::Converter.new("", "xml_attr_quote")
assert_equal('"&<>"', ec.convert("&<>\""))
assert_equal('"', ec.finish)
end
def test_xml_escape_with_charref
ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::XML_TEXT_DECORATOR|Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal('&lt;&#x2665;&gt;&amp;"&#x2661;"', ec.convert("<\u2665>&\"\u2661\""))
assert_equal('', ec.finish)
ec = Encoding::Converter.new("utf-8", "euc-jp",
Encoding::Converter::XML_ATTR_CONTENT_DECORATOR|
Encoding::Converter::XML_ATTR_QUOTE_DECORATOR|
Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal('"&lt;&#x2665;&gt;&amp;&quot;&#x2661;&quot;', ec.convert("<\u2665>&\"\u2661\""))
assert_equal('"', ec.finish)
ec = Encoding::Converter.new("utf-8", "iso-2022-jp", Encoding::Converter::XML_TEXT_DECORATOR)
assert_equal("&amp;\e$B$&\e(B&amp;".force_encoding("iso-2022-jp"), ec.convert("&\u3046&"))
assert_equal('', ec.finish)
end
def test_xml_hasharg
assert_equal("&amp;\e$B$&\e(B&#x2665;&amp;\"'".force_encoding("iso-2022-jp"),
"&\u3046\u2665&\"'".encode("iso-2022-jp", xml: :text))
assert_equal("\"&amp;\e$B$&\e(B&#x2661;&amp;&quot;&apos;\"".force_encoding("iso-2022-jp"),
"&\u3046\u2661&\"'".encode("iso-2022-jp", xml: :attr))
assert_equal("&amp;\u3046\u2661&amp;\"'".force_encoding("utf-8"),
"&\u3046\u2661&\"'".encode("utf-8", xml: :text))
end
def test_iso2022jp_invalid_replace
assert_equal("?x".force_encoding("iso-2022-jp"),
"\222\xA1x".encode("iso-2022-jp", "stateless-iso-2022-jp", :invalid => :replace))
end
def test_convpath
eucjp = Encoding::EUC_JP
utf8 = Encoding::UTF_8
utf16be = Encoding::UTF_16BE
utf16le = Encoding::UTF_16LE
iso88591 = Encoding::ISO_8859_1
iso2022jp = Encoding::ISO_2022_JP
siso2022jp = Encoding::STATELESS_ISO_2022_JP
assert_equal([], Encoding::Converter.new("", "").convpath)
assert_equal([[eucjp, utf8], [utf8, iso88591]],
Encoding::Converter.new(eucjp, iso88591).convpath)
assert_equal([[eucjp, siso2022jp], [siso2022jp, iso2022jp]],
Encoding::Converter.new(eucjp, iso2022jp).convpath)
assert_equal([[iso2022jp, siso2022jp],
[siso2022jp, eucjp],
[eucjp, utf8],
[utf8, iso88591]],
Encoding::Converter.new(iso2022jp, iso88591).convpath)
assert_equal(["universal_newline", [utf8, utf16be]],
Encoding::Converter.new(utf8, utf16be, universal_newline: true).convpath)
assert_equal([[utf16be, utf8], "universal_newline"],
Encoding::Converter.new(utf16be, utf8, universal_newline: true).convpath)
assert_equal([[utf16be, utf8], "universal_newline", [utf8, utf16le]],
Encoding::Converter.new(utf16be, utf16le, universal_newline: true).convpath)
end
def test_search_convpath
eucjp = Encoding::EUC_JP
utf8 = Encoding::UTF_8
utf32be = Encoding::UTF_32BE
iso88591 = Encoding::ISO_8859_1
assert_equal([[iso88591,utf8], [utf8,eucjp]],
Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP"))
assert_equal([[iso88591,utf8], [utf8,eucjp]],
Encoding::Converter.search_convpath(iso88591, eucjp))
assert_equal([[iso88591,utf8], [utf8,eucjp], "universal_newline"],
Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true))
assert_equal([[iso88591,utf8], "universal_newline", [utf8,utf32be]],
Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true))
end
def test_invalid_replace2
assert_raise(ArgumentError) {
broken = "\x80".force_encoding("euc-jp")
"".encode("euc-jp", :undef => :replace, :replace => broken)
}
end
def test_newline_option
ec1 = Encoding::Converter.new("", "", universal_newline: true)
ec2 = Encoding::Converter.new("", "", newline: :universal)
assert_equal(ec1, ec2)
assert_raise_with_message(ArgumentError, /\u{3042}/) {
Encoding::Converter.new("", "", newline: "\u{3042}".to_sym)
}
newlines = %i[universal_newline crlf_newline cr_newline]
(2..newlines.size).each do |i|
newlines.combination(i) do |opts|
assert_raise(Encoding::ConverterNotFoundError, "#{opts} are mutually exclusive") do
Encoding::Converter.new("", "", **opts.inject({}) {|o,nl|o[nl]=true;o})
end
end
end
newlines.each do |nl|
opts = {newline: :universal, nl => true}
2021-04-25 11:10:39 -04:00
ec2 = assert_warning(/:newline option precedes/, opts.inspect) do
Encoding::Converter.new("", "", **opts)
end
assert_equal(ec1, ec2)
end
end
def test_default_external
Encoding.list.grep(->(enc) {/\AISO-8859-\d+\z/i =~ enc.name}) do |enc|
assert_separately(%W[--disable=gems -d - #{enc.name}], <<-EOS, ignore_stderr: true)
Encoding.default_external = ext = ARGV[0]
Encoding.default_internal = int ='utf-8'
assert_nothing_raised do
Encoding::Converter.new(ext, int)
end
EOS
end
end
end