2015-12-16 00:07:31 -05:00
|
|
|
# frozen_string_literal: false
|
2008-08-12 10:46:18 -04:00
|
|
|
require 'test/unit'
|
|
|
|
|
|
|
|
class TestEncodingConverter < Test::Unit::TestCase
|
2008-09-04 08:48:21 -04:00
|
|
|
def check_ec(edst, esrc, eres, dst, src, ec, off, len, opts=nil)
|
Make rb_scan_args handle keywords more similar to Ruby methods (#2460)
Cfuncs that use rb_scan_args with the : entry suffer similar keyword
argument separation issues that Ruby methods suffer if the cfuncs
accept optional or variable arguments.
This makes the following changes to : handling.
* Treats as **kw, prompting keyword argument separation warnings
if called with a positional hash.
* Do not look for an option hash if empty keywords are provided.
For backwards compatibility, treat an empty keyword splat as a empty
mandatory positional hash argument, but emit a a warning, as this
behavior will be removed in Ruby 3. The argument number check
needs to be moved lower so it can correctly handle an empty
positional argument being added.
* If the last argument is nil and it is necessary to treat it as an option
hash in order to make sure all arguments are processed, continue to
treat the last argument as the option hash. Emit a warning in this case,
as this behavior will be removed in Ruby 3.
* If splitting the keyword hash into two hashes, issue a warning, as we
will not be splitting hashes in Ruby 3.
* If the keyword argument is required to fill a mandatory positional
argument, continue to do so, but emit a warning as this behavior will
be going away in Ruby 3.
* If keyword arguments are provided and the last argument is not a hash,
that indicates something wrong. This can happen if a cfunc is calling
rb_scan_args multiple times, and providing arguments that were not
passed to it from Ruby. Callers need to switch to the new
rb_scan_args_kw function, which allows passing of whether keywords
were provided.
This commit fixes all warnings caused by the changes above.
It switches some function calls to *_kw versions with appropriate
kw_splat flags. If delegating arguments, RB_PASS_CALLED_KEYWORDS
is used. If creating new arguments, RB_PASS_KEYWORDS is used if
the last argument is a hash to be treated as keywords.
In open_key_args in io.c, use rb_scan_args_kw.
In this case, the arguments provided come from another C
function, not Ruby. The last argument may or may not be a hash,
so we can't set keyword argument mode. However, if it is a
hash, we don't want to warn when treating it as keywords.
In Ruby files, make sure to appropriately use keyword splats
or literal keywords when calling Cfuncs that now issue keyword
argument separation warnings through rb_scan_args. Also, make
sure not to pass nil in place of an option hash.
Work around Kernel#warn warnings due to problems in the Rubygems
override of the method. There is an open pull request to fix
these issues in Rubygems, but part of the Rubygems tests for
their override fail on ruby-head due to rb_scan_args not
recognizing empty keyword splats, which this commit fixes.
Implementation wise, adding rb_scan_args_kw is kind of a pain,
because rb_scan_args takes a variable number of arguments.
In order to not duplicate all the code, the function internals need
to be split into two functions taking a va_list, and to avoid passing
in a ton of arguments, a single struct argument is used to handle
the variables previously local to the function.
2019-09-25 14:18:49 -04:00
|
|
|
case opts
|
|
|
|
when Hash
|
|
|
|
res = ec.primitive_convert(src, dst, off, len, **opts)
|
|
|
|
else
|
|
|
|
res = ec.primitive_convert(src, dst, off, len, opts)
|
|
|
|
end
|
2015-12-14 01:40:55 -05:00
|
|
|
assert_equal([edst.b, esrc.b, eres],
|
|
|
|
[dst.b, src.b, res])
|
2008-08-13 20:40:25 -04:00
|
|
|
end
|
|
|
|
|
2008-09-04 08:48:21 -04:00
|
|
|
def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, opts=nil)
|
2008-08-13 20:40:25 -04:00
|
|
|
ec = Encoding::Converter.new(*ec) if Array === ec
|
|
|
|
i = consumed + rest
|
|
|
|
o = ""
|
2008-09-04 08:48:21 -04:00
|
|
|
ret = ec.primitive_convert(i, o, 0, obuf_bytesize, opts)
|
2008-08-13 20:40:25 -04:00
|
|
|
assert_equal([converted, eres, rest],
|
|
|
|
[o, ret, i])
|
|
|
|
end
|
|
|
|
|
2008-08-30 15:21:18 -04:00
|
|
|
def assert_errinfo(e_res, e_enc1, e_enc2, e_error_bytes, e_readagain_bytes, ec)
|
2008-08-15 05:12:56 -04:00
|
|
|
assert_equal([e_res, e_enc1, e_enc2,
|
2015-12-14 03:04:28 -05:00
|
|
|
e_error_bytes&.b,
|
|
|
|
e_readagain_bytes&.b],
|
2008-08-15 05:12:56 -04:00
|
|
|
ec.primitive_errinfo)
|
|
|
|
end
|
|
|
|
|
2008-09-08 10:33:17 -04:00
|
|
|
def test_s_asciicompat_encoding
|
|
|
|
assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding("ISO-2022-JP"))
|
|
|
|
assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding(Encoding::ISO_2022_JP))
|
|
|
|
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16BE"))
|
|
|
|
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16LE"))
|
|
|
|
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32BE"))
|
|
|
|
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32LE"))
|
|
|
|
assert_nil(Encoding::Converter.asciicompat_encoding("EUC-JP"))
|
|
|
|
assert_nil(Encoding::Converter.asciicompat_encoding("UTF-8"))
|
|
|
|
assert_nil(Encoding::Converter.asciicompat_encoding(Encoding::UTF_8))
|
2008-09-12 13:30:07 -04:00
|
|
|
assert_nil(Encoding::Converter.asciicompat_encoding("xml_attr_escape"))
|
2008-09-08 10:33:17 -04:00
|
|
|
assert_nil(Encoding::Converter.asciicompat_encoding("encoding-not-exist"))
|
2008-09-05 13:23:49 -04:00
|
|
|
end
|
|
|
|
|
2008-09-08 10:33:17 -04:00
|
|
|
def test_asciicompat_encoding_iso2022jp
|
|
|
|
acenc = Encoding::Converter.asciicompat_encoding("ISO-2022-JP")
|
2012-07-25 02:49:35 -04:00
|
|
|
str = "\e$B~~\e(B".force_encoding("iso-2022-jp")
|
2008-09-08 10:33:17 -04:00
|
|
|
str2 = str.encode(acenc)
|
2012-07-25 02:49:35 -04:00
|
|
|
str3 = str2.encode("ISO-2022-JP")
|
2008-09-05 19:27:51 -04:00
|
|
|
assert_equal(str, str3)
|
|
|
|
end
|
|
|
|
|
2008-09-12 13:13:04 -04:00
|
|
|
def test_s_new
|
2008-08-14 08:35:19 -04:00
|
|
|
assert_kind_of(Encoding::Converter, Encoding::Converter.new("UTF-8", "EUC-JP"))
|
|
|
|
assert_kind_of(Encoding::Converter, Encoding::Converter.new(Encoding::UTF_8, Encoding::EUC_JP))
|
|
|
|
end
|
|
|
|
|
2008-09-12 13:13:04 -04:00
|
|
|
def test_s_new_convpath
|
|
|
|
assert_equal([], Encoding::Converter.new([]).convpath)
|
|
|
|
assert_equal([[Encoding::UTF_8, Encoding::EUC_JP]],
|
|
|
|
Encoding::Converter.new([["UTF-8", "EUC-JP"]]).convpath)
|
|
|
|
assert_equal([[Encoding::UTF_8, Encoding::WINDOWS_31J]],
|
|
|
|
Encoding::Converter.new([["utf-8", "cp932"]]).convpath)
|
|
|
|
assert_equal([[Encoding::UTF_8, Encoding::EUC_JP]],
|
|
|
|
Encoding::Converter.new([[Encoding::UTF_8, Encoding::EUC_JP]]).convpath)
|
|
|
|
assert_equal([[Encoding::ISO_8859_1, Encoding::UTF_8],
|
|
|
|
[Encoding::UTF_8, Encoding::EUC_JP]],
|
|
|
|
Encoding::Converter.new([["iso-8859-1", "euc-jp"]]).convpath)
|
|
|
|
assert_equal([[Encoding::ISO_8859_1, Encoding::UTF_8],
|
|
|
|
[Encoding::UTF_8, Encoding::EUC_JP],
|
|
|
|
"universal_newline"],
|
|
|
|
Encoding::Converter.new([["iso-8859-1", "euc-jp"], "universal_newline"]).convpath)
|
|
|
|
assert_equal(["universal_newline",
|
|
|
|
[Encoding::ISO_8859_1, Encoding::UTF_8],
|
|
|
|
[Encoding::UTF_8, Encoding::EUC_JP],
|
|
|
|
"universal_newline"],
|
|
|
|
Encoding::Converter.new(["universal_newline", ["iso-8859-1", "euc-jp"], "universal_newline"]).convpath)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_new_fail
|
2008-08-14 20:05:06 -04:00
|
|
|
name1 = "encoding-which-is-not-exist-1"
|
|
|
|
name2 = "encoding-which-is-not-exist-2"
|
|
|
|
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_raise(Encoding::ConverterNotFoundError) {
|
2008-08-14 20:05:06 -04:00
|
|
|
Encoding::Converter.new(name1, name2)
|
|
|
|
}
|
|
|
|
|
|
|
|
encoding_list = Encoding.list.map {|e| e.name }
|
2013-12-13 04:18:05 -05:00
|
|
|
assert_not_include(encoding_list, name1)
|
|
|
|
assert_not_include(encoding_list, name2)
|
2008-08-14 20:05:06 -04:00
|
|
|
end
|
|
|
|
|
2008-09-05 18:27:46 -04:00
|
|
|
def test_newline_converter_with_ascii_incompatible
|
2008-09-08 13:23:37 -04:00
|
|
|
assert_nothing_raised {
|
2008-09-09 08:22:43 -04:00
|
|
|
Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR)
|
2008-09-05 18:27:46 -04:00
|
|
|
}
|
2008-09-08 13:23:37 -04:00
|
|
|
assert_nothing_raised {
|
2008-09-09 08:22:43 -04:00
|
|
|
Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CRLF_NEWLINE_DECORATOR)
|
2008-09-05 18:27:46 -04:00
|
|
|
}
|
2008-09-08 13:23:37 -04:00
|
|
|
assert_nothing_raised {
|
2008-09-09 08:22:43 -04:00
|
|
|
Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CR_NEWLINE_DECORATOR)
|
2008-09-05 18:27:46 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
assert_nothing_raised {
|
2008-09-09 08:22:43 -04:00
|
|
|
Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR)
|
2008-09-05 18:27:46 -04:00
|
|
|
}
|
|
|
|
assert_nothing_raised {
|
2008-09-09 08:22:43 -04:00
|
|
|
Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CRLF_NEWLINE_DECORATOR)
|
2008-09-05 18:27:46 -04:00
|
|
|
}
|
|
|
|
assert_nothing_raised {
|
2008-09-09 08:22:43 -04:00
|
|
|
Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CR_NEWLINE_DECORATOR)
|
2008-09-05 18:27:46 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-14 10:28:10 -04:00
|
|
|
def test_get_encoding
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
|
|
|
assert_equal(Encoding::UTF_8, ec.source_encoding)
|
|
|
|
assert_equal(Encoding::EUC_JP, ec.destination_encoding)
|
|
|
|
end
|
|
|
|
|
2008-08-14 12:06:33 -04:00
|
|
|
def test_result_encoding
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
|
|
|
dst = "".force_encoding("ASCII-8BIT")
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, dst.encoding)
|
|
|
|
ec.primitive_convert("\u{3042}", dst, nil, 10)
|
|
|
|
assert_equal(Encoding::EUC_JP, dst.encoding)
|
|
|
|
end
|
|
|
|
|
2008-08-14 06:38:41 -04:00
|
|
|
def test_output_region
|
2008-08-13 13:24:42 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert(src="a", dst="b", nil, 1, :partial_input=>true)
|
2008-08-13 13:24:42 -04:00
|
|
|
assert_equal("ba", dst)
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert(src="a", dst="b", 0, 1, :partial_input=>true)
|
2008-08-13 13:24:42 -04:00
|
|
|
assert_equal("a", dst)
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert(src="a", dst="b", 1, 1, :partial_input=>true)
|
2008-08-13 13:24:42 -04:00
|
|
|
assert_equal("ba", dst)
|
|
|
|
assert_raise(ArgumentError) {
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert(src="a", dst="b", 2, 1, :partial_input=>true)
|
2008-08-13 13:24:42 -04:00
|
|
|
}
|
|
|
|
assert_raise(ArgumentError) {
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert(src="a", dst="b", -1, 1, :partial_input=>true)
|
2008-08-13 13:24:42 -04:00
|
|
|
}
|
|
|
|
assert_raise(ArgumentError) {
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert(src="a", dst="b", 1, -1, :partial_input=>true)
|
2008-08-13 13:24:42 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-28 13:39:02 -04:00
|
|
|
def test_nil_source_buffer
|
2008-08-28 13:13:49 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
2012-07-25 02:49:35 -04:00
|
|
|
ret = ec.primitive_convert(nil, "", nil, 10)
|
2008-08-28 13:13:49 -04:00
|
|
|
assert_equal(:finished, ret)
|
|
|
|
end
|
|
|
|
|
2008-08-28 13:39:02 -04:00
|
|
|
def test_nil_destination_bytesize
|
|
|
|
ec = Encoding::Converter.new("Shift_JIS", "UTF-8")
|
|
|
|
n = 10000
|
|
|
|
src = "\xa1".force_encoding("Shift_JIS") * n
|
|
|
|
ret = ec.primitive_convert(src, dst="", nil, nil)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_equal("\xEF\xBD\xA1".force_encoding("UTF-8") * n, dst)
|
|
|
|
end
|
|
|
|
|
2008-08-31 01:18:29 -04:00
|
|
|
def test_nil_destination_bytesize2
|
|
|
|
ec = Encoding::Converter.new("Shift_JIS", "UTF-8")
|
|
|
|
n = 10000
|
|
|
|
src = "\xa1".force_encoding("Shift_JIS") * n
|
|
|
|
ret = ec.primitive_convert(src, dst="")
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_equal("\xEF\xBD\xA1".force_encoding("UTF-8") * n, dst)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_nil_destination_bytesize_with_nonnil_byteoffset
|
2008-08-28 13:39:02 -04:00
|
|
|
ec = Encoding::Converter.new("Shift_JIS", "UTF-8")
|
|
|
|
n = 2000
|
|
|
|
src = "\xa1".force_encoding("Shift_JIS") * n
|
|
|
|
dst = "abcd" * 2000
|
|
|
|
ret = ec.primitive_convert(src, dst, 3, nil)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_equal("abc" + "\xEF\xBD\xA1".force_encoding("UTF-8") * n, dst)
|
|
|
|
end
|
|
|
|
|
2008-08-14 06:38:41 -04:00
|
|
|
def test_partial_input
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
2008-09-04 08:48:21 -04:00
|
|
|
ret = ec.primitive_convert(src="", dst="", nil, 10, :partial_input=>true)
|
2008-08-14 07:31:27 -04:00
|
|
|
assert_equal(:source_buffer_empty, ret)
|
2008-08-14 06:38:41 -04:00
|
|
|
ret = ec.primitive_convert(src="", dst="", nil, 10)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
end
|
|
|
|
|
2008-08-13 20:40:25 -04:00
|
|
|
def test_accumulate_dst1
|
2008-08-13 13:24:42 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
2008-08-13 20:40:25 -04:00
|
|
|
a = ["", "abc\u{3042}def", ec, nil, 1]
|
2008-08-14 07:31:27 -04:00
|
|
|
check_ec("a", "c\u{3042}def", :destination_buffer_full, *a)
|
|
|
|
check_ec("ab", "\u{3042}def", :destination_buffer_full, *a)
|
|
|
|
check_ec("abc", "def", :destination_buffer_full, *a)
|
|
|
|
check_ec("abc\xA4", "def", :destination_buffer_full, *a)
|
|
|
|
check_ec("abc\xA4\xA2", "ef", :destination_buffer_full, *a)
|
|
|
|
check_ec("abc\xA4\xA2d", "f", :destination_buffer_full, *a)
|
|
|
|
check_ec("abc\xA4\xA2de", "", :destination_buffer_full, *a)
|
2008-08-13 20:40:25 -04:00
|
|
|
check_ec("abc\xA4\xA2def", "", :finished, *a)
|
2008-08-13 13:24:42 -04:00
|
|
|
end
|
|
|
|
|
2008-08-13 20:40:25 -04:00
|
|
|
def test_accumulate_dst2
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
|
|
|
a = ["", "abc\u{3042}def", ec, nil, 2]
|
2008-08-14 07:31:27 -04:00
|
|
|
check_ec("ab", "\u{3042}def", :destination_buffer_full, *a)
|
|
|
|
check_ec("abc\xA4", "def", :destination_buffer_full, *a)
|
|
|
|
check_ec("abc\xA4\xA2d", "f", :destination_buffer_full, *a)
|
2008-08-13 20:40:25 -04:00
|
|
|
check_ec("abc\xA4\xA2def", "", :finished, *a)
|
2008-08-13 13:24:42 -04:00
|
|
|
end
|
|
|
|
|
2008-08-12 10:46:18 -04:00
|
|
|
def test_eucjp_to_utf8
|
2008-08-13 13:24:42 -04:00
|
|
|
assert_econv("", :finished, 100, ["UTF-8", "EUC-JP"], "", "")
|
|
|
|
assert_econv("a", :finished, 100, ["UTF-8", "EUC-JP"], "a", "")
|
2008-08-12 18:43:17 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_iso2022jp
|
2008-08-13 13:24:42 -04:00
|
|
|
assert_econv("", :finished, 100, ["Shift_JIS", "ISO-2022-JP"], "", "")
|
2008-08-12 10:46:18 -04:00
|
|
|
end
|
|
|
|
|
2008-08-14 22:32:47 -04:00
|
|
|
def test_iso2022jp_encode
|
2008-08-14 02:12:27 -04:00
|
|
|
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
|
2008-09-04 08:48:21 -04:00
|
|
|
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
2008-08-14 07:31:27 -04:00
|
|
|
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xA2"; check_ec("a", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xA4"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xA1"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xA2"; check_ec("a\e$B\"$!\"", "", :source_buffer_empty, *a)
|
|
|
|
src << "b"; check_ec("a\e$B\"$!\"\e(Bb", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xA2\xA6"; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&", "", :source_buffer_empty, *a)
|
2008-08-14 02:12:27 -04:00
|
|
|
a[-1] = 0; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&\e(B", "", :finished, *a)
|
|
|
|
end
|
|
|
|
|
2008-08-14 22:32:47 -04:00
|
|
|
def test_iso2022jp_decode
|
|
|
|
ec = Encoding::Converter.new("ISO-2022-JP", "EUC-JP")
|
2008-09-04 08:48:21 -04:00
|
|
|
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
2008-08-14 22:32:47 -04:00
|
|
|
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
|
|
|
|
src << "\e"; check_ec("a", "", :source_buffer_empty, *a)
|
|
|
|
src << "$"; check_ec("a", "", :source_buffer_empty, *a)
|
|
|
|
src << "B"; check_ec("a", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x21"; check_ec("a", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x22"; check_ec("a\xA1\xA2", "", :source_buffer_empty, *a)
|
|
|
|
src << "\n"; check_ec("a\xA1\xA2", "", :invalid_byte_sequence, *a)
|
|
|
|
src << "\x23"; check_ec("a\xA1\xA2", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x24"; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
|
|
|
|
src << "\e"; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
|
|
|
|
src << "("; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
|
|
|
|
src << "B"; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
|
|
|
|
src << "c"; check_ec("a\xA1\xA2\xA3\xA4c", "", :source_buffer_empty, *a)
|
|
|
|
src << "\n"; check_ec("a\xA1\xA2\xA3\xA4c\n","", :source_buffer_empty, *a)
|
|
|
|
end
|
|
|
|
|
2008-08-12 10:46:18 -04:00
|
|
|
def test_invalid
|
2008-08-14 07:31:27 -04:00
|
|
|
assert_econv("", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "\x80", "")
|
|
|
|
assert_econv("a", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "a\x80", "")
|
|
|
|
assert_econv("a", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "a\x80", "\x80")
|
|
|
|
assert_econv("abc", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "abc\xFF", "def")
|
|
|
|
assert_econv("abc", :invalid_byte_sequence, 100, ["Shift_JIS", "EUC-JP"], "abc\xFF", "def")
|
|
|
|
assert_econv("abc", :invalid_byte_sequence, 100, ["ISO-2022-JP", "EUC-JP"], "abc\xFF", "def")
|
2008-08-13 20:40:25 -04:00
|
|
|
end
|
2008-08-13 13:24:42 -04:00
|
|
|
|
2008-08-13 20:40:25 -04:00
|
|
|
def test_invalid2
|
|
|
|
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
|
|
|
|
a = ["", "abc\xFFdef", ec, nil, 1]
|
2008-08-14 07:31:27 -04:00
|
|
|
check_ec("a", "c\xFFdef", :destination_buffer_full, *a)
|
|
|
|
check_ec("ab", "\xFFdef", :destination_buffer_full, *a)
|
|
|
|
check_ec("abc", "def", :invalid_byte_sequence, *a)
|
|
|
|
check_ec("abcd", "f", :destination_buffer_full, *a)
|
|
|
|
check_ec("abcde", "", :destination_buffer_full, *a)
|
2008-08-13 20:40:25 -04:00
|
|
|
check_ec("abcdef", "", :finished, *a)
|
2008-08-12 18:43:17 -04:00
|
|
|
end
|
|
|
|
|
2008-08-14 02:35:33 -04:00
|
|
|
def test_invalid3
|
|
|
|
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
|
|
|
|
a = ["", "abc\xFFdef", ec, nil, 10]
|
2008-08-14 07:31:27 -04:00
|
|
|
check_ec("abc", "def", :invalid_byte_sequence, *a)
|
2008-08-14 02:35:33 -04:00
|
|
|
check_ec("abcdef", "", :finished, *a)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_invalid4
|
|
|
|
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
|
2008-09-09 12:27:02 -04:00
|
|
|
a = ["", "abc\xFFdef", ec, nil, 10, :after_output=>true]
|
|
|
|
check_ec("a", "bc\xFFdef", :after_output, *a)
|
|
|
|
check_ec("ab", "c\xFFdef", :after_output, *a)
|
|
|
|
check_ec("abc", "\xFFdef", :after_output, *a)
|
2008-08-14 07:31:27 -04:00
|
|
|
check_ec("abc", "def", :invalid_byte_sequence, *a)
|
2008-09-09 12:27:02 -04:00
|
|
|
check_ec("abcd", "ef", :after_output, *a)
|
|
|
|
check_ec("abcde", "f", :after_output, *a)
|
|
|
|
check_ec("abcdef", "", :after_output, *a)
|
2008-08-14 02:35:33 -04:00
|
|
|
check_ec("abcdef", "", :finished, *a)
|
|
|
|
end
|
|
|
|
|
2008-08-14 20:18:42 -04:00
|
|
|
def test_invalid_utf16le
|
|
|
|
ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
|
2008-09-04 08:48:21 -04:00
|
|
|
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
2008-08-14 20:18:42 -04:00
|
|
|
src << "A"; check_ec("", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x01"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x02"; check_ec("A", "", :invalid_byte_sequence, *a)
|
|
|
|
src << "\x03"; check_ec("A\u{0201}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x04"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :invalid_byte_sequence, *a)
|
|
|
|
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xdc"; check_ec("A\u{0201}\u{0403}\u{10000}", "", :source_buffer_empty, *a)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_invalid_utf16be
|
|
|
|
ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
|
2008-09-04 08:48:21 -04:00
|
|
|
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
2008-08-14 20:18:42 -04:00
|
|
|
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
|
|
|
src << "A"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x02"; check_ec("A", "", :invalid_byte_sequence, *a)
|
|
|
|
src << "\x01"; check_ec("A\u{0201}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x04"; check_ec("A\u{0201}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x03"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :invalid_byte_sequence, *a)
|
|
|
|
src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xdc"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A\u{0201}\u{0403}\u{10000}", "", :source_buffer_empty, *a)
|
|
|
|
end
|
|
|
|
|
2008-08-14 20:27:03 -04:00
|
|
|
def test_invalid_utf32be
|
|
|
|
ec = Encoding::Converter.new("UTF-32BE", "UTF-8")
|
2008-09-04 08:48:21 -04:00
|
|
|
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
2008-08-14 20:27:03 -04:00
|
|
|
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
|
|
|
src << "A"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xdc"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :invalid_byte_sequence, *a)
|
|
|
|
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "B"; check_ec("AB", "", :source_buffer_empty, *a)
|
|
|
|
|
|
|
|
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
|
|
|
|
src << "C"; check_ec("ABC", "", :source_buffer_empty, *a)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_invalid_utf32le
|
|
|
|
ec = Encoding::Converter.new("UTF-32LE", "UTF-8")
|
2008-09-04 08:48:21 -04:00
|
|
|
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
2008-08-14 20:27:03 -04:00
|
|
|
src << "A"; check_ec("", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\xdc"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :invalid_byte_sequence, *a)
|
|
|
|
|
|
|
|
src << "B"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
|
|
|
|
|
|
|
|
src << "C"; check_ec("AB", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
|
|
|
|
src << "\x00"; check_ec("ABC", "", :source_buffer_empty, *a)
|
|
|
|
end
|
|
|
|
|
2008-08-12 18:43:17 -04:00
|
|
|
def test_errors
|
2008-08-13 02:08:56 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
|
2008-08-13 20:40:25 -04:00
|
|
|
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10]
|
2008-08-14 02:35:33 -04:00
|
|
|
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
|
2008-08-14 07:31:27 -04:00
|
|
|
check_ec("A", "\x00B", :invalid_byte_sequence, *a) # \xDC\x00 is invalid as UTF-16BE
|
2008-08-13 20:40:25 -04:00
|
|
|
check_ec("AB", "", :finished, *a)
|
2008-08-12 10:46:18 -04:00
|
|
|
end
|
2008-08-13 01:30:42 -04:00
|
|
|
|
2008-08-14 02:12:27 -04:00
|
|
|
def test_errors2
|
|
|
|
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
|
2008-09-09 12:27:02 -04:00
|
|
|
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, :after_output=>true]
|
2008-08-14 02:12:27 -04:00
|
|
|
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
|
2008-09-09 12:27:02 -04:00
|
|
|
check_ec("A", "\xDC\x00\x00B", :after_output, *a)
|
2008-08-14 07:31:27 -04:00
|
|
|
check_ec("A", "\x00B", :invalid_byte_sequence, *a)
|
2008-09-09 12:27:02 -04:00
|
|
|
check_ec("AB", "", :after_output, *a)
|
2008-08-14 02:12:27 -04:00
|
|
|
check_ec("AB", "", :finished, *a)
|
|
|
|
end
|
|
|
|
|
2008-08-13 01:30:42 -04:00
|
|
|
def test_universal_newline
|
2008-09-09 08:22:43 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline: true)
|
2008-09-04 08:48:21 -04:00
|
|
|
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
2008-08-14 07:31:27 -04:00
|
|
|
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
|
|
|
|
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
|
|
|
|
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
|
2009-11-22 14:15:55 -05:00
|
|
|
src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu", "", :source_buffer_empty, *a)
|
2008-08-14 07:31:27 -04:00
|
|
|
src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
|
|
|
|
src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
|
2008-08-13 01:30:42 -04:00
|
|
|
end
|
2008-08-13 01:48:57 -04:00
|
|
|
|
2008-08-22 12:44:00 -04:00
|
|
|
def test_universal_newline2
|
2008-09-09 08:22:43 -04:00
|
|
|
ec = Encoding::Converter.new("", "", universal_newline: true)
|
2008-09-04 08:48:21 -04:00
|
|
|
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
2008-08-22 12:44:00 -04:00
|
|
|
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
|
|
|
|
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
|
|
|
|
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
|
2009-11-22 14:15:55 -05:00
|
|
|
src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu", "", :source_buffer_empty, *a)
|
2008-08-22 12:44:00 -04:00
|
|
|
src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
|
|
|
|
src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
|
|
|
|
end
|
|
|
|
|
2009-11-22 14:15:55 -05:00
|
|
|
def test_universal_newline3
|
|
|
|
ec = Encoding::Converter.new("", "", universal_newline: true)
|
|
|
|
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
|
|
|
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
|
|
|
|
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
|
|
|
|
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
|
|
|
|
src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu", "", :source_buffer_empty, *a)
|
|
|
|
src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
|
|
|
|
src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
|
|
|
|
src << "\r"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
|
|
|
|
a[-1] = nil
|
|
|
|
src << ""; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz\n", "", :finished, *a)
|
|
|
|
end
|
|
|
|
|
2008-08-13 01:48:57 -04:00
|
|
|
def test_crlf_newline
|
2008-09-09 08:22:43 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP", crlf_newline: true)
|
2008-08-13 13:24:42 -04:00
|
|
|
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
|
2008-08-13 01:48:57 -04:00
|
|
|
end
|
|
|
|
|
2008-08-22 12:44:00 -04:00
|
|
|
def test_crlf_newline2
|
2008-09-09 08:22:43 -04:00
|
|
|
ec = Encoding::Converter.new("", "", crlf_newline: true)
|
2008-08-22 12:44:00 -04:00
|
|
|
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
|
|
|
|
end
|
|
|
|
|
2008-08-13 01:48:57 -04:00
|
|
|
def test_cr_newline
|
2008-09-09 08:22:43 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP", cr_newline: true)
|
2008-08-13 13:24:42 -04:00
|
|
|
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
|
2008-08-13 01:48:57 -04:00
|
|
|
end
|
2008-08-14 02:12:27 -04:00
|
|
|
|
2008-08-22 12:44:00 -04:00
|
|
|
def test_cr_newline2
|
2008-09-09 08:22:43 -04:00
|
|
|
ec = Encoding::Converter.new("", "", cr_newline: true)
|
2008-08-22 12:44:00 -04:00
|
|
|
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
|
|
|
|
end
|
|
|
|
|
2011-04-26 11:55:21 -04:00
|
|
|
def test_no_universal_newline1
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline: false)
|
|
|
|
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\r\ndef", "")
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_no_universal_newline2
|
|
|
|
ec = Encoding::Converter.new("", "", universal_newline: false)
|
|
|
|
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\r\ndef", "")
|
|
|
|
end
|
|
|
|
|
2008-09-09 12:27:02 -04:00
|
|
|
def test_after_output
|
2008-08-14 02:12:27 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
2008-09-09 12:27:02 -04:00
|
|
|
a = ["", "abc\u{3042}def", ec, nil, 100, :after_output=>true]
|
|
|
|
check_ec("a", "bc\u{3042}def", :after_output, *a)
|
|
|
|
check_ec("ab", "c\u{3042}def", :after_output, *a)
|
|
|
|
check_ec("abc", "\u{3042}def", :after_output, *a)
|
|
|
|
check_ec("abc\xA4\xA2", "def", :after_output, *a)
|
|
|
|
check_ec("abc\xA4\xA2d", "ef", :after_output, *a)
|
|
|
|
check_ec("abc\xA4\xA2de", "f", :after_output, *a)
|
|
|
|
check_ec("abc\xA4\xA2def", "", :after_output, *a)
|
2008-08-14 02:12:27 -04:00
|
|
|
check_ec("abc\xA4\xA2def", "", :finished, *a)
|
|
|
|
end
|
2008-08-15 05:12:56 -04:00
|
|
|
|
|
|
|
def test_errinfo_invalid_euc_jp
|
|
|
|
ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
|
2012-07-25 02:49:35 -04:00
|
|
|
ec.primitive_convert("\xff", "", nil, 10)
|
2008-09-03 07:09:25 -04:00
|
|
|
assert_errinfo(:invalid_byte_sequence, "EUC-JP", "Shift_JIS", "\xFF", "", ec)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_errinfo_invalid_euc_jp2
|
|
|
|
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
2012-07-25 02:49:35 -04:00
|
|
|
ec.primitive_convert("\xff", "", nil, 10)
|
2008-08-30 15:21:18 -04:00
|
|
|
assert_errinfo(:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", "", ec)
|
2008-08-15 05:12:56 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_errinfo_undefined_hiragana
|
|
|
|
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
2012-07-25 02:49:35 -04:00
|
|
|
ec.primitive_convert("\xa4\xa2", "", nil, 10)
|
2008-08-30 15:21:18 -04:00
|
|
|
assert_errinfo(:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", "", ec)
|
2008-08-15 05:12:56 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_errinfo_invalid_partial_character
|
|
|
|
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
2012-07-25 02:49:35 -04:00
|
|
|
ec.primitive_convert("\xa4", "", nil, 10)
|
2008-08-30 15:21:18 -04:00
|
|
|
assert_errinfo(:incomplete_input, "EUC-JP", "UTF-8", "\xA4", "", ec)
|
2008-08-15 05:12:56 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_errinfo_valid_partial_character
|
|
|
|
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
2012-07-25 02:49:35 -04:00
|
|
|
ec.primitive_convert("\xa4", "", nil, 10, :partial_input=>true)
|
2008-08-30 15:21:18 -04:00
|
|
|
assert_errinfo(:source_buffer_empty, nil, nil, nil, nil, ec)
|
2008-08-15 05:12:56 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_errinfo_invalid_utf16be
|
|
|
|
ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
|
2012-07-25 02:49:35 -04:00
|
|
|
ec.primitive_convert(src="\xd8\x00\x00@", "", nil, 10)
|
2008-08-30 15:21:18 -04:00
|
|
|
assert_errinfo(:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00", ec)
|
2008-08-15 05:12:56 -04:00
|
|
|
assert_equal("@", src)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_errinfo_invalid_utf16le
|
|
|
|
ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
|
2012-07-25 02:49:35 -04:00
|
|
|
ec.primitive_convert(src="\x00\xd8@\x00", "", nil, 10)
|
2008-08-30 15:21:18 -04:00
|
|
|
assert_errinfo(:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00", ec)
|
2008-08-15 05:12:56 -04:00
|
|
|
assert_equal("", src)
|
|
|
|
end
|
2008-08-15 10:17:11 -04:00
|
|
|
|
|
|
|
def test_output_iso2022jp
|
|
|
|
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, :partial_input=>true)
|
2008-08-15 10:17:11 -04:00
|
|
|
assert_equal("\e$B!!".force_encoding("ISO-2022-JP"), dst)
|
2008-08-31 03:43:19 -04:00
|
|
|
assert_equal(nil, ec.insert_output("???"))
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
|
2008-08-15 10:17:11 -04:00
|
|
|
assert_equal("\e$B!!\e(B???".force_encoding("ISO-2022-JP"), dst)
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, :partial_input=>true)
|
2008-08-15 10:17:11 -04:00
|
|
|
assert_equal("\e$B!!\e(B???\e$B!\"".force_encoding("ISO-2022-JP"), dst)
|
|
|
|
|
2008-08-31 03:43:19 -04:00
|
|
|
assert_equal(nil, ec.insert_output("\xA1\xA1".force_encoding("EUC-JP")))
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
|
2009-03-05 22:56:38 -05:00
|
|
|
assert_equal("\e$B!!\e(B???\e$B!\"!!".force_encoding("ISO-2022-JP"), dst)
|
2008-08-15 10:17:11 -04:00
|
|
|
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, :partial_input=>true)
|
2008-08-16 01:32:42 -04:00
|
|
|
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#".force_encoding("ISO-2022-JP"), dst)
|
2008-08-15 10:17:11 -04:00
|
|
|
|
2008-08-31 03:43:19 -04:00
|
|
|
assert_equal(nil, ec.insert_output("\u3042"))
|
2008-09-04 08:48:21 -04:00
|
|
|
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
|
2008-08-16 01:32:42 -04:00
|
|
|
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst)
|
2008-08-15 12:41:16 -04:00
|
|
|
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_raise(Encoding::UndefinedConversionError) {
|
2008-08-30 15:23:23 -04:00
|
|
|
ec.insert_output("\uFFFD")
|
2008-08-15 12:41:16 -04:00
|
|
|
}
|
|
|
|
|
2008-08-16 01:32:42 -04:00
|
|
|
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst)
|
2008-08-15 12:41:16 -04:00
|
|
|
|
2008-08-16 01:32:42 -04:00
|
|
|
ec.primitive_convert("", dst, nil, 10)
|
|
|
|
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"\e(B".force_encoding("ISO-2022-JP"), dst)
|
|
|
|
end
|
2008-08-16 23:05:40 -04:00
|
|
|
|
|
|
|
def test_exc_invalid
|
2008-09-18 04:57:33 -04:00
|
|
|
err = assert_raise(Encoding::InvalidByteSequenceError) {
|
2008-08-16 23:05:40 -04:00
|
|
|
"abc\xa4def".encode("ISO-8859-1", "EUC-JP")
|
|
|
|
}
|
2008-08-31 04:17:48 -04:00
|
|
|
assert_equal("EUC-JP", err.source_encoding_name)
|
|
|
|
assert_equal("UTF-8", err.destination_encoding_name)
|
2008-08-31 05:08:31 -04:00
|
|
|
assert_equal(Encoding::EUC_JP, err.source_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, err.destination_encoding)
|
2008-08-16 23:05:40 -04:00
|
|
|
assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
|
2008-08-25 11:26:54 -04:00
|
|
|
assert_equal("d", err.readagain_bytes)
|
2008-08-26 12:09:29 -04:00
|
|
|
assert_equal(false, err.incomplete_input?)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_exc_incomplete
|
2008-09-18 04:57:33 -04:00
|
|
|
err = assert_raise(Encoding::InvalidByteSequenceError) {
|
2008-08-26 12:09:29 -04:00
|
|
|
"abc\xa4".encode("ISO-8859-1", "EUC-JP")
|
|
|
|
}
|
2008-08-31 04:17:48 -04:00
|
|
|
assert_equal("EUC-JP", err.source_encoding_name)
|
|
|
|
assert_equal("UTF-8", err.destination_encoding_name)
|
2008-08-31 05:08:31 -04:00
|
|
|
assert_equal(Encoding::EUC_JP, err.source_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, err.destination_encoding)
|
2008-08-26 12:09:29 -04:00
|
|
|
assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
|
|
|
|
assert_equal(nil, err.readagain_bytes)
|
|
|
|
assert_equal(true, err.incomplete_input?)
|
2008-08-16 23:05:40 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_exc_undef
|
2008-09-25 08:47:30 -04:00
|
|
|
err = assert_raise(Encoding::UndefinedConversionError) {
|
2008-08-16 23:05:40 -04:00
|
|
|
"abc\xa4\xa2def".encode("ISO-8859-1", "EUC-JP")
|
|
|
|
}
|
2008-08-31 04:17:48 -04:00
|
|
|
assert_equal("UTF-8", err.source_encoding_name)
|
|
|
|
assert_equal("ISO-8859-1", err.destination_encoding_name)
|
2008-08-31 05:08:31 -04:00
|
|
|
assert_equal(Encoding::UTF_8, err.source_encoding)
|
|
|
|
assert_equal(Encoding::ISO_8859_1, err.destination_encoding)
|
2008-08-16 23:05:40 -04:00
|
|
|
assert_equal("\u{3042}", err.error_char)
|
|
|
|
end
|
|
|
|
|
2008-08-17 00:40:59 -04:00
|
|
|
def test_putback
|
|
|
|
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
|
|
|
ret = ec.primitive_convert(src="abc\xa1def", dst="", nil, 10)
|
|
|
|
assert_equal(:invalid_byte_sequence, ret)
|
|
|
|
assert_equal(["abc", "ef"], [dst, src])
|
2008-08-30 15:39:16 -04:00
|
|
|
src = ec.putback + src
|
2008-08-17 00:40:59 -04:00
|
|
|
assert_equal(["abc", "def"], [dst, src])
|
|
|
|
ret = ec.primitive_convert(src, dst, nil, 10)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_equal(["abcdef", ""], [dst, src])
|
|
|
|
end
|
|
|
|
|
2008-08-30 15:36:38 -04:00
|
|
|
def test_putback2
|
|
|
|
ec = Encoding::Converter.new("utf-16le", "euc-jp")
|
2012-07-25 02:49:35 -04:00
|
|
|
ret = ec.primitive_convert("\x00\xd8\x21\x00", "", nil, nil)
|
2008-08-30 15:36:38 -04:00
|
|
|
assert_equal(:invalid_byte_sequence, ret)
|
2008-08-31 03:59:03 -04:00
|
|
|
assert_equal("\x00".force_encoding("utf-16le"), ec.putback(1))
|
|
|
|
assert_equal("\x21".force_encoding("utf-16le"), ec.putback(1))
|
2008-08-30 15:36:38 -04:00
|
|
|
assert_equal("", ec.putback(1))
|
|
|
|
end
|
|
|
|
|
2008-08-23 02:02:58 -04:00
|
|
|
def test_invalid_replace
|
2008-09-04 08:08:54 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP", invalid: :replace)
|
2008-08-23 02:02:58 -04:00
|
|
|
ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_equal("", src)
|
|
|
|
assert_equal("abc?def", dst)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_invalid_ignore
|
2008-09-04 06:15:34 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP", :invalid => :replace, :replace => "")
|
2008-08-23 02:02:58 -04:00
|
|
|
ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_equal("", src)
|
|
|
|
assert_equal("abcdef", dst)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_undef_replace
|
2008-09-04 08:08:54 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace)
|
2008-08-23 02:02:58 -04:00
|
|
|
ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_equal("", src)
|
|
|
|
assert_equal("abc?def", dst)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_undef_ignore
|
2008-09-04 06:15:34 -04:00
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace, :replace => "")
|
2008-08-23 02:02:58 -04:00
|
|
|
ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_equal("", src)
|
|
|
|
assert_equal("abcdef", dst)
|
|
|
|
end
|
|
|
|
|
2008-08-25 09:04:16 -04:00
|
|
|
def test_noconv
|
|
|
|
ec = Encoding::Converter.new("", "")
|
|
|
|
assert_equal(nil, ec.source_encoding)
|
|
|
|
assert_equal(nil, ec.destination_encoding)
|
2008-08-30 15:21:18 -04:00
|
|
|
assert_equal([:source_buffer_empty, nil, nil, nil, nil], ec.primitive_errinfo)
|
2008-08-25 09:04:16 -04:00
|
|
|
a = ["", "abcdefg", ec, nil, 2]
|
|
|
|
check_ec("ab", "cdefg", :destination_buffer_full, *a)
|
|
|
|
check_ec("abcd", "efg", :destination_buffer_full, *a)
|
|
|
|
check_ec("abcdef", "g", :destination_buffer_full, *a)
|
|
|
|
check_ec("abcdefg", "", :finished, *a)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_noconv_partial
|
|
|
|
ec = Encoding::Converter.new("", "")
|
2008-09-04 08:48:21 -04:00
|
|
|
a = ["", "abcdefg", ec, nil, 2, :partial_input=>true]
|
2008-08-25 09:04:16 -04:00
|
|
|
check_ec("ab", "cdefg", :destination_buffer_full, *a)
|
|
|
|
check_ec("abcd", "efg", :destination_buffer_full, *a)
|
|
|
|
check_ec("abcdef", "g", :destination_buffer_full, *a)
|
|
|
|
check_ec("abcdefg", "", :source_buffer_empty, *a)
|
|
|
|
end
|
|
|
|
|
2008-09-09 12:27:02 -04:00
|
|
|
def test_noconv_after_output
|
2008-08-25 09:04:16 -04:00
|
|
|
ec = Encoding::Converter.new("", "")
|
2008-09-09 12:27:02 -04:00
|
|
|
a = ["", "abcdefg", ec, nil, 2, :after_output=>true]
|
|
|
|
check_ec("a", "bcdefg", :after_output, *a)
|
|
|
|
check_ec("ab", "cdefg", :after_output, *a)
|
|
|
|
check_ec("abc", "defg", :after_output, *a)
|
|
|
|
check_ec("abcd", "efg", :after_output, *a)
|
|
|
|
check_ec("abcde", "fg", :after_output, *a)
|
|
|
|
check_ec("abcdef", "g", :after_output, *a)
|
|
|
|
check_ec("abcdefg", "", :after_output, *a)
|
2008-08-25 09:04:16 -04:00
|
|
|
check_ec("abcdefg", "", :finished, *a)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_noconv_insert_output
|
|
|
|
ec = Encoding::Converter.new("", "")
|
2008-08-30 15:23:23 -04:00
|
|
|
ec.insert_output("xyz")
|
2008-08-25 09:04:16 -04:00
|
|
|
ret = ec.primitive_convert(src="abc", dst="", nil, 20)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_equal(["xyzabc", ""], [dst, src])
|
|
|
|
end
|
2008-08-28 13:46:18 -04:00
|
|
|
|
|
|
|
def test_convert
|
|
|
|
ec = Encoding::Converter.new("utf-8", "euc-jp")
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::InvalidByteSequenceError) { ec.convert("a\x80") }
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_raise(Encoding::UndefinedConversionError) { ec.convert("\ufffd") }
|
2008-08-28 13:46:18 -04:00
|
|
|
ret = ec.primitive_convert(nil, "", nil, nil)
|
|
|
|
assert_equal(:finished, ret)
|
|
|
|
assert_raise(ArgumentError) { ec.convert("a") }
|
|
|
|
end
|
2008-08-28 14:00:02 -04:00
|
|
|
|
|
|
|
def test_finish_iso2022jp
|
|
|
|
ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
|
|
|
|
assert_equal("\e$B$\"".force_encoding("iso-2022-jp"), ec.convert("\u3042"))
|
|
|
|
assert_equal("\e(B".force_encoding("iso-2022-jp"), ec.finish)
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_finish_incomplete_error
|
|
|
|
ec = Encoding::Converter.new("utf-8", "euc-jp")
|
|
|
|
ec.convert("\xEF")
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::InvalidByteSequenceError) { ec.finish }
|
2008-08-28 14:00:02 -04:00
|
|
|
end
|
2008-08-31 01:27:52 -04:00
|
|
|
|
|
|
|
def test_last_error1
|
|
|
|
ec = Encoding::Converter.new("sjis", "euc-jp")
|
|
|
|
assert_equal(nil, ec.last_error)
|
2012-07-25 02:49:35 -04:00
|
|
|
assert_equal(:incomplete_input, ec.primitive_convert("fo\x81", "", nil, nil))
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_kind_of(Encoding::InvalidByteSequenceError, ec.last_error)
|
2008-08-31 01:27:52 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_last_error2
|
|
|
|
ec = Encoding::Converter.new("sjis", "euc-jp")
|
2012-07-25 02:49:35 -04:00
|
|
|
assert_equal("fo", ec.convert("fo\x81"))
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::InvalidByteSequenceError) { ec.finish }
|
|
|
|
assert_kind_of(Encoding::InvalidByteSequenceError, ec.last_error)
|
2008-08-31 01:27:52 -04:00
|
|
|
end
|
|
|
|
|
2008-08-31 03:37:10 -04:00
|
|
|
def test_us_ascii
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "US-ASCII")
|
2012-07-25 02:49:35 -04:00
|
|
|
ec.primitive_convert("\u{3042}", "")
|
2008-08-31 03:37:10 -04:00
|
|
|
err = ec.last_error
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_kind_of(Encoding::UndefinedConversionError, err)
|
2008-08-31 03:37:10 -04:00
|
|
|
assert_equal("\u{3042}", err.error_char)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_88591
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "ISO-8859-1")
|
2012-07-25 02:49:35 -04:00
|
|
|
ec.primitive_convert("\u{3042}", "")
|
2008-08-31 03:37:10 -04:00
|
|
|
err = ec.last_error
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_kind_of(Encoding::UndefinedConversionError, err)
|
2008-08-31 03:37:10 -04:00
|
|
|
assert_equal("\u{3042}", err.error_char)
|
|
|
|
end
|
2008-09-03 12:34:11 -04:00
|
|
|
|
|
|
|
def test_get_replacement
|
|
|
|
ec = Encoding::Converter.new("euc-jp", "iso-8859-1")
|
|
|
|
assert_equal("?", ec.replacement)
|
|
|
|
|
|
|
|
ec = Encoding::Converter.new("euc-jp", "utf-8")
|
|
|
|
assert_equal("\uFFFD", ec.replacement)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_set_replacement
|
2008-09-04 08:08:54 -04:00
|
|
|
ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
|
2008-09-03 12:34:11 -04:00
|
|
|
ec.replacement = "<undef>"
|
|
|
|
assert_equal("a <undef> b", ec.convert("a \u3042 b"))
|
|
|
|
end
|
2008-09-03 14:18:10 -04:00
|
|
|
|
|
|
|
def test_econv_new_hash
|
2009-03-05 22:56:38 -05:00
|
|
|
ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
|
2008-09-03 14:18:10 -04:00
|
|
|
assert_equal("a ? b", ec.convert("a \u3042 b"))
|
2009-03-05 22:56:38 -05:00
|
|
|
ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace, :replace => "X")
|
2008-09-03 14:18:10 -04:00
|
|
|
assert_equal("a X b", ec.convert("a \u3042 b"))
|
|
|
|
end
|
2008-09-05 16:24:18 -04:00
|
|
|
|
|
|
|
def test_hex_charref
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
|
|
|
|
assert_equal("あ", ec.convert("\u3042"))
|
|
|
|
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_HEX_CHARREF)
|
|
|
|
assert_equal("\xa4\xcf\xa4\xa1\xa4\xa4♥\xa1\xa3".force_encoding("euc-jp"),
|
|
|
|
ec.convert("\u{306f 3041 3044 2665 3002}"))
|
|
|
|
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "ISO-2022-JP", Encoding::Converter::UNDEF_HEX_CHARREF)
|
|
|
|
assert_equal("\e$B$O$!$$\e(B♥\e$B!#".force_encoding("ISO-2022-JP"),
|
|
|
|
ec.convert("\u{306f 3041 3044 2665 3002}"))
|
|
|
|
assert_equal("\e(B".force_encoding("ISO-2022-JP"),
|
|
|
|
ec.finish)
|
2008-09-05 16:37:36 -04:00
|
|
|
|
|
|
|
ec = Encoding::Converter.new("EUC-JP", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
|
|
|
|
assert_equal("交換法則: n×m=m×n".force_encoding("ISO-8859-1"),
|
|
|
|
ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn"))
|
|
|
|
|
|
|
|
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1", Encoding::Converter::UNDEF_HEX_CHARREF)
|
|
|
|
assert_equal("交換法則: n\xD7m=m\xD7n".force_encoding("ISO-8859-1"),
|
|
|
|
ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn"))
|
2008-09-05 17:29:12 -04:00
|
|
|
|
|
|
|
ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
|
|
|
|
assert_equal("&", ec.convert("&"))
|
|
|
|
end
|
|
|
|
|
2008-09-06 10:05:10 -04:00
|
|
|
def test_xml_escape_text
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "amp_escape")
|
2008-09-05 17:29:12 -04:00
|
|
|
assert_equal('&<>"', ec.convert("&<>\""))
|
2008-09-05 23:20:51 -04:00
|
|
|
assert_equal('', ec.finish)
|
2008-09-05 17:29:12 -04:00
|
|
|
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "xml_text_escape")
|
2008-09-05 17:29:12 -04:00
|
|
|
assert_equal('&<>"', ec.convert("&<>\""))
|
2008-09-05 23:20:51 -04:00
|
|
|
assert_equal('', ec.finish)
|
|
|
|
end
|
|
|
|
|
2008-09-06 23:13:29 -04:00
|
|
|
def test_xml_escape_attr_content
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "xml_attr_content_escape")
|
2008-09-06 23:13:29 -04:00
|
|
|
assert_equal('', ec.finish)
|
|
|
|
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "xml_attr_content_escape")
|
2008-09-06 23:13:29 -04:00
|
|
|
assert_equal('', ec.convert(""))
|
|
|
|
assert_equal('', ec.finish)
|
|
|
|
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "xml_attr_content_escape")
|
2008-09-06 23:13:29 -04:00
|
|
|
assert_equal('"', ec.convert('"'))
|
|
|
|
assert_equal('', ec.finish)
|
|
|
|
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "xml_attr_content_escape")
|
2020-06-02 13:35:00 -04:00
|
|
|
assert_equal('&<>"'', ec.convert("&<>\"'"))
|
2008-09-06 23:13:29 -04:00
|
|
|
assert_equal('', ec.finish)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_xml_escape_attr_quote
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "xml_attr_quote")
|
2008-09-05 23:20:51 -04:00
|
|
|
assert_equal('""', ec.finish)
|
|
|
|
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "xml_attr_quote")
|
2008-09-05 23:20:51 -04:00
|
|
|
assert_equal('', ec.convert(""))
|
|
|
|
assert_equal('""', ec.finish)
|
|
|
|
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "xml_attr_quote")
|
2008-09-06 23:13:29 -04:00
|
|
|
assert_equal('""', ec.convert('"'))
|
2008-09-05 23:20:51 -04:00
|
|
|
assert_equal('"', ec.finish)
|
2008-09-05 17:29:12 -04:00
|
|
|
|
2008-09-12 13:30:07 -04:00
|
|
|
ec = Encoding::Converter.new("", "xml_attr_quote")
|
2008-09-06 23:13:29 -04:00
|
|
|
assert_equal('"&<>"', ec.convert("&<>\""))
|
2008-09-05 23:20:51 -04:00
|
|
|
assert_equal('"', ec.finish)
|
2008-09-05 16:24:18 -04:00
|
|
|
end
|
2008-09-06 01:47:30 -04:00
|
|
|
|
2008-09-06 10:05:10 -04:00
|
|
|
def test_xml_escape_with_charref
|
2008-09-09 08:22:43 -04:00
|
|
|
ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::XML_TEXT_DECORATOR|Encoding::Converter::UNDEF_HEX_CHARREF)
|
2008-09-06 01:47:30 -04:00
|
|
|
assert_equal('<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\""))
|
|
|
|
assert_equal('', ec.finish)
|
|
|
|
|
2008-09-06 23:13:29 -04:00
|
|
|
ec = Encoding::Converter.new("utf-8", "euc-jp",
|
2008-09-09 08:22:43 -04:00
|
|
|
Encoding::Converter::XML_ATTR_CONTENT_DECORATOR|
|
|
|
|
Encoding::Converter::XML_ATTR_QUOTE_DECORATOR|
|
2008-09-06 23:13:29 -04:00
|
|
|
Encoding::Converter::UNDEF_HEX_CHARREF)
|
2008-09-06 01:47:30 -04:00
|
|
|
assert_equal('"<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\""))
|
|
|
|
assert_equal('"', ec.finish)
|
|
|
|
|
2008-09-09 08:22:43 -04:00
|
|
|
ec = Encoding::Converter.new("utf-8", "iso-2022-jp", Encoding::Converter::XML_TEXT_DECORATOR)
|
2008-09-06 01:47:30 -04:00
|
|
|
assert_equal("&\e$B$&\e(B&".force_encoding("iso-2022-jp"), ec.convert("&\u3046&"))
|
|
|
|
assert_equal('', ec.finish)
|
|
|
|
end
|
2008-09-06 02:07:34 -04:00
|
|
|
|
2008-09-06 10:05:10 -04:00
|
|
|
def test_xml_hasharg
|
2008-09-06 02:07:34 -04:00
|
|
|
assert_equal("&\e$B$&\e(B♥&\"'".force_encoding("iso-2022-jp"),
|
2008-09-06 10:05:10 -04:00
|
|
|
"&\u3046\u2665&\"'".encode("iso-2022-jp", xml: :text))
|
2020-06-02 13:35:00 -04:00
|
|
|
assert_equal("\"&\e$B$&\e(B♡&"'\"".force_encoding("iso-2022-jp"),
|
2008-09-06 10:05:10 -04:00
|
|
|
"&\u3046\u2661&\"'".encode("iso-2022-jp", xml: :attr))
|
2008-09-06 05:48:32 -04:00
|
|
|
|
|
|
|
assert_equal("&\u3046\u2661&\"'".force_encoding("utf-8"),
|
2008-09-06 10:05:10 -04:00
|
|
|
"&\u3046\u2661&\"'".encode("utf-8", xml: :text))
|
2008-09-06 02:07:34 -04:00
|
|
|
end
|
2008-09-08 10:08:44 -04:00
|
|
|
|
|
|
|
def test_iso2022jp_invalid_replace
|
|
|
|
assert_equal("?x".force_encoding("iso-2022-jp"),
|
|
|
|
"\222\xA1x".encode("iso-2022-jp", "stateless-iso-2022-jp", :invalid => :replace))
|
|
|
|
end
|
2008-09-10 13:15:08 -04:00
|
|
|
|
|
|
|
def test_convpath
|
2008-09-10 13:26:49 -04:00
|
|
|
eucjp = Encoding::EUC_JP
|
|
|
|
utf8 = Encoding::UTF_8
|
|
|
|
utf16be = Encoding::UTF_16BE
|
|
|
|
utf16le = Encoding::UTF_16LE
|
|
|
|
iso88591 = Encoding::ISO_8859_1
|
|
|
|
iso2022jp = Encoding::ISO_2022_JP
|
|
|
|
siso2022jp = Encoding::STATELESS_ISO_2022_JP
|
2008-09-12 09:53:42 -04:00
|
|
|
|
2008-09-10 13:15:08 -04:00
|
|
|
assert_equal([], Encoding::Converter.new("", "").convpath)
|
2008-09-10 13:26:49 -04:00
|
|
|
assert_equal([[eucjp, utf8], [utf8, iso88591]],
|
|
|
|
Encoding::Converter.new(eucjp, iso88591).convpath)
|
|
|
|
assert_equal([[eucjp, siso2022jp], [siso2022jp, iso2022jp]],
|
|
|
|
Encoding::Converter.new(eucjp, iso2022jp).convpath)
|
|
|
|
assert_equal([[iso2022jp, siso2022jp],
|
|
|
|
[siso2022jp, eucjp],
|
|
|
|
[eucjp, utf8],
|
|
|
|
[utf8, iso88591]],
|
|
|
|
Encoding::Converter.new(iso2022jp, iso88591).convpath)
|
|
|
|
assert_equal(["universal_newline", [utf8, utf16be]],
|
|
|
|
Encoding::Converter.new(utf8, utf16be, universal_newline: true).convpath)
|
|
|
|
assert_equal([[utf16be, utf8], "universal_newline"],
|
|
|
|
Encoding::Converter.new(utf16be, utf8, universal_newline: true).convpath)
|
|
|
|
assert_equal([[utf16be, utf8], "universal_newline", [utf8, utf16le]],
|
|
|
|
Encoding::Converter.new(utf16be, utf16le, universal_newline: true).convpath)
|
2008-09-10 13:15:08 -04:00
|
|
|
end
|
2008-09-12 09:53:42 -04:00
|
|
|
|
|
|
|
def test_search_convpath
|
|
|
|
eucjp = Encoding::EUC_JP
|
|
|
|
utf8 = Encoding::UTF_8
|
|
|
|
utf32be = Encoding::UTF_32BE
|
|
|
|
iso88591 = Encoding::ISO_8859_1
|
|
|
|
assert_equal([[iso88591,utf8], [utf8,eucjp]],
|
|
|
|
Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP"))
|
|
|
|
assert_equal([[iso88591,utf8], [utf8,eucjp]],
|
|
|
|
Encoding::Converter.search_convpath(iso88591, eucjp))
|
|
|
|
assert_equal([[iso88591,utf8], [utf8,eucjp], "universal_newline"],
|
|
|
|
Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true))
|
|
|
|
assert_equal([[iso88591,utf8], "universal_newline", [utf8,utf32be]],
|
|
|
|
Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true))
|
|
|
|
end
|
2008-09-12 14:45:44 -04:00
|
|
|
|
2010-02-13 22:09:53 -05:00
|
|
|
def test_invalid_replace2
|
2008-09-12 14:45:44 -04:00
|
|
|
assert_raise(ArgumentError) {
|
|
|
|
broken = "\x80".force_encoding("euc-jp")
|
|
|
|
"".encode("euc-jp", :undef => :replace, :replace => broken)
|
|
|
|
}
|
|
|
|
end
|
2011-09-05 23:07:16 -04:00
|
|
|
|
|
|
|
def test_newline_option
|
|
|
|
ec1 = Encoding::Converter.new("", "", universal_newline: true)
|
|
|
|
ec2 = Encoding::Converter.new("", "", newline: :universal)
|
|
|
|
assert_equal(ec1, ec2)
|
2014-08-02 21:55:32 -04:00
|
|
|
assert_raise_with_message(ArgumentError, /\u{3042}/) {
|
|
|
|
Encoding::Converter.new("", "", newline: "\u{3042}".to_sym)
|
|
|
|
}
|
2020-01-10 20:14:53 -05:00
|
|
|
newlines = %i[universal_newline crlf_newline cr_newline]
|
|
|
|
(2..newlines.size).each do |i|
|
|
|
|
newlines.combination(i) do |opts|
|
|
|
|
assert_raise(Encoding::ConverterNotFoundError, "#{opts} are mutually exclusive") do
|
|
|
|
Encoding::Converter.new("", "", **opts.inject({}) {|o,nl|o[nl]=true;o})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
newlines.each do |nl|
|
|
|
|
opts = {newline: :universal, nl => true}
|
2020-01-10 20:19:29 -05:00
|
|
|
ec2 = assert_warning(/:newline option preceds/, opts.inspect) do
|
|
|
|
Encoding::Converter.new("", "", **opts)
|
|
|
|
end
|
2020-01-10 20:14:53 -05:00
|
|
|
assert_equal(ec1, ec2)
|
|
|
|
end
|
2011-09-05 23:07:16 -04:00
|
|
|
end
|
2011-12-09 01:01:41 -05:00
|
|
|
|
|
|
|
def test_default_external
|
2013-08-05 04:32:49 -04:00
|
|
|
Encoding.list.grep(->(enc) {/\AISO-8859-\d+\z/i =~ enc.name}) do |enc|
|
|
|
|
assert_separately(%W[--disable=gems -d - #{enc.name}], <<-EOS, ignore_stderr: true)
|
2011-12-09 01:01:41 -05:00
|
|
|
Encoding.default_external = ext = ARGV[0]
|
|
|
|
Encoding.default_internal = int ='utf-8'
|
2013-08-05 04:32:49 -04:00
|
|
|
assert_nothing_raised do
|
2011-12-09 01:01:41 -05:00
|
|
|
Encoding::Converter.new(ext, int)
|
|
|
|
end
|
2013-08-05 04:32:49 -04:00
|
|
|
EOS
|
2011-12-09 01:01:41 -05:00
|
|
|
end
|
|
|
|
end
|
2008-08-12 10:46:18 -04:00
|
|
|
end
|