From fde26b7d7abc6c17debe3e293ca9346a6110aa62 Mon Sep 17 00:00:00 2001 From: akr Date: Wed, 13 Aug 2008 17:24:42 +0000 Subject: [PATCH] * transcode.c (econv_primitive_convert): add output_byteoffset argument. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18591 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 ++ test/ruby/test_econv.rb | 111 +++++++++++++++++++++++++--------------- transcode.c | 45 ++++++++++++---- 3 files changed, 112 insertions(+), 49 deletions(-) diff --git a/ChangeLog b/ChangeLog index c91fe516ed..ad2b07dbcc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Thu Aug 14 02:23:31 2008 Tanaka Akira + + * transcode.c (econv_primitive_convert): add output_byteoffset + argument. + Thu Aug 14 00:43:53 2008 Nobuyoshi Nakada * configure.in (rb_cv_gcc_function_alias): checks alias attribute. diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index e9669aec18..ed00814fdc 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -1,79 +1,110 @@ require 'test/unit' class TestEncodingConverter < Test::Unit::TestCase - def assert_econv(ret_expected, dst_expected, src_expected, to, from, src, opt={}) + def test_output_area + ec = Encoding::Converter.new("UTF-8", "EUC-JP") + ec.primitive_convert(src="a", dst="b", nil, 1, Encoding::Converter::PARTIAL_INPUT) + assert_equal("ba", dst) + ec.primitive_convert(src="a", dst="b", 0, 1, Encoding::Converter::PARTIAL_INPUT) + assert_equal("a", dst) + ec.primitive_convert(src="a", dst="b", 1, 1, Encoding::Converter::PARTIAL_INPUT) + assert_equal("ba", dst) + assert_raise(ArgumentError) { + ec.primitive_convert(src="a", dst="b", 2, 1, Encoding::Converter::PARTIAL_INPUT) + } + assert_raise(ArgumentError) { + ec.primitive_convert(src="a", dst="b", -1, 1, Encoding::Converter::PARTIAL_INPUT) + } + assert_raise(ArgumentError) { + ec.primitive_convert(src="a", dst="b", 1, -1, Encoding::Converter::PARTIAL_INPUT) + } + end + + def test_accumulate_dst + ec = Encoding::Converter.new("UTF-8", "EUC-JP") + src = "abcdef" + dst = "" + ec.primitive_convert(src, dst, nil, 1) + assert_equal(["a", "cdef"], [dst, src]) + ec.primitive_convert(src, dst, nil, 1) + assert_equal(["ab", "def"], [dst, src]) + ec.primitive_convert(src, dst, nil, 1) + assert_equal(["abc", "ef"], [dst, src]) + ec.primitive_convert(src, dst, nil, 1) + assert_equal(["abcd", "f"], [dst, src]) + ec.primitive_convert(src, dst, nil, 1) + assert_equal(["abcde", ""], [dst, src]) + ec.primitive_convert(src, dst, nil, 1) + assert_equal(["abcdef", ""], [dst, src]) + end + + def assert_econv_loop(ret_expected, dst_expected, src_expected, to, from, src, opt={}) + opt[:obuf_off] ||= 0 opt[:obuf_len] ||= 100 src = src.dup ec = Encoding::Converter.new(from, to) dst = '' while true - ret = ec.primitive_convert(src, dst2="", opt[:obuf_len]) - dst << dst2 + ret = ec.primitive_convert(src, dst, nil, opt[:obuf_len]) #p [ret, dst, src] break if ret != :obuf_full end assert_equal([ret_expected, dst_expected, src_expected], [ret, dst, src]) end + def assert_econv(converted, expected, obuf_bytesize, ec, consumed, rest, flags=0) + ec = Encoding::Converter.new(*ec) if Array === ec + i = consumed + rest + o = "" + ret = ec.primitive_convert(i, o, 0, obuf_bytesize, flags) + assert_equal([converted, expected, rest], + [o, ret, i]) + end + def test_eucjp_to_utf8 - assert_econv(:finished, "", "", "EUC-JP", "UTF-8", "") - assert_econv(:finished, "a", "", "EUC-JP", "UTF-8", "a") + assert_econv("", :finished, 100, ["UTF-8", "EUC-JP"], "", "") + assert_econv("a", :finished, 100, ["UTF-8", "EUC-JP"], "a", "") end def test_iso2022jp - assert_econv(:finished, "", "", "ISO-2022-JP", "Shift_JIS", "") + assert_econv("", :finished, 100, ["Shift_JIS", "ISO-2022-JP"], "", "") end def test_invalid - assert_econv(:invalid_input, "", "", "EUC-JP", "UTF-8", "\x80") - assert_econv(:invalid_input, "a", "", "EUC-JP", "UTF-8", "a\x80") - assert_econv(:invalid_input, "a", "\x80", "EUC-JP", "UTF-8", "a\x80\x80") - assert_econv(:invalid_input, "abc", "def", "EUC-JP", "UTF-8", "abc\xFFdef") - assert_econv(:invalid_input, "abc", "def", "EUC-JP", "Shift_JIS", "abc\xFFdef") - assert_econv(:invalid_input, "abc", "def", "EUC-JP", "Shift_JIS", "abc\xFFdef", :obuf_len=>1) - assert_econv(:invalid_input, "abc", "def", "Shift_JIS", "ISO-2022-JP", "abc\xFFdef") + assert_econv("", :invalid_input, 100, ["UTF-8", "EUC-JP"], "\x80", "") + assert_econv("a", :invalid_input, 100, ["UTF-8", "EUC-JP"], "a\x80", "") + assert_econv("a", :invalid_input, 100, ["UTF-8", "EUC-JP"], "a\x80", "\x80") + assert_econv("abc", :invalid_input, 100, ["UTF-8", "EUC-JP"], "abc\xFF", "def") + assert_econv("abc", :invalid_input, 100, ["Shift_JIS", "EUC-JP"], "abc\xFF", "def") + assert_econv("abc", :invalid_input, 100, ["ISO-2022-JP", "EUC-JP"], "abc\xFF", "def") + + assert_econv_loop(:invalid_input, "abc", "def", "EUC-JP", "Shift_JIS", "abc\xFFdef", :obuf_len=>1) end def test_errors ec = Encoding::Converter.new("UTF-16BE", "EUC-JP") - src = "\xFF\xFE\x00A\xDC\x00" - ret = ec.primitive_convert(src, dst="", 10) - assert_equal("", src) - assert_equal("", dst) - assert_equal(:undefined_conversion, ret) # \xFF\xFE is not representable in EUC-JP - ret = ec.primitive_convert(src, dst="", 10) - assert_equal("", src) - assert_equal("A", dst) - assert_equal(:invalid_input, ret) # \xDC\x00 is invalid as UTF-16BE - ret = ec.primitive_convert(src, dst="", 10) - assert_equal("", src) - assert_equal("", dst) - assert_equal(:finished, ret) + assert_econv("", :undefined_conversion, 10, ec, "\xFF\xFE\x00A\xDC\x00", "\x00B") + assert_econv("A", :invalid_input, 10, ec, "", "\x00B") # \xDC\x00 is invalid as UTF-16BE + assert_econv("B", :finished, 10, ec, "\x00B", "") end def test_universal_newline ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE) - ret = ec.primitive_convert(src="abc\r\ndef", dst="", 50, Encoding::Converter::PARTIAL_INPUT) - assert_equal([:ibuf_empty, "", "abc\ndef"], [ret, src, dst]) - ret = ec.primitive_convert(src="ghi\njkl", dst="", 50, Encoding::Converter::PARTIAL_INPUT) - assert_equal([:ibuf_empty, "", "ghi\njkl"], [ret, src, dst]) - ret = ec.primitive_convert(src="mno\rpqr", dst="", 50, Encoding::Converter::PARTIAL_INPUT) - assert_equal([:ibuf_empty, "", "mno\npqr"], [ret, src, dst]) - ret = ec.primitive_convert(src="stu\r", dst="", 50, Encoding::Converter::PARTIAL_INPUT) - assert_equal([:ibuf_empty, "", "stu\n"], [ret, src, dst]) - ret = ec.primitive_convert(src="\nvwx", dst="", 50, Encoding::Converter::PARTIAL_INPUT) - assert_equal([:ibuf_empty, "", "vwx"], [ret, src, dst]) + assert_econv("abc\ndef", :ibuf_empty, 50, ec, "abc\r\ndef", "", Encoding::Converter::PARTIAL_INPUT) + assert_econv("ghi\njkl", :ibuf_empty, 50, ec, "ghi\njkl", "", Encoding::Converter::PARTIAL_INPUT) + assert_econv("mno\npqr", :ibuf_empty, 50, ec, "mno\rpqr", "", Encoding::Converter::PARTIAL_INPUT) + assert_econv("stu\n", :ibuf_empty, 50, ec, "stu\r", "", Encoding::Converter::PARTIAL_INPUT) + assert_econv("vwx", :ibuf_empty, 50, ec, "\nvwx", "", Encoding::Converter::PARTIAL_INPUT) + assert_econv("\nyz", :ibuf_empty, 50, ec, "\nyz", "", Encoding::Converter::PARTIAL_INPUT) end def test_crlf_newline ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE) - ret = ec.primitive_convert(src="abc\ndef", dst="", 50) - assert_equal([:finished, "", "abc\r\ndef"], [ret, src, dst]) + assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "") end def test_cr_newline ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE) - ret = ec.primitive_convert(src="abc\ndef", dst="", 50) - assert_equal([:finished, "", "abc\rdef"], [ret, src, dst]) + assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "") end end diff --git a/transcode.c b/transcode.c index 76f4e68991..1bdcf9babb 100644 --- a/transcode.c +++ b/transcode.c @@ -1484,33 +1484,60 @@ check_econv(VALUE self) static VALUE econv_primitive_convert(int argc, VALUE *argv, VALUE self) { - VALUE input, output, output_size_v, flags_v; + VALUE input, output, output_byteoffset_v, output_bytesize_v, flags_v; rb_trans_t *ts = check_econv(self); rb_trans_result_t res; const unsigned char *ip, *is; unsigned char *op, *os; - long output_size; + long output_byteoffset, output_bytesize; + unsigned long output_byteend; int flags; - rb_scan_args(argc, argv, "31", &input, &output, &output_size_v, &flags_v); + rb_scan_args(argc, argv, "41", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v); + + if (output_byteoffset_v == Qnil) + output_byteoffset = 0; + else + output_byteoffset = NUM2LONG(output_byteoffset_v); + + output_bytesize = NUM2LONG(output_bytesize_v); - output_size = NUM2LONG(output_size_v); if (flags_v == Qnil) flags = 0; else flags = NUM2INT(flags_v); + StringValue(output); StringValue(input); rb_str_modify(output); - if (rb_str_capacity(output) < output_size) - rb_str_resize(output, output_size); + if (output_byteoffset_v == Qnil) + output_byteoffset = RSTRING_LEN(output); + + if (output_byteoffset < 0) + rb_raise(rb_eArgError, "negative output_byteoffset"); + + if (RSTRING_LEN(output) < output_byteoffset) + rb_raise(rb_eArgError, "output_byteoffset too big"); + + if (output_bytesize < 0) + rb_raise(rb_eArgError, "negative output_bytesize"); + + output_byteend = (unsigned long)output_byteoffset + + (unsigned long)output_bytesize; + + if (output_byteend < (unsigned long)output_byteoffset || + LONG_MAX < output_byteend) + rb_raise(rb_eArgError, "output_byteoffset+output_bytesize too big"); + + if (rb_str_capacity(output) < output_byteend) + rb_str_resize(output, output_byteend); ip = (const unsigned char *)RSTRING_PTR(input); is = ip + RSTRING_LEN(input); - op = (unsigned char *)RSTRING_PTR(output); - os = op + output_size; + op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset; + os = op + output_bytesize; res = rb_trans_conv(ts, &ip, is, &op, os, flags); rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output)); @@ -1522,7 +1549,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self) case transcode_obuf_full: return ID2SYM(rb_intern("obuf_full")); case transcode_ibuf_empty: return ID2SYM(rb_intern("ibuf_empty")); case transcode_finished: return ID2SYM(rb_intern("finished")); - default: return INT2NUM(res); + default: return INT2NUM(res); /* should not be reached */ } }