mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* transcode.c (econv_primitive_convert): add output_byteoffset
argument. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18591 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
e6eb8f0399
commit
fde26b7d7a
3 changed files with 112 additions and 49 deletions
|
@ -1,3 +1,8 @@
|
|||
Thu Aug 14 02:23:31 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (econv_primitive_convert): add output_byteoffset
|
||||
argument.
|
||||
|
||||
Thu Aug 14 00:43:53 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* configure.in (rb_cv_gcc_function_alias): checks alias attribute.
|
||||
|
|
|
@ -1,79 +1,110 @@
|
|||
require 'test/unit'
|
||||
|
||||
class TestEncodingConverter < Test::Unit::TestCase
|
||||
def assert_econv(ret_expected, dst_expected, src_expected, to, from, src, opt={})
|
||||
def test_output_area
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
||||
ec.primitive_convert(src="a", dst="b", nil, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal("ba", dst)
|
||||
ec.primitive_convert(src="a", dst="b", 0, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal("a", dst)
|
||||
ec.primitive_convert(src="a", dst="b", 1, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal("ba", dst)
|
||||
assert_raise(ArgumentError) {
|
||||
ec.primitive_convert(src="a", dst="b", 2, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
}
|
||||
assert_raise(ArgumentError) {
|
||||
ec.primitive_convert(src="a", dst="b", -1, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
}
|
||||
assert_raise(ArgumentError) {
|
||||
ec.primitive_convert(src="a", dst="b", 1, -1, Encoding::Converter::PARTIAL_INPUT)
|
||||
}
|
||||
end
|
||||
|
||||
def test_accumulate_dst
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
||||
src = "abcdef"
|
||||
dst = ""
|
||||
ec.primitive_convert(src, dst, nil, 1)
|
||||
assert_equal(["a", "cdef"], [dst, src])
|
||||
ec.primitive_convert(src, dst, nil, 1)
|
||||
assert_equal(["ab", "def"], [dst, src])
|
||||
ec.primitive_convert(src, dst, nil, 1)
|
||||
assert_equal(["abc", "ef"], [dst, src])
|
||||
ec.primitive_convert(src, dst, nil, 1)
|
||||
assert_equal(["abcd", "f"], [dst, src])
|
||||
ec.primitive_convert(src, dst, nil, 1)
|
||||
assert_equal(["abcde", ""], [dst, src])
|
||||
ec.primitive_convert(src, dst, nil, 1)
|
||||
assert_equal(["abcdef", ""], [dst, src])
|
||||
end
|
||||
|
||||
def assert_econv_loop(ret_expected, dst_expected, src_expected, to, from, src, opt={})
|
||||
opt[:obuf_off] ||= 0
|
||||
opt[:obuf_len] ||= 100
|
||||
src = src.dup
|
||||
ec = Encoding::Converter.new(from, to)
|
||||
dst = ''
|
||||
while true
|
||||
ret = ec.primitive_convert(src, dst2="", opt[:obuf_len])
|
||||
dst << dst2
|
||||
ret = ec.primitive_convert(src, dst, nil, opt[:obuf_len])
|
||||
#p [ret, dst, src]
|
||||
break if ret != :obuf_full
|
||||
end
|
||||
assert_equal([ret_expected, dst_expected, src_expected], [ret, dst, src])
|
||||
end
|
||||
|
||||
def assert_econv(converted, expected, obuf_bytesize, ec, consumed, rest, flags=0)
|
||||
ec = Encoding::Converter.new(*ec) if Array === ec
|
||||
i = consumed + rest
|
||||
o = ""
|
||||
ret = ec.primitive_convert(i, o, 0, obuf_bytesize, flags)
|
||||
assert_equal([converted, expected, rest],
|
||||
[o, ret, i])
|
||||
end
|
||||
|
||||
def test_eucjp_to_utf8
|
||||
assert_econv(:finished, "", "", "EUC-JP", "UTF-8", "")
|
||||
assert_econv(:finished, "a", "", "EUC-JP", "UTF-8", "a")
|
||||
assert_econv("", :finished, 100, ["UTF-8", "EUC-JP"], "", "")
|
||||
assert_econv("a", :finished, 100, ["UTF-8", "EUC-JP"], "a", "")
|
||||
end
|
||||
|
||||
def test_iso2022jp
|
||||
assert_econv(:finished, "", "", "ISO-2022-JP", "Shift_JIS", "")
|
||||
assert_econv("", :finished, 100, ["Shift_JIS", "ISO-2022-JP"], "", "")
|
||||
end
|
||||
|
||||
def test_invalid
|
||||
assert_econv(:invalid_input, "", "", "EUC-JP", "UTF-8", "\x80")
|
||||
assert_econv(:invalid_input, "a", "", "EUC-JP", "UTF-8", "a\x80")
|
||||
assert_econv(:invalid_input, "a", "\x80", "EUC-JP", "UTF-8", "a\x80\x80")
|
||||
assert_econv(:invalid_input, "abc", "def", "EUC-JP", "UTF-8", "abc\xFFdef")
|
||||
assert_econv(:invalid_input, "abc", "def", "EUC-JP", "Shift_JIS", "abc\xFFdef")
|
||||
assert_econv(:invalid_input, "abc", "def", "EUC-JP", "Shift_JIS", "abc\xFFdef", :obuf_len=>1)
|
||||
assert_econv(:invalid_input, "abc", "def", "Shift_JIS", "ISO-2022-JP", "abc\xFFdef")
|
||||
assert_econv("", :invalid_input, 100, ["UTF-8", "EUC-JP"], "\x80", "")
|
||||
assert_econv("a", :invalid_input, 100, ["UTF-8", "EUC-JP"], "a\x80", "")
|
||||
assert_econv("a", :invalid_input, 100, ["UTF-8", "EUC-JP"], "a\x80", "\x80")
|
||||
assert_econv("abc", :invalid_input, 100, ["UTF-8", "EUC-JP"], "abc\xFF", "def")
|
||||
assert_econv("abc", :invalid_input, 100, ["Shift_JIS", "EUC-JP"], "abc\xFF", "def")
|
||||
assert_econv("abc", :invalid_input, 100, ["ISO-2022-JP", "EUC-JP"], "abc\xFF", "def")
|
||||
|
||||
assert_econv_loop(:invalid_input, "abc", "def", "EUC-JP", "Shift_JIS", "abc\xFFdef", :obuf_len=>1)
|
||||
end
|
||||
|
||||
def test_errors
|
||||
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
|
||||
src = "\xFF\xFE\x00A\xDC\x00"
|
||||
ret = ec.primitive_convert(src, dst="", 10)
|
||||
assert_equal("", src)
|
||||
assert_equal("", dst)
|
||||
assert_equal(:undefined_conversion, ret) # \xFF\xFE is not representable in EUC-JP
|
||||
ret = ec.primitive_convert(src, dst="", 10)
|
||||
assert_equal("", src)
|
||||
assert_equal("A", dst)
|
||||
assert_equal(:invalid_input, ret) # \xDC\x00 is invalid as UTF-16BE
|
||||
ret = ec.primitive_convert(src, dst="", 10)
|
||||
assert_equal("", src)
|
||||
assert_equal("", dst)
|
||||
assert_equal(:finished, ret)
|
||||
assert_econv("", :undefined_conversion, 10, ec, "\xFF\xFE\x00A\xDC\x00", "\x00B")
|
||||
assert_econv("A", :invalid_input, 10, ec, "", "\x00B") # \xDC\x00 is invalid as UTF-16BE
|
||||
assert_econv("B", :finished, 10, ec, "\x00B", "")
|
||||
end
|
||||
|
||||
def test_universal_newline
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE)
|
||||
ret = ec.primitive_convert(src="abc\r\ndef", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "abc\ndef"], [ret, src, dst])
|
||||
ret = ec.primitive_convert(src="ghi\njkl", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "ghi\njkl"], [ret, src, dst])
|
||||
ret = ec.primitive_convert(src="mno\rpqr", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "mno\npqr"], [ret, src, dst])
|
||||
ret = ec.primitive_convert(src="stu\r", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "stu\n"], [ret, src, dst])
|
||||
ret = ec.primitive_convert(src="\nvwx", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "vwx"], [ret, src, dst])
|
||||
assert_econv("abc\ndef", :ibuf_empty, 50, ec, "abc\r\ndef", "", Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_econv("ghi\njkl", :ibuf_empty, 50, ec, "ghi\njkl", "", Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_econv("mno\npqr", :ibuf_empty, 50, ec, "mno\rpqr", "", Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_econv("stu\n", :ibuf_empty, 50, ec, "stu\r", "", Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_econv("vwx", :ibuf_empty, 50, ec, "\nvwx", "", Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_econv("\nyz", :ibuf_empty, 50, ec, "\nyz", "", Encoding::Converter::PARTIAL_INPUT)
|
||||
end
|
||||
|
||||
def test_crlf_newline
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE)
|
||||
ret = ec.primitive_convert(src="abc\ndef", dst="", 50)
|
||||
assert_equal([:finished, "", "abc\r\ndef"], [ret, src, dst])
|
||||
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
|
||||
end
|
||||
|
||||
def test_cr_newline
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE)
|
||||
ret = ec.primitive_convert(src="abc\ndef", dst="", 50)
|
||||
assert_equal([:finished, "", "abc\rdef"], [ret, src, dst])
|
||||
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
|
||||
end
|
||||
end
|
||||
|
|
45
transcode.c
45
transcode.c
|
@ -1484,33 +1484,60 @@ check_econv(VALUE self)
|
|||
static VALUE
|
||||
econv_primitive_convert(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
VALUE input, output, output_size_v, flags_v;
|
||||
VALUE input, output, output_byteoffset_v, output_bytesize_v, flags_v;
|
||||
rb_trans_t *ts = check_econv(self);
|
||||
rb_trans_result_t res;
|
||||
const unsigned char *ip, *is;
|
||||
unsigned char *op, *os;
|
||||
long output_size;
|
||||
long output_byteoffset, output_bytesize;
|
||||
unsigned long output_byteend;
|
||||
int flags;
|
||||
|
||||
rb_scan_args(argc, argv, "31", &input, &output, &output_size_v, &flags_v);
|
||||
rb_scan_args(argc, argv, "41", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v);
|
||||
|
||||
if (output_byteoffset_v == Qnil)
|
||||
output_byteoffset = 0;
|
||||
else
|
||||
output_byteoffset = NUM2LONG(output_byteoffset_v);
|
||||
|
||||
output_bytesize = NUM2LONG(output_bytesize_v);
|
||||
|
||||
output_size = NUM2LONG(output_size_v);
|
||||
if (flags_v == Qnil)
|
||||
flags = 0;
|
||||
else
|
||||
flags = NUM2INT(flags_v);
|
||||
|
||||
StringValue(output);
|
||||
StringValue(input);
|
||||
rb_str_modify(output);
|
||||
|
||||
if (rb_str_capacity(output) < output_size)
|
||||
rb_str_resize(output, output_size);
|
||||
if (output_byteoffset_v == Qnil)
|
||||
output_byteoffset = RSTRING_LEN(output);
|
||||
|
||||
if (output_byteoffset < 0)
|
||||
rb_raise(rb_eArgError, "negative output_byteoffset");
|
||||
|
||||
if (RSTRING_LEN(output) < output_byteoffset)
|
||||
rb_raise(rb_eArgError, "output_byteoffset too big");
|
||||
|
||||
if (output_bytesize < 0)
|
||||
rb_raise(rb_eArgError, "negative output_bytesize");
|
||||
|
||||
output_byteend = (unsigned long)output_byteoffset +
|
||||
(unsigned long)output_bytesize;
|
||||
|
||||
if (output_byteend < (unsigned long)output_byteoffset ||
|
||||
LONG_MAX < output_byteend)
|
||||
rb_raise(rb_eArgError, "output_byteoffset+output_bytesize too big");
|
||||
|
||||
if (rb_str_capacity(output) < output_byteend)
|
||||
rb_str_resize(output, output_byteend);
|
||||
|
||||
ip = (const unsigned char *)RSTRING_PTR(input);
|
||||
is = ip + RSTRING_LEN(input);
|
||||
|
||||
op = (unsigned char *)RSTRING_PTR(output);
|
||||
os = op + output_size;
|
||||
op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
|
||||
os = op + output_bytesize;
|
||||
|
||||
res = rb_trans_conv(ts, &ip, is, &op, os, flags);
|
||||
rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
|
||||
|
@ -1522,7 +1549,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
|
|||
case transcode_obuf_full: return ID2SYM(rb_intern("obuf_full"));
|
||||
case transcode_ibuf_empty: return ID2SYM(rb_intern("ibuf_empty"));
|
||||
case transcode_finished: return ID2SYM(rb_intern("finished"));
|
||||
default: return INT2NUM(res);
|
||||
default: return INT2NUM(res); /* should not be reached */
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue