mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* transcode.c (rb_econv_conv): new function. it don't consume input
too much, even for multilevel conversion. (transcode_loop): use rb_econv_conv. (econv_primitive_convert): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18610 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
012c5ca966
commit
a99e6c15b8
3 changed files with 59 additions and 9 deletions
|
@ -1,3 +1,10 @@
|
|||
Thu Aug 14 15:34:10 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (rb_econv_conv): new function. it don't consume input
|
||||
too much, even for multilevel conversion.
|
||||
(transcode_loop): use rb_econv_conv.
|
||||
(econv_primitive_convert): ditto.
|
||||
|
||||
Thu Aug 14 15:27:42 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* include/ruby/defines.h (RUBY_ALIAS_FUNCTION): fallback definition.
|
||||
|
|
|
@ -95,18 +95,38 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
def test_invalid2
|
||||
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
|
||||
a = ["", "abc\xFFdef", ec, nil, 1]
|
||||
check_ec("a", "def", :obuf_full, *a)
|
||||
check_ec("ab", "def", :obuf_full, *a)
|
||||
check_ec("a", "c\xFFdef", :obuf_full, *a)
|
||||
check_ec("ab", "\xFFdef", :obuf_full, *a)
|
||||
check_ec("abc", "def", :invalid_input, *a)
|
||||
check_ec("abcd", "", :obuf_full, *a)
|
||||
check_ec("abcd", "f", :obuf_full, *a)
|
||||
check_ec("abcde", "", :obuf_full, *a)
|
||||
check_ec("abcdef", "", :finished, *a)
|
||||
end
|
||||
|
||||
def test_invalid3
|
||||
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
|
||||
a = ["", "abc\xFFdef", ec, nil, 10]
|
||||
check_ec("abc", "def", :invalid_input, *a)
|
||||
check_ec("abcdef", "", :finished, *a)
|
||||
end
|
||||
|
||||
def test_invalid4
|
||||
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
|
||||
a = ["", "abc\xFFdef", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
|
||||
check_ec("a", "bc\xFFdef", :output_followed_by_input, *a)
|
||||
check_ec("ab", "c\xFFdef", :output_followed_by_input, *a)
|
||||
check_ec("abc", "\xFFdef", :output_followed_by_input, *a)
|
||||
check_ec("abc", "def", :invalid_input, *a)
|
||||
check_ec("abcd", "ef", :output_followed_by_input, *a)
|
||||
check_ec("abcde", "f", :output_followed_by_input, *a)
|
||||
check_ec("abcdef", "", :output_followed_by_input, *a)
|
||||
check_ec("abcdef", "", :finished, *a)
|
||||
end
|
||||
|
||||
def test_errors
|
||||
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
|
||||
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10]
|
||||
check_ec("", "\x00B", :undefined_conversion, *a)
|
||||
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
|
||||
check_ec("A", "\x00B", :invalid_input, *a) # \xDC\x00 is invalid as UTF-16BE
|
||||
check_ec("AB", "", :finished, *a)
|
||||
end
|
||||
|
|
33
transcode.c
33
transcode.c
|
@ -955,6 +955,25 @@ found_needreport:
|
|||
return transcode_ibuf_empty;
|
||||
}
|
||||
|
||||
static rb_trans_result_t
|
||||
rb_econv_conv(rb_trans_t *ts,
|
||||
const unsigned char **input_ptr, const unsigned char *input_stop,
|
||||
unsigned char **output_ptr, unsigned char *output_stop,
|
||||
int flags)
|
||||
{
|
||||
rb_trans_result_t res;
|
||||
|
||||
if ((flags & OUTPUT_FOLLOWED_BY_INPUT) ||
|
||||
ts->num_trans == 1)
|
||||
return rb_trans_conv(ts, input_ptr, input_stop, output_ptr, output_stop, flags);
|
||||
|
||||
flags |= OUTPUT_FOLLOWED_BY_INPUT;
|
||||
do {
|
||||
res = rb_trans_conv(ts, input_ptr, input_stop, output_ptr, output_stop, flags);
|
||||
} while (res == transcode_output_followed_by_input);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
rb_trans_close(rb_trans_t *ts)
|
||||
{
|
||||
|
@ -1064,7 +1083,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
max_output = last_tc->transcoder->max_output;
|
||||
|
||||
resume:
|
||||
ret = rb_trans_conv(ts, in_pos, in_stop, out_pos, out_stop, opt);
|
||||
ret = rb_econv_conv(ts, in_pos, in_stop, out_pos, out_stop, opt);
|
||||
if (ret == transcode_invalid_input) {
|
||||
/* deal with invalid byte sequence */
|
||||
/* todo: add more alternative behaviors */
|
||||
|
@ -1134,14 +1153,14 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
if (ret == transcode_ibuf_empty) {
|
||||
if (ptr < in_stop) {
|
||||
input_byte = *ptr;
|
||||
ret = rb_trans_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
|
||||
ret = rb_econv_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
|
||||
}
|
||||
else {
|
||||
ret = rb_trans_conv(ts, NULL, NULL, out_pos, out_stop, 0);
|
||||
ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, 0);
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = rb_trans_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
|
||||
ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
|
||||
}
|
||||
if (&input_byte != p)
|
||||
ptr += p - &input_byte;
|
||||
|
@ -1495,10 +1514,12 @@ check_econv(VALUE self)
|
|||
*
|
||||
* possible flags:
|
||||
* Encoding::Converter::PARTIAL_INPUT # input buffer may be part of larger input
|
||||
* Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT # stop conversion after output before input
|
||||
*
|
||||
* possible results:
|
||||
* :invalid_input
|
||||
* :undefined_conversion
|
||||
* :output_followed_by_input
|
||||
* :obuf_full
|
||||
* :ibuf_empty
|
||||
* :finished
|
||||
|
@ -1527,6 +1548,8 @@ check_econv(VALUE self)
|
|||
* primitive_convert stops conversion when one of following condition met.
|
||||
* - invalid byte sequence found in input buffer (:invalid_input)
|
||||
* - character not representable in output encoding (:undefined_conversion)
|
||||
* - after some output is generated, before any input is consumed (:output_followed_by_input)
|
||||
* this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified.
|
||||
* - output buffer is full (:obuf_full)
|
||||
* - input buffer is empty (:ibuf_empty)
|
||||
* this occur only when PARTIAL_INPUT is specified.
|
||||
|
@ -1606,7 +1629,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
|
|||
op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
|
||||
os = op + output_bytesize;
|
||||
|
||||
res = rb_trans_conv(ts, &ip, is, &op, os, flags);
|
||||
res = rb_econv_conv(ts, &ip, is, &op, os, flags);
|
||||
rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
|
||||
rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue