mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* transcode_data.h (rb_trans_result_t): new enumeration constant:
transcode_output_followed_by_input. * transcode.c (OUTPUT_FOLLOWED_BY_INPUT): new flag. (transcode_restartable0): suspend when output followed by input if OUTPUT_FOLLOWED_BY_INPUT is specified. (trans_sweep): check OUTPUT_FOLLOWED_BY_INPUT. (rb_trans_conv): support OUTPUT_FOLLOWED_BY_INPUT. (econv_primitive_convert): return :output_followed_by_input for transcode_output_followed_by_input. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18608 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
2c20dd1473
commit
bcae240f19
4 changed files with 118 additions and 8 deletions
13
ChangeLog
13
ChangeLog
|
@ -1,3 +1,16 @@
|
|||
Thu Aug 14 15:08:17 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode_data.h (rb_trans_result_t): new enumeration constant:
|
||||
transcode_output_followed_by_input.
|
||||
|
||||
* transcode.c (OUTPUT_FOLLOWED_BY_INPUT): new flag.
|
||||
(transcode_restartable0): suspend when output followed by input if
|
||||
OUTPUT_FOLLOWED_BY_INPUT is specified.
|
||||
(trans_sweep): check OUTPUT_FOLLOWED_BY_INPUT.
|
||||
(rb_trans_conv): support OUTPUT_FOLLOWED_BY_INPUT.
|
||||
(econv_primitive_convert): return :output_followed_by_input for
|
||||
transcode_output_followed_by_input.
|
||||
|
||||
Thu Aug 14 14:57:46 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* gc.c (getrusage_time): should return the value.
|
||||
|
|
|
@ -70,6 +70,19 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
assert_econv("", :finished, 100, ["Shift_JIS", "ISO-2022-JP"], "", "")
|
||||
end
|
||||
|
||||
def test_iso2022jp_outstream
|
||||
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
src << "a"; check_ec("a", "", :ibuf_empty, *a)
|
||||
src << "\xA2"; check_ec("a", "", :ibuf_empty, *a)
|
||||
src << "\xA4"; check_ec("a\e$B\"$", "", :ibuf_empty, *a)
|
||||
src << "\xA1"; check_ec("a\e$B\"$", "", :ibuf_empty, *a)
|
||||
src << "\xA2"; check_ec("a\e$B\"$!\"", "", :ibuf_empty, *a)
|
||||
src << "b"; check_ec("a\e$B\"$!\"\e(Bb", "", :ibuf_empty, *a)
|
||||
src << "\xA2\xA6"; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&", "", :ibuf_empty, *a)
|
||||
a[-1] = 0; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&\e(B", "", :finished, *a)
|
||||
end
|
||||
|
||||
def test_invalid
|
||||
assert_econv("", :invalid_input, 100, ["UTF-8", "EUC-JP"], "\x80", "")
|
||||
assert_econv("a", :invalid_input, 100, ["UTF-8", "EUC-JP"], "a\x80", "")
|
||||
|
@ -98,6 +111,16 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
check_ec("AB", "", :finished, *a)
|
||||
end
|
||||
|
||||
def test_errors2
|
||||
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
|
||||
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
|
||||
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
|
||||
check_ec("A", "\xDC\x00\x00B", :output_followed_by_input, *a)
|
||||
check_ec("A", "\x00B", :invalid_input, *a)
|
||||
check_ec("AB", "", :output_followed_by_input, *a)
|
||||
check_ec("AB", "", :finished, *a)
|
||||
end
|
||||
|
||||
def test_universal_newline
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE)
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
|
@ -118,4 +141,17 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE)
|
||||
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
|
||||
end
|
||||
|
||||
def test_output_followed_by_input
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
||||
a = ["", "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
|
||||
check_ec("a", "bc\u{3042}def", :output_followed_by_input, *a)
|
||||
check_ec("ab", "c\u{3042}def", :output_followed_by_input, *a)
|
||||
check_ec("abc", "\u{3042}def", :output_followed_by_input, *a)
|
||||
check_ec("abc\xA4\xA2", "def", :output_followed_by_input, *a)
|
||||
check_ec("abc\xA4\xA2d", "ef", :output_followed_by_input, *a)
|
||||
check_ec("abc\xA4\xA2de", "f", :output_followed_by_input, *a)
|
||||
check_ec("abc\xA4\xA2def", "", :output_followed_by_input, *a)
|
||||
check_ec("abc\xA4\xA2def", "", :finished, *a)
|
||||
end
|
||||
end
|
||||
|
|
76
transcode.c
76
transcode.c
|
@ -29,6 +29,7 @@ static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
|
|||
#define UNIVERSAL_NEWLINE 0x200
|
||||
#define CRLF_NEWLINE 0x400
|
||||
#define CR_NEWLINE 0x800
|
||||
#define OUTPUT_FOLLOWED_BY_INPUT 0x1000
|
||||
|
||||
/*
|
||||
* Dispatch data and logic
|
||||
|
@ -403,6 +404,11 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
while (out_stop - out_p < 1) { SUSPEND(transcode_obuf_full, num); } \
|
||||
} while (0)
|
||||
|
||||
#define SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(num) \
|
||||
if ((opt & OUTPUT_FOLLOWED_BY_INPUT) && *out_pos != out_p) { \
|
||||
SUSPEND(transcode_output_followed_by_input, num); \
|
||||
}
|
||||
|
||||
#define next_table (tc->next_table)
|
||||
#define next_info (tc->next_info)
|
||||
#define next_byte (tc->next_byte)
|
||||
|
@ -434,9 +440,13 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
case 21: goto resume_label21;
|
||||
case 22: goto resume_label22;
|
||||
case 23: goto resume_label23;
|
||||
case 24: goto resume_label24;
|
||||
case 25: goto resume_label25;
|
||||
case 26: goto resume_label26;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(24);
|
||||
if (in_stop <= in_p) {
|
||||
if (!(opt & PARTIAL_INPUT))
|
||||
break;
|
||||
|
@ -462,6 +472,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
continue;
|
||||
case 0x00: case 0x04: case 0x08: case 0x0C:
|
||||
case 0x10: case 0x14: case 0x18: case 0x1C:
|
||||
SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(25);
|
||||
while (in_p >= in_stop) {
|
||||
if (!(opt & PARTIAL_INPUT))
|
||||
goto invalid;
|
||||
|
@ -536,6 +547,8 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
}
|
||||
case INVALID:
|
||||
if (tc->recognized_len + (in_p - inchar_start) <= unitlen) {
|
||||
if (tc->recognized_len + (in_p - inchar_start) < unitlen)
|
||||
SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(26);
|
||||
while ((opt & PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) {
|
||||
in_p = in_stop;
|
||||
SUSPEND(transcode_ibuf_empty, 8);
|
||||
|
@ -828,6 +841,12 @@ trans_sweep(rb_trans_t *ts,
|
|||
f = flags;
|
||||
if (ts->num_finished != i)
|
||||
f |= PARTIAL_INPUT;
|
||||
if (i == 0 && (flags & OUTPUT_FOLLOWED_BY_INPUT)) {
|
||||
start = 1;
|
||||
flags &= ~OUTPUT_FOLLOWED_BY_INPUT;
|
||||
}
|
||||
if (i != 0)
|
||||
f &= ~OUTPUT_FOLLOWED_BY_INPUT;
|
||||
iold = *ipp;
|
||||
oold = *opp;
|
||||
te->last_result = res = rb_transcoding_convert(te->tc, ipp, is, opp, os, f);
|
||||
|
@ -837,6 +856,7 @@ trans_sweep(rb_trans_t *ts,
|
|||
switch (res) {
|
||||
case transcode_invalid_input:
|
||||
case transcode_undefined_conversion:
|
||||
case transcode_output_followed_by_input:
|
||||
return i;
|
||||
|
||||
case transcode_obuf_full:
|
||||
|
@ -859,7 +879,8 @@ rb_trans_conv(rb_trans_t *ts,
|
|||
int flags)
|
||||
{
|
||||
int i;
|
||||
int start, err_index;
|
||||
int needreport_index;
|
||||
int sweep_start;
|
||||
|
||||
unsigned char empty_buf;
|
||||
unsigned char *empty_ptr = &empty_buf;
|
||||
|
@ -874,23 +895,60 @@ rb_trans_conv(rb_trans_t *ts,
|
|||
output_stop = empty_ptr;
|
||||
}
|
||||
|
||||
err_index = -1;
|
||||
if (ts->elems[0].last_result == transcode_output_followed_by_input)
|
||||
ts->elems[0].last_result = transcode_ibuf_empty;
|
||||
|
||||
needreport_index = -1;
|
||||
for (i = ts->num_trans-1; 0 <= i; i--) {
|
||||
if (ts->elems[i].last_result != transcode_ibuf_empty) {
|
||||
err_index = i;
|
||||
switch (ts->elems[i].last_result) {
|
||||
case transcode_invalid_input:
|
||||
case transcode_undefined_conversion:
|
||||
case transcode_output_followed_by_input:
|
||||
case transcode_finished:
|
||||
sweep_start = i+1;
|
||||
needreport_index = i;
|
||||
goto found_needreport;
|
||||
|
||||
case transcode_obuf_full:
|
||||
case transcode_ibuf_empty:
|
||||
break;
|
||||
|
||||
default:
|
||||
rb_bug("unexpected transcode last result");
|
||||
}
|
||||
}
|
||||
|
||||
/* /^[io]+$/ is confirmed. but actually /^i*o*$/. */
|
||||
|
||||
if (ts->elems[ts->num_trans-1].last_result == transcode_obuf_full &&
|
||||
(flags & OUTPUT_FOLLOWED_BY_INPUT)) {
|
||||
rb_trans_result_t res;
|
||||
|
||||
res = rb_trans_conv(ts, NULL, NULL, output_ptr, output_stop,
|
||||
(flags & ~OUTPUT_FOLLOWED_BY_INPUT)|PARTIAL_INPUT);
|
||||
|
||||
if (res == transcode_ibuf_empty)
|
||||
return transcode_output_followed_by_input;
|
||||
return res;
|
||||
}
|
||||
|
||||
sweep_start = 0;
|
||||
|
||||
found_needreport:
|
||||
|
||||
do {
|
||||
start = err_index + 1;
|
||||
err_index = trans_sweep(ts, input_ptr, input_stop, output_ptr, output_stop, flags, start);
|
||||
} while (err_index != -1 && err_index != ts->num_trans-1);
|
||||
needreport_index = trans_sweep(ts, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
|
||||
sweep_start = needreport_index + 1;
|
||||
} while (needreport_index != -1 && needreport_index != ts->num_trans-1);
|
||||
|
||||
for (i = ts->num_trans-1; 0 <= i; i--) {
|
||||
if (ts->elems[i].last_result != transcode_ibuf_empty) {
|
||||
rb_trans_result_t res = ts->elems[i].last_result;
|
||||
ts->elems[i].last_result = transcode_ibuf_empty;
|
||||
if (res == transcode_invalid_input ||
|
||||
res == transcode_undefined_conversion ||
|
||||
res == transcode_output_followed_by_input) {
|
||||
ts->elems[i].last_result = transcode_ibuf_empty;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
@ -1558,6 +1616,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
|
|||
case transcode_obuf_full: return ID2SYM(rb_intern("obuf_full"));
|
||||
case transcode_ibuf_empty: return ID2SYM(rb_intern("ibuf_empty"));
|
||||
case transcode_finished: return ID2SYM(rb_intern("finished"));
|
||||
case transcode_output_followed_by_input: return ID2SYM(rb_intern("output_followed_by_input"));
|
||||
default: return INT2NUM(res); /* should not be reached */
|
||||
}
|
||||
}
|
||||
|
@ -1601,6 +1660,7 @@ Init_transcode(void)
|
|||
rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);
|
||||
rb_define_method(rb_cEncodingConverter, "max_output", econv_max_output, 0);
|
||||
rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT));
|
||||
rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(OUTPUT_FOLLOWED_BY_INPUT));
|
||||
rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE", INT2FIX(UNIVERSAL_NEWLINE));
|
||||
rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE", INT2FIX(CRLF_NEWLINE));
|
||||
rb_define_const(rb_cEncodingConverter, "CR_NEWLINE", INT2FIX(CR_NEWLINE));
|
||||
|
|
|
@ -117,6 +117,7 @@ typedef enum {
|
|||
transcode_obuf_full,
|
||||
transcode_ibuf_empty,
|
||||
transcode_finished,
|
||||
transcode_output_followed_by_input,
|
||||
} rb_trans_result_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
Loading…
Add table
Reference in a new issue