1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* include/ruby/encoding.h (rb_econv_result_t): enumeration constant:

econv_incomplete_input. 

* io.c (finish_writeconv): check econv_incomplete_input.

* transcode.c (transcode_restartable0): return econv_incomplete_input
  for unexpected end of source buffer.
  (trans_sweep): check econv_incomplete_input.
  (rb_trans_conv): ditto.
  (rb_econv_convert0): ditto.
  (rb_econv_convert): ditto.
  (transcode_loop): ditto.
  (make_econv_exception): change message for econv_incomplete_input.
  (econv_result_to_symbol): return :incomplete_input for
  econv_incomplete_input.
  (ecerr_incomplete_input): new method.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18875 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-08-26 16:09:29 +00:00
parent 99a26ac164
commit db6ec3105e
5 changed files with 82 additions and 18 deletions

View file

@ -1,3 +1,22 @@
Wed Aug 27 01:03:23 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/encoding.h (rb_econv_result_t): enumeration constant:
econv_incomplete_input.
* io.c (finish_writeconv): check econv_incomplete_input.
* transcode.c (transcode_restartable0): return econv_incomplete_input
for unexpected end of source buffer.
(trans_sweep): check econv_incomplete_input.
(rb_trans_conv): ditto.
(rb_econv_convert0): ditto.
(rb_econv_convert): ditto.
(transcode_loop): ditto.
(make_econv_exception): change message for econv_incomplete_input.
(econv_result_to_symbol): return :incomplete_input for
econv_incomplete_input.
(ecerr_incomplete_input): new method.
Wed Aug 27 00:05:55 2008 Tanaka Akira <akr@fsij.org> Wed Aug 27 00:05:55 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/io.h (rb_io_t): rename crbuf to cbuf. * include/ruby/io.h (rb_io_t): rename crbuf to cbuf.

View file

@ -203,6 +203,7 @@ typedef enum {
econv_source_buffer_empty, econv_source_buffer_empty,
econv_finished, econv_finished,
econv_output_followed_by_input, econv_output_followed_by_input,
econv_incomplete_input,
} rb_econv_result_t; } rb_econv_result_t;
typedef struct { typedef struct {

12
io.c
View file

@ -2967,7 +2967,7 @@ finish_writeconv(rb_io_t *fptr, int noraise)
de = buf + sizeof(buf); de = buf + sizeof(buf);
res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0); res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
while (dp-ds) { while (dp-ds) {
retry: retry:
r = rb_write_internal(fptr->fd, ds, dp-ds); r = rb_write_internal(fptr->fd, ds, dp-ds);
if (r == dp-ds) if (r == dp-ds)
break; break;
@ -2987,6 +2987,7 @@ retry:
rb_econv_check_error(fptr->writeconv); rb_econv_check_error(fptr->writeconv);
} }
if (res == econv_invalid_byte_sequence || if (res == econv_invalid_byte_sequence ||
res == econv_incomplete_input ||
res == econv_undefined_conversion) { res == econv_undefined_conversion) {
break; break;
} }
@ -3009,6 +3010,7 @@ retry:
rb_econv_check_error(fptr->writeconv); rb_econv_check_error(fptr->writeconv);
} }
if (res == econv_invalid_byte_sequence || if (res == econv_invalid_byte_sequence ||
res == econv_incomplete_input ||
res == econv_undefined_conversion) { res == econv_undefined_conversion) {
break; break;
} }
@ -3603,7 +3605,7 @@ rb_io_mode_flags(const char *mode)
} }
} }
finished: finished:
if ((flags & FMODE_BINMODE) && (flags & FMODE_TEXTMODE)) if ((flags & FMODE_BINMODE) && (flags & FMODE_TEXTMODE))
goto error; goto error;
@ -7051,7 +7053,7 @@ copy_stream_sendfile(struct copy_stream_struct *stp)
} }
} }
retry_sendfile: retry_sendfile:
if (use_pread) { if (use_pread) {
ss = simple_sendfile(stp->dst_fd, stp->src_fd, &src_offset, copy_length); ss = simple_sendfile(stp->dst_fd, stp->src_fd, &src_offset, copy_length);
} }
@ -7095,7 +7097,7 @@ static ssize_t
copy_stream_read(struct copy_stream_struct *stp, char *buf, int len, off_t offset) copy_stream_read(struct copy_stream_struct *stp, char *buf, int len, off_t offset)
{ {
ssize_t ss; ssize_t ss;
retry_read: retry_read:
if (offset == (off_t)-1) if (offset == (off_t)-1)
ss = read(stp->src_fd, buf, len); ss = read(stp->src_fd, buf, len);
else { else {
@ -7231,7 +7233,7 @@ copy_stream_func(void *arg)
copy_stream_read_write(stp); copy_stream_read_write(stp);
#ifdef USE_SENDFILE #ifdef USE_SENDFILE
finish: finish:
#endif #endif
return Qnil; return Qnil;
} }

View file

@ -364,7 +364,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_errinfo_invalid_partial_character def test_errinfo_invalid_partial_character
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
ec.primitive_convert(src="\xa4", dst="", nil, 10) ec.primitive_convert(src="\xa4", dst="", nil, 10)
assert_errinfo(:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xA4", "", nil, ec) assert_errinfo(:incomplete_input, "EUC-JP", "UTF-8", "\xA4", "", nil, ec)
end end
def test_errinfo_valid_partial_character def test_errinfo_valid_partial_character
@ -426,6 +426,18 @@ class TestEncodingConverter < Test::Unit::TestCase
assert_equal("UTF-8", err.destination_encoding) assert_equal("UTF-8", err.destination_encoding)
assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes) assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
assert_equal("d", err.readagain_bytes) assert_equal("d", err.readagain_bytes)
assert_equal(false, err.incomplete_input?)
end
def test_exc_incomplete
err = assert_raise(Encoding::InvalidByteSequence) {
"abc\xa4".encode("ISO-8859-1", "EUC-JP")
}
assert_equal("EUC-JP", err.source_encoding)
assert_equal("UTF-8", err.destination_encoding)
assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
assert_equal(nil, err.readagain_bytes)
assert_equal(true, err.incomplete_input?)
end end
def test_exc_undef def test_exc_undef

View file

@ -466,6 +466,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
case 24: goto resume_label24; case 24: goto resume_label24;
case 25: goto resume_label25; case 25: goto resume_label25;
case 26: goto resume_label26; case 26: goto resume_label26;
case 27: goto resume_label27;
} }
while (1) { while (1) {
@ -500,7 +501,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(25); SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(25);
while (in_p >= in_stop) { while (in_p >= in_stop) {
if (!(opt & ECONV_PARTIAL_INPUT)) if (!(opt & ECONV_PARTIAL_INPUT))
goto invalid; goto incomplete;
SUSPEND(econv_source_buffer_empty, 5); SUSPEND(econv_source_buffer_empty, 5);
} }
next_byte = (unsigned char)*in_p++; next_byte = (unsigned char)*in_p++;
@ -602,6 +603,10 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
SUSPEND(econv_invalid_byte_sequence, 1); SUSPEND(econv_invalid_byte_sequence, 1);
continue; continue;
incomplete:
SUSPEND(econv_incomplete_input, 27);
continue;
undef: undef:
SUSPEND(econv_undefined_conversion, 2); SUSPEND(econv_undefined_conversion, 2);
continue; continue;
@ -949,6 +954,7 @@ trans_sweep(rb_econv_t *ec,
switch (res) { switch (res) {
case econv_invalid_byte_sequence: case econv_invalid_byte_sequence:
case econv_incomplete_input:
case econv_undefined_conversion: case econv_undefined_conversion:
case econv_output_followed_by_input: case econv_output_followed_by_input:
return i; return i;
@ -997,6 +1003,7 @@ rb_trans_conv(rb_econv_t *ec,
for (i = ec->num_trans-1; 0 <= i; i--) { for (i = ec->num_trans-1; 0 <= i; i--) {
switch (ec->elems[i].last_result) { switch (ec->elems[i].last_result) {
case econv_invalid_byte_sequence: case econv_invalid_byte_sequence:
case econv_incomplete_input:
case econv_undefined_conversion: case econv_undefined_conversion:
case econv_output_followed_by_input: case econv_output_followed_by_input:
case econv_finished: case econv_finished:
@ -1030,7 +1037,7 @@ rb_trans_conv(rb_econv_t *ec,
sweep_start = 0; sweep_start = 0;
found_needreport: found_needreport:
do { do {
needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start); needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
@ -1041,6 +1048,7 @@ found_needreport:
if (ec->elems[i].last_result != econv_source_buffer_empty) { if (ec->elems[i].last_result != econv_source_buffer_empty) {
rb_econv_result_t res = ec->elems[i].last_result; rb_econv_result_t res = ec->elems[i].last_result;
if (res == econv_invalid_byte_sequence || if (res == econv_invalid_byte_sequence ||
res == econv_incomplete_input ||
res == econv_undefined_conversion || res == econv_undefined_conversion ||
res == econv_output_followed_by_input) { res == econv_output_followed_by_input) {
ec->elems[i].last_result = econv_source_buffer_empty; ec->elems[i].last_result = econv_source_buffer_empty;
@ -1160,10 +1168,11 @@ rb_econv_convert0(rb_econv_t *ec,
} while (res == econv_output_followed_by_input); } while (res == econv_output_followed_by_input);
} }
gotresult: gotresult:
ec->last_error.result = res; ec->last_error.result = res;
ec->last_error.partial_input = flags & ECONV_PARTIAL_INPUT; ec->last_error.partial_input = flags & ECONV_PARTIAL_INPUT;
if (res == econv_invalid_byte_sequence || if (res == econv_invalid_byte_sequence ||
res == econv_incomplete_input ||
res == econv_undefined_conversion) { res == econv_undefined_conversion) {
rb_transcoding *error_tc = ec->elems[result_position].tc; rb_transcoding *error_tc = ec->elems[result_position].tc;
ec->last_error.error_tc = error_tc; ec->last_error.error_tc = error_tc;
@ -1200,10 +1209,11 @@ rb_econv_convert(rb_econv_t *ec,
output_stop = empty_ptr; output_stop = empty_ptr;
} }
resume: resume:
ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags); ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
if (ret == econv_invalid_byte_sequence) { if (ret == econv_invalid_byte_sequence ||
ret == econv_incomplete_input) {
/* deal with invalid byte sequence */ /* deal with invalid byte sequence */
/* todo: add more alternative behaviors */ /* todo: add more alternative behaviors */
if (ec->opts.flags&ECONV_INVALID_IGNORE) { if (ec->opts.flags&ECONV_INVALID_IGNORE) {
@ -1398,7 +1408,7 @@ rb_econv_insert_output(rb_econv_t *ec,
xfree((void*)insert_str); xfree((void*)insert_str);
return 0; return 0;
fail: fail:
if (insert_str != str) if (insert_str != str)
xfree((void*)insert_str); xfree((void*)insert_str);
return -1; return -1;
@ -1620,7 +1630,8 @@ static VALUE
make_econv_exception(rb_econv_t *ec) make_econv_exception(rb_econv_t *ec)
{ {
VALUE mesg, exc; VALUE mesg, exc;
if (ec->last_error.result == econv_invalid_byte_sequence) { if (ec->last_error.result == econv_invalid_byte_sequence ||
ec->last_error.result == econv_incomplete_input) {
const char *err = (const char *)ec->last_error.error_bytes_start; const char *err = (const char *)ec->last_error.error_bytes_start;
size_t error_len = ec->last_error.error_bytes_len; size_t error_len = ec->last_error.error_bytes_len;
VALUE bytes = rb_str_new(err, error_len); VALUE bytes = rb_str_new(err, error_len);
@ -1628,7 +1639,12 @@ make_econv_exception(rb_econv_t *ec)
size_t readagain_len = ec->last_error.readagain_len; size_t readagain_len = ec->last_error.readagain_len;
VALUE bytes2 = Qnil; VALUE bytes2 = Qnil;
VALUE dumped2; VALUE dumped2;
if (readagain_len) { if (ec->last_error.result == econv_incomplete_input) {
mesg = rb_sprintf("incomplete input: %s on %s",
StringValueCStr(dumped),
ec->last_error.source_encoding);
}
else if (readagain_len) {
bytes2 = rb_str_new(err+error_len, readagain_len); bytes2 = rb_str_new(err+error_len, readagain_len);
dumped2 = rb_str_dump(bytes2); dumped2 = rb_str_dump(bytes2);
mesg = rb_sprintf("invalid byte sequence: %s followed by %s on %s", mesg = rb_sprintf("invalid byte sequence: %s followed by %s on %s",
@ -1647,6 +1663,7 @@ make_econv_exception(rb_econv_t *ec)
rb_ivar_set(exc, rb_intern("destination_encoding"), rb_str_new2(ec->last_error.destination_encoding)); rb_ivar_set(exc, rb_intern("destination_encoding"), rb_str_new2(ec->last_error.destination_encoding));
rb_ivar_set(exc, rb_intern("error_bytes"), bytes); rb_ivar_set(exc, rb_intern("error_bytes"), bytes);
rb_ivar_set(exc, rb_intern("readagain_bytes"), bytes2); rb_ivar_set(exc, rb_intern("readagain_bytes"), bytes2);
rb_ivar_set(exc, rb_intern("incomplete_input"), ec->last_error.result == econv_incomplete_input ? Qtrue : Qfalse);
return exc; return exc;
} }
if (ec->last_error.result == econv_undefined_conversion) { if (ec->last_error.result == econv_undefined_conversion) {
@ -1742,10 +1759,11 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
last_tc = ec->last_tc; last_tc = ec->last_tc;
max_output = last_tc ? last_tc->transcoder->max_output : 1; max_output = last_tc ? last_tc->transcoder->max_output : 1;
resume: resume:
ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0); ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
if (ret == econv_invalid_byte_sequence) { if (ret == econv_invalid_byte_sequence ||
ret == econv_incomplete_input) {
exc = make_econv_exception(ec); exc = make_econv_exception(ec);
rb_econv_close(ec); rb_econv_close(ec);
rb_exc_raise(exc); rb_exc_raise(exc);
@ -1812,6 +1830,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
ptr += p - &input_byte; ptr += p - &input_byte;
switch (ret) { switch (ret) {
case econv_invalid_byte_sequence: case econv_invalid_byte_sequence:
case econv_incomplete_input:
exc = make_econv_exception(ec); exc = make_econv_exception(ec);
rb_econv_close(ec); rb_econv_close(ec);
rb_exc_raise(exc); rb_exc_raise(exc);
@ -2291,6 +2310,7 @@ econv_result_to_symbol(rb_econv_result_t res)
{ {
switch (res) { switch (res) {
case econv_invalid_byte_sequence: return ID2SYM(rb_intern("invalid_byte_sequence")); case econv_invalid_byte_sequence: return ID2SYM(rb_intern("invalid_byte_sequence"));
case econv_incomplete_input: return ID2SYM(rb_intern("incomplete_input"));
case econv_undefined_conversion: return ID2SYM(rb_intern("undefined_conversion")); case econv_undefined_conversion: return ID2SYM(rb_intern("undefined_conversion"));
case econv_destination_buffer_full: return ID2SYM(rb_intern("destination_buffer_full")); case econv_destination_buffer_full: return ID2SYM(rb_intern("destination_buffer_full"));
case econv_source_buffer_empty: return ID2SYM(rb_intern("source_buffer_empty")); case econv_source_buffer_empty: return ID2SYM(rb_intern("source_buffer_empty"));
@ -2311,6 +2331,7 @@ econv_result_to_symbol(rb_econv_result_t res)
* *
* possible results: * possible results:
* :invalid_byte_sequence * :invalid_byte_sequence
* :incomplete_input
* :undefined_conversion * :undefined_conversion
* :output_followed_by_input * :output_followed_by_input
* :destination_buffer_full * :destination_buffer_full
@ -2342,6 +2363,8 @@ econv_result_to_symbol(rb_econv_result_t res)
* *
* primitive_convert stops conversion when one of following condition met. * primitive_convert stops conversion when one of following condition met.
* - invalid byte sequence found in source buffer (:invalid_byte_sequence) * - invalid byte sequence found in source buffer (:invalid_byte_sequence)
* - unexpected end of source buffer (:incomplete_input)
* this occur only when PARTIAL_INPUT is not specified.
* - character not representable in output encoding (:undefined_conversion) * - character not representable in output encoding (:undefined_conversion)
* - after some output is generated, before input is done (:output_followed_by_input) * - after some output is generated, before input is done (:output_followed_by_input)
* this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified. * this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified.
@ -2451,7 +2474,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
* for primitive_convert. * for primitive_convert.
* *
* Other elements are only meaningful when result is * Other elements are only meaningful when result is
* :invalid_byte_sequence or :undefined_conversion. * :invalid_byte_sequence, :incomplete_input or :undefined_conversion.
* *
* enc1 and enc2 indicats a conversion step as pair of strings. * enc1 and enc2 indicats a conversion step as pair of strings.
* For example, EUC-JP to ISO-8859-1 is * For example, EUC-JP to ISO-8859-1 is
@ -2482,7 +2505,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
* ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
* ec.primitive_convert(src="\xa4", dst="", nil, 10) * ec.primitive_convert(src="\xa4", dst="", nil, 10)
* p ec.primitive_errinfo * p ec.primitive_errinfo
* #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xA4", "", nil] * #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", "", nil]
* *
* # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by * # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
* # partial characters. * # partial characters.
@ -2625,6 +2648,12 @@ ecerr_readagain_bytes(VALUE self)
return rb_attr_get(self, rb_intern("readagain_bytes")); return rb_attr_get(self, rb_intern("readagain_bytes"));
} }
static VALUE
ecerr_incomplete_input(VALUE self)
{
return rb_attr_get(self, rb_intern("incomplete_input"));
}
extern void Init_newline(void); extern void Init_newline(void);
void void
@ -2674,6 +2703,7 @@ Init_transcode(void)
rb_define_method(rb_eInvalidByteSequence, "destination_encoding", ecerr_destination_encoding, 0); rb_define_method(rb_eInvalidByteSequence, "destination_encoding", ecerr_destination_encoding, 0);
rb_define_method(rb_eInvalidByteSequence, "error_bytes", ecerr_error_bytes, 0); rb_define_method(rb_eInvalidByteSequence, "error_bytes", ecerr_error_bytes, 0);
rb_define_method(rb_eInvalidByteSequence, "readagain_bytes", ecerr_readagain_bytes, 0); rb_define_method(rb_eInvalidByteSequence, "readagain_bytes", ecerr_readagain_bytes, 0);
rb_define_method(rb_eInvalidByteSequence, "incomplete_input?", ecerr_incomplete_input, 0);
Init_newline(); Init_newline();
} }