From cfa7550b6680fac48fcdc9d5c80dadeb71186dae Mon Sep 17 00:00:00 2001 From: nobu Date: Sat, 15 Aug 2015 01:15:22 +0000 Subject: [PATCH] io.c: read more data * io.c (rb_io_each_codepoint): read more data when read partially. [ruby-core:70379] [Bug #11444] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51583 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++++ io.c | 17 +++++++++++++++++ test/ruby/test_io_m17n.rb | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/ChangeLog b/ChangeLog index 1096377d77..cd7a16509f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Sat Aug 15 10:15:20 2015 Nobuyoshi Nakada + + * io.c (rb_io_each_codepoint): read more data when read partially. + [ruby-core:70379] [Bug #11444] + Sat Aug 15 04:33:39 2015 Eric Wong * hash.c (any_hash): skip rb_objid_hash for static syms diff --git a/io.c b/io.c index 104f521378..fc973bc5c8 100644 --- a/io.c +++ b/io.c @@ -3763,8 +3763,25 @@ rb_io_each_codepoint(VALUE io) rb_yield(UINT2NUM(c)); } else if (MBCLEN_INVALID_P(r)) { + invalid: rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); } + else if (MBCLEN_NEEDMORE_P(r)) { + char cbuf[8], *p = cbuf; + int more = MBCLEN_NEEDMORE_LEN(r); + if (more > numberof(cbuf)) goto invalid; + more += n = fptr->rbuf.len; + if (more > numberof(cbuf)) goto invalid; + while ((n = (int)read_buffered_data(p, more, fptr)) > 0 && + (p += n, (more -= n) > 0)) { + if (io_fillbuf(fptr) < 0) goto invalid; + if ((n = fptr->rbuf.len) > more) n = more; + } + r = rb_enc_precise_mbclen(cbuf, p, enc); + if (!MBCLEN_CHARFOUND_P(r)) goto invalid; + c = rb_enc_codepoint(cbuf, p, enc); + rb_yield(UINT2NUM(c)); + } else { continue; } diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 4382824844..55e23a2768 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2562,4 +2562,24 @@ EOT a.close rescue nil b.close rescue nil end + + def test_each_codepoint_need_more + code = <<-'end;' + c = nil + begin + STDIN.set_encoding(Encoding::UTF_8).each_codepoint{|i| c = i} + rescue ArgumentError => e + STDERR.puts e.message + else + printf "%x", c + end + end; + args = ['-e', code] + bug11444 = '[ruby-core:70379] [Bug #11444]' + assert_in_out_err(args, "\u{1f376}".b[0,3], [], + ["invalid byte sequence in UTF-8"], + bug11444, timeout: 1) + assert_in_out_err(args, "x"*8190+"\u{1f376}", ["1f376"], [], + bug11444, timeout: 1) + end end