From c749064f9f250d79db6ada0fe7f1f0784c183baa Mon Sep 17 00:00:00 2001 From: matz Date: Sun, 23 Dec 2007 19:55:18 +0000 Subject: [PATCH] * io.c (appendline): should do multibyte aware RS search. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14554 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 2 ++ io.c | 22 ++++++++++++++++------ test/ruby/test_io_m17n.rb | 10 +++++----- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4339904cdc..2c0ca81caa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -25,6 +25,8 @@ Mon Dec 24 02:59:32 2007 Yukihiro Matsumoto * io.c (io_enc_str): should preserve default_external encoding. + * io.c (appendline): should do multibyte aware RS search. + Mon Dec 24 02:06:35 2007 Yukihiro Matsumoto * io.c (rb_f_open): documentation update. diff --git a/io.c b/io.c index 54bff0ecb9..faf163f1b8 100644 --- a/io.c +++ b/io.c @@ -1647,22 +1647,32 @@ io_read(int argc, VALUE *argv, VALUE io) } static int -appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) +appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp, int mb) { VALUE str = *strp; int c = EOF; long limit = *lp; + rb_encoding *enc = io_read_encoding(fptr); do { long pending = READ_DATA_PENDING_COUNT(fptr); if (pending > 0) { - const char *p = READ_DATA_PENDING_PTR(fptr); - const char *e; + const char *s = READ_DATA_PENDING_PTR(fptr); + const char *p, *e; long last = 0, len = (c != EOF); if (limit > 0 && pending > limit) pending = limit; + p = s; + again: e = memchr(p, delim, pending); - if (e) pending = e - p + 1; + if (e) { + if (mb && + ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,(UChar*)s,(UChar*)e) != (UChar*)e) { + p = e + 1; + goto again; + } + pending = e - s + 1; + } len += pending; if (!NIL_P(str)) { last = RSTRING_LEN(str); @@ -1742,7 +1752,7 @@ rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit) int c, nolimit = 0; for (;;) { - c = appendline(fptr, delim, &str, &limit); + c = appendline(fptr, delim, &str, &limit, 0); if (c == EOF || c == delim) break; if (limit == 0) { nolimit = 1; @@ -1842,7 +1852,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io) } newline = rsptr[rslen - 1]; - while ((c = appendline(fptr, newline, &str, &limit)) != EOF) { + while ((c = appendline(fptr, newline, &str, &limit, 1)) != EOF) { if (c == newline) { if (RSTRING_LEN(str) < rslen) continue; if (!rspara) rscheck(rsptr, rslen, rs); diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 88e862dff6..4b8273b40b 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -60,8 +60,8 @@ EOT s = open("tmp", "r:euc-jp:utf-8") {|f| f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8")) } - assert_equal(Encoding.find("euc-jp"), s.encoding) - assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("iso-8859-1"), s, '[ruby-core:14319]') + assert_equal(Encoding.find("utf-8"), s.encoding) + assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]') } end @@ -196,8 +196,6 @@ EOT w.close s = r.read assert_equal(Encoding.default_external, s.encoding) - puts encdump(s) - puts encdump(utf8) assert_str_equal(utf8, s) } @@ -226,7 +224,9 @@ EOT } } - ENCS.reject {|e| e == Encoding::ASCII_8BIT }.each {|enc| + ENCS.each {|enc| + next if enc == Encoding::ASCII_8BIT + next if enc == Encoding::UTF_8 with_pipe("#{enc}:UTF-8") {|r, w| w << "\xc2\xa1" w.close