From c749064f9f250d79db6ada0fe7f1f0784c183baa Mon Sep 17 00:00:00 2001
From: matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Sun, 23 Dec 2007 19:55:18 +0000
Subject: [PATCH] * io.c (appendline): should do multibyte aware RS search.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14554 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 ChangeLog                 |  2 ++
 io.c                      | 22 ++++++++++++++++------
 test/ruby/test_io_m17n.rb | 10 +++++-----
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 4339904cdc..2c0ca81caa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -25,6 +25,8 @@ Mon Dec 24 02:59:32 2007  Yukihiro Matsumoto  <matz@ruby-lang.org>
 
 	* io.c (io_enc_str): should preserve default_external encoding.
 
+	* io.c (appendline): should do multibyte aware RS search.
+
 Mon Dec 24 02:06:35 2007  Yukihiro Matsumoto  <matz@ruby-lang.org>
 
 	* io.c (rb_f_open): documentation update.
diff --git a/io.c b/io.c
index 54bff0ecb9..faf163f1b8 100644
--- a/io.c
+++ b/io.c
@@ -1647,22 +1647,32 @@ io_read(int argc, VALUE *argv, VALUE io)
 }
 
 static int
-appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
+appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp, int mb)
 {
     VALUE str = *strp;
     int c = EOF;
     long limit = *lp;
+    rb_encoding *enc = io_read_encoding(fptr);
 
     do {
 	long pending = READ_DATA_PENDING_COUNT(fptr);
 	if (pending > 0) {
-	    const char *p = READ_DATA_PENDING_PTR(fptr);
-	    const char *e;
+	    const char *s = READ_DATA_PENDING_PTR(fptr);
+	    const char *p, *e;
 	    long last = 0, len = (c != EOF);
 
 	    if (limit > 0 && pending > limit) pending = limit;
+	    p = s;
+	  again:
 	    e = memchr(p, delim, pending);
-	    if (e) pending = e - p + 1;
+	    if (e) {
+		if (mb &&
+		    ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,(UChar*)s,(UChar*)e) != (UChar*)e) {
+		    p = e + 1;
+		    goto again;
+		}
+		pending = e - s + 1;
+	    }
 	    len += pending;
 	    if (!NIL_P(str)) {
 		last = RSTRING_LEN(str);
@@ -1742,7 +1752,7 @@ rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit)
     int c, nolimit = 0;
 
     for (;;) {
-	c = appendline(fptr, delim, &str, &limit);
+	c = appendline(fptr, delim, &str, &limit, 0);
 	if (c == EOF || c == delim) break;
 	if (limit == 0) {
 	    nolimit = 1;
@@ -1842,7 +1852,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
 	}
 	newline = rsptr[rslen - 1];
 
-	while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
+	while ((c = appendline(fptr, newline, &str, &limit, 1)) != EOF) {
 	    if (c == newline) {
 		if (RSTRING_LEN(str) < rslen) continue;
 		if (!rspara) rscheck(rsptr, rslen, rs);
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 88e862dff6..4b8273b40b 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -60,8 +60,8 @@ EOT
       s = open("tmp", "r:euc-jp:utf-8") {|f|
         f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8"))
       }
-      assert_equal(Encoding.find("euc-jp"), s.encoding)
-      assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("iso-8859-1"), s, '[ruby-core:14319]')
+      assert_equal(Encoding.find("utf-8"), s.encoding)
+      assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]')
     }
   end
 
@@ -196,8 +196,6 @@ EOT
       w.close
       s = r.read
       assert_equal(Encoding.default_external, s.encoding)
-      puts encdump(s)
-      puts encdump(utf8)
       assert_str_equal(utf8, s)
     }
 
@@ -226,7 +224,9 @@ EOT
       }
     }
 
-    ENCS.reject {|e| e == Encoding::ASCII_8BIT }.each {|enc|
+    ENCS.each {|enc|
+      next if enc == Encoding::ASCII_8BIT
+      next if enc == Encoding::UTF_8
       with_pipe("#{enc}:UTF-8") {|r, w|
         w << "\xc2\xa1"
         w.close