From a04a812ed0aa7424ea1f40756402d1f199a43308 Mon Sep 17 00:00:00 2001 From: matz Date: Mon, 24 Dec 2007 16:36:14 +0000 Subject: [PATCH] * include/ruby/encoding.h (rb_enc_left_char_head): new utility macro. * include/ruby/encoding.h (rb_enc_right_char_head): ditto. * io.c (appendline): does multibyte RS search in the function. * io.c (prepare_getline_args): RS may be nil. * io.c (rb_io_getc): should process character based on external encoding, when transcoding required. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14619 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 13 +++++++ include/ruby/encoding.h | 3 ++ io.c | 79 ++++++++++++++++++++++++++------------- string.c | 5 +-- test/ruby/test_io_m17n.rb | 9 +++-- version.h | 6 +-- 6 files changed, 80 insertions(+), 35 deletions(-) diff --git a/ChangeLog b/ChangeLog index fb43b86b38..9f3ebcff92 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +Tue Dec 25 01:19:18 2007 Yukihiro Matsumoto + + * include/ruby/encoding.h (rb_enc_left_char_head): new utility macro. + + * include/ruby/encoding.h (rb_enc_right_char_head): ditto. + + * io.c (appendline): does multibyte RS search in the function. + + * io.c (prepare_getline_args): RS may be nil. + + * io.c (rb_io_getc): should process character based on external + encoding, when transcoding required. + Tue Dec 25 01:07:57 2007 Tanaka Akira * lib/irb/output-method.rb: translate a comment to English to diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index c70f0972d6..49df3b22a8 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -94,6 +94,9 @@ int rb_enc_codelen(int code, rb_encoding *enc); /* ptr, ptr, encoding -> prev_char */ #define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p) +/* ptr, ptr, encoding -> next_char */ +#define rb_enc_left_char_head(s,p,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)s,(UChar*)p) +#define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)s,(UChar*)p) #define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t) #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) diff --git a/io.c b/io.c index faf163f1b8..56ddbcc040 100644 --- a/io.c +++ b/io.c @@ -363,6 +363,15 @@ io_read_encoding(rb_io_t *fptr) : rb_default_external_encoding(); } +static rb_encoding* +io_input_encoding(rb_io_t *fptr) +{ + if (fptr->enc2) { + return fptr->enc2; + } + return io_read_encoding(fptr); +} + void rb_io_check_writable(rb_io_t *fptr) { @@ -1646,31 +1655,51 @@ io_read(int argc, VALUE *argv, VALUE io) return str; } +static void +rscheck(const char *rsptr, long rslen, VALUE rs) +{ + if (!rs) return; + if (RSTRING_PTR(rs) != rsptr && RSTRING_LEN(rs) != rslen) + rb_raise(rb_eRuntimeError, "rs modified"); +} + static int -appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp, int mb) +appendline(rb_io_t *fptr, int delim, const char *rsptr, int rslen, VALUE rs, VALUE *strp, long *lp) { VALUE str = *strp; int c = EOF; long limit = *lp; - rb_encoding *enc = io_read_encoding(fptr); + rb_encoding *enc = io_input_encoding(fptr); do { long pending = READ_DATA_PENDING_COUNT(fptr); if (pending > 0) { const char *s = READ_DATA_PENDING_PTR(fptr); - const char *p, *e; + const char *p, *e, *pp; long last = 0, len = (c != EOF); if (limit > 0 && pending > limit) pending = limit; - p = s; + pp = p = s; again: e = memchr(p, delim, pending); if (e) { - if (mb && - ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,(UChar*)s,(UChar*)e) != (UChar*)e) { + const char *p0 = e - rslen + 1; + if (p0 < s) { p = e + 1; goto again; } + pp = rb_enc_left_char_head(pp, p0, enc); + if (pp != p0) { + p = e + 1; + goto again; + } + if (rsptr) { + rscheck(rsptr, rslen, rs); + if (memcmp(p0, rsptr, rslen) != 0) { + p = e + 1; + goto again; + } + } pending = e - s + 1; } len += pending; @@ -1752,7 +1781,7 @@ rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit) int c, nolimit = 0; for (;;) { - c = appendline(fptr, delim, &str, &limit, 0); + c = appendline(fptr, delim, 0, 0, 0, &str, &limit); if (c == EOF || c == delim) break; if (limit == 0) { nolimit = 1; @@ -1770,14 +1799,6 @@ rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit) return str; } -static int -rscheck(const char *rsptr, long rslen, VALUE rs) -{ - if (RSTRING_PTR(rs) != rsptr && RSTRING_LEN(rs) != rslen) - rb_raise(rb_eRuntimeError, "rs modified"); - return 0; -} - static void prepare_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit, VALUE io) { @@ -1803,10 +1824,20 @@ prepare_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit, VALUE io) } } GetOpenFile(io, fptr); - if (fptr->enc2) { - rs = rb_funcall(rs, id_encode, 2, - rb_enc_from_encoding(fptr->enc2), - rb_enc_from_encoding(fptr->enc)); + if (!NIL_P(rs)) { + rb_encoding *enc_rs = rb_enc_get(rs); + rb_encoding *enc_io = io_read_encoding(fptr); + + if (enc_io != enc_rs && + (rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT || + !rb_enc_asciicompat(enc_io))) { + rb_raise(rb_eArgError, "IO and RS encodings differ"); + } + if (fptr->enc2) { + rs = rb_funcall(rs, id_encode, 2, + rb_enc_from_encoding(fptr->enc2), + rb_enc_from_encoding(fptr->enc)); + } } *rsp = rs; *limit = NIL_P(lim) ? -1L : NUM2LONG(lim); @@ -1843,6 +1874,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io) rslen = 2; rspara = 1; swallow(fptr, '\n'); + rs = 0; } else if (rslen == 1) { return rb_io_getline_fast(fptr, (unsigned char)RSTRING_PTR(rs)[0], limit); @@ -1852,12 +1884,9 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io) } newline = rsptr[rslen - 1]; - while ((c = appendline(fptr, newline, &str, &limit, 1)) != EOF) { + while ((c = appendline(fptr, newline, rsptr, rslen, rs, &str, &limit)) != EOF) { if (c == newline) { - if (RSTRING_LEN(str) < rslen) continue; - if (!rspara) rscheck(rsptr, rslen, rs); - if (memcmp(RSTRING_PTR(str) + RSTRING_LEN(str) - rslen, - rsptr, rslen) == 0) break; + break; } if (limit == 0) { nolimit = 1; @@ -2201,7 +2230,7 @@ rb_io_getc(VALUE io) GetOpenFile(io, fptr); rb_io_check_readable(fptr); - enc = io_read_encoding(fptr); + enc = io_input_encoding(fptr); READ_CHECK(fptr); if (io_fillbuf(fptr) < 0) { return Qnil; diff --git a/string.c b/string.c index fd2c672f1c..6ca4ba3c63 100644 --- a/string.c +++ b/string.c @@ -1491,8 +1491,7 @@ rb_str_index(VALUE str, VALUE sub, long offset) char *t; pos = rb_memsearch(sptr, slen, s, len); if (pos < 0) return pos; - t = (char *)onigenc_get_right_adjust_char_head(enc, (const UChar *)s, - (const UChar *)s + pos); + t = rb_enc_right_char_head(s, s+pos, enc); if (t == s) break; if ((len -= t - s) <= 0) return -1; offset += t - s; @@ -4564,7 +4563,7 @@ rb_str_chomp_bang(int argc, VALUE *argv, VALUE str) if (p[len-1] == newline && (rslen <= 1 || memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) { - if (ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, (UChar *)p, (UChar *)pp) != (const UChar*)pp) + if (rb_enc_left_char_head(p, pp, enc) != pp) return Qnil; rb_str_modify(str); STR_SET_LEN(str, RSTRING_LEN(str) - rslen); diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 6bc9025f1a..e90d1dd916 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -87,7 +87,7 @@ EOT def test_open_w with_tmpdir { open("tmp", "w") {|f| - assert_equal(nil, f.external_encoding) + assert_equal(Encoding.default_external, f.external_encoding) assert_equal(nil, f.internal_encoding) } } @@ -96,7 +96,7 @@ EOT def test_open_wb with_tmpdir { open("tmp", "wb") {|f| - assert_equal(nil, f.external_encoding) + assert_equal(Encoding::ASCII_8BIT, f.external_encoding) assert_equal(nil, f.internal_encoding) } } @@ -135,12 +135,12 @@ EOT end def test_stdout - assert_equal(nil, STDOUT.external_encoding) + assert_equal(Encoding.default_external, STDOUT.external_encoding) assert_equal(nil, STDOUT.internal_encoding) end def test_stderr - assert_equal(nil, STDERR.external_encoding) + assert_equal(Encoding.default_external, STDERR.external_encoding) assert_equal(nil, STDERR.internal_encoding) end @@ -181,6 +181,7 @@ EOT with_pipe("euc-jp:utf-8") {|r, w| w.write "before \xa2\xa2 after" rs = "\xA2\xA2".encode("utf-8", "euc-jp") + w.close timeout(1) { assert_equal("before \xa2\xa2".encode("utf-8", "euc-jp"), r.gets(rs)) diff --git a/version.h b/version.h index b5c5308c3a..46fada7b70 100644 --- a/version.h +++ b/version.h @@ -1,7 +1,7 @@ #define RUBY_VERSION "1.9.0" -#define RUBY_RELEASE_DATE "2007-12-24" +#define RUBY_RELEASE_DATE "2007-12-25" #define RUBY_VERSION_CODE 190 -#define RUBY_RELEASE_CODE 20071224 +#define RUBY_RELEASE_CODE 20071225 #define RUBY_PATCHLEVEL 0 #define RUBY_VERSION_MAJOR 1 @@ -9,7 +9,7 @@ #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_YEAR 2007 #define RUBY_RELEASE_MONTH 12 -#define RUBY_RELEASE_DAY 24 +#define RUBY_RELEASE_DAY 25 #ifdef RUBY_EXTERN RUBY_EXTERN const char ruby_version[];