diff --git a/ChangeLog b/ChangeLog index 59be6b1ca3..7d648fc3ae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Fri Dec 28 23:53:18 2007 Tanaka Akira + + * ext/strscan/strscan.c (str_new): new function for allocate an string + with encoding propagation. + (extract_range): use str_new. + (extract_beg_len): ditto. + (strscan_peek): ditto. + (strscan_rest): ditto. + Fri Dec 28 20:18:42 2007 WATANABE Hirofumi * golf_prelude.rb (Object.say): derived from Perl 5.10. diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index fc4a7cf4e1..8c94d1c60d 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -126,13 +126,21 @@ infect(VALUE str, struct strscanner *p) return str; } +static VALUE +str_new(struct strscanner *p, const char *ptr, long len) +{ + VALUE str = rb_str_new(ptr, len); + rb_enc_copy(str, p->str); + return str; +} + static VALUE extract_range(struct strscanner *p, long beg_i, long end_i) { if (beg_i > S_LEN(p)) return Qnil; if (end_i > S_LEN(p)) end_i = S_LEN(p); - return infect(rb_str_new(S_PBEG(p) + beg_i, end_i - beg_i), p); + return infect(str_new(p, S_PBEG(p) + beg_i, end_i - beg_i), p); } static VALUE @@ -141,7 +149,7 @@ extract_beg_len(struct strscanner *p, long beg_i, long len) if (beg_i > S_LEN(p)) return Qnil; if (beg_i + len > S_LEN(p)) len = S_LEN(p) - beg_i; - return infect(rb_str_new(S_PBEG(p) + beg_i, len), p); + return infect(str_new(p, S_PBEG(p) + beg_i, len), p); } /* ======================================================================= @@ -737,7 +745,7 @@ strscan_peek(VALUE self, VALUE vlen) len = NUM2LONG(vlen); if (EOS_P(p)) - return infect(rb_str_new("", 0), p); + return infect(str_new(p, "", 0), p); if (p->curr + len > S_LEN(p)) len = S_LEN(p) - p->curr; @@ -999,7 +1007,7 @@ strscan_rest(VALUE self) GET_SCANNER(self, p); if (EOS_P(p)) { - return infect(rb_str_new("", 0), p); + return infect(str_new(p, "", 0), p); } return extract_range(p, p->curr, S_LEN(p)); } diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 50516b3c0a..cbe4f88743 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -51,7 +51,7 @@ rb_encoding* rb_enc_compatible(VALUE,VALUE); rb_encoding* rb_enc_check(VALUE,VALUE); void rb_enc_associate_index(VALUE, int); void rb_enc_associate(VALUE, rb_encoding*); -void rb_enc_copy(VALUE, VALUE); +void rb_enc_copy(VALUE dst, VALUE src); VALUE rb_enc_str_new(const char*, long len, rb_encoding*); long rb_enc_strlen(const char*, const char*, rb_encoding*); diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index c918368435..687aaa575c 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -289,7 +289,7 @@ class TestStringScanner < Test::Unit::TestCase assert_nil s.getch s = StringScanner.new("\244\242".force_encoding("euc-jp")) - assert_equal "\244\242", s.getch + assert_equal "\244\242".force_encoding("euc-jp"), s.getch assert_nil s.getch s = StringScanner.new('test') @@ -317,8 +317,8 @@ class TestStringScanner < Test::Unit::TestCase assert_nil s.get_byte s = StringScanner.new("\244\242".force_encoding("euc-jp")) - assert_equal "\244", s.get_byte - assert_equal "\242", s.get_byte + assert_equal "\244".force_encoding("euc-jp"), s.get_byte + assert_equal "\242".force_encoding("euc-jp"), s.get_byte assert_nil s.get_byte s = StringScanner.new('test') @@ -414,7 +414,7 @@ class TestStringScanner < Test::Unit::TestCase s = StringScanner.new("\244\242".force_encoding("euc-jp")) s.getch - assert_equal "\244\242", s[0] + assert_equal "\244\242".force_encoding("euc-jp"), s[0] str = 'test' str.taint @@ -536,4 +536,9 @@ class TestStringScanner < Test::Unit::TestCase s.terminate assert_nil s.matched_size end + + def test_encoding + ss = StringScanner.new("\xA1\xA2".force_encoding("euc-jp")) + assert_equal(Encoding::EUC_JP, ss.scan(/./e).encoding) + end end