diff --git a/ChangeLog b/ChangeLog index e88ebeae73..dd700e5af7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +Thu May 8 01:10:03 2008 Yukihiro Matsumoto + + * string.c (tr_trans): should squeeze properly. [ruby-dev:34587] + + * string.c (tr_trans): had a bug in treating multi-byte character + replacement. + + * string.c (rb_str_delete_bang): need not to do anything for empty + strings. + + * test/ruby/test_m17n_comb.rb (TestM17NComb::test_str_delete): add + test for empty receiver. + Wed May 7 20:19:18 2008 NAKAMURA Usaku * ruby.c (process_options, ruby_set_argv): set encoding of rb_argv diff --git a/string.c b/string.c index 6eb213fa7f..703a88e281 100644 --- a/string.c +++ b/string.c @@ -4219,25 +4219,21 @@ static VALUE rb_str_delete_bang(int,VALUE*,VALUE); static VALUE tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) { - SIGNED_VALUE trans[256]; + int trans[256]; rb_encoding *enc, *e1, *e2; struct tr trsrc, trrepl; int cflag = 0; - int c, last = 0, modify = 0, i; + int c, c0, last = 0, modify = 0, i, l; char *s, *send; VALUE hash = 0; StringValue(src); StringValue(repl); if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; - trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src); - if (RSTRING_LEN(src) >= 2 && RSTRING_PTR(src)[0] == '^') { - cflag++; - trsrc.p++; - } if (RSTRING_LEN(repl) == 0) { return rb_str_delete_bang(1, &src, str); } + e1 = rb_enc_check(str, src); e2 = rb_enc_check(str, repl); if (e1 == e2) { @@ -4246,6 +4242,11 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) else { enc = rb_enc_check(src, repl); } + trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src); + if (RSTRING_LEN(str) > 1 && rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) == '^') { + cflag = 1; + trsrc.p += l; + } trrepl.p = RSTRING_PTR(repl); trrepl.pend = trrepl.p + RSTRING_LEN(repl); trsrc.gen = trrepl.gen = 0; @@ -4284,7 +4285,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) r = trnext(&trrepl, enc); if (r == -1) r = trrepl.now; if (c < 256) { - trans[c] = INT2NUM(r); + trans[c] = r; } else { if (!hash) hash = rb_hash_new(); @@ -4299,35 +4300,37 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) int clen, tlen, max = RSTRING_LEN(str); int offset, save = -1; char *buf = ALLOC_N(char, max), *t = buf; - VALUE v; - if (cflag) tlen = rb_enc_codelen(last, enc); while (s < send) { - c = rb_enc_codepoint(s, send, enc); + c0 = c = rb_enc_codepoint(s, send, enc); tlen = clen = rb_enc_codelen(c, enc); s += clen; if (c < 256) { - v = trans[c] >= 0 ? trans[c] : Qnil; + c = trans[c]; + } + else if (hash) { + VALUE tmp = rb_hash_lookup(hash, INT2NUM(c)); + if (NIL_P(tmp)) { + if (cflag) c = last; + else c = -1; + } + else if (cflag) c = -1; + else c = NUM2INT(tmp); } else { - v = hash ? rb_hash_aref(hash, INT2NUM(c)) : Qnil; + c = -1; } - if (!NIL_P(v)) { - if (!cflag) { - c = NUM2INT(v); - if (save == c) continue; - save = c; - tlen = rb_enc_codelen(c, enc); - modify = 1; - } - else { - save = c = last; - modify = 1; - } + if (c >= 0) { + if (save == c) continue; + save = c; + tlen = rb_enc_codelen(c, enc); + modify = 1; } else { save = -1; + modify = 1; + c = c0; } while (t - buf + tlen >= max) { offset = t - buf; @@ -4349,7 +4352,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) c = (unsigned char)*s; if (trans[c] >= 0) { if (!cflag) { - c = FIX2INT(trans[c]); + c = trans[c]; *s = c; modify = 1; } @@ -4367,27 +4370,32 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) char *buf = ALLOC_N(char, max), *t = buf; VALUE v; - if (cflag) tlen = rb_enc_codelen(last, enc); while (s < send) { - c = rb_enc_codepoint(s, send, enc); + c0 = c = rb_enc_codepoint(s, send, enc); tlen = clen = rb_enc_codelen(c, enc); if (c < 256) { - v = trans[c] >= 0 ? trans[c] : Qnil; + c = trans[c]; + } + else if (hash) { + VALUE tmp = rb_hash_lookup(hash, INT2NUM(c)); + if (NIL_P(tmp)) { + if (cflag) c = last; + else c = -1; + } + else if (cflag) c = -1; + else c = NUM2INT(tmp); } else { - v = hash ? rb_hash_aref(hash, INT2NUM(c)) : Qnil; + c = -1; } - if (!NIL_P(v)) { - if (!cflag) { - c = NUM2INT(v); - tlen = rb_enc_codelen(c, enc); - modify = 1; - } - else { - c = last; - modify = 1; - } + if (c >= 0) { + tlen = rb_enc_codelen(c, enc); + modify = 1; + } + else { + modify = 1; + c = c0; } while (t - buf + tlen >= max) { offset = t - buf; @@ -4548,8 +4556,10 @@ rb_str_delete_bang(int argc, VALUE *argv, VALUE str) VALUE del = 0, nodel = 0; int modify = 0; int i; - int cr = ENC_CODERANGE(str); + int cr; + if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; + cr = ENC_CODERANGE(str); if (argc < 1) { rb_raise(rb_eArgError, "wrong number of arguments"); } diff --git a/test/ruby/test_m17n_comb.rb b/test/ruby/test_m17n_comb.rb index 26efa85ea2..44e7eeff20 100644 --- a/test/ruby/test_m17n_comb.rb +++ b/test/ruby/test_m17n_comb.rb @@ -804,6 +804,10 @@ class TestM17NComb < Test::Unit::TestCase def test_str_delete combination(STRINGS, STRINGS) {|s1, s2| + if s1.empty? + assert_equal(s1, s1.delete(s2)) + next + end if !s1.valid_encoding? || !s2.valid_encoding? assert_raise(ArgumentError) { s1.delete(s2) } next diff --git a/version.h b/version.h index 73d572e9dd..5a250d845f 100644 --- a/version.h +++ b/version.h @@ -1,7 +1,7 @@ #define RUBY_VERSION "1.9.0" -#define RUBY_RELEASE_DATE "2008-05-07" +#define RUBY_RELEASE_DATE "2008-05-08" #define RUBY_VERSION_CODE 190 -#define RUBY_RELEASE_CODE 20080507 +#define RUBY_RELEASE_CODE 20080508 #define RUBY_PATCHLEVEL 0 #define RUBY_VERSION_MAJOR 1 @@ -9,7 +9,7 @@ #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_YEAR 2008 #define RUBY_RELEASE_MONTH 5 -#define RUBY_RELEASE_DAY 7 +#define RUBY_RELEASE_DAY 8 #ifdef RUBY_EXTERN RUBY_EXTERN const char ruby_version[];