From 4fad63da01789ad3b562833c97d289d2abfeff19 Mon Sep 17 00:00:00 2001 From: nobu Date: Fri, 27 May 2016 08:09:46 +0000 Subject: [PATCH] transcode.c: scrub in the given encoding * transcode.c (str_transcode0): scrub in the given encoding when the source encoding is given, not in the encoding of the receiver. [ruby-core:75732] [Bug #12431] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55181 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 6 ++++++ internal.h | 1 + string.c | 8 ++++++-- test/ruby/test_transcode.rb | 3 +++ transcode.c | 2 +- 5 files changed, 17 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7de6ec6891..6fe7d64c60 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Fri May 27 17:09:44 2016 Nobuyoshi Nakada + + * transcode.c (str_transcode0): scrub in the given encoding when + the source encoding is given, not in the encoding of the + receiver. [ruby-core:75732] [Bug #12431] + Fri May 27 15:07:32 2016 Nobuyoshi Nakada * include/ruby/ruby.h (rb_scan_args): remove nul padding which diff --git a/internal.h b/internal.h index 0f16fcc94b..38c7986734 100644 --- a/internal.h +++ b/internal.h @@ -1356,6 +1356,7 @@ VALUE rb_str_locktmp_ensure(VALUE str, VALUE (*func)(VALUE), VALUE arg); VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc); VALUE rb_str_cat_conv_enc_opts(VALUE newstr, long ofs, const char *ptr, long len, rb_encoding *from, int ecflags, VALUE ecopts); +VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl); #endif #define STR_NOEMBED FL_USER1 #define STR_SHARED FL_USER2 /* = ELTS_SHARED */ diff --git a/string.c b/string.c index 049b088a0d..80b668d52d 100644 --- a/string.c +++ b/string.c @@ -8926,9 +8926,14 @@ str_compat_and_valid(VALUE str, rb_encoding *enc) */ VALUE rb_str_scrub(VALUE str, VALUE repl) +{ + return rb_enc_str_scrub(STR_ENC_GET(str), str, repl); +} + +VALUE +rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl) { int cr = ENC_CODERANGE(str); - rb_encoding *enc; int encidx; VALUE buf = Qnil; const char *rep; @@ -8938,7 +8943,6 @@ rb_str_scrub(VALUE str, VALUE repl) if (ENC_CODERANGE_CLEAN_P(cr)) return Qnil; - enc = STR_ENC_GET(str); if (!NIL_P(repl)) { repl = str_compat_and_valid(repl, enc); tainted = OBJ_TAINTED_RAW(repl); diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 33e7467102..2f97d098fd 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -1213,6 +1213,9 @@ class TestTranscode < Test::Unit::TestCase def test_invalid_replace_string assert_equal("aA", "a\x80A".encode("us-ascii", "euc-jp", :invalid=>:replace, :replace=>"")) assert_equal("aA", "a\x80A".encode("us-ascii", "euc-jis-2004", :invalid=>:replace, :replace=>"")) + s = "abcd\u{c1}" + r = s.b.encode("UTF-8", "UTF-8", invalid: :replace, replace: "\u{fffd}") + assert_equal(s, r) end def test_undef_replace diff --git a/transcode.c b/transcode.c index 0514adaf7e..8aa9cf3352 100644 --- a/transcode.c +++ b/transcode.c @@ -2700,7 +2700,7 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts) if (!NIL_P(ecopts)) { rep = rb_hash_aref(ecopts, sym_replace); } - dest = rb_str_scrub(str, rep); + dest = rb_enc_str_scrub(senc, str, rep); if (NIL_P(dest)) dest = str; *self = dest; return dencidx;