1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

transcode.c: scrub in the given encoding

* transcode.c (str_transcode0): scrub in the given encoding when
  the source encoding is given, not in the encoding of the
  receiver.  [ruby-core:75732] [Bug #12431]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55181 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2016-05-27 08:09:46 +00:00
parent be1ceb2723
commit 4fad63da01
5 changed files with 17 additions and 3 deletions

View file

@ -1,3 +1,9 @@
Fri May 27 17:09:44 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
* transcode.c (str_transcode0): scrub in the given encoding when
the source encoding is given, not in the encoding of the
receiver. [ruby-core:75732] [Bug #12431]
Fri May 27 15:07:32 2016 Nobuyoshi Nakada <nobu@ruby-lang.org> Fri May 27 15:07:32 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
* include/ruby/ruby.h (rb_scan_args): remove nul padding which * include/ruby/ruby.h (rb_scan_args): remove nul padding which

View file

@ -1356,6 +1356,7 @@ VALUE rb_str_locktmp_ensure(VALUE str, VALUE (*func)(VALUE), VALUE arg);
VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc); VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc);
VALUE rb_str_cat_conv_enc_opts(VALUE newstr, long ofs, const char *ptr, long len, VALUE rb_str_cat_conv_enc_opts(VALUE newstr, long ofs, const char *ptr, long len,
rb_encoding *from, int ecflags, VALUE ecopts); rb_encoding *from, int ecflags, VALUE ecopts);
VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl);
#endif #endif
#define STR_NOEMBED FL_USER1 #define STR_NOEMBED FL_USER1
#define STR_SHARED FL_USER2 /* = ELTS_SHARED */ #define STR_SHARED FL_USER2 /* = ELTS_SHARED */

View file

@ -8926,9 +8926,14 @@ str_compat_and_valid(VALUE str, rb_encoding *enc)
*/ */
VALUE VALUE
rb_str_scrub(VALUE str, VALUE repl) rb_str_scrub(VALUE str, VALUE repl)
{
return rb_enc_str_scrub(STR_ENC_GET(str), str, repl);
}
VALUE
rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
{ {
int cr = ENC_CODERANGE(str); int cr = ENC_CODERANGE(str);
rb_encoding *enc;
int encidx; int encidx;
VALUE buf = Qnil; VALUE buf = Qnil;
const char *rep; const char *rep;
@ -8938,7 +8943,6 @@ rb_str_scrub(VALUE str, VALUE repl)
if (ENC_CODERANGE_CLEAN_P(cr)) if (ENC_CODERANGE_CLEAN_P(cr))
return Qnil; return Qnil;
enc = STR_ENC_GET(str);
if (!NIL_P(repl)) { if (!NIL_P(repl)) {
repl = str_compat_and_valid(repl, enc); repl = str_compat_and_valid(repl, enc);
tainted = OBJ_TAINTED_RAW(repl); tainted = OBJ_TAINTED_RAW(repl);

View file

@ -1213,6 +1213,9 @@ class TestTranscode < Test::Unit::TestCase
def test_invalid_replace_string def test_invalid_replace_string
assert_equal("a<x>A", "a\x80A".encode("us-ascii", "euc-jp", :invalid=>:replace, :replace=>"<x>")) assert_equal("a<x>A", "a\x80A".encode("us-ascii", "euc-jp", :invalid=>:replace, :replace=>"<x>"))
assert_equal("a<x>A", "a\x80A".encode("us-ascii", "euc-jis-2004", :invalid=>:replace, :replace=>"<x>")) assert_equal("a<x>A", "a\x80A".encode("us-ascii", "euc-jis-2004", :invalid=>:replace, :replace=>"<x>"))
s = "abcd\u{c1}"
r = s.b.encode("UTF-8", "UTF-8", invalid: :replace, replace: "\u{fffd}")
assert_equal(s, r)
end end
def test_undef_replace def test_undef_replace

View file

@ -2700,7 +2700,7 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
if (!NIL_P(ecopts)) { if (!NIL_P(ecopts)) {
rep = rb_hash_aref(ecopts, sym_replace); rep = rb_hash_aref(ecopts, sym_replace);
} }
dest = rb_str_scrub(str, rep); dest = rb_enc_str_scrub(senc, str, rep);
if (NIL_P(dest)) dest = str; if (NIL_P(dest)) dest = str;
*self = dest; *self = dest;
return dencidx; return dencidx;