mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
string.c: infection
* string.c (rb_str_scrub): the result should be infected by the original string. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53169 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
eec8adb168
commit
61c19c9d43
3 changed files with 34 additions and 12 deletions
|
@ -1,3 +1,8 @@
|
||||||
|
Thu Dec 17 14:16:29 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
|
* string.c (rb_str_scrub): the result should be infected by the
|
||||||
|
original string.
|
||||||
|
|
||||||
Thu Dec 17 13:35:27 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Thu Dec 17 13:35:27 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* transcode.c (rb_econv_substr_append, econv_primitive_convert):
|
* transcode.c (rb_econv_substr_append, econv_primitive_convert):
|
||||||
|
|
23
string.c
23
string.c
|
@ -8667,6 +8667,10 @@ rb_str_scrub(VALUE str, VALUE repl)
|
||||||
int cr = ENC_CODERANGE(str);
|
int cr = ENC_CODERANGE(str);
|
||||||
rb_encoding *enc;
|
rb_encoding *enc;
|
||||||
int encidx;
|
int encidx;
|
||||||
|
VALUE buf = Qnil;
|
||||||
|
const char *rep;
|
||||||
|
long replen;
|
||||||
|
int tainted = 0;
|
||||||
|
|
||||||
if (ENC_CODERANGE_CLEAN_P(cr))
|
if (ENC_CODERANGE_CLEAN_P(cr))
|
||||||
return Qnil;
|
return Qnil;
|
||||||
|
@ -8674,6 +8678,7 @@ rb_str_scrub(VALUE str, VALUE repl)
|
||||||
enc = STR_ENC_GET(str);
|
enc = STR_ENC_GET(str);
|
||||||
if (!NIL_P(repl)) {
|
if (!NIL_P(repl)) {
|
||||||
repl = str_compat_and_valid(repl, enc);
|
repl = str_compat_and_valid(repl, enc);
|
||||||
|
tainted = OBJ_TAINTED_RAW(repl);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rb_enc_dummy_p(enc)) {
|
if (rb_enc_dummy_p(enc)) {
|
||||||
|
@ -8690,10 +8695,7 @@ rb_str_scrub(VALUE str, VALUE repl)
|
||||||
const char *p = RSTRING_PTR(str);
|
const char *p = RSTRING_PTR(str);
|
||||||
const char *e = RSTRING_END(str);
|
const char *e = RSTRING_END(str);
|
||||||
const char *p1 = p;
|
const char *p1 = p;
|
||||||
const char *rep;
|
|
||||||
long replen;
|
|
||||||
int rep7bit_p;
|
int rep7bit_p;
|
||||||
VALUE buf = Qnil;
|
|
||||||
if (rb_block_given_p()) {
|
if (rb_block_given_p()) {
|
||||||
rep = NULL;
|
rep = NULL;
|
||||||
replen = 0;
|
replen = 0;
|
||||||
|
@ -8759,6 +8761,7 @@ rb_str_scrub(VALUE str, VALUE repl)
|
||||||
else {
|
else {
|
||||||
repl = rb_yield(rb_enc_str_new(p, clen, enc));
|
repl = rb_yield(rb_enc_str_new(p, clen, enc));
|
||||||
repl = str_compat_and_valid(repl, enc);
|
repl = str_compat_and_valid(repl, enc);
|
||||||
|
tainted |= OBJ_TAINTED_RAW(repl);
|
||||||
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
|
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
|
||||||
if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
|
if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
|
||||||
cr = ENC_CODERANGE_VALID;
|
cr = ENC_CODERANGE_VALID;
|
||||||
|
@ -8793,22 +8796,18 @@ rb_str_scrub(VALUE str, VALUE repl)
|
||||||
else {
|
else {
|
||||||
repl = rb_yield(rb_enc_str_new(p, e-p, enc));
|
repl = rb_yield(rb_enc_str_new(p, e-p, enc));
|
||||||
repl = str_compat_and_valid(repl, enc);
|
repl = str_compat_and_valid(repl, enc);
|
||||||
|
tainted |= OBJ_TAINTED_RAW(repl);
|
||||||
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
|
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
|
||||||
if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
|
if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
|
||||||
cr = ENC_CODERANGE_VALID;
|
cr = ENC_CODERANGE_VALID;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr);
|
|
||||||
return buf;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* ASCII incompatible */
|
/* ASCII incompatible */
|
||||||
const char *p = RSTRING_PTR(str);
|
const char *p = RSTRING_PTR(str);
|
||||||
const char *e = RSTRING_END(str);
|
const char *e = RSTRING_END(str);
|
||||||
const char *p1 = p;
|
const char *p1 = p;
|
||||||
VALUE buf = Qnil;
|
|
||||||
const char *rep;
|
|
||||||
long replen;
|
|
||||||
long mbminlen = rb_enc_mbminlen(enc);
|
long mbminlen = rb_enc_mbminlen(enc);
|
||||||
if (!NIL_P(repl)) {
|
if (!NIL_P(repl)) {
|
||||||
rep = RSTRING_PTR(repl);
|
rep = RSTRING_PTR(repl);
|
||||||
|
@ -8863,6 +8862,7 @@ rb_str_scrub(VALUE str, VALUE repl)
|
||||||
else {
|
else {
|
||||||
repl = rb_yield(rb_enc_str_new(p, e-p, enc));
|
repl = rb_yield(rb_enc_str_new(p, e-p, enc));
|
||||||
repl = str_compat_and_valid(repl, enc);
|
repl = str_compat_and_valid(repl, enc);
|
||||||
|
tainted |= OBJ_TAINTED_RAW(repl);
|
||||||
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
|
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
|
||||||
}
|
}
|
||||||
p += clen;
|
p += clen;
|
||||||
|
@ -8889,12 +8889,15 @@ rb_str_scrub(VALUE str, VALUE repl)
|
||||||
else {
|
else {
|
||||||
repl = rb_yield(rb_enc_str_new(p, e-p, enc));
|
repl = rb_yield(rb_enc_str_new(p, e-p, enc));
|
||||||
repl = str_compat_and_valid(repl, enc);
|
repl = str_compat_and_valid(repl, enc);
|
||||||
|
tainted |= OBJ_TAINTED_RAW(repl);
|
||||||
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
|
rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID);
|
cr = ENC_CODERANGE_VALID;
|
||||||
return buf;
|
|
||||||
}
|
}
|
||||||
|
FL_SET_RAW(buf, tainted|OBJ_TAINTED_RAW(str));
|
||||||
|
ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr);
|
||||||
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1560,27 +1560,32 @@ class TestM17N < Test::Unit::TestCase
|
||||||
assert_equal(a("\xE3\x81\x82"), s.b)
|
assert_equal(a("\xE3\x81\x82"), s.b)
|
||||||
assert_equal(Encoding::ASCII_8BIT, s.b.encoding)
|
assert_equal(Encoding::ASCII_8BIT, s.b.encoding)
|
||||||
s.taint
|
s.taint
|
||||||
assert_equal(true, s.b.tainted?)
|
assert_predicate(s.b, :tainted?)
|
||||||
s = "abc".b
|
s = "abc".b
|
||||||
assert_equal(true, s.b.ascii_only?)
|
assert_predicate(s.b, :ascii_only?)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_scrub_valid_string
|
def test_scrub_valid_string
|
||||||
str = "foo"
|
str = "foo"
|
||||||
assert_equal(str, str.scrub)
|
assert_equal(str, str.scrub)
|
||||||
assert_not_same(str, str.scrub)
|
assert_not_same(str, str.scrub)
|
||||||
|
assert_predicate(str.dup.taint.scrub, :tainted?)
|
||||||
str = "\u3042\u3044"
|
str = "\u3042\u3044"
|
||||||
assert_equal(str, str.scrub)
|
assert_equal(str, str.scrub)
|
||||||
assert_not_same(str, str.scrub)
|
assert_not_same(str, str.scrub)
|
||||||
|
assert_predicate(str.dup.taint.scrub, :tainted?)
|
||||||
str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
|
str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
|
||||||
assert_equal(str, str.scrub)
|
assert_equal(str, str.scrub)
|
||||||
assert_not_same(str, str.scrub)
|
assert_not_same(str, str.scrub)
|
||||||
assert_nothing_raised(ArgumentError) {str.scrub(nil)}
|
assert_nothing_raised(ArgumentError) {str.scrub(nil)}
|
||||||
|
assert_predicate(str.dup.taint.scrub, :tainted?)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_scrub_replace_default
|
def test_scrub_replace_default
|
||||||
assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub)
|
assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub)
|
||||||
assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)
|
assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)
|
||||||
|
assert_predicate(u("\x80\x80\x80").taint.scrub, :tainted?)
|
||||||
|
assert_predicate(u("\xF4\x80\x80A").taint.scrub, :tainted?)
|
||||||
|
|
||||||
# examples in Unicode 6.1.0 D93b
|
# examples in Unicode 6.1.0 D93b
|
||||||
assert_equal("\x41\uFFFD\uFFFD\x41\uFFFD\x41",
|
assert_equal("\x41\uFFFD\uFFFD\x41\uFFFD\x41",
|
||||||
|
@ -1595,7 +1600,14 @@ class TestM17N < Test::Unit::TestCase
|
||||||
|
|
||||||
def test_scrub_replace_argument
|
def test_scrub_replace_argument
|
||||||
assert_equal("foo", u("foo").scrub("\u3013"))
|
assert_equal("foo", u("foo").scrub("\u3013"))
|
||||||
|
assert_predicate(u("foo").taint.scrub("\u3013"), :tainted?)
|
||||||
|
assert_not_predicate(u("foo").scrub("\u3013".taint), :tainted?)
|
||||||
|
assert_equal("\u3042\u3044", u("\xE3\x81\x82\xE3\x81\x84").scrub("\u3013"))
|
||||||
|
assert_predicate(u("\xE3\x81\x82\xE3\x81\x84").taint.scrub("\u3013"), :tainted?)
|
||||||
|
assert_not_predicate(u("\xE3\x81\x82\xE3\x81\x84").scrub("\u3013".taint), :tainted?)
|
||||||
assert_equal("\u3042\u3013", u("\xE3\x81\x82\xE3\x81").scrub("\u3013"))
|
assert_equal("\u3042\u3013", u("\xE3\x81\x82\xE3\x81").scrub("\u3013"))
|
||||||
|
assert_predicate(u("\xE3\x81\x82\xE3\x81").taint.scrub("\u3013"), :tainted?)
|
||||||
|
assert_predicate(u("\xE3\x81\x82\xE3\x81").scrub("\u3013".taint), :tainted?)
|
||||||
assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub(e("\xA4\xA2")) }
|
assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub(e("\xA4\xA2")) }
|
||||||
assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub(1) }
|
assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub(1) }
|
||||||
assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub(u("\x81")) }
|
assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub(u("\x81")) }
|
||||||
|
@ -1604,6 +1616,8 @@ class TestM17N < Test::Unit::TestCase
|
||||||
|
|
||||||
def test_scrub_replace_block
|
def test_scrub_replace_block
|
||||||
assert_equal("\u3042<e381>", u("\xE3\x81\x82\xE3\x81").scrub{|x|'<'+x.unpack('H*')[0]+'>'})
|
assert_equal("\u3042<e381>", u("\xE3\x81\x82\xE3\x81").scrub{|x|'<'+x.unpack('H*')[0]+'>'})
|
||||||
|
assert_predicate(u("\xE3\x81\x82\xE3\x81").taint.scrub{|x|'<'+x.unpack('H*')[0]+'>'}, :tainted?)
|
||||||
|
assert_predicate(u("\xE3\x81\x82\xE3\x81").scrub{|x|('<'+x.unpack('H*')[0]+'>').taint}, :tainted?)
|
||||||
assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub{e("\xA4\xA2")} }
|
assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub{e("\xA4\xA2")} }
|
||||||
assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub{1} }
|
assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub{1} }
|
||||||
assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub{u("\x81")} }
|
assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub{u("\x81")} }
|
||||||
|
|
Loading…
Reference in a new issue