mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
string.c: fix false coderange
* string.c (rb_enc_str_scrub): enc can differ from the actual encoding of the string, the cached coderange is useless then. [ruby-core:82674] [Bug #13874] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59763 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
faa26f5570
commit
bd10ce165c
2 changed files with 36 additions and 9 deletions
18
string.c
18
string.c
|
@ -9553,6 +9553,8 @@ str_compat_and_valid(VALUE str, rb_encoding *enc)
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static VALUE enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param str the string to be scrubbed
|
* @param str the string to be scrubbed
|
||||||
* @param repl the replacement character
|
* @param repl the replacement character
|
||||||
|
@ -9561,13 +9563,25 @@ str_compat_and_valid(VALUE str, rb_encoding *enc)
|
||||||
VALUE
|
VALUE
|
||||||
rb_str_scrub(VALUE str, VALUE repl)
|
rb_str_scrub(VALUE str, VALUE repl)
|
||||||
{
|
{
|
||||||
return rb_enc_str_scrub(STR_ENC_GET(str), str, repl);
|
rb_encoding *enc = STR_ENC_GET(str);
|
||||||
|
return enc_str_scrub(enc, str, repl, ENC_CODERANGE(str));
|
||||||
}
|
}
|
||||||
|
|
||||||
VALUE
|
VALUE
|
||||||
rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
|
rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
|
||||||
{
|
{
|
||||||
int cr = ENC_CODERANGE(str);
|
int cr = ENC_CODERANGE_UNKNOWN;
|
||||||
|
if (enc == STR_ENC_GET(str)) {
|
||||||
|
/* cached coderange makes sense only when enc equals the
|
||||||
|
* actual encoding of str */
|
||||||
|
cr = ENC_CODERANGE(str);
|
||||||
|
}
|
||||||
|
return enc_str_scrub(enc, str, repl, cr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
|
||||||
|
{
|
||||||
int encidx;
|
int encidx;
|
||||||
VALUE buf = Qnil;
|
VALUE buf = Qnil;
|
||||||
const char *rep;
|
const char *rep;
|
||||||
|
|
|
@ -2180,17 +2180,19 @@ class TestTranscode < Test::Unit::TestCase
|
||||||
|
|
||||||
def test_valid_dummy_encoding
|
def test_valid_dummy_encoding
|
||||||
bug9314 = '[ruby-core:59354] [Bug #9314]'
|
bug9314 = '[ruby-core:59354] [Bug #9314]'
|
||||||
assert_separately(%W[- -- #{bug9314}], <<-'end;')
|
assert_separately(%W[- -- #{bug9314}], "#{<<~"begin;"}\n#{<<~'end;'}")
|
||||||
bug = ARGV.shift
|
begin;
|
||||||
result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_16)}
|
bug = ARGV.shift
|
||||||
assert_equal("\xFE\xFF\x00t\x00e\x00s\x00t", result.b, bug)
|
result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_16)}
|
||||||
result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_32)}
|
assert_equal("\xFE\xFF\x00t\x00e\x00s\x00t", result.b, bug)
|
||||||
assert_equal("\x00\x00\xFE\xFF\x00\x00\x00t\x00\x00\x00e\x00\x00\x00s\x00\x00\x00t", result.b, bug)
|
result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_32)}
|
||||||
|
assert_equal("\x00\x00\xFE\xFF\x00\x00\x00t\x00\x00\x00e\x00\x00\x00s\x00\x00\x00t", result.b, bug)
|
||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_loading_race
|
def test_loading_race
|
||||||
assert_separately([], <<-'end;') #do
|
assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
|
||||||
|
begin;
|
||||||
bug11277 = '[ruby-dev:49106] [Bug #11277]'
|
bug11277 = '[ruby-dev:49106] [Bug #11277]'
|
||||||
num = 2
|
num = 2
|
||||||
th = (0...num).map do |i|
|
th = (0...num).map do |i|
|
||||||
|
@ -2207,6 +2209,17 @@ class TestTranscode < Test::Unit::TestCase
|
||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_scrub_encode_with_coderange
|
||||||
|
bug = '[ruby-core:82674] [Bug #13874]'
|
||||||
|
s = "\xe5".b
|
||||||
|
u = Encoding::UTF_8
|
||||||
|
assert_equal("?", s.encode(u, u, invalid: :replace, replace: "?"),
|
||||||
|
"should replace invalid byte")
|
||||||
|
assert_predicate(s, :valid_encoding?, "any char is valid in binary")
|
||||||
|
assert_equal("?", s.encode(u, u, invalid: :replace, replace: "?"),
|
||||||
|
"#{bug} coderange should not have side effects")
|
||||||
|
end
|
||||||
|
|
||||||
def test_universal_newline
|
def test_universal_newline
|
||||||
bug11324 = '[ruby-core:69841] [Bug #11324]'
|
bug11324 = '[ruby-core:69841] [Bug #11324]'
|
||||||
usascii = Encoding::US_ASCII
|
usascii = Encoding::US_ASCII
|
||||||
|
|
Loading…
Add table
Reference in a new issue