From 0f283054e7f568f09fbfc952d57ea6daf4abbd88 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 19 Jun 2019 10:44:54 -0700 Subject: [PATCH] Check that String#scrub block does not modify receiver Similar to the check used for String#gsub. Can fix possible segfault. Fixes [Bug #15941] --- string.c | 19 ++++++++++++------- test/ruby/test_m17n.rb | 13 +++++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/string.c b/string.c index 2bf9586e98..8d7d2ad7b9 100644 --- a/string.c +++ b/string.c @@ -10278,9 +10278,10 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) { int encidx; VALUE buf = Qnil; - const char *rep; + const char *rep, *p, *e, *p1, *sp; long replen = -1; int tainted = 0; + long slen; if (rb_block_given_p()) { if (!NIL_P(repl)) @@ -10306,10 +10307,13 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) rep = replace; replen = (int)sizeof(replace); \ } while (0) + slen = RSTRING_LEN(str); + p = RSTRING_PTR(str); + e = RSTRING_END(str); + p1 = p; + sp = p; + if (rb_enc_asciicompat(enc)) { - const char *p = RSTRING_PTR(str); - const char *e = RSTRING_END(str); - const char *p1 = p; int rep7bit_p; if (!replen) { rep = NULL; @@ -10374,6 +10378,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); + str_mod_check(str, sp, slen); repl = str_compat_and_valid(repl, enc); tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); @@ -10409,6 +10414,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); + str_mod_check(str, sp, slen); repl = str_compat_and_valid(repl, enc); tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); @@ -10419,9 +10425,6 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { /* ASCII incompatible */ - const char *p = RSTRING_PTR(str); - const char *e = RSTRING_END(str); - const char *p1 = p; long mbminlen = rb_enc_mbminlen(enc); if (!replen) { rep = NULL; @@ -10478,6 +10481,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); + str_mod_check(str, sp, slen); repl = str_compat_and_valid(repl, enc); tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); @@ -10505,6 +10509,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); + str_mod_check(str, sp, slen); repl = str_compat_and_valid(repl, enc); tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 953f6417b0..44f3cc97a9 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -1604,6 +1604,19 @@ class TestM17N < Test::Unit::TestCase assert_predicate(str.dup.taint.scrub, :tainted?) end + def test_scrub_modification_inside_block + str = ("abc\u3042".b << "\xE3\x80".b).force_encoding('UTF-8') + assert_raise(RuntimeError) {str.scrub{|_| str << "1234567890"; "?" }} + + str = "\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE) + assert_raise(RuntimeError) do + str.scrub do |_| + str << "1\x002\x00".force_encoding('UTF-16LE') + "?\x00".force_encoding('UTF-16LE') + end + end + end + def test_scrub_replace_default assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub) assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)