mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	Check that String#scrub block does not modify receiver
Similar to the check used for String#gsub. Can fix possible segfault. Fixes [Bug #15941]
This commit is contained in:
		
							parent
							
								
									7582287eb2
								
							
						
					
					
						commit
						0f283054e7
					
				
					 2 changed files with 25 additions and 7 deletions
				
			
		
							
								
								
									
										19
									
								
								string.c
									
										
									
									
									
								
							
							
						
						
									
										19
									
								
								string.c
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -10278,9 +10278,10 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
 | 
			
		|||
{
 | 
			
		||||
    int encidx;
 | 
			
		||||
    VALUE buf = Qnil;
 | 
			
		||||
    const char *rep;
 | 
			
		||||
    const char *rep, *p, *e, *p1, *sp;
 | 
			
		||||
    long replen = -1;
 | 
			
		||||
    int tainted = 0;
 | 
			
		||||
    long slen;
 | 
			
		||||
 | 
			
		||||
    if (rb_block_given_p()) {
 | 
			
		||||
	if (!NIL_P(repl))
 | 
			
		||||
| 
						 | 
				
			
			@ -10306,10 +10307,13 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
 | 
			
		|||
	rep = replace; replen = (int)sizeof(replace); \
 | 
			
		||||
    } while (0)
 | 
			
		||||
 | 
			
		||||
    slen = RSTRING_LEN(str);
 | 
			
		||||
    p = RSTRING_PTR(str);
 | 
			
		||||
    e = RSTRING_END(str);
 | 
			
		||||
    p1 = p;
 | 
			
		||||
    sp = p;
 | 
			
		||||
 | 
			
		||||
    if (rb_enc_asciicompat(enc)) {
 | 
			
		||||
	const char *p = RSTRING_PTR(str);
 | 
			
		||||
	const char *e = RSTRING_END(str);
 | 
			
		||||
	const char *p1 = p;
 | 
			
		||||
	int rep7bit_p;
 | 
			
		||||
	if (!replen) {
 | 
			
		||||
	    rep = NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -10374,6 +10378,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
 | 
			
		|||
		}
 | 
			
		||||
		else {
 | 
			
		||||
		    repl = rb_yield(rb_enc_str_new(p, clen, enc));
 | 
			
		||||
                    str_mod_check(str, sp, slen);
 | 
			
		||||
		    repl = str_compat_and_valid(repl, enc);
 | 
			
		||||
		    tainted |= OBJ_TAINTED_RAW(repl);
 | 
			
		||||
		    rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
 | 
			
		||||
| 
						 | 
				
			
			@ -10409,6 +10414,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
 | 
			
		|||
	    }
 | 
			
		||||
	    else {
 | 
			
		||||
		repl = rb_yield(rb_enc_str_new(p, e-p, enc));
 | 
			
		||||
                str_mod_check(str, sp, slen);
 | 
			
		||||
		repl = str_compat_and_valid(repl, enc);
 | 
			
		||||
		tainted |= OBJ_TAINTED_RAW(repl);
 | 
			
		||||
		rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
 | 
			
		||||
| 
						 | 
				
			
			@ -10419,9 +10425,6 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
 | 
			
		|||
    }
 | 
			
		||||
    else {
 | 
			
		||||
	/* ASCII incompatible */
 | 
			
		||||
	const char *p = RSTRING_PTR(str);
 | 
			
		||||
	const char *e = RSTRING_END(str);
 | 
			
		||||
	const char *p1 = p;
 | 
			
		||||
	long mbminlen = rb_enc_mbminlen(enc);
 | 
			
		||||
	if (!replen) {
 | 
			
		||||
	    rep = NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -10478,6 +10481,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
 | 
			
		|||
		}
 | 
			
		||||
		else {
 | 
			
		||||
		    repl = rb_yield(rb_enc_str_new(p, clen, enc));
 | 
			
		||||
                    str_mod_check(str, sp, slen);
 | 
			
		||||
		    repl = str_compat_and_valid(repl, enc);
 | 
			
		||||
		    tainted |= OBJ_TAINTED_RAW(repl);
 | 
			
		||||
		    rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
 | 
			
		||||
| 
						 | 
				
			
			@ -10505,6 +10509,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
 | 
			
		|||
	    }
 | 
			
		||||
	    else {
 | 
			
		||||
		repl = rb_yield(rb_enc_str_new(p, e-p, enc));
 | 
			
		||||
                str_mod_check(str, sp, slen);
 | 
			
		||||
		repl = str_compat_and_valid(repl, enc);
 | 
			
		||||
		tainted |= OBJ_TAINTED_RAW(repl);
 | 
			
		||||
		rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1604,6 +1604,19 @@ class TestM17N < Test::Unit::TestCase
 | 
			
		|||
    assert_predicate(str.dup.taint.scrub, :tainted?)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def test_scrub_modification_inside_block
 | 
			
		||||
    str = ("abc\u3042".b << "\xE3\x80".b).force_encoding('UTF-8')
 | 
			
		||||
    assert_raise(RuntimeError) {str.scrub{|_| str << "1234567890"; "?" }}
 | 
			
		||||
 | 
			
		||||
    str = "\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE)
 | 
			
		||||
    assert_raise(RuntimeError) do
 | 
			
		||||
      str.scrub do |_|
 | 
			
		||||
        str << "1\x002\x00".force_encoding('UTF-16LE')
 | 
			
		||||
        "?\x00".force_encoding('UTF-16LE')
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def test_scrub_replace_default
 | 
			
		||||
    assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub)
 | 
			
		||||
    assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue