mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Avoid race condition in Regexp#match
In certain conditions, Regexp#match could return a MatchData with missing captures. This seems to require at the least, multiple threads calling a method that calls the same block/proc/lambda which calls Regexp#match. The race condition happens because the MatchData is passed from indirectly via the backref, and other threads can modify the backref. Fix the issue by: 1. Not reusing the existing MatchData from the backref, and always allocating a new MatchData. 2. Passing the MatchData directly to the caller using a VALUE*, instead of indirectly through the backref. It's likely that variants of this issue exist for other Regexp methods. Anywhere that MatchData is passed implicitly through the backref is probably vulnerable to this issue. Fixes [Bug #17507]
This commit is contained in:
parent
d087214658
commit
abc0304cb2
Notes:
git
2021-10-02 13:50:38 +09:00
2 changed files with 40 additions and 27 deletions
46
re.c
46
re.c
|
@ -1615,8 +1615,8 @@ rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* returns byte offset */
|
/* returns byte offset */
|
||||||
long
|
static long
|
||||||
rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
|
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
|
||||||
{
|
{
|
||||||
long result;
|
long result;
|
||||||
VALUE match;
|
VALUE match;
|
||||||
|
@ -1638,18 +1638,7 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
|
||||||
tmpreg = reg != RREGEXP_PTR(re);
|
tmpreg = reg != RREGEXP_PTR(re);
|
||||||
if (!tmpreg) RREGEXP(re)->usecnt++;
|
if (!tmpreg) RREGEXP(re)->usecnt++;
|
||||||
|
|
||||||
match = rb_backref_get();
|
MEMZERO(regs, struct re_registers, 1);
|
||||||
if (!NIL_P(match)) {
|
|
||||||
if (FL_TEST(match, MATCH_BUSY)) {
|
|
||||||
match = Qnil;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
regs = RMATCH_REGS(match);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (NIL_P(match)) {
|
|
||||||
MEMZERO(regs, struct re_registers, 1);
|
|
||||||
}
|
|
||||||
if (!reverse) {
|
if (!reverse) {
|
||||||
range += len;
|
range += len;
|
||||||
}
|
}
|
||||||
|
@ -1682,13 +1671,10 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NIL_P(match)) {
|
match = match_alloc(rb_cMatch);
|
||||||
int err;
|
int copy_err = rb_reg_region_copy(RMATCH_REGS(match), regs);
|
||||||
match = match_alloc(rb_cMatch);
|
onig_region_free(regs, 0);
|
||||||
err = rb_reg_region_copy(RMATCH_REGS(match), regs);
|
if (copy_err) rb_memerror();
|
||||||
onig_region_free(regs, 0);
|
|
||||||
if (err) rb_memerror();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (set_backref_str) {
|
if (set_backref_str) {
|
||||||
RMATCH(match)->str = rb_str_new4(str);
|
RMATCH(match)->str = rb_str_new4(str);
|
||||||
|
@ -1696,10 +1682,17 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
|
||||||
|
|
||||||
RMATCH(match)->regexp = re;
|
RMATCH(match)->regexp = re;
|
||||||
rb_backref_set(match);
|
rb_backref_set(match);
|
||||||
|
if (set_match) *set_match = match;
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long
|
||||||
|
rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
|
||||||
|
{
|
||||||
|
return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
long
|
long
|
||||||
rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
|
rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
|
||||||
{
|
{
|
||||||
|
@ -3193,7 +3186,7 @@ reg_operand(VALUE s, int check)
|
||||||
}
|
}
|
||||||
|
|
||||||
static long
|
static long
|
||||||
reg_match_pos(VALUE re, VALUE *strp, long pos)
|
reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match)
|
||||||
{
|
{
|
||||||
VALUE str = *strp;
|
VALUE str = *strp;
|
||||||
|
|
||||||
|
@ -3212,7 +3205,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos)
|
||||||
}
|
}
|
||||||
pos = rb_str_offset(str, pos);
|
pos = rb_str_offset(str, pos);
|
||||||
}
|
}
|
||||||
return rb_reg_search(re, str, pos, 0);
|
return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3266,7 +3259,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos)
|
||||||
VALUE
|
VALUE
|
||||||
rb_reg_match(VALUE re, VALUE str)
|
rb_reg_match(VALUE re, VALUE str)
|
||||||
{
|
{
|
||||||
long pos = reg_match_pos(re, &str, 0);
|
long pos = reg_match_pos(re, &str, 0, NULL);
|
||||||
if (pos < 0) return Qnil;
|
if (pos < 0) return Qnil;
|
||||||
pos = rb_str_sublen(str, pos);
|
pos = rb_str_sublen(str, pos);
|
||||||
return LONG2FIX(pos);
|
return LONG2FIX(pos);
|
||||||
|
@ -3377,7 +3370,7 @@ rb_reg_match2(VALUE re)
|
||||||
static VALUE
|
static VALUE
|
||||||
rb_reg_match_m(int argc, VALUE *argv, VALUE re)
|
rb_reg_match_m(int argc, VALUE *argv, VALUE re)
|
||||||
{
|
{
|
||||||
VALUE result, str, initpos;
|
VALUE result = Qnil, str, initpos;
|
||||||
long pos;
|
long pos;
|
||||||
|
|
||||||
if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
|
if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
|
||||||
|
@ -3387,12 +3380,11 @@ rb_reg_match_m(int argc, VALUE *argv, VALUE re)
|
||||||
pos = 0;
|
pos = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = reg_match_pos(re, &str, pos);
|
pos = reg_match_pos(re, &str, pos, &result);
|
||||||
if (pos < 0) {
|
if (pos < 0) {
|
||||||
rb_backref_set(Qnil);
|
rb_backref_set(Qnil);
|
||||||
return Qnil;
|
return Qnil;
|
||||||
}
|
}
|
||||||
result = rb_backref_get();
|
|
||||||
rb_match_busy(result);
|
rb_match_busy(result);
|
||||||
if (!NIL_P(result) && rb_block_given_p()) {
|
if (!NIL_P(result) && rb_block_given_p()) {
|
||||||
return rb_yield(result);
|
return rb_yield(result);
|
||||||
|
|
|
@ -265,6 +265,27 @@ class TestRegexp < Test::Unit::TestCase
|
||||||
assert_equal(re, re.match("foo").regexp)
|
assert_equal(re, re.match("foo").regexp)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_match_lambda_multithread
|
||||||
|
bug17507 = "[ruby-core:101901]"
|
||||||
|
str = "a-x-foo-bar-baz-z-b"
|
||||||
|
|
||||||
|
worker = lambda do
|
||||||
|
m = /foo-([A-Za-z0-9_\.]+)-baz/.match(str)
|
||||||
|
assert_equal("bar", m[1], bug17507)
|
||||||
|
|
||||||
|
# These two lines are needed to trigger the bug
|
||||||
|
File.exist? "/tmp"
|
||||||
|
str.gsub(/foo-bar-baz/, "foo-abc-baz")
|
||||||
|
end
|
||||||
|
|
||||||
|
def self. threaded_test(worker)
|
||||||
|
6.times.map {Thread.new {10_000.times {worker.call}}}.each(&:join)
|
||||||
|
end
|
||||||
|
|
||||||
|
# The bug only occurs in a method calling a block/proc/lambda
|
||||||
|
threaded_test(worker)
|
||||||
|
end
|
||||||
|
|
||||||
def test_source
|
def test_source
|
||||||
bug5484 = '[ruby-core:40364]'
|
bug5484 = '[ruby-core:40364]'
|
||||||
assert_equal('', //.source)
|
assert_equal('', //.source)
|
||||||
|
|
Loading…
Reference in a new issue