mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Fix handling of control/meta escapes in literal regexps
Ruby uses a recursive algorithm for handling control/meta escapes in strings (read_escape). However, the equivalent code for regexps (tokadd_escape) in did not use a recursive algorithm. Due to this, Handling of control/meta escapes in regexp did not have the same behavior as in strings, leading to behavior such as the following returning nil: ```ruby /\c\xFF/ =~ "\c\xFF" ``` Switch the code for handling \c, \C and \M in literal regexps to use the same code as for strings (read_escape), to keep behavior consistent between the two. Fixes [Bug #14367]
This commit is contained in:
parent
9484f9ebdf
commit
11ae581a4a
Notes:
git
2021-05-13 10:56:08 +09:00
3 changed files with 36 additions and 34 deletions
50
parse.y
50
parse.y
|
|
@ -6902,10 +6902,8 @@ static int
|
|||
tokadd_escape(struct parser_params *p, rb_encoding **encp)
|
||||
{
|
||||
int c;
|
||||
int flags = 0;
|
||||
size_t numlen;
|
||||
|
||||
first:
|
||||
switch (c = nextc(p)) {
|
||||
case '\n':
|
||||
return 0; /* just ignore */
|
||||
|
|
@ -6928,37 +6926,6 @@ tokadd_escape(struct parser_params *p, rb_encoding **encp)
|
|||
}
|
||||
return 0;
|
||||
|
||||
case 'M':
|
||||
if (flags & ESCAPE_META) goto eof;
|
||||
if ((c = nextc(p)) != '-') {
|
||||
pushback(p, c);
|
||||
goto eof;
|
||||
}
|
||||
tokcopy(p, 3);
|
||||
flags |= ESCAPE_META;
|
||||
goto escaped;
|
||||
|
||||
case 'C':
|
||||
if (flags & ESCAPE_CONTROL) goto eof;
|
||||
if ((c = nextc(p)) != '-') {
|
||||
pushback(p, c);
|
||||
goto eof;
|
||||
}
|
||||
tokcopy(p, 3);
|
||||
goto escaped;
|
||||
|
||||
case 'c':
|
||||
if (flags & ESCAPE_CONTROL) goto eof;
|
||||
tokcopy(p, 2);
|
||||
flags |= ESCAPE_CONTROL;
|
||||
escaped:
|
||||
if ((c = nextc(p)) == '\\') {
|
||||
goto first;
|
||||
}
|
||||
else if (c == -1) goto eof;
|
||||
tokadd(p, c);
|
||||
return 0;
|
||||
|
||||
eof:
|
||||
case -1:
|
||||
yyerror0("Invalid escape character syntax");
|
||||
|
|
@ -7151,6 +7118,23 @@ tokadd_string(struct parser_params *p,
|
|||
goto non_ascii;
|
||||
}
|
||||
if (func & STR_FUNC_REGEXP) {
|
||||
switch (c) {
|
||||
case 'c':
|
||||
case 'C':
|
||||
case 'M': {
|
||||
pushback(p, c);
|
||||
c = read_escape(p, 0, enc);
|
||||
|
||||
int i;
|
||||
char escbuf[5];
|
||||
snprintf(escbuf, sizeof(escbuf), "\\x%02X", c);
|
||||
for(i = 0; i < 4; i++) {
|
||||
tokadd(p, escbuf[i]);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (c == term && !simple_re_meta(c)) {
|
||||
tokadd(p, c);
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ describe "Regexps with interpolation" do
|
|||
|
||||
it "gives precedence to escape sequences over substitution" do
|
||||
str = "J"
|
||||
/\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})'
|
||||
/\c#{str}/.to_s.should include('{str}')
|
||||
end
|
||||
|
||||
it "throws RegexpError for malformed interpolation" do
|
||||
|
|
|
|||
|
|
@ -496,6 +496,24 @@ class TestRegexp < Test::Unit::TestCase
|
|||
assert_raise(RegexpError) { Regexp.new("((?<v>))\\g<0>") }
|
||||
end
|
||||
|
||||
def test_match_control_meta_escape
|
||||
assert_equal(0, /\c\xFF/ =~ "\c\xFF")
|
||||
assert_equal(0, /\c\M-\xFF/ =~ "\c\M-\xFF")
|
||||
assert_equal(0, /\C-\xFF/ =~ "\C-\xFF")
|
||||
assert_equal(0, /\C-\M-\xFF/ =~ "\C-\M-\xFF")
|
||||
assert_equal(0, /\M-\xFF/ =~ "\M-\xFF")
|
||||
assert_equal(0, /\M-\C-\xFF/ =~ "\M-\C-\xFF")
|
||||
assert_equal(0, /\M-\c\xFF/ =~ "\M-\c\xFF")
|
||||
|
||||
assert_nil(/\c\xFE/ =~ "\c\xFF")
|
||||
assert_nil(/\c\M-\xFE/ =~ "\c\M-\xFF")
|
||||
assert_nil(/\C-\xFE/ =~ "\C-\xFF")
|
||||
assert_nil(/\C-\M-\xFE/ =~ "\C-\M-\xFF")
|
||||
assert_nil(/\M-\xFE/ =~ "\M-\xFF")
|
||||
assert_nil(/\M-\C-\xFE/ =~ "\M-\C-\xFF")
|
||||
assert_nil(/\M-\c\xFE/ =~ "\M-\c\xFF")
|
||||
end
|
||||
|
||||
def test_unescape
|
||||
assert_raise(ArgumentError) { s = '\\'; /#{ s }/ }
|
||||
assert_equal(/\xFF/n, /#{ s="\\xFF" }/n)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue