1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Fix handling of control/meta escapes in literal regexps

Ruby uses a recursive algorithm for handling control/meta escapes
in strings (read_escape).  However, the equivalent code for regexps
(tokadd_escape) in did not use a recursive algorithm.  Due to this,
Handling of control/meta escapes in regexp did not have the same
behavior as in strings, leading to behavior such as the following
returning nil:

```ruby
/\c\xFF/ =~ "\c\xFF"
```

Switch the code for handling \c, \C and \M in literal regexps to
use the same code as for strings (read_escape), to keep behavior
consistent between the two.

Fixes [Bug #14367]
This commit is contained in:
Jeremy Evans 2021-05-12 12:37:55 -07:00
parent 9484f9ebdf
commit 11ae581a4a
Notes: git 2021-05-13 10:56:08 +09:00
3 changed files with 36 additions and 34 deletions

50
parse.y
View file

@ -6902,10 +6902,8 @@ static int
tokadd_escape(struct parser_params *p, rb_encoding **encp)
{
int c;
int flags = 0;
size_t numlen;
first:
switch (c = nextc(p)) {
case '\n':
return 0; /* just ignore */
@ -6928,37 +6926,6 @@ tokadd_escape(struct parser_params *p, rb_encoding **encp)
}
return 0;
case 'M':
if (flags & ESCAPE_META) goto eof;
if ((c = nextc(p)) != '-') {
pushback(p, c);
goto eof;
}
tokcopy(p, 3);
flags |= ESCAPE_META;
goto escaped;
case 'C':
if (flags & ESCAPE_CONTROL) goto eof;
if ((c = nextc(p)) != '-') {
pushback(p, c);
goto eof;
}
tokcopy(p, 3);
goto escaped;
case 'c':
if (flags & ESCAPE_CONTROL) goto eof;
tokcopy(p, 2);
flags |= ESCAPE_CONTROL;
escaped:
if ((c = nextc(p)) == '\\') {
goto first;
}
else if (c == -1) goto eof;
tokadd(p, c);
return 0;
eof:
case -1:
yyerror0("Invalid escape character syntax");
@ -7151,6 +7118,23 @@ tokadd_string(struct parser_params *p,
goto non_ascii;
}
if (func & STR_FUNC_REGEXP) {
switch (c) {
case 'c':
case 'C':
case 'M': {
pushback(p, c);
c = read_escape(p, 0, enc);
int i;
char escbuf[5];
snprintf(escbuf, sizeof(escbuf), "\\x%02X", c);
for(i = 0; i < 4; i++) {
tokadd(p, escbuf[i]);
}
continue;
}
}
if (c == term && !simple_re_meta(c)) {
tokadd(p, c);
continue;

View file

@ -36,7 +36,7 @@ describe "Regexps with interpolation" do
it "gives precedence to escape sequences over substitution" do
str = "J"
/\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})'
/\c#{str}/.to_s.should include('{str}')
end
it "throws RegexpError for malformed interpolation" do

View file

@ -496,6 +496,24 @@ class TestRegexp < Test::Unit::TestCase
assert_raise(RegexpError) { Regexp.new("((?<v>))\\g<0>") }
end
def test_match_control_meta_escape
assert_equal(0, /\c\xFF/ =~ "\c\xFF")
assert_equal(0, /\c\M-\xFF/ =~ "\c\M-\xFF")
assert_equal(0, /\C-\xFF/ =~ "\C-\xFF")
assert_equal(0, /\C-\M-\xFF/ =~ "\C-\M-\xFF")
assert_equal(0, /\M-\xFF/ =~ "\M-\xFF")
assert_equal(0, /\M-\C-\xFF/ =~ "\M-\C-\xFF")
assert_equal(0, /\M-\c\xFF/ =~ "\M-\c\xFF")
assert_nil(/\c\xFE/ =~ "\c\xFF")
assert_nil(/\c\M-\xFE/ =~ "\c\M-\xFF")
assert_nil(/\C-\xFE/ =~ "\C-\xFF")
assert_nil(/\C-\M-\xFE/ =~ "\C-\M-\xFF")
assert_nil(/\M-\xFE/ =~ "\M-\xFF")
assert_nil(/\M-\C-\xFE/ =~ "\M-\C-\xFF")
assert_nil(/\M-\c\xFE/ =~ "\M-\c\xFF")
end
def test_unescape
assert_raise(ArgumentError) { s = '\\'; /#{ s }/ }
assert_equal(/\xFF/n, /#{ s="\\xFF" }/n)