From 2976ec58a48e3ff83ff3d0979041bb5694592153 Mon Sep 17 00:00:00 2001 From: nobu Date: Wed, 21 Apr 2010 04:44:58 +0000 Subject: [PATCH] * parse.y (regexp): check each fragments. [ruby-core:27374] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 4 +++ parse.y | 78 +++++++++++++++++++++++++++++++++++----- test/ruby/test_regexp.rb | 7 +++- 3 files changed, 80 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index d2a6acf423..5f83a8917f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Wed Apr 21 13:44:54 2010 Nobuyoshi Nakada + + * parse.y (regexp): check each fragments. [ruby-core:27374] + Wed Apr 21 13:36:01 2010 Nobuyoshi Nakada * parse.y (parser_yylex): reverted r27388 due to backward diff --git a/parse.y b/parse.y index 23f4362857..de1aee7f2b 100644 --- a/parse.y +++ b/parse.y @@ -362,6 +362,7 @@ static NODE *arg_concat_gen(struct parser_params*,NODE*,NODE*); #define arg_concat(h,t) arg_concat_gen(parser,h,t) static NODE *literal_concat_gen(struct parser_params*,NODE*,NODE*); #define literal_concat(h,t) literal_concat_gen(parser,h,t) +static int literal_concat0(struct parser_params *, VALUE, VALUE); static NODE *new_evstr_gen(struct parser_params*,NODE*); #define new_evstr(n) new_evstr_gen(parser,n) static NODE *evstr2dstr_gen(struct parser_params*,NODE*); @@ -415,7 +416,7 @@ static VALUE reg_compile_gen(struct parser_params*, VALUE, int); #define reg_compile(str,options) reg_compile_gen(parser, str, options) static void reg_fragment_setenc_gen(struct parser_params*, VALUE, int); #define reg_fragment_setenc(str,options) reg_fragment_setenc_gen(parser, str, options) -static void reg_fragment_check_gen(struct parser_params*, VALUE, int); +static int reg_fragment_check_gen(struct parser_params*, VALUE, int); #define reg_fragment_check(str,options) reg_fragment_check_gen(parser, str, options) static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); #define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,regexp,match) @@ -684,7 +685,7 @@ static void token_info_pop(struct parser_params*, const char *token); %token tREGEXP_END %type singleton strings string string1 xstring regexp -%type string_contents xstring_contents string_content +%type string_contents xstring_contents regexp_contents string_content %type words qwords word_list qword_list word %type literal numeric dsym cpath %type top_compstmt top_stmts top_stmt @@ -3852,12 +3853,12 @@ xstring : tXSTRING_BEG xstring_contents tSTRING_END } ; -regexp : tREGEXP_BEG xstring_contents tREGEXP_END +regexp : tREGEXP_BEG regexp_contents tREGEXP_END { /*%%%*/ int options = $3; NODE *node = $2; - NODE *list; + NODE *list, *prev; if (!node) { node = NEW_LIT(reg_compile(STR_NEW0(), options)); } @@ -3880,11 +3881,33 @@ regexp : tREGEXP_BEG xstring_contents tREGEXP_END } node->nd_cflag = options & RE_OPTION_MASK; if (!NIL_P(node->nd_lit)) reg_fragment_check(node->nd_lit, options); - for (list = node->nd_next; list; list = list->nd_next) { - if (nd_type(list->nd_head) == NODE_STR) { - reg_fragment_check(list->nd_head->nd_lit, options); + for (list = (prev = node)->nd_next; list; list = list->nd_next) { + if (nd_type(list->nd_head) == NODE_STR) { + VALUE tail = list->nd_head->nd_lit; + if (reg_fragment_check(tail, options) && prev && !NIL_P(prev->nd_lit)) { + if (!literal_concat0(parser, prev->nd_lit, tail)) { + node = 0; + break; + } + rb_str_resize(tail, 0); + prev->nd_next = list->nd_next; + rb_gc_force_recycle((VALUE)list->nd_head); + rb_gc_force_recycle((VALUE)list); + list = prev; + } + else { + prev = list; + } } + else { + prev = 0; + } } + if (!node->nd_next) { + VALUE src = node->nd_lit; + nd_set_type(node, NODE_LIT); + node->nd_lit = reg_compile(src, options); + } break; } $$ = node; @@ -4012,6 +4035,43 @@ xstring_contents: /* none */ } ; +regexp_contents: /* none */ + { + /*%%%*/ + $$ = 0; + /*% + $$ = dispatch0(regexp_new); + %*/ + } + | regexp_contents string_content + { + /*%%%*/ + NODE *head = $1, *tail = $2; + if (!head) { + $$ = tail; + } + else if (!tail) { + $$ = head; + } + else { + switch (nd_type(head)) { + case NODE_STR: + nd_set_type(head, NODE_DSTR); + break; + case NODE_DSTR: + break; + default: + head = list_append(NEW_DSTR(Qnil), head); + break; + } + $$ = list_append(head, tail); + } + /*% + $$ = dispatch2(regexp_add, $1, $2); + %*/ + } + ; + string_content : tSTRING_CONTENT | tSTRING_DVAR { @@ -9064,7 +9124,7 @@ reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options) c, rb_enc_name(rb_enc_get(str))); } -static void +static int reg_fragment_check_gen(struct parser_params* parser, VALUE str, int options) { VALUE err; @@ -9074,7 +9134,9 @@ reg_fragment_check_gen(struct parser_params* parser, VALUE str, int options) err = rb_obj_as_string(err); compile_error(PARSER_ARG "%s", RSTRING_PTR(err)); RB_GC_GUARD(err); + return 0; } + return 1; } typedef struct { diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 1ef1380078..35f23685eb 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -477,7 +477,7 @@ class TestRegexp < Test::Unit::TestCase end.value assert(m.tainted?) assert_nothing_raised('[ruby-core:26137]') { - m = proc {$SAFE = 4; %r"#{}"o}.call + m = proc {$SAFE = 4; %r"#{ }"o}.call } assert(m.tainted?) end @@ -807,4 +807,9 @@ class TestRegexp < Test::Unit::TestCase assert_equal("1", $1) assert_equal(" " * 4999999, $2) end + + def test_invalid_fragment + bug2547 = '[ruby-core:27374]' + assert_raise(SyntaxError, bug2547) {eval('/#{"\\\\"}y/')} + end end