From 342d4c16d963408905fd08118d1908fe197f2364 Mon Sep 17 00:00:00 2001 From: yui-knk Date: Sun, 25 Sep 2022 18:09:34 +0900 Subject: [PATCH] Generates "end" tokens if parser hits end of input but "end" tokens are needed for correct language. [Feature #19013] --- parse.y | 111 ++++++++++++- test/ruby/test_ast.rb | 378 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 483 insertions(+), 6 deletions(-) diff --git a/parse.y b/parse.y index c05ce48068..a1b1e2ee46 100644 --- a/parse.y +++ b/parse.y @@ -355,6 +355,8 @@ struct parser_params { VALUE error_buffer; VALUE debug_lines; const struct rb_iseq_struct *parent_iseq; + /* store specific keyword localtions to generate dummy end token */ + VALUE end_expect_token_localtions; #else /* Ripper only */ @@ -408,6 +410,36 @@ pop_pktbl(struct parser_params *p, st_table *tbl) p->pktbl = tbl; } +#ifndef RIPPER +static void flush_debug_buffer(struct parser_params *p, VALUE out, VALUE str); + +static void +debug_end_expect_token_localtions(struct parser_params *p, const char *name) +{ + if(p->debug) { + VALUE mesg = rb_sprintf("%s: ", name); + rb_str_catf(mesg, " %"PRIsVALUE"\n", p->end_expect_token_localtions); + flush_debug_buffer(p, p->debug_output, mesg); + } +} + +static void +push_end_expect_token_localtions(struct parser_params *p, const rb_code_position_t *pos) +{ + if(NIL_P(p->end_expect_token_localtions)) return; + rb_ary_push(p->end_expect_token_localtions, rb_ary_new_from_args(2, INT2NUM(pos->lineno), INT2NUM(pos->column))); + debug_end_expect_token_localtions(p, "push_end_expect_token_localtions"); +} + +static void +pop_end_expect_token_localtions(struct parser_params *p) +{ + if(NIL_P(p->end_expect_token_localtions)) return; + rb_ary_pop(p->end_expect_token_localtions); + debug_end_expect_token_localtions(p, "pop_end_expect_token_localtions"); +} +#endif + RBIMPL_ATTR_NONNULL((1, 2, 3)) static int parser_yyerror(struct parser_params*, const YYLTYPE *yylloc, const char*); RBIMPL_ATTR_NONNULL((1, 2)) @@ -1214,6 +1246,7 @@ static int looking_at_eol_p(struct parser_params *p); %token tBACK_REF "back reference" %token tSTRING_CONTENT "literal content" %token tREGEXP_END +%token tDUMNY_END "dummy end" %type singleton strings string string1 xstring regexp %type string_contents xstring_contents regexp_contents string_content @@ -3307,28 +3340,38 @@ primary : literal } | defn_head f_arglist + { + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ + } bodystmt k_end { restore_defun(p, $1->nd_defn); /*%%%*/ - $$ = set_defun_body(p, $1, $2, $3, &@$); + $$ = set_defun_body(p, $1, $2, $4, &@$); /*% %*/ - /*% ripper: def!(get_value($1), $2, $3) %*/ + /*% ripper: def!(get_value($1), $2, $4) %*/ local_pop(p); } | defs_head f_arglist + { + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ + } bodystmt k_end { restore_defun(p, $1->nd_defn); /*%%%*/ - $$ = set_defun_body(p, $1, $2, $3, &@$); + $$ = set_defun_body(p, $1, $2, $4, &@$); /*% $1 = get_value($1); %*/ - /*% ripper: defs!(AREF($1, 0), AREF($1, 1), AREF($1, 2), $2, $3) %*/ + /*% ripper: defs!(AREF($1, 0), AREF($1, 1), AREF($1, 2), $2, $4) %*/ local_pop(p); } | keyword_break @@ -3371,6 +3414,9 @@ primary_value : primary k_begin : keyword_begin { token_info_push(p, "begin", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3388,36 +3434,54 @@ k_if : keyword_if p->token_info->nonspc = 0; } } + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_unless : keyword_unless { token_info_push(p, "unless", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_while : keyword_while { token_info_push(p, "while", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_until : keyword_until { token_info_push(p, "until", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_case : keyword_case { token_info_push(p, "case", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_for : keyword_for { token_info_push(p, "for", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3425,6 +3489,9 @@ k_class : keyword_class { token_info_push(p, "class", &@$); $$ = p->ctxt; + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3432,6 +3499,9 @@ k_module : keyword_module { token_info_push(p, "module", &@$); $$ = p->ctxt; + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3445,12 +3515,19 @@ k_def : keyword_def k_do : keyword_do { token_info_push(p, "do", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ + } ; k_do_block : keyword_do_block { token_info_push(p, "do", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3497,6 +3574,13 @@ k_elsif : keyword_elsif k_end : keyword_end { token_info_pop(p, "end", &@$); + /*%%%*/ + pop_end_expect_token_localtions(p); + /*% %*/ + } + | tDUMNY_END + { + compile_error(p, "syntax error, unexpected end-of-input"); } ; @@ -3862,9 +3946,15 @@ lambda_body : tLAMBEG compstmt '}' token_info_pop(p, "}", &@3); $$ = $2; } - | keyword_do_LAMBDA bodystmt k_end + | keyword_do_LAMBDA { - $$ = $2; + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ + } + bodystmt k_end + { + $$ = $3; } ; @@ -9319,6 +9409,12 @@ parser_yylex(struct parser_params *p) case '\032': /* ^Z */ case -1: /* end of script. */ p->eofp = 1; +#ifndef RIPPER + if (!NIL_P(p->end_expect_token_localtions) && RARRAY_LEN(p->end_expect_token_localtions) > 0) { + pop_end_expect_token_localtions(p); + return tDUMNY_END; + } +#endif return 0; /* white spaces */ @@ -13184,6 +13280,7 @@ parser_initialize(struct parser_params *p) p->parsing_thread = Qnil; #else p->error_buffer = Qfalse; + p->end_expect_token_localtions = Qnil; #endif p->debug_buffer = Qnil; p->debug_output = rb_ractor_stdout(); @@ -13212,6 +13309,7 @@ parser_mark(void *ptr) rb_gc_mark(p->debug_lines); rb_gc_mark(p->compile_option); rb_gc_mark(p->error_buffer); + rb_gc_mark(p->end_expect_token_localtions); #else rb_gc_mark(p->delayed.token); rb_gc_mark(p->value); @@ -13324,6 +13422,7 @@ rb_parser_error_tolerant(VALUE vparser) TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); p->error_tolerant = 1; + p->end_expect_token_localtions = rb_ary_new(); } #endif diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb index c2c5356f83..2f05bf97a1 100644 --- a/test/ruby/test_ast.rb +++ b/test/ruby/test_ast.rb @@ -1,6 +1,7 @@ # frozen_string_literal: false require 'test/unit' require 'tempfile' +require 'pp' class RubyVM module AbstractSyntaxTree @@ -578,4 +579,381 @@ dummy assert_equal(:SCOPE, node.type) end + + def test_error_tolerant_end_is_short_for_method_define + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + def m + m2 + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:4 + tbl: [] + args: nil + body: + (DEFN@1:0-2:4 + mid: :m + body: + (SCOPE@1:0-2:4 + tbl: [] + args: + (ARGS@1:5-1:5 + pre_num: 0 + pre_init: nil + opt: nil + first_post: nil + post_num: 0 + post_init: nil + rest: nil + kw: nil + kwrest: nil + block: nil) + body: (VCALL@2:2-2:4 :m2)))) + EXP + end + + def test_error_tolerant_end_is_short_for_singleton_method_define + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + def obj.m + m2 + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:4 + tbl: [] + args: nil + body: + (DEFS@1:0-2:4 (VCALL@1:4-1:7 :obj) :m + (SCOPE@1:0-2:4 + tbl: [] + args: + (ARGS@1:9-1:9 + pre_num: 0 + pre_init: nil + opt: nil + first_post: nil + post_num: 0 + post_init: nil + rest: nil + kw: nil + kwrest: nil + block: nil) + body: (VCALL@2:2-2:4 :m2)))) + EXP + end + + def test_error_tolerant_end_is_short_for_begin + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + begin + a = 1 + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:7 tbl: [:a] args: nil body: (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1))) + EXP + end + + def test_error_tolerant_end_is_short_for_if + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + if cond + a = 1 + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:7 + tbl: [:a] + args: nil + body: + (IF@1:0-2:7 (VCALL@1:3-1:7 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1)) nil)) + EXP + + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + if cond + a = 1 + else + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-3:5 + tbl: [:a] + args: nil + body: + (IF@1:0-3:5 (VCALL@1:3-1:7 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1)) + (BEGIN@3:4-3:4 nil))) + EXP + end + + def test_error_tolerant_end_is_short_for_unless + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + unless cond + a = 1 + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:7 + tbl: [:a] + args: nil + body: + (UNLESS@1:0-2:7 (VCALL@1:7-1:11 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1)) + nil)) + EXP + + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + unless cond + a = 1 + else + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-3:5 + tbl: [:a] + args: nil + body: + (UNLESS@1:0-3:5 (VCALL@1:7-1:11 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1)) + (BEGIN@3:4-3:4 nil))) + EXP + end + + def test_error_tolerant_end_is_short_for_while + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + while true + m + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:3 + tbl: [] + args: nil + body: (WHILE@1:0-2:3 (TRUE@1:6-1:10) (VCALL@2:2-2:3 :m) true)) + EXP + end + + def test_error_tolerant_end_is_short_for_until + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + until true + m + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:3 + tbl: [] + args: nil + body: (UNTIL@1:0-2:3 (TRUE@1:6-1:10) (VCALL@2:2-2:3 :m) true)) + EXP + end + + def test_error_tolerant_end_is_short_for_case + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + case a + when 1 + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:6 + tbl: [] + args: nil + body: + (CASE@1:0-2:6 (VCALL@1:5-1:6 :a) + (WHEN@2:0-2:6 (LIST@2:5-2:6 (LIT@2:5-2:6 1) nil) (BEGIN@2:6-2:6 nil) + nil))) + EXP + + + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + case + when a == 1 + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:11 + tbl: [] + args: nil + body: + (CASE2@1:0-2:11 nil + (WHEN@2:0-2:11 + (LIST@2:5-2:11 + (OPCALL@2:5-2:11 (VCALL@2:5-2:6 :a) :== + (LIST@2:10-2:11 (LIT@2:10-2:11 1) nil)) nil) + (BEGIN@2:11-2:11 nil) nil))) + EXP + + + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + case a + in {a: String} + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:14 + tbl: [] + args: nil + body: + (CASE3@1:0-2:14 (VCALL@1:5-1:6 :a) + (IN@2:0-2:14 + (HSHPTN@2:4-2:13 + const: nil + kw: + (HASH@2:4-2:13 + (LIST@2:4-2:13 (LIT@2:4-2:6 :a) (CONST@2:7-2:13 :String) nil)) + kwrest: nil) (BEGIN@2:14-2:14 nil) nil))) + EXP + end + + def test_error_tolerant_end_is_short_for_for + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + for i in ary + m + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:3 + tbl: [:i] + args: nil + body: + (FOR@1:0-2:3 (VCALL@1:9-1:12 :ary) + (SCOPE@1:0-2:3 + tbl: [nil] + args: + (ARGS@1:4-1:5 + pre_num: 1 + pre_init: (LASGN@1:4-1:5 :i (DVAR@1:4-1:5 nil)) + opt: nil + first_post: nil + post_num: 0 + post_init: nil + rest: nil + kw: nil + kwrest: nil + block: nil) + body: (VCALL@2:2-2:3 :m)))) + EXP + end + + def test_error_tolerant_end_is_short_for_class + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + class C + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-1:7 + tbl: [] + args: nil + body: + (CLASS@1:0-1:7 (COLON2@1:6-1:7 nil :C) nil + (SCOPE@1:0-1:7 tbl: [] args: nil body: (BEGIN@1:7-1:7 nil)))) + EXP + end + + def test_error_tolerant_end_is_short_for_module + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + module M + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-1:8 + tbl: [] + args: nil + body: + (MODULE@1:0-1:8 (COLON2@1:7-1:8 nil :M) + (SCOPE@1:0-1:8 tbl: [] args: nil body: (BEGIN@1:8-1:8 nil)))) + EXP + end + + def test_error_tolerant_end_is_short_for_do + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + m do + a + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:3 + tbl: [] + args: nil + body: + (ITER@1:0-2:3 (FCALL@1:0-1:1 :m nil) + (SCOPE@1:2-2:3 tbl: [] args: nil body: (VCALL@2:2-2:3 :a)))) + EXP + end + + def test_error_tolerant_end_is_short_for_do_block + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + m 1 do + a + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:3 + tbl: [] + args: nil + body: + (ITER@1:0-2:3 (FCALL@1:0-1:3 :m (LIST@1:2-1:3 (LIT@1:2-1:3 1) nil)) + (SCOPE@1:4-2:3 tbl: [] args: nil body: (VCALL@2:2-2:3 :a)))) + EXP + end + + def test_error_tolerant_end_is_short_for_do_LAMBDA + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + -> do + a + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:3 + tbl: [] + args: nil + body: + (LAMBDA@1:0-2:3 + (SCOPE@1:2-2:3 + tbl: [] + args: + (ARGS@1:2-1:2 + pre_num: 0 + pre_init: nil + opt: nil + first_post: nil + post_num: 0 + post_init: nil + rest: nil + kw: nil + kwrest: nil + block: nil) + body: (VCALL@2:2-2:3 :a)))) + EXP + end end