diff --git a/ChangeLog b/ChangeLog index 6ee79fd8c0..30b4d166d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Fri Sep 2 09:58:08 2011 Nobuyoshi Nakada + + * parse.y (parser_tokadd_string, parser_yylex): ignore a backslash + which prefixes an non-ascii character, which has no escape + syntax. [ruby-core:39222] [Ruby 1.9 - Bug #5262] + Fri Sep 2 04:05:25 2011 Aaron Patterson * ext/psych/lib/psych/visitors/yaml_tree.rb: emit strings tagged as diff --git a/parse.y b/parse.y index eb5f7fa5e6..df8dc75307 100644 --- a/parse.y +++ b/parse.y @@ -5411,6 +5411,7 @@ parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *e } #define lex_goto_eol(parser) ((parser)->parser_lex_p = (parser)->parser_lex_pend) +#define lex_eol_p() (lex_p >= lex_pend) #define peek(c) peek_n((c), 0) #define peek_n(c,n) (lex_p+(n) < lex_pend && (c) == (unsigned char)lex_p[n]) @@ -5920,6 +5921,8 @@ parser_tokadd_string(struct parser_params *parser, continue; default: + if (c == -1) return -1; + if (!ISASCII(c)) goto non_ascii; if (func & STR_FUNC_REGEXP) { pushback(c); if ((c = tokadd_escape(&enc)) < 0) @@ -5945,6 +5948,7 @@ parser_tokadd_string(struct parser_params *parser, } } else if (!parser_isascii()) { + non_ascii: has_nonascii = 1; if (enc != *encp) { mixed_error(enc, *encp); @@ -7003,6 +7007,10 @@ parser_yylex(struct parser_params *parser) tokadd(c); } } + else if (!lex_eol_p() && !(c = *lex_p, ISASCII(c))) { + nextc(); + if (tokadd_mbchar(c) == -1) return 0; + } else { c = read_escape(0, &enc); tokadd(c); diff --git a/test/ruby/test_literal.rb b/test/ruby/test_literal.rb index 315fe529ea..23c1c9b800 100644 --- a/test/ruby/test_literal.rb +++ b/test/ruby/test_literal.rb @@ -53,15 +53,33 @@ class TestRubyLiteral < Test::Unit::TestCase assert_equal "3", "\x33" assert_equal "\n", "\n" bug2500 = '[ruby-core:27228]' + bug5262 = '[ruby-core:39222]' %w[c C- M-].each do |pre| ["u", %w[u{ }]].each do |open, close| - str = "\"\\#{pre}\\#{open}5555#{close}\"" - assert_raise(SyntaxError, "#{bug2500} eval(#{str})") {eval(str)} + ["?", ['"', '"']].each do |qopen, qclose| + str = "#{qopen}\\#{pre}\\#{open}5555#{close}#{qclose}" + assert_raise(SyntaxError, "#{bug2500} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\u201c#{close}#{qclose}" + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\u201c#{close}#{qclose}".encode("euc-jp") + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\u201c#{close}#{qclose}".encode("iso-8859-13") + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\xe2\x7f#{close}#{qclose}".force_encoding("utf-8") + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + end end end assert_equal "\x13", "\c\x33" assert_equal "\x13", "\C-\x33" assert_equal "\xB3", "\M-\x33" + assert_equal "\u201c", eval(%["\\\u{201c}"]), bug5262 + assert_equal "\u201c".encode("euc-jp"), eval(%["\\\u{201c}"].encode("euc-jp")), bug5262 + assert_equal "\u201c".encode("iso-8859-13"), eval(%["\\\u{201c}"].encode("iso-8859-13")), bug5262 end def test_dstring