1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Multiple codepoints are not allowed at single character literal

It has unintentionally passed since 2.5.
This commit is contained in:
Nobuyoshi Nakada 2019-07-05 22:18:08 +09:00
parent 0a2f598d23
commit d746a41e85
No known key found for this signature in database
GPG key ID: 4BC7D6DF58D8DF60
2 changed files with 21 additions and 5 deletions

25
parse.y
View file

@ -6246,24 +6246,28 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
/* return value is for ?\u3042 */ /* return value is for ?\u3042 */
static void static void
tokadd_utf8(struct parser_params *p, rb_encoding **encp, tokadd_utf8(struct parser_params *p, rb_encoding **encp,
int string_literal, int symbol_literal, int regexp_literal) int term, int symbol_literal, int regexp_literal)
{ {
/* /*
* If string_literal is true, then we allow multiple codepoints * If `term` is not -1, then we allow multiple codepoints in \u{}
* in \u{}, and add the codepoints to the current token. * upto `term` byte, otherwise we're parsing a character literal.
* Otherwise we're parsing a character literal and return a single * And then add the codepoints to the current token.
* codepoint without adding it
*/ */
static const char multiple_codepoints[] = "Multiple codepoints at single character literal";
const int open_brace = '{', close_brace = '}'; const int open_brace = '{', close_brace = '}';
if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); } if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); }
if (peek(p, open_brace)) { /* handle \u{...} form */ if (peek(p, open_brace)) { /* handle \u{...} form */
const char *second = NULL;
int c, last = nextc(p); int c, last = nextc(p);
if (p->lex.pcur >= p->lex.pend) goto unterminated; if (p->lex.pcur >= p->lex.pend) goto unterminated;
while (ISSPACE(c = *p->lex.pcur) && ++p->lex.pcur < p->lex.pend); while (ISSPACE(c = *p->lex.pcur) && ++p->lex.pcur < p->lex.pend);
while (c != close_brace) { while (c != close_brace) {
if (c == term) goto unterminated;
if (second == multiple_codepoints)
second = p->lex.pcur;
if (regexp_literal) tokadd(p, last); if (regexp_literal) tokadd(p, last);
if (!tokadd_codepoint(p, encp, regexp_literal, TRUE)) { if (!tokadd_codepoint(p, encp, regexp_literal, TRUE)) {
break; break;
@ -6272,6 +6276,8 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
if (++p->lex.pcur >= p->lex.pend) goto unterminated; if (++p->lex.pcur >= p->lex.pend) goto unterminated;
last = c; last = c;
} }
if (term == -1 && !second)
second = multiple_codepoints;
} }
if (c != close_brace) { if (c != close_brace) {
@ -6280,6 +6286,15 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
yyerror0("unterminated Unicode escape"); yyerror0("unterminated Unicode escape");
return; return;
} }
if (second && second != multiple_codepoints) {
const char *pcur = p->lex.pcur;
p->lex.pcur = second;
dispatch_scan_event(p, tSTRING_CONTENT);
token_flush(p);
p->lex.pcur = pcur;
yyerror0(multiple_codepoints);
token_flush(p);
}
if (regexp_literal) tokadd(p, close_brace); if (regexp_literal) tokadd(p, close_brace);
nextc(p); nextc(p);

View file

@ -577,6 +577,7 @@ class TestParse < Test::Unit::TestCase
assert_equal("\u{1234}", eval("?\u{1234}")) assert_equal("\u{1234}", eval("?\u{1234}"))
assert_equal("\u{1234}", eval('?\u{1234}')) assert_equal("\u{1234}", eval('?\u{1234}'))
assert_equal("\u{1234}", eval('?\u1234')) assert_equal("\u{1234}", eval('?\u1234'))
assert_syntax_error('?\u{41 42}', 'Multiple codepoints at single character literal')
e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape') e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
assert_not_match(/end-of-input/, e.message) assert_not_match(/end-of-input/, e.message)