mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Multiple codepoints are not allowed at single character literal
It has unintentionally passed since 2.5.
This commit is contained in:
parent
0a2f598d23
commit
d746a41e85
2 changed files with 21 additions and 5 deletions
25
parse.y
25
parse.y
|
@ -6246,24 +6246,28 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
|
|||
/* return value is for ?\u3042 */
|
||||
static void
|
||||
tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||
int string_literal, int symbol_literal, int regexp_literal)
|
||||
int term, int symbol_literal, int regexp_literal)
|
||||
{
|
||||
/*
|
||||
* If string_literal is true, then we allow multiple codepoints
|
||||
* in \u{}, and add the codepoints to the current token.
|
||||
* Otherwise we're parsing a character literal and return a single
|
||||
* codepoint without adding it
|
||||
* If `term` is not -1, then we allow multiple codepoints in \u{}
|
||||
* upto `term` byte, otherwise we're parsing a character literal.
|
||||
* And then add the codepoints to the current token.
|
||||
*/
|
||||
static const char multiple_codepoints[] = "Multiple codepoints at single character literal";
|
||||
|
||||
const int open_brace = '{', close_brace = '}';
|
||||
|
||||
if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); }
|
||||
|
||||
if (peek(p, open_brace)) { /* handle \u{...} form */
|
||||
const char *second = NULL;
|
||||
int c, last = nextc(p);
|
||||
if (p->lex.pcur >= p->lex.pend) goto unterminated;
|
||||
while (ISSPACE(c = *p->lex.pcur) && ++p->lex.pcur < p->lex.pend);
|
||||
while (c != close_brace) {
|
||||
if (c == term) goto unterminated;
|
||||
if (second == multiple_codepoints)
|
||||
second = p->lex.pcur;
|
||||
if (regexp_literal) tokadd(p, last);
|
||||
if (!tokadd_codepoint(p, encp, regexp_literal, TRUE)) {
|
||||
break;
|
||||
|
@ -6272,6 +6276,8 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
|||
if (++p->lex.pcur >= p->lex.pend) goto unterminated;
|
||||
last = c;
|
||||
}
|
||||
if (term == -1 && !second)
|
||||
second = multiple_codepoints;
|
||||
}
|
||||
|
||||
if (c != close_brace) {
|
||||
|
@ -6280,6 +6286,15 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
|||
yyerror0("unterminated Unicode escape");
|
||||
return;
|
||||
}
|
||||
if (second && second != multiple_codepoints) {
|
||||
const char *pcur = p->lex.pcur;
|
||||
p->lex.pcur = second;
|
||||
dispatch_scan_event(p, tSTRING_CONTENT);
|
||||
token_flush(p);
|
||||
p->lex.pcur = pcur;
|
||||
yyerror0(multiple_codepoints);
|
||||
token_flush(p);
|
||||
}
|
||||
|
||||
if (regexp_literal) tokadd(p, close_brace);
|
||||
nextc(p);
|
||||
|
|
|
@ -577,6 +577,7 @@ class TestParse < Test::Unit::TestCase
|
|||
assert_equal("\u{1234}", eval("?\u{1234}"))
|
||||
assert_equal("\u{1234}", eval('?\u{1234}'))
|
||||
assert_equal("\u{1234}", eval('?\u1234'))
|
||||
assert_syntax_error('?\u{41 42}', 'Multiple codepoints at single character literal')
|
||||
e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
|
||||
assert_not_match(/end-of-input/, e.message)
|
||||
|
||||
|
|
Loading…
Reference in a new issue