mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Multiple codepoints are not allowed at single character literal
It has unintentionally passed since 2.5.
This commit is contained in:
parent
0a2f598d23
commit
d746a41e85
2 changed files with 21 additions and 5 deletions
25
parse.y
25
parse.y
|
@ -6246,24 +6246,28 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
|
||||||
/* return value is for ?\u3042 */
|
/* return value is for ?\u3042 */
|
||||||
static void
|
static void
|
||||||
tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||||
int string_literal, int symbol_literal, int regexp_literal)
|
int term, int symbol_literal, int regexp_literal)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If string_literal is true, then we allow multiple codepoints
|
* If `term` is not -1, then we allow multiple codepoints in \u{}
|
||||||
* in \u{}, and add the codepoints to the current token.
|
* upto `term` byte, otherwise we're parsing a character literal.
|
||||||
* Otherwise we're parsing a character literal and return a single
|
* And then add the codepoints to the current token.
|
||||||
* codepoint without adding it
|
|
||||||
*/
|
*/
|
||||||
|
static const char multiple_codepoints[] = "Multiple codepoints at single character literal";
|
||||||
|
|
||||||
const int open_brace = '{', close_brace = '}';
|
const int open_brace = '{', close_brace = '}';
|
||||||
|
|
||||||
if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); }
|
if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); }
|
||||||
|
|
||||||
if (peek(p, open_brace)) { /* handle \u{...} form */
|
if (peek(p, open_brace)) { /* handle \u{...} form */
|
||||||
|
const char *second = NULL;
|
||||||
int c, last = nextc(p);
|
int c, last = nextc(p);
|
||||||
if (p->lex.pcur >= p->lex.pend) goto unterminated;
|
if (p->lex.pcur >= p->lex.pend) goto unterminated;
|
||||||
while (ISSPACE(c = *p->lex.pcur) && ++p->lex.pcur < p->lex.pend);
|
while (ISSPACE(c = *p->lex.pcur) && ++p->lex.pcur < p->lex.pend);
|
||||||
while (c != close_brace) {
|
while (c != close_brace) {
|
||||||
|
if (c == term) goto unterminated;
|
||||||
|
if (second == multiple_codepoints)
|
||||||
|
second = p->lex.pcur;
|
||||||
if (regexp_literal) tokadd(p, last);
|
if (regexp_literal) tokadd(p, last);
|
||||||
if (!tokadd_codepoint(p, encp, regexp_literal, TRUE)) {
|
if (!tokadd_codepoint(p, encp, regexp_literal, TRUE)) {
|
||||||
break;
|
break;
|
||||||
|
@ -6272,6 +6276,8 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||||
if (++p->lex.pcur >= p->lex.pend) goto unterminated;
|
if (++p->lex.pcur >= p->lex.pend) goto unterminated;
|
||||||
last = c;
|
last = c;
|
||||||
}
|
}
|
||||||
|
if (term == -1 && !second)
|
||||||
|
second = multiple_codepoints;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c != close_brace) {
|
if (c != close_brace) {
|
||||||
|
@ -6280,6 +6286,15 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||||
yyerror0("unterminated Unicode escape");
|
yyerror0("unterminated Unicode escape");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (second && second != multiple_codepoints) {
|
||||||
|
const char *pcur = p->lex.pcur;
|
||||||
|
p->lex.pcur = second;
|
||||||
|
dispatch_scan_event(p, tSTRING_CONTENT);
|
||||||
|
token_flush(p);
|
||||||
|
p->lex.pcur = pcur;
|
||||||
|
yyerror0(multiple_codepoints);
|
||||||
|
token_flush(p);
|
||||||
|
}
|
||||||
|
|
||||||
if (regexp_literal) tokadd(p, close_brace);
|
if (regexp_literal) tokadd(p, close_brace);
|
||||||
nextc(p);
|
nextc(p);
|
||||||
|
|
|
@ -577,6 +577,7 @@ class TestParse < Test::Unit::TestCase
|
||||||
assert_equal("\u{1234}", eval("?\u{1234}"))
|
assert_equal("\u{1234}", eval("?\u{1234}"))
|
||||||
assert_equal("\u{1234}", eval('?\u{1234}'))
|
assert_equal("\u{1234}", eval('?\u{1234}'))
|
||||||
assert_equal("\u{1234}", eval('?\u1234'))
|
assert_equal("\u{1234}", eval('?\u1234'))
|
||||||
|
assert_syntax_error('?\u{41 42}', 'Multiple codepoints at single character literal')
|
||||||
e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
|
e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
|
||||||
assert_not_match(/end-of-input/, e.message)
|
assert_not_match(/end-of-input/, e.message)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue