mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
merge revision(s) 6375c68f8851e1e0fee8a95afba91c4555097127,c05eaa93258ddc01e685b6cc3a0da82998a2af48: [Backport #15839]
parse.y: function parser_mixed_error & parser_mixed_escape git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66919 b2dd03c8-39d4-4d8f-98ff-823fe69b080e Fix mixed encoding in heredoc Heredocs are parsed line-by-line, so we need to keep track of the temporary encoding of the string. Previously, a heredoc would only detect mixed encoding errors if they were on the same line, this changes things so they will be caught on different lines. Fixes [Bug #15839] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@67724 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
a451d5d303
commit
adbb1c699d
3 changed files with 76 additions and 38 deletions
83
parse.y
83
parse.y
|
@ -4446,7 +4446,7 @@ none : /* none */
|
|||
# define yylval (*p->lval)
|
||||
|
||||
static int regx_options(struct parser_params*);
|
||||
static int tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**);
|
||||
static int tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**,rb_encoding**);
|
||||
static void tokaddmbc(struct parser_params *p, int c, rb_encoding *enc);
|
||||
static enum yytokentype parse_string(struct parser_params*,rb_strterm_literal_t*);
|
||||
static enum yytokentype here_document(struct parser_params*,rb_strterm_heredoc_t*);
|
||||
|
@ -5659,32 +5659,38 @@ parser_update_heredoc_indent(struct parser_params *p, int c)
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
static void
|
||||
parser_mixed_error(struct parser_params *p, rb_encoding *enc1, rb_encoding *enc2)
|
||||
{
|
||||
static const char mixed_msg[] = "%s mixed within %s source";
|
||||
const char *n1 = rb_enc_name(enc1), *n2 = rb_enc_name(enc2);
|
||||
const size_t len = sizeof(mixed_msg) - 4 + strlen(n1) + strlen(n2);
|
||||
char *errbuf = ALLOCA_N(char, len);
|
||||
snprintf(errbuf, len, mixed_msg, n1, n2);
|
||||
yyerror0(errbuf);
|
||||
}
|
||||
|
||||
static void
|
||||
parser_mixed_escape(struct parser_params *p, const char *beg, rb_encoding *enc1, rb_encoding *enc2)
|
||||
{
|
||||
const char *pos = p->lex.pcur;
|
||||
p->lex.pcur = beg;
|
||||
parser_mixed_error(p, enc1, enc2);
|
||||
p->lex.pcur = pos;
|
||||
}
|
||||
|
||||
static int
|
||||
tokadd_string(struct parser_params *p,
|
||||
int func, int term, int paren, long *nest,
|
||||
rb_encoding **encp)
|
||||
rb_encoding **encp, rb_encoding **enc)
|
||||
{
|
||||
int c;
|
||||
rb_encoding *enc = 0;
|
||||
char *errbuf = 0;
|
||||
static const char mixed_msg[] = "%s mixed within %s source";
|
||||
bool erred = false;
|
||||
|
||||
#define mixed_error(enc1, enc2) if (!errbuf) { \
|
||||
size_t len = sizeof(mixed_msg) - 4; \
|
||||
len += strlen(rb_enc_name(enc1)); \
|
||||
len += strlen(rb_enc_name(enc2)); \
|
||||
errbuf = ALLOCA_N(char, len); \
|
||||
snprintf(errbuf, len, mixed_msg, \
|
||||
rb_enc_name(enc1), \
|
||||
rb_enc_name(enc2)); \
|
||||
yyerror0(errbuf); \
|
||||
}
|
||||
#define mixed_escape(beg, enc1, enc2) do { \
|
||||
const char *pos = p->lex.pcur; \
|
||||
p->lex.pcur = (beg); \
|
||||
mixed_error((enc1), (enc2)); \
|
||||
p->lex.pcur = pos; \
|
||||
} while (0)
|
||||
#define mixed_error(enc1, enc2) \
|
||||
(void)(erred || (parser_mixed_error(p, enc1, enc2), erred = true))
|
||||
#define mixed_escape(beg, enc1, enc2) \
|
||||
(void)(erred || (parser_mixed_escape(p, beg, enc1, enc2), erred = true))
|
||||
|
||||
while ((c = nextc(p)) != -1) {
|
||||
if (p->heredoc_indent > 0) {
|
||||
|
@ -5734,7 +5740,7 @@ tokadd_string(struct parser_params *p,
|
|||
tokadd(p, '\\');
|
||||
break;
|
||||
}
|
||||
if (!parser_tokadd_utf8(p, &enc, term,
|
||||
if (!parser_tokadd_utf8(p, enc, term,
|
||||
func & STR_FUNC_SYMBOL,
|
||||
func & STR_FUNC_REGEXP)) {
|
||||
return -1;
|
||||
|
@ -5753,17 +5759,17 @@ tokadd_string(struct parser_params *p,
|
|||
continue;
|
||||
}
|
||||
pushback(p, c);
|
||||
if ((c = tokadd_escape(p, &enc)) < 0)
|
||||
if ((c = tokadd_escape(p, enc)) < 0)
|
||||
return -1;
|
||||
if (enc && enc != *encp) {
|
||||
mixed_escape(p->lex.ptok+2, enc, *encp);
|
||||
if (*enc && *enc != *encp) {
|
||||
mixed_escape(p->lex.ptok+2, *enc, *encp);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if (func & STR_FUNC_EXPAND) {
|
||||
pushback(p, c);
|
||||
if (func & STR_FUNC_ESCAPE) tokadd(p, '\\');
|
||||
c = read_escape(p, 0, &enc);
|
||||
c = read_escape(p, 0, enc);
|
||||
}
|
||||
else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
|
||||
/* ignore backslashed spaces in %w */
|
||||
|
@ -5777,11 +5783,11 @@ tokadd_string(struct parser_params *p,
|
|||
}
|
||||
else if (!parser_isascii(p)) {
|
||||
non_ascii:
|
||||
if (!enc) {
|
||||
enc = *encp;
|
||||
if (!*enc) {
|
||||
*enc = *encp;
|
||||
}
|
||||
else if (enc != *encp) {
|
||||
mixed_error(enc, *encp);
|
||||
else if (*enc != *encp) {
|
||||
mixed_error(*enc, *encp);
|
||||
continue;
|
||||
}
|
||||
if (tokadd_mbchar(p, c) == -1) return -1;
|
||||
|
@ -5792,18 +5798,18 @@ tokadd_string(struct parser_params *p,
|
|||
break;
|
||||
}
|
||||
if (c & 0x80) {
|
||||
if (!enc) {
|
||||
enc = *encp;
|
||||
if (!*enc) {
|
||||
*enc = *encp;
|
||||
}
|
||||
else if (enc != *encp) {
|
||||
mixed_error(enc, *encp);
|
||||
else if (*enc != *encp) {
|
||||
mixed_error(*enc, *encp);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
tokadd(p, c);
|
||||
}
|
||||
terminate:
|
||||
if (enc) *encp = enc;
|
||||
if (*enc) *encp = *enc;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
@ -5936,6 +5942,7 @@ parse_string(struct parser_params *p, rb_strterm_literal_t *quote)
|
|||
int paren = (int)quote->u2.paren;
|
||||
int c, space = 0;
|
||||
rb_encoding *enc = p->enc;
|
||||
rb_encoding *base_enc = 0;
|
||||
VALUE lit;
|
||||
|
||||
if (func & STR_FUNC_TERM) {
|
||||
|
@ -5976,7 +5983,7 @@ parse_string(struct parser_params *p, rb_strterm_literal_t *quote)
|
|||
}
|
||||
pushback(p, c);
|
||||
if (tokadd_string(p, func, term, paren, "e->u0.nest,
|
||||
&enc) == -1) {
|
||||
&enc, &base_enc) == -1) {
|
||||
if (p->eofp) {
|
||||
#ifndef RIPPER
|
||||
# define unterminated_literal(mesg) yyerror0(mesg)
|
||||
|
@ -6341,6 +6348,7 @@ here_document(struct parser_params *p, rb_strterm_heredoc_t *here)
|
|||
long len;
|
||||
VALUE str = 0;
|
||||
rb_encoding *enc = p->enc;
|
||||
rb_encoding *base_enc = 0;
|
||||
int bol;
|
||||
|
||||
eos = RSTRING_PTR(here->term);
|
||||
|
@ -6452,7 +6460,8 @@ here_document(struct parser_params *p, rb_strterm_heredoc_t *here)
|
|||
}
|
||||
do {
|
||||
pushback(p, c);
|
||||
if ((c = tokadd_string(p, func, '\n', 0, NULL, &enc)) == -1) {
|
||||
enc = p->enc;
|
||||
if ((c = tokadd_string(p, func, '\n', 0, NULL, &enc, &base_enc)) == -1) {
|
||||
if (p->eofp) goto error;
|
||||
goto restore;
|
||||
}
|
||||
|
|
|
@ -763,6 +763,35 @@ eom
|
|||
assert_equal("\n0\n1", eval("<<~0 '1'\n \n0\#{}\n0"))
|
||||
end
|
||||
|
||||
def test_heredoc_mixed_encoding
|
||||
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
||||
#encoding: cp932
|
||||
<<-TEXT
|
||||
\xe9\x9d\u1234
|
||||
TEXT
|
||||
HEREDOC
|
||||
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
||||
#encoding: cp932
|
||||
<<-TEXT
|
||||
\xe9\x9d
|
||||
\u1234
|
||||
TEXT
|
||||
HEREDOC
|
||||
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
||||
#encoding: cp932
|
||||
<<-TEXT
|
||||
\u1234\xe9\x9d
|
||||
TEXT
|
||||
HEREDOC
|
||||
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
||||
#encoding: cp932
|
||||
<<-TEXT
|
||||
\u1234
|
||||
\xe9\x9d
|
||||
TEXT
|
||||
HEREDOC
|
||||
end
|
||||
|
||||
def test_lineno_operation_brace_block
|
||||
expected = __LINE__ + 1
|
||||
actual = caller_lineno\
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#define RUBY_VERSION "2.6.3"
|
||||
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
|
||||
#define RUBY_PATCHLEVEL 74
|
||||
#define RUBY_PATCHLEVEL 75
|
||||
|
||||
#define RUBY_RELEASE_YEAR 2019
|
||||
#define RUBY_RELEASE_MONTH 8
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue