mirror of
https://github.com/jashkenas/coffeescript.git
synced 2022-11-09 12:23:24 -05:00
Fix #3795: Never generate invalid strings and regexes
- Invalid `\x` and `\u` escapes now throw errors. - U+2028 and U+2029 (which JavaScript treats as newline characters) are now escaped to `\u2028` and `\u2029`, respectively. - Octal escapes are now forbidden not only in strings, but in regexes as well. - `\0` escapes are now escaped if needed (so that they do not form an octal literal by mistake). Note that `\01` is an octal escape in a regex, while `\1` is a backreference. (Added a test for backreferences while at it.) - Fixed a bug where newlines in strings weren't removed if preceded by an escaped character.
This commit is contained in:
parent
5a220d4e13
commit
72ceec5680
6 changed files with 277 additions and 90 deletions
|
@ -1,6 +1,6 @@
|
|||
// Generated by CoffeeScript 1.9.0
|
||||
(function() {
|
||||
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, ref, ref1, repeat, starts, throwSyntaxError,
|
||||
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVALID_ESCAPE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, ref, ref1, repeat, starts, throwSyntaxError,
|
||||
indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
|
||||
|
||||
ref = require('./rewriter'), Rewriter = ref.Rewriter, INVERSES = ref.INVERSES;
|
||||
|
@ -177,7 +177,7 @@
|
|||
};
|
||||
|
||||
Lexer.prototype.stringToken = function() {
|
||||
var $, attempt, doc, end, heredoc, i, indent, indentRegex, match, quote, ref2, ref3, regex, token, tokens;
|
||||
var $, attempt, delimiter, doc, end, heredoc, i, indent, indentRegex, match, quote, ref2, ref3, regex, token, tokens;
|
||||
quote = (STRING_START.exec(this.chunk) || [])[0];
|
||||
if (!quote) {
|
||||
return 0;
|
||||
|
@ -197,6 +197,7 @@
|
|||
heredoc = quote.length === 3;
|
||||
ref2 = this.matchWithInterpolations(regex, quote), tokens = ref2.tokens, end = ref2.index;
|
||||
$ = tokens.length - 1;
|
||||
delimiter = quote[0];
|
||||
if (heredoc) {
|
||||
indent = null;
|
||||
doc = ((function() {
|
||||
|
@ -219,7 +220,9 @@
|
|||
if (indent) {
|
||||
indentRegex = RegExp("^" + indent, "gm");
|
||||
}
|
||||
this.mergeInterpolationTokens(tokens, quote[0], (function(_this) {
|
||||
this.mergeInterpolationTokens(tokens, {
|
||||
delimiter: delimiter
|
||||
}, (function(_this) {
|
||||
return function(value, i) {
|
||||
value = _this.formatString(value);
|
||||
if (i === 0) {
|
||||
|
@ -229,15 +232,16 @@
|
|||
value = value.replace(TRAILING_BLANK_LINE, '');
|
||||
}
|
||||
value = value.replace(indentRegex, '');
|
||||
value = value.replace(MULTILINER, '\\n');
|
||||
return value;
|
||||
};
|
||||
})(this));
|
||||
} else {
|
||||
this.mergeInterpolationTokens(tokens, quote, (function(_this) {
|
||||
this.mergeInterpolationTokens(tokens, {
|
||||
delimiter: delimiter
|
||||
}, (function(_this) {
|
||||
return function(value, i) {
|
||||
value = _this.formatString(value);
|
||||
value = value.replace(STRING_OMIT, function(match, offset) {
|
||||
value = value.replace(SIMPLE_STRING_OMIT, function(match, offset) {
|
||||
if ((i === 0 && offset === 0) || (i === $ && offset + match.length === value.length)) {
|
||||
return '';
|
||||
} else {
|
||||
|
@ -279,7 +283,7 @@
|
|||
};
|
||||
|
||||
Lexer.prototype.regexToken = function() {
|
||||
var closed, end, errorToken, flags, index, match, prev, re, ref2, ref3, regex, rparen, tokens;
|
||||
var body, closed, end, errorToken, flags, index, match, prev, ref2, ref3, regex, rparen, tokens;
|
||||
switch (false) {
|
||||
case !(match = REGEX_ILLEGAL.exec(this.chunk)):
|
||||
this.error("regular expressions cannot begin with " + match[2], match.index + match[1].length);
|
||||
|
@ -288,7 +292,10 @@
|
|||
tokens = match.tokens, index = match.index;
|
||||
break;
|
||||
case !(match = REGEX.exec(this.chunk)):
|
||||
regex = match[0], closed = match[1];
|
||||
regex = match[0], body = match[1], closed = match[2];
|
||||
this.validateEscapes(regex, {
|
||||
isRegex: true
|
||||
});
|
||||
index = regex.length;
|
||||
prev = last(this.tokens);
|
||||
if (prev) {
|
||||
|
@ -314,21 +321,21 @@
|
|||
case !!VALID_FLAGS.test(flags):
|
||||
this.error("invalid regular expression flags " + flags, index);
|
||||
break;
|
||||
case !regex:
|
||||
this.token('REGEX', "" + regex + flags);
|
||||
break;
|
||||
case tokens.length !== 1:
|
||||
re = this.formatHeregex(tokens[0][1]).replace(/\//g, '\\/');
|
||||
this.token('REGEX', "/" + (re || '(?:)') + "/" + flags, 0, end, errorToken);
|
||||
case !(regex || tokens.length === 1):
|
||||
if (body == null) {
|
||||
body = this.formatHeregex(tokens[0][1]);
|
||||
}
|
||||
this.token('REGEX', "" + (this.makeDelimitedLiteral(body, {
|
||||
delimiter: '/'
|
||||
})) + flags, 0, end, errorToken);
|
||||
break;
|
||||
default:
|
||||
this.token('IDENTIFIER', 'RegExp', 0, 0);
|
||||
this.token('CALL_START', '(', 0, 0, errorToken);
|
||||
this.mergeInterpolationTokens(tokens, '"', (function(_this) {
|
||||
return function(value) {
|
||||
return _this.formatHeregex(value).replace(/\\/g, '\\\\');
|
||||
};
|
||||
})(this));
|
||||
this.mergeInterpolationTokens(tokens, {
|
||||
delimiter: '"',
|
||||
double: true
|
||||
}, this.formatHeregex);
|
||||
if (flags) {
|
||||
this.token(',', ',', index, 0);
|
||||
this.token('STRING', '"' + flags + '"', index, flags.length);
|
||||
|
@ -569,6 +576,10 @@
|
|||
str = this.chunk.slice(offsetInChunk);
|
||||
while (true) {
|
||||
strPart = regex.exec(str)[0];
|
||||
this.validateEscapes(strPart, {
|
||||
isRegex: delimiter.charAt(0) === '/',
|
||||
offsetInChunk: offsetInChunk
|
||||
});
|
||||
tokens.push(this.makeToken('NEOSTRING', strPart, offsetInChunk));
|
||||
str = str.slice(strPart.length);
|
||||
offsetInChunk += strPart.length;
|
||||
|
@ -605,7 +616,7 @@
|
|||
};
|
||||
};
|
||||
|
||||
Lexer.prototype.mergeInterpolationTokens = function(tokens, quote, fn) {
|
||||
Lexer.prototype.mergeInterpolationTokens = function(tokens, options, fn) {
|
||||
var converted, errorToken, firstEmptyStringIndex, firstIndex, firstToken, i, interpolated, j, lastToken, len, locationToken, plusToken, ref2, ref3, rparen, tag, token, tokensToPush, value;
|
||||
if (interpolated = tokens.length > 1) {
|
||||
firstToken = tokens[0];
|
||||
|
@ -644,7 +655,7 @@
|
|||
this.tokens.splice(firstEmptyStringIndex, 2);
|
||||
}
|
||||
token[0] = 'STRING';
|
||||
token[1] = this.makeString(converted, quote);
|
||||
token[1] = this.makeDelimitedLiteral(converted, options);
|
||||
locationToken = token;
|
||||
tokensToPush = [token];
|
||||
}
|
||||
|
@ -747,36 +758,68 @@
|
|||
};
|
||||
|
||||
Lexer.prototype.formatString = function(str) {
|
||||
return str.replace(/\\[^\S\n]*(\n|\\)\s*/g, function(escaped, character) {
|
||||
if (character === '\n') {
|
||||
return '';
|
||||
} else {
|
||||
return escaped;
|
||||
}
|
||||
});
|
||||
return str.replace(STRING_OMIT, '$1');
|
||||
};
|
||||
|
||||
Lexer.prototype.formatHeregex = function(str) {
|
||||
return str.replace(HEREGEX_OMIT, '$1$2').replace(MULTILINER, '\\n');
|
||||
return str.replace(HEREGEX_OMIT, '$1$2');
|
||||
};
|
||||
|
||||
Lexer.prototype.makeString = function(body, quote) {
|
||||
var match;
|
||||
if (!body) {
|
||||
return quote + quote;
|
||||
Lexer.prototype.validateEscapes = function(str, options) {
|
||||
var before, hex, match, message, octal, ref2, unicode;
|
||||
if (options == null) {
|
||||
options = {};
|
||||
}
|
||||
body = body.replace(RegExp("\\\\(" + quote + "|\\\\)", "g"), function(match, contents) {
|
||||
if (contents === quote) {
|
||||
return contents;
|
||||
} else {
|
||||
return match;
|
||||
match = INVALID_ESCAPE.exec(str);
|
||||
if (!match) {
|
||||
return;
|
||||
}
|
||||
match[0], before = match[1], octal = match[2], hex = match[3], unicode = match[4];
|
||||
if (options.isRegex && octal && octal.charAt(0) !== '0') {
|
||||
return;
|
||||
}
|
||||
message = octal ? "octal escape sequences are not allowed \\" + octal : "invalid escape sequence \\" + (hex || unicode);
|
||||
return this.error(message, ((ref2 = options.offsetInChunk) != null ? ref2 : 0) + match.index + before.length);
|
||||
};
|
||||
|
||||
Lexer.prototype.makeDelimitedLiteral = function(body, options) {
|
||||
var regex;
|
||||
if (options == null) {
|
||||
options = {};
|
||||
}
|
||||
if (body === '' && options.delimiter === '/') {
|
||||
body = '(?:)';
|
||||
}
|
||||
regex = RegExp("(\\\\\\\\)|(\\\\0(?=[1-7]))|\\\\?(" + options.delimiter + ")|\\\\?(?:(\\n)|(\\r)|(\\u2028)|(\\u2029))|(\\\\.)", "g");
|
||||
body = body.replace(regex, function(match, backslash, nul, delimiter, lf, cr, ls, ps, other) {
|
||||
switch (false) {
|
||||
case !backslash:
|
||||
if (options.double) {
|
||||
return backslash + backslash;
|
||||
} else {
|
||||
return backslash;
|
||||
}
|
||||
case !nul:
|
||||
return '\\x00';
|
||||
case !delimiter:
|
||||
return "\\" + delimiter;
|
||||
case !lf:
|
||||
return '\\n';
|
||||
case !cr:
|
||||
return '\\r';
|
||||
case !ls:
|
||||
return '\\u2028';
|
||||
case !ps:
|
||||
return '\\u2029';
|
||||
case !other:
|
||||
if (options.double) {
|
||||
return "\\" + other;
|
||||
} else {
|
||||
return other;
|
||||
}
|
||||
}
|
||||
});
|
||||
body = body.replace(RegExp("" + quote, "g"), '\\$&');
|
||||
if (match = OCTAL_ESCAPE.exec(body)) {
|
||||
this.error("octal escape sequences are not allowed " + match[2], match.index + match[1].length + 1);
|
||||
}
|
||||
return quote + body + quote;
|
||||
return "" + options.delimiter + body + options.delimiter;
|
||||
};
|
||||
|
||||
Lexer.prototype.error = function(message, offset) {
|
||||
|
@ -860,11 +903,13 @@
|
|||
|
||||
HEREDOC_DOUBLE = /^(?:[^\\"#]|\\[\s\S]|"(?!"")|\#(?!\{))*/;
|
||||
|
||||
STRING_OMIT = /\s*\n\s*/g;
|
||||
STRING_OMIT = /((?:\\\\)+)|\\[^\S\n]*\n\s*/g;
|
||||
|
||||
SIMPLE_STRING_OMIT = /\s*\n\s*/g;
|
||||
|
||||
HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g;
|
||||
|
||||
REGEX = /^\/(?!\/)(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])*(\/)?/;
|
||||
REGEX = /^\/(?!\/)((?:[^[\/\n\\]|\\[^\n]|\[(?:\\[^\n]|[^\]\n\\])*])*)(\/)?/;
|
||||
|
||||
REGEX_FLAGS = /^\w*/;
|
||||
|
||||
|
@ -872,19 +917,17 @@
|
|||
|
||||
HEREGEX = /^(?:[^\\\/#]|\\[\s\S]|\/(?!\/\/)|\#(?!\{))*/;
|
||||
|
||||
HEREGEX_OMIT = /((?:\\\\)+)|\\(\s|\/)|\s+(?:#.*)?/g;
|
||||
HEREGEX_OMIT = /((?:\\\\)+)|\\(\s)|\s+(?:#.*)?/g;
|
||||
|
||||
REGEX_ILLEGAL = /^(\/|\/{3}\s*)(\*)/;
|
||||
|
||||
POSSIBLY_DIVISION = /^\/=?\s/;
|
||||
|
||||
MULTILINER = /\n/g;
|
||||
|
||||
HERECOMMENT_ILLEGAL = /\*\//;
|
||||
|
||||
LINE_CONTINUER = /^\s*(?:,|\??\.(?![.\d])|::)/;
|
||||
|
||||
OCTAL_ESCAPE = /^((?:\\.|[^\\])*)(\\(?:0[0-7]|[1-7]))/;
|
||||
INVALID_ESCAPE = /((?:^|[^\\])(?:\\\\)*)\\(?:(0[0-7]|[1-7])|(x(?![\da-fA-F]{2}).{0,2})|(u(?![\da-fA-F]{4}).{0,4}))/;
|
||||
|
||||
LEADING_BLANK_LINE = /^[^\n\S]*\n/;
|
||||
|
||||
|
|
109
src/lexer.coffee
109
src/lexer.coffee
|
@ -202,6 +202,7 @@ exports.Lexer = class Lexer
|
|||
{tokens, index: end} = @matchWithInterpolations regex, quote
|
||||
$ = tokens.length - 1
|
||||
|
||||
delimiter = quote[0]
|
||||
if heredoc
|
||||
# Find the smallest indentation. It will be removed from all lines later.
|
||||
indent = null
|
||||
|
@ -210,17 +211,16 @@ exports.Lexer = class Lexer
|
|||
attempt = match[1]
|
||||
indent = attempt if indent is null or 0 < attempt.length < indent.length
|
||||
indentRegex = /// ^#{indent} ///gm if indent
|
||||
@mergeInterpolationTokens tokens, quote[0], (value, i) =>
|
||||
@mergeInterpolationTokens tokens, {delimiter}, (value, i) =>
|
||||
value = @formatString value
|
||||
value = value.replace LEADING_BLANK_LINE, '' if i is 0
|
||||
value = value.replace TRAILING_BLANK_LINE, '' if i is $
|
||||
value = value.replace indentRegex, ''
|
||||
value = value.replace MULTILINER, '\\n'
|
||||
value
|
||||
else
|
||||
@mergeInterpolationTokens tokens, quote, (value, i) =>
|
||||
@mergeInterpolationTokens tokens, {delimiter}, (value, i) =>
|
||||
value = @formatString value
|
||||
value = value.replace STRING_OMIT, (match, offset) ->
|
||||
value = value.replace SIMPLE_STRING_OMIT, (match, offset) ->
|
||||
if (i is 0 and offset is 0) or
|
||||
(i is $ and offset + match.length is value.length)
|
||||
''
|
||||
|
@ -258,7 +258,8 @@ exports.Lexer = class Lexer
|
|||
when match = @matchWithInterpolations HEREGEX, '///'
|
||||
{tokens, index} = match
|
||||
when match = REGEX.exec @chunk
|
||||
[regex, closed] = match
|
||||
[regex, body, closed] = match
|
||||
@validateEscapes regex, isRegex: yes
|
||||
index = regex.length
|
||||
prev = last @tokens
|
||||
if prev
|
||||
|
@ -276,16 +277,13 @@ exports.Lexer = class Lexer
|
|||
switch
|
||||
when not VALID_FLAGS.test flags
|
||||
@error "invalid regular expression flags #{flags}", index
|
||||
when regex
|
||||
@token 'REGEX', "#{regex}#{flags}"
|
||||
when tokens.length is 1
|
||||
re = @formatHeregex(tokens[0][1]).replace(/\//g, '\\/')
|
||||
@token 'REGEX', "/#{ re or '(?:)' }/#{flags}", 0, end, errorToken
|
||||
when regex or tokens.length is 1
|
||||
body ?= @formatHeregex tokens[0][1]
|
||||
@token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, errorToken
|
||||
else
|
||||
@token 'IDENTIFIER', 'RegExp', 0, 0
|
||||
@token 'CALL_START', '(', 0, 0, errorToken
|
||||
@mergeInterpolationTokens tokens, '"', (value) =>
|
||||
@formatHeregex(value).replace(/\\/g, '\\\\')
|
||||
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, @formatHeregex
|
||||
if flags
|
||||
@token ',', ',', index, 0
|
||||
@token 'STRING', '"' + flags + '"', index, flags.length
|
||||
|
@ -484,6 +482,8 @@ exports.Lexer = class Lexer
|
|||
loop
|
||||
[strPart] = regex.exec str
|
||||
|
||||
@validateEscapes strPart, {isRegex: delimiter.charAt(0) is '/', offsetInChunk}
|
||||
|
||||
# Push a fake 'NEOSTRING' token, which will get turned into a real string later.
|
||||
tokens.push @makeToken 'NEOSTRING', strPart, offsetInChunk
|
||||
|
||||
|
@ -527,8 +527,8 @@ exports.Lexer = class Lexer
|
|||
# Merge the array `tokens` of the fake token types 'TOKENS' and 'NEOSTRING'
|
||||
# (as returned by `matchWithInterpolations`) into the token stream. The value
|
||||
# of 'NEOSTRING's are converted using `fn` and turned into strings using
|
||||
# `quote` first.
|
||||
mergeInterpolationTokens: (tokens, quote, fn) ->
|
||||
# `options` first.
|
||||
mergeInterpolationTokens: (tokens, options, fn) ->
|
||||
if interpolated = tokens.length > 1
|
||||
[firstToken] = tokens
|
||||
errorToken = ['', 'interpolation',
|
||||
|
@ -566,7 +566,7 @@ exports.Lexer = class Lexer
|
|||
if i is 2 and firstEmptyStringIndex?
|
||||
@tokens.splice firstEmptyStringIndex, 2 # Remove empty string and the plus.
|
||||
token[0] = 'STRING'
|
||||
token[1] = @makeString converted, quote
|
||||
token[1] = @makeDelimitedLiteral converted, options
|
||||
locationToken = token
|
||||
tokensToPush = [token]
|
||||
if @tokens.length > firstIndex
|
||||
|
@ -674,23 +674,45 @@ exports.Lexer = class Lexer
|
|||
'**', 'SHIFT', 'RELATION', 'COMPARE', 'LOGIC', 'THROW', 'EXTENDS']
|
||||
|
||||
formatString: (str) ->
|
||||
# Ignore escaped backslashes and remove escaped newlines.
|
||||
str.replace /\\[^\S\n]*(\n|\\)\s*/g, (escaped, character) ->
|
||||
if character is '\n' then '' else escaped
|
||||
str.replace STRING_OMIT, '$1'
|
||||
|
||||
formatHeregex: (str) ->
|
||||
str.replace(HEREGEX_OMIT, '$1$2').replace(MULTILINER, '\\n')
|
||||
str.replace HEREGEX_OMIT, '$1$2'
|
||||
|
||||
# Constructs a string token by escaping quotes.
|
||||
makeString: (body, quote) ->
|
||||
return quote + quote unless body
|
||||
# Ignore escaped backslashes and unescape quotes.
|
||||
body = body.replace /// \\( #{quote} | \\ ) ///g, (match, contents) ->
|
||||
if contents is quote then contents else match
|
||||
body = body.replace /// #{quote} ///g, '\\$&'
|
||||
if match = OCTAL_ESCAPE.exec body
|
||||
@error "octal escape sequences are not allowed #{match[2]}", match.index + match[1].length + 1
|
||||
quote + body + quote
|
||||
# Validates escapes in strings and regexes.
|
||||
validateEscapes: (str, options = {}) ->
|
||||
match = INVALID_ESCAPE.exec str
|
||||
return unless match
|
||||
[[], before, octal, hex, unicode] = match
|
||||
return if options.isRegex and octal and octal.charAt(0) isnt '0'
|
||||
message =
|
||||
if octal
|
||||
"octal escape sequences are not allowed \\#{octal}"
|
||||
else
|
||||
"invalid escape sequence \\#{hex or unicode}"
|
||||
@error message, (options.offsetInChunk ? 0) + match.index + before.length
|
||||
|
||||
# Constructs a string or regex by escaping certain characters.
|
||||
makeDelimitedLiteral: (body, options = {}) ->
|
||||
body = '(?:)' if body is '' and options.delimiter is '/'
|
||||
regex = ///
|
||||
(\\\\) # escaped backslash
|
||||
| (\\0(?=[1-7])) # nul character mistaken as octal escape
|
||||
| \\?(#{options.delimiter}) # (possibly escaped) delimiter
|
||||
| \\?(?: (\n)|(\r)|(\u2028)|(\u2029) ) # (possibly escaped) newlines
|
||||
| (\\.) # other escapes
|
||||
///g
|
||||
body = body.replace regex, (match, backslash, nul, delimiter, lf, cr, ls, ps, other) -> switch
|
||||
# Ignore escaped backslashes.
|
||||
when backslash then (if options.double then backslash + backslash else backslash)
|
||||
when nul then '\\x00'
|
||||
when delimiter then "\\#{delimiter}"
|
||||
when lf then '\\n'
|
||||
when cr then '\\r'
|
||||
when ls then '\\u2028'
|
||||
when ps then '\\u2029'
|
||||
when other then (if options.double then "\\#{other}" else other)
|
||||
"#{options.delimiter}#{body}#{options.delimiter}"
|
||||
|
||||
# Throws a compiler error on the current position.
|
||||
error: (message, offset = 0) ->
|
||||
|
@ -791,18 +813,22 @@ STRING_DOUBLE = /// ^(?: [^\\"#] | \\[\s\S] | \#(?!\{) )* ///
|
|||
HEREDOC_SINGLE = /// ^(?: [^\\'] | \\[\s\S] | '(?!'') )* ///
|
||||
HEREDOC_DOUBLE = /// ^(?: [^\\"#] | \\[\s\S] | "(?!"") | \#(?!\{) )* ///
|
||||
|
||||
STRING_OMIT = /\s*\n\s*/g
|
||||
HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g
|
||||
STRING_OMIT = ///
|
||||
((?:\\\\)+) # consume (and preserve) an even number of backslashes
|
||||
| \\[^\S\n]*\n\s* # remove escaped newlines
|
||||
///g
|
||||
SIMPLE_STRING_OMIT = /\s*\n\s*/g
|
||||
HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g
|
||||
|
||||
# Regex-matching-regexes.
|
||||
REGEX = /// ^
|
||||
/ (?!/) (
|
||||
/ (?!/) ((
|
||||
?: [^ [ / \n \\ ] # every other thing
|
||||
| \\. # anything (but newlines) escaped
|
||||
| \\[^\n] # anything but newlines escaped
|
||||
| \[ # character class
|
||||
(?: \\. | [^ \] \n \\ ] )*
|
||||
(?: \\[^\n] | [^ \] \n \\ ] )*
|
||||
]
|
||||
)* (/)?
|
||||
)*) (/)?
|
||||
///
|
||||
|
||||
REGEX_FLAGS = /^\w*/
|
||||
|
@ -812,7 +838,7 @@ HEREGEX = /// ^(?: [^\\/#] | \\[\s\S] | /(?!//) | \#(?!\{) )* ///
|
|||
|
||||
HEREGEX_OMIT = ///
|
||||
((?:\\\\)+) # consume (and preserve) an even number of backslashes
|
||||
| \\(\s|/) # preserve escaped whitespace and "de-escape" slashes
|
||||
| \\(\s) # preserve escaped whitespace
|
||||
| \s+(?:#.*)? # remove whitespace and comments
|
||||
///g
|
||||
|
||||
|
@ -821,13 +847,18 @@ REGEX_ILLEGAL = /// ^ ( / | /{3}\s*) (\*) ///
|
|||
POSSIBLY_DIVISION = /// ^ /=?\s ///
|
||||
|
||||
# Other regexes.
|
||||
MULTILINER = /\n/g
|
||||
|
||||
HERECOMMENT_ILLEGAL = /\*\//
|
||||
|
||||
LINE_CONTINUER = /// ^ \s* (?: , | \??\.(?![.\d]) | :: ) ///
|
||||
|
||||
OCTAL_ESCAPE = /// ^ ((?: \\. | [^\\] )*) (\\ (?: 0[0-7] | [1-7] )) ///
|
||||
INVALID_ESCAPE = ///
|
||||
( (?:^|[^\\]) (?:\\\\)* ) # make sure the escape isn’t escaped
|
||||
\\ (
|
||||
?: (0[0-7]|[1-7]) # octal escape
|
||||
| (x(?![\da-fA-F]{2}).{0,2}) # hex escape
|
||||
| (u(?![\da-fA-F]{4}).{0,4}) # unicode escape
|
||||
)
|
||||
///
|
||||
|
||||
LEADING_BLANK_LINE = /^[^\n\S]*\n/
|
||||
TRAILING_BLANK_LINE = /\n[^\n\S]*$/
|
||||
|
|
|
@ -380,6 +380,70 @@ test "octal escapes", ->
|
|||
"a\\0\\tb\\\\\\07c"
|
||||
\ \ \ \ ^
|
||||
'''
|
||||
assertErrorFormat '''
|
||||
"a
|
||||
#{b} \\1"
|
||||
''', '''
|
||||
[stdin]:2:8: error: octal escape sequences are not allowed \\1
|
||||
#{b} \\1"
|
||||
^
|
||||
'''
|
||||
assertErrorFormat '''
|
||||
/a\\0\\tb\\\\\\07c/
|
||||
''', '''
|
||||
[stdin]:1:10: error: octal escape sequences are not allowed \\07
|
||||
/a\\0\\tb\\\\\\07c/
|
||||
\ \ \ \ ^
|
||||
'''
|
||||
assertErrorFormat '''
|
||||
///a
|
||||
#{b} \\01///
|
||||
''', '''
|
||||
[stdin]:2:8: error: octal escape sequences are not allowed \\01
|
||||
#{b} \\01///
|
||||
^
|
||||
'''
|
||||
|
||||
test "#3795: invalid escapes", ->
|
||||
assertErrorFormat '''
|
||||
"a\\0\\tb\\\\\\x7g"
|
||||
''', '''
|
||||
[stdin]:1:10: error: invalid escape sequence \\x7g
|
||||
"a\\0\\tb\\\\\\x7g"
|
||||
\ \ \ \ ^
|
||||
'''
|
||||
assertErrorFormat '''
|
||||
"a
|
||||
#{b} \\uA02
|
||||
c"
|
||||
''', '''
|
||||
[stdin]:2:8: error: invalid escape sequence \\uA02
|
||||
#{b} \\uA02
|
||||
^
|
||||
'''
|
||||
assertErrorFormat '''
|
||||
/a\\u002space/
|
||||
''', '''
|
||||
[stdin]:1:3: error: invalid escape sequence \\u002s
|
||||
/a\\u002space/
|
||||
^
|
||||
'''
|
||||
assertErrorFormat '''
|
||||
///a \\u002 0 space///
|
||||
''', '''
|
||||
[stdin]:1:6: error: invalid escape sequence \\u002
|
||||
///a \\u002 0 space///
|
||||
^
|
||||
'''
|
||||
assertErrorFormat '''
|
||||
///a
|
||||
#{b} \\x0
|
||||
c///
|
||||
''', '''
|
||||
[stdin]:2:8: error: invalid escape sequence \\x0
|
||||
#{b} \\x0
|
||||
^
|
||||
'''
|
||||
|
||||
test "illegal herecomment", ->
|
||||
assertErrorFormat '''
|
||||
|
|
|
@ -146,11 +146,11 @@ ok /^a[\s\S]+b$/.test "a#{ (x) -> x ** 2 }b"
|
|||
# TODO: improve heregex interpolation tests
|
||||
|
||||
test "heregex interpolation", ->
|
||||
eq /\\#{}\\\"/ + '', ///
|
||||
eq /\\#{}\\"/ + '', ///
|
||||
#{
|
||||
"#{ '\\' }" # normal comment
|
||||
}
|
||||
# regex comment
|
||||
\#{}
|
||||
\\ \"
|
||||
\\ "
|
||||
/// + ''
|
||||
|
|
|
@ -265,3 +265,24 @@ test "regexes are not callable", ->
|
|||
///a#{b}///
|
||||
k: v
|
||||
'''
|
||||
|
||||
test "backreferences", ->
|
||||
ok /(a)(b)\2\1/.test 'abba'
|
||||
|
||||
test "#3795: Escape otherwise invalid characters", ->
|
||||
ok (/
/).test '\u2028'
|
||||
ok (/
/).test '\u2029'
|
||||
ok ///\
///.test '\u2028'
|
||||
ok ///\
///.test '\u2029'
|
||||
ok ///a
b///.test 'ab' # The space is U+2028.
|
||||
ok ///a
b///.test 'ab' # The space is U+2029.
|
||||
ok ///\0
|
||||
1///.test '\x001'
|
||||
|
||||
a = 'a'
|
||||
ok ///#{a}
b///.test 'ab' # The space is U+2028.
|
||||
ok ///#{a}
b///.test 'ab' # The space is U+2029.
|
||||
ok ///#{a}\
///.test 'a\u2028'
|
||||
ok ///#{a}\
///.test 'a\u2029'
|
||||
ok ///#{a}\0
|
||||
1///.test 'a\x001'
|
||||
|
|
|
@ -359,3 +359,31 @@ test "strings are not callable", ->
|
|||
"a#{b}"
|
||||
k: v
|
||||
'''
|
||||
|
||||
test "#3795: Escape otherwise invalid characters", ->
|
||||
eq '
', '\u2028'
|
||||
eq '
', '\u2029'
|
||||
eq '\0\
|
||||
1', '\x001'
|
||||
eq "
", '\u2028'
|
||||
eq "
", '\u2029'
|
||||
eq "\0\
|
||||
1", '\x001'
|
||||
eq '''
''', '\u2028'
|
||||
eq '''
''', '\u2029'
|
||||
eq '''\0\
|
||||
1''', '\x001'
|
||||
eq """
""", '\u2028'
|
||||
eq """
""", '\u2029'
|
||||
eq """\0\
|
||||
1""", '\x001'
|
||||
|
||||
a = 'a'
|
||||
eq "#{a}
", 'a\u2028'
|
||||
eq "#{a}
", 'a\u2029'
|
||||
eq "#{a}\0\
|
||||
1", 'a\0' + '1'
|
||||
eq """#{a}
""", 'a\u2028'
|
||||
eq """#{a}
""", 'a\u2029'
|
||||
eq """#{a}\0\
|
||||
1""", 'a\0' + '1'
|
||||
|
|
Loading…
Reference in a new issue