Merge pull request #3782 from lydell/regex

Fix #3410, #3182: Allow regex to start with space or =
This commit is contained in:
Michael Ficarra 2015-01-10 07:52:02 -08:00
commit bec8f27e8a
4 changed files with 255 additions and 34 deletions

View File

@ -1,6 +1,6 @@
// Generated by CoffeeScript 1.8.0
(function() {
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, repeat, starts, throwSyntaxError, _ref, _ref1,
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, repeat, starts, throwSyntaxError, _ref, _ref1,
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
_ref = require('./rewriter'), Rewriter = _ref.Rewriter, INVERSES = _ref.INVERSES;
@ -287,7 +287,7 @@
};
Lexer.prototype.regexToken = function() {
var end, flags, index, match, prev, re, regex, tokens, _ref2, _ref3;
var closed, end, flags, index, match, prev, re, regex, tokens, _ref2, _ref3, _ref4;
switch (false) {
case !(match = REGEX_ILLEGAL.exec(this.chunk)):
this.error("regular expressions cannot begin with " + match[2], match.index + match[1].length);
@ -296,11 +296,20 @@
_ref2 = this.matchWithInterpolations(this.chunk.slice(3), HEREGEX, '///', 3), tokens = _ref2.tokens, index = _ref2.index;
break;
case !(match = REGEX.exec(this.chunk)):
regex = match[0];
regex = match[0], closed = match[1];
index = regex.length;
prev = last(this.tokens);
if (prev && (_ref3 = prev[0], __indexOf.call((prev.spaced ? NOT_REGEX : NOT_SPACED_REGEX), _ref3) >= 0)) {
return 0;
if (prev) {
if (prev.spaced && (_ref3 = prev[0], __indexOf.call(CALLABLE, _ref3) >= 0)) {
if (!closed || POSSIBLY_DIVISION.test(regex)) {
return 0;
}
} else if (_ref4 = prev[0], __indexOf.call(NOT_REGEX, _ref4) >= 0) {
return 0;
}
}
if (!closed) {
this.error('missing / (unclosed regex)');
}
break;
default:
@ -845,7 +854,7 @@
HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g;
REGEX = /^\/(?![\s=])(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])+\//;
REGEX = /^\/(?!\/)(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])*(\/)?/;
REGEX_FLAGS = /^\w*/;
@ -857,6 +866,8 @@
REGEX_ILLEGAL = /^(\/|\/{3}\s*)(\*)/;
POSSIBLY_DIVISION = /^\/=?\s/;
MULTILINER = /\n/g;
HERECOMMENT_ILLEGAL = /\*\//;
@ -889,13 +900,11 @@
BOOL = ['TRUE', 'FALSE'];
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--'];
CALLABLE = ['IDENTIFIER', ')', ']', '?', '@', 'THIS', 'SUPER'];
NOT_SPACED_REGEX = NOT_REGEX.concat(')', '}', 'THIS', 'IDENTIFIER', 'STRING', ']');
INDEXABLE = CALLABLE.concat(['NUMBER', 'STRING', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '}', '::']);
CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER'];
INDEXABLE = CALLABLE.concat('NUMBER', 'BOOL', 'NULL', 'UNDEFINED');
NOT_REGEX = INDEXABLE.concat(['++', '--']);
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];

View File

@ -258,10 +258,15 @@ exports.Lexer = class Lexer
when @chunk[...3] is '///'
{tokens, index} = @matchWithInterpolations @chunk[3..], HEREGEX, '///', 3
when match = REGEX.exec @chunk
[regex] = match
[regex, closed] = match
index = regex.length
prev = last @tokens
return 0 if prev and (prev[0] in (if prev.spaced then NOT_REGEX else NOT_SPACED_REGEX))
if prev
if prev.spaced and prev[0] in CALLABLE
return 0 if not closed or POSSIBLY_DIVISION.test regex
else if prev[0] in NOT_REGEX
return 0
@error 'missing / (unclosed regex)' unless closed
else
return 0
@ -776,13 +781,13 @@ HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g
# Regex-matching-regexes.
REGEX = /// ^
/ (?! [\s=] ) ( # disallow leading whitespace or equals sign
/ (?!/) (
?: [^ [ / \n \\ ] # every other thing
| \\. # anything (but newlines) escaped
| \[ # character class
(?: \\. | [^ \] \n \\ ] )*
]
)+ /
)* (/)?
///
REGEX_FLAGS = /^\w*/
@ -798,6 +803,8 @@ HEREGEX_OMIT = ///
REGEX_ILLEGAL = /// ^ ( / | /{3}\s*) (\*) ///
POSSIBLY_DIVISION = /// ^ /=?\s ///
# Other regexes.
MULTILINER = /\n/g
@ -841,23 +848,17 @@ RELATION = ['IN', 'OF', 'INSTANCEOF']
# Boolean tokens.
BOOL = ['TRUE', 'FALSE']
# Tokens which a regular expression will never immediately follow, but which
# a division operator might.
#
# See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
#
# Our list is shorter, due to sans-parentheses method calls.
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--']
# If the previous token is not spaced, there are more preceding tokens that
# force a division parse:
NOT_SPACED_REGEX = NOT_REGEX.concat ')', '}', 'THIS', 'IDENTIFIER', 'STRING', ']'
# Tokens which could legitimately be invoked or indexed. An opening
# parentheses or bracket following these tokens will be recorded as the start
# of a function invocation or indexing operation.
CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER']
INDEXABLE = CALLABLE.concat 'NUMBER', 'BOOL', 'NULL', 'UNDEFINED'
CALLABLE = ['IDENTIFIER', ')', ']', '?', '@', 'THIS', 'SUPER']
INDEXABLE = CALLABLE.concat ['NUMBER', 'STRING', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '}', '::']
# Tokens which a regular expression will never immediately follow (except spaced
# CALLABLEs in some cases), but which a division operator can.
#
# See: http://www-archive.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
NOT_REGEX = INDEXABLE.concat ['++', '--']
# Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
# occurs at the start of a line. We disambiguate these from trailing whens to

View File

@ -405,3 +405,29 @@ test "missing `)`, `}`, `]`", ->
foo#{ bar "#{1}"
^
'''
test "unclosed regexes", ->
assertErrorFormat '''
/
''', '''
[stdin]:1:1: error: missing / (unclosed regex)
/
^
'''
assertErrorFormat '''
# Note the double escaping; this would be `/a\/` real code.
/a\\/
''', '''
[stdin]:2:1: error: missing / (unclosed regex)
/a\\/
^
'''
assertErrorFormat '''
/// ^
a #{""" ""#{if /[/].test "|" then 1 else 0}"" """}
///
''', '''
[stdin]:2:18: error: missing / (unclosed regex)
a #{""" ""#{if /[/].test "|" then 1 else 0}"" """}
^
'''

View File

@ -13,16 +13,34 @@ test "basic regular expression literals", ->
ok 'a'.match /a/g
test "division is not confused for a regular expression", ->
# Any spacing around the slash is allowed when it cannot be a regex.
eq 2, 4 / 2 / 1
eq 2, 4/2/1
eq 2, 4/ 2 / 1
eq 2, 4 /2 / 1
eq 2, 4 / 2/ 1
eq 2, 4 / 2 /1
eq 2, 4 /2/ 1
a = 4
a = (regex) -> regex.test 'a b c'
a.valueOf = -> 4
b = 2
g = 1
eq 2, a / b/g
a = 10
b = a /= 4 / 2
eq a, 5
eq 2, a / b/g
eq 2, a/ b/g
eq 2, a / b/ g
eq 2, a / b/g # Tabs.
eq 2, a / b/g # Non-breaking spaces.
eq true, a /b/g
# Use parentheses to disambiguate.
eq true, a(/ b/g)
eq true, a(/ b/)
eq true, a (/ b/)
# Escape to disambiguate.
eq true, a /\ b/g
eq false, a /\ b/g
eq true, a /\ b/
obj = method: -> 2
two = 2
@ -32,6 +50,173 @@ test "division is not confused for a regular expression", ->
eq 2, (4)/2/i
eq 1, i/i/i
a = ''
a += ' ' until / /.test a
eq a, ' '
a = if /=/.test '=' then yes else no
eq a, yes
a = if !/=/.test '=' then yes else no
eq a, no
#3182:
match = 'foo=bar'.match /=/
eq match[0], '='
#3410:
ok ' '.match(/ /)[0] is ' '
test "division vs regex after a callable token", ->
b = 2
g = 1
r = (r) -> r.test 'b'
a = 4
eq 2, a / b/g
eq 2, a/b/g
eq 2, a/ b/g
eq true, r /b/g
eq 2, (1 + 3) / b/g
eq 2, (1 + 3)/b/g
eq 2, (1 + 3)/ b/g
eq true, (r) /b/g
eq 2, [4][0] / b/g
eq 2, [4][0]/b/g
eq 2, [4][0]/ b/g
eq true, [r][0] /b/g
eq 0.5, 4? / b/g
eq 0.5, 4?/b/g
eq 0.5, 4?/ b/g
eq true, r? /b/g
(->
eq 2, @ / b/g
eq 2, @/b/g
eq 2, @/ b/g
).call 4
(->
eq true, @ /b/g
).call r
(->
eq 2, this / b/g
eq 2, this/b/g
eq 2, this/ b/g
).call 4
(->
eq true, this /b/g
).call r
class A
p: (regex) -> if regex then r regex else 4
class B extends A
p: ->
eq 2, super / b/g
eq 2, super/b/g
eq 2, super/ b/g
eq true, super /b/g
new B().p()
test "always division and never regex after some tokens", ->
b = 2
g = 1
eq 2, 4 / b/g
eq 2, 4/b/g
eq 2, 4/ b/g
eq 2, 4 /b/g
eq 2, "4" / b/g
eq 2, "4"/b/g
eq 2, "4"/ b/g
eq 2, "4" /b/g
ok isNaN /a/ / b/g
ok isNaN /a/i / b/g
ok isNaN /a//b/g
ok isNaN /a/i/b/g
ok isNaN /a// b/g
ok isNaN /a/i/ b/g
ok isNaN /a/ /b/g
ok isNaN /a/i /b/g
eq 0.5, true / b/g
eq 0.5, true/b/g
eq 0.5, true/ b/g
eq 0.5, true /b/g
eq 0, false / b/g
eq 0, false/b/g
eq 0, false/ b/g
eq 0, false /b/g
eq 0, null / b/g
eq 0, null/b/g
eq 0, null/ b/g
eq 0, null /b/g
ok isNaN undefined / b/g
ok isNaN undefined/b/g
ok isNaN undefined/ b/g
ok isNaN undefined /b/g
ok isNaN {a: 4} / b/g
ok isNaN {a: 4}/b/g
ok isNaN {a: 4}/ b/g
ok isNaN {a: 4} /b/g
o = prototype: 4
eq 2, o:: / b/g
eq 2, o::/b/g
eq 2, o::/ b/g
eq 2, o:: /b/g
i = 4
eq 2.0, i++ / b/g
eq 2.5, i++/b/g
eq 3.0, i++/ b/g
eq 3.5, i++ /b/g
eq 4.0, i-- / b/g
eq 3.5, i--/b/g
eq 3.0, i--/ b/g
eq 2.5, i-- /b/g
test "compound division vs regex", ->
c = 4
i = 2
a = 10
b = a /= c / i
eq a, 5
a = 10
b = a /= c /i
eq a, 5
a = 10
b = a /= c /i # Tabs.
eq a, 5
a = 10
b = a /= c /i # Non-breaking spaces.
eq a, 5
a = 10
b = a/= c /i
eq a, 5
a = 10
b = a/=c/i
eq a, 5
a = (regex) -> regex.test '=C '
b = a /=c /i
eq b, true
a = (regex) -> regex.test '= C '
# Use parentheses to disambiguate.
b = a(/= c /i)
eq b, true
b = a(/= c /)
eq b, false
b = a (/= c /)
eq b, false
# Escape to disambiguate.
b = a /\= c /i
eq b, true
b = a /\= c /
eq b, false
test "#764: regular expressions should be indexable", ->
eq /0/['source'], ///#{0}///['source']