diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index 9e4eb109..e1f7fcc7 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -1,6 +1,6 @@ // Generated by CoffeeScript 1.8.0 (function() { - var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, repeat, starts, throwSyntaxError, _ref, _ref1, + var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, repeat, starts, throwSyntaxError, _ref, _ref1, __indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; }; _ref = require('./rewriter'), Rewriter = _ref.Rewriter, INVERSES = _ref.INVERSES; @@ -287,7 +287,7 @@ }; Lexer.prototype.regexToken = function() { - var end, flags, index, match, prev, re, regex, tokens, _ref2, _ref3; + var closed, end, flags, index, match, prev, re, regex, tokens, _ref2, _ref3, _ref4; switch (false) { case !(match = REGEX_ILLEGAL.exec(this.chunk)): this.error("regular expressions cannot begin with " + match[2], match.index + match[1].length); @@ -296,11 +296,20 @@ _ref2 = this.matchWithInterpolations(this.chunk.slice(3), HEREGEX, '///', 3), tokens = _ref2.tokens, index = _ref2.index; break; case !(match = REGEX.exec(this.chunk)): - regex = match[0]; + regex = match[0], closed = match[1]; index = regex.length; prev = last(this.tokens); - if (prev && (_ref3 = prev[0], __indexOf.call((prev.spaced ? NOT_REGEX : NOT_SPACED_REGEX), _ref3) >= 0)) { - return 0; + if (prev) { + if (prev.spaced && (_ref3 = prev[0], __indexOf.call(CALLABLE, _ref3) >= 0)) { + if (!closed || POSSIBLY_DIVISION.test(regex)) { + return 0; + } + } else if (_ref4 = prev[0], __indexOf.call(NOT_REGEX, _ref4) >= 0) { + return 0; + } + } + if (!closed) { + this.error('missing / (unclosed regex)'); } break; default: @@ -845,7 +854,7 @@ HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g; - REGEX = /^\/(?![\s=])(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])+\//; + REGEX = /^\/(?!\/)(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])*(\/)?/; REGEX_FLAGS = /^\w*/; @@ -857,6 +866,8 @@ REGEX_ILLEGAL = /^(\/|\/{3}\s*)(\*)/; + POSSIBLY_DIVISION = /^\/=?\s/; + MULTILINER = /\n/g; HERECOMMENT_ILLEGAL = /\*\//; @@ -889,13 +900,11 @@ BOOL = ['TRUE', 'FALSE']; - NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--']; + CALLABLE = ['IDENTIFIER', ')', ']', '?', '@', 'THIS', 'SUPER']; - NOT_SPACED_REGEX = NOT_REGEX.concat(')', '}', 'THIS', 'IDENTIFIER', 'STRING', ']'); + INDEXABLE = CALLABLE.concat(['NUMBER', 'STRING', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '}', '::']); - CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER']; - - INDEXABLE = CALLABLE.concat('NUMBER', 'BOOL', 'NULL', 'UNDEFINED'); + NOT_REGEX = INDEXABLE.concat(['++', '--']); LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR']; diff --git a/src/lexer.coffee b/src/lexer.coffee index 11cea8e9..e712e67a 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -258,10 +258,15 @@ exports.Lexer = class Lexer when @chunk[...3] is '///' {tokens, index} = @matchWithInterpolations @chunk[3..], HEREGEX, '///', 3 when match = REGEX.exec @chunk - [regex] = match + [regex, closed] = match index = regex.length prev = last @tokens - return 0 if prev and (prev[0] in (if prev.spaced then NOT_REGEX else NOT_SPACED_REGEX)) + if prev + if prev.spaced and prev[0] in CALLABLE + return 0 if not closed or POSSIBLY_DIVISION.test regex + else if prev[0] in NOT_REGEX + return 0 + @error 'missing / (unclosed regex)' unless closed else return 0 @@ -776,13 +781,13 @@ HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g # Regex-matching-regexes. REGEX = /// ^ - / (?! [\s=] ) ( # disallow leading whitespace or equals sign + / (?!/) ( ?: [^ [ / \n \\ ] # every other thing | \\. # anything (but newlines) escaped | \[ # character class (?: \\. | [^ \] \n \\ ] )* ] - )+ / + )* (/)? /// REGEX_FLAGS = /^\w*/ @@ -798,6 +803,8 @@ HEREGEX_OMIT = /// REGEX_ILLEGAL = /// ^ ( / | /{3}\s*) (\*) /// +POSSIBLY_DIVISION = /// ^ /=?\s /// + # Other regexes. MULTILINER = /\n/g @@ -841,23 +848,17 @@ RELATION = ['IN', 'OF', 'INSTANCEOF'] # Boolean tokens. BOOL = ['TRUE', 'FALSE'] -# Tokens which a regular expression will never immediately follow, but which -# a division operator might. -# -# See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions -# -# Our list is shorter, due to sans-parentheses method calls. -NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--'] - -# If the previous token is not spaced, there are more preceding tokens that -# force a division parse: -NOT_SPACED_REGEX = NOT_REGEX.concat ')', '}', 'THIS', 'IDENTIFIER', 'STRING', ']' - # Tokens which could legitimately be invoked or indexed. An opening # parentheses or bracket following these tokens will be recorded as the start # of a function invocation or indexing operation. -CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER'] -INDEXABLE = CALLABLE.concat 'NUMBER', 'BOOL', 'NULL', 'UNDEFINED' +CALLABLE = ['IDENTIFIER', ')', ']', '?', '@', 'THIS', 'SUPER'] +INDEXABLE = CALLABLE.concat ['NUMBER', 'STRING', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '}', '::'] + +# Tokens which a regular expression will never immediately follow (except spaced +# CALLABLEs in some cases), but which a division operator can. +# +# See: http://www-archive.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions +NOT_REGEX = INDEXABLE.concat ['++', '--'] # Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN` # occurs at the start of a line. We disambiguate these from trailing whens to diff --git a/test/error_messages.coffee b/test/error_messages.coffee index 9f6c764a..2aa9e7f1 100644 --- a/test/error_messages.coffee +++ b/test/error_messages.coffee @@ -405,3 +405,29 @@ test "missing `)`, `}`, `]`", -> foo#{ bar "#{1}" ^ ''' + +test "unclosed regexes", -> + assertErrorFormat ''' + / + ''', ''' + [stdin]:1:1: error: missing / (unclosed regex) + / + ^ + ''' + assertErrorFormat ''' + # Note the double escaping; this would be `/a\/` real code. + /a\\/ + ''', ''' + [stdin]:2:1: error: missing / (unclosed regex) + /a\\/ + ^ + ''' + assertErrorFormat ''' + /// ^ + a #{""" ""#{if /[/].test "|" then 1 else 0}"" """} + /// + ''', ''' + [stdin]:2:18: error: missing / (unclosed regex) + a #{""" ""#{if /[/].test "|" then 1 else 0}"" """} + ^ + ''' diff --git a/test/regexps.coffee b/test/regexps.coffee index 0aa4cbec..c8f7f020 100644 --- a/test/regexps.coffee +++ b/test/regexps.coffee @@ -13,16 +13,34 @@ test "basic regular expression literals", -> ok 'a'.match /a/g test "division is not confused for a regular expression", -> + # Any spacing around the slash is allowed when it cannot be a regex. eq 2, 4 / 2 / 1 + eq 2, 4/2/1 + eq 2, 4/ 2 / 1 + eq 2, 4 /2 / 1 + eq 2, 4 / 2/ 1 + eq 2, 4 / 2 /1 + eq 2, 4 /2/ 1 - a = 4 + a = (regex) -> regex.test 'a b c' + a.valueOf = -> 4 b = 2 g = 1 - eq 2, a / b/g - a = 10 - b = a /= 4 / 2 - eq a, 5 + eq 2, a / b/g + eq 2, a/ b/g + eq 2, a / b/ g + eq 2, a / b/g # Tabs. + eq 2, a / b/g # Non-breaking spaces. + eq true, a /b/g + # Use parentheses to disambiguate. + eq true, a(/ b/g) + eq true, a(/ b/) + eq true, a (/ b/) + # Escape to disambiguate. + eq true, a /\ b/g + eq false, a /\ b/g + eq true, a /\ b/ obj = method: -> 2 two = 2 @@ -32,6 +50,173 @@ test "division is not confused for a regular expression", -> eq 2, (4)/2/i eq 1, i/i/i + a = '' + a += ' ' until / /.test a + eq a, ' ' + + a = if /=/.test '=' then yes else no + eq a, yes + + a = if !/=/.test '=' then yes else no + eq a, no + + #3182: + match = 'foo=bar'.match /=/ + eq match[0], '=' + + #3410: + ok ' '.match(/ /)[0] is ' ' + + +test "division vs regex after a callable token", -> + b = 2 + g = 1 + r = (r) -> r.test 'b' + + a = 4 + eq 2, a / b/g + eq 2, a/b/g + eq 2, a/ b/g + eq true, r /b/g + eq 2, (1 + 3) / b/g + eq 2, (1 + 3)/b/g + eq 2, (1 + 3)/ b/g + eq true, (r) /b/g + eq 2, [4][0] / b/g + eq 2, [4][0]/b/g + eq 2, [4][0]/ b/g + eq true, [r][0] /b/g + eq 0.5, 4? / b/g + eq 0.5, 4?/b/g + eq 0.5, 4?/ b/g + eq true, r? /b/g + (-> + eq 2, @ / b/g + eq 2, @/b/g + eq 2, @/ b/g + ).call 4 + (-> + eq true, @ /b/g + ).call r + (-> + eq 2, this / b/g + eq 2, this/b/g + eq 2, this/ b/g + ).call 4 + (-> + eq true, this /b/g + ).call r + class A + p: (regex) -> if regex then r regex else 4 + class B extends A + p: -> + eq 2, super / b/g + eq 2, super/b/g + eq 2, super/ b/g + eq true, super /b/g + new B().p() + +test "always division and never regex after some tokens", -> + b = 2 + g = 1 + + eq 2, 4 / b/g + eq 2, 4/b/g + eq 2, 4/ b/g + eq 2, 4 /b/g + eq 2, "4" / b/g + eq 2, "4"/b/g + eq 2, "4"/ b/g + eq 2, "4" /b/g + ok isNaN /a/ / b/g + ok isNaN /a/i / b/g + ok isNaN /a//b/g + ok isNaN /a/i/b/g + ok isNaN /a// b/g + ok isNaN /a/i/ b/g + ok isNaN /a/ /b/g + ok isNaN /a/i /b/g + eq 0.5, true / b/g + eq 0.5, true/b/g + eq 0.5, true/ b/g + eq 0.5, true /b/g + eq 0, false / b/g + eq 0, false/b/g + eq 0, false/ b/g + eq 0, false /b/g + eq 0, null / b/g + eq 0, null/b/g + eq 0, null/ b/g + eq 0, null /b/g + ok isNaN undefined / b/g + ok isNaN undefined/b/g + ok isNaN undefined/ b/g + ok isNaN undefined /b/g + ok isNaN {a: 4} / b/g + ok isNaN {a: 4}/b/g + ok isNaN {a: 4}/ b/g + ok isNaN {a: 4} /b/g + o = prototype: 4 + eq 2, o:: / b/g + eq 2, o::/b/g + eq 2, o::/ b/g + eq 2, o:: /b/g + i = 4 + eq 2.0, i++ / b/g + eq 2.5, i++/b/g + eq 3.0, i++/ b/g + eq 3.5, i++ /b/g + eq 4.0, i-- / b/g + eq 3.5, i--/b/g + eq 3.0, i--/ b/g + eq 2.5, i-- /b/g + +test "compound division vs regex", -> + c = 4 + i = 2 + + a = 10 + b = a /= c / i + eq a, 5 + + a = 10 + b = a /= c /i + eq a, 5 + + a = 10 + b = a /= c /i # Tabs. + eq a, 5 + + a = 10 + b = a /= c /i # Non-breaking spaces. + eq a, 5 + + a = 10 + b = a/= c /i + eq a, 5 + + a = 10 + b = a/=c/i + eq a, 5 + + a = (regex) -> regex.test '=C ' + b = a /=c /i + eq b, true + + a = (regex) -> regex.test '= C ' + # Use parentheses to disambiguate. + b = a(/= c /i) + eq b, true + b = a(/= c /) + eq b, false + b = a (/= c /) + eq b, false + # Escape to disambiguate. + b = a /\= c /i + eq b, true + b = a /\= c / + eq b, false + test "#764: regular expressions should be indexable", -> eq /0/['source'], ///#{0}///['source']