From 8fd6258a4622593cbc77a23230c00a6af85647f5 Mon Sep 17 00:00:00 2001
From: Simon Lydell <simon.lydell@gmail.com>
Date: Sat, 10 Jan 2015 01:48:00 +0100
Subject: [PATCH] Fix #3410, #3182: Allow regex to start with space or =
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A regex may not follow a specific set of tokens. These were already known before
in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them
to be more correct and to add a few missing tokens). In all other cases (except
after a spaced callable) a slash is the start of a regex, and may now start with
a space or an equals sign. It’s really that simple!

A slash after a spaced callable is the only ambigous case. We cannot know if
that's division or function application with a regex as the argument. The
spacing determines which is which:

Space on both sides:
- `a / b/i`  -> `a / b / i`
- `a /= b/i` -> `a /= b / i`

No spaces:
- `a/b/i`    -> `a / b / i`
- `a/=b/i`   -> `a /= b / i`

Space on the right side:
- `a/ b/i`   -> `a / b / i`
- `a/= b/i`  -> `a /= b / i`

Space on the left side:
- `a /b/i`   -> `a(/b/i)`
- `a /=b/i`  -> `a(/=b/i)`

The last case used to compile to `a /= b / i`, but that has been changed to be
consistent with the `/` operator. The last case really looks like a regex, so it
should be parsed as one.

Moreover, you may now also space the `/` and `/=` operators with other
whitespace characters than a space (such as tabs and non-breaking spaces) for
consistency.

Lastly, unclosed regexes are now reported as such, instead of generating some
other confusing error message.

It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you
may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See
https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.
---
 lib/coffee-script/lexer.js |  31 +++---
 src/lexer.coffee           |  37 +++----
 test/error_messages.coffee |  26 +++++
 test/regexps.coffee        | 195 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 255 insertions(+), 34 deletions(-)

diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js
index 9e4eb109..e1f7fcc7 100644
--- a/lib/coffee-script/lexer.js
+++ b/lib/coffee-script/lexer.js
@@ -1,6 +1,6 @@
 // Generated by CoffeeScript 1.8.0
 (function() {
-  var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, repeat, starts, throwSyntaxError, _ref, _ref1,
+  var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, repeat, starts, throwSyntaxError, _ref, _ref1,
     __indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
 
   _ref = require('./rewriter'), Rewriter = _ref.Rewriter, INVERSES = _ref.INVERSES;
@@ -287,7 +287,7 @@
     };
 
     Lexer.prototype.regexToken = function() {
-      var end, flags, index, match, prev, re, regex, tokens, _ref2, _ref3;
+      var closed, end, flags, index, match, prev, re, regex, tokens, _ref2, _ref3, _ref4;
       switch (false) {
         case !(match = REGEX_ILLEGAL.exec(this.chunk)):
           this.error("regular expressions cannot begin with " + match[2], match.index + match[1].length);
@@ -296,11 +296,20 @@
           _ref2 = this.matchWithInterpolations(this.chunk.slice(3), HEREGEX, '///', 3), tokens = _ref2.tokens, index = _ref2.index;
           break;
         case !(match = REGEX.exec(this.chunk)):
-          regex = match[0];
+          regex = match[0], closed = match[1];
           index = regex.length;
           prev = last(this.tokens);
-          if (prev && (_ref3 = prev[0], __indexOf.call((prev.spaced ? NOT_REGEX : NOT_SPACED_REGEX), _ref3) >= 0)) {
-            return 0;
+          if (prev) {
+            if (prev.spaced && (_ref3 = prev[0], __indexOf.call(CALLABLE, _ref3) >= 0)) {
+              if (!closed || POSSIBLY_DIVISION.test(regex)) {
+                return 0;
+              }
+            } else if (_ref4 = prev[0], __indexOf.call(NOT_REGEX, _ref4) >= 0) {
+              return 0;
+            }
+          }
+          if (!closed) {
+            this.error('missing / (unclosed regex)');
           }
           break;
         default:
@@ -845,7 +854,7 @@
 
   HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g;
 
-  REGEX = /^\/(?![\s=])(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])+\//;
+  REGEX = /^\/(?!\/)(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])*(\/)?/;
 
   REGEX_FLAGS = /^\w*/;
 
@@ -857,6 +866,8 @@
 
   REGEX_ILLEGAL = /^(\/|\/{3}\s*)(\*)/;
 
+  POSSIBLY_DIVISION = /^\/=?\s/;
+
   MULTILINER = /\n/g;
 
   HERECOMMENT_ILLEGAL = /\*\//;
@@ -889,13 +900,11 @@
 
   BOOL = ['TRUE', 'FALSE'];
 
-  NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--'];
+  CALLABLE = ['IDENTIFIER', ')', ']', '?', '@', 'THIS', 'SUPER'];
 
-  NOT_SPACED_REGEX = NOT_REGEX.concat(')', '}', 'THIS', 'IDENTIFIER', 'STRING', ']');
+  INDEXABLE = CALLABLE.concat(['NUMBER', 'STRING', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '}', '::']);
 
-  CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER'];
-
-  INDEXABLE = CALLABLE.concat('NUMBER', 'BOOL', 'NULL', 'UNDEFINED');
+  NOT_REGEX = INDEXABLE.concat(['++', '--']);
 
   LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
 
diff --git a/src/lexer.coffee b/src/lexer.coffee
index 11cea8e9..e712e67a 100644
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@@ -258,10 +258,15 @@ exports.Lexer = class Lexer
       when @chunk[...3] is '///'
         {tokens, index} = @matchWithInterpolations @chunk[3..], HEREGEX, '///', 3
       when match = REGEX.exec @chunk
-        [regex] = match
+        [regex, closed] = match
         index = regex.length
         prev = last @tokens
-        return 0 if prev and (prev[0] in (if prev.spaced then NOT_REGEX else NOT_SPACED_REGEX))
+        if prev
+          if prev.spaced and prev[0] in CALLABLE
+            return 0 if not closed or POSSIBLY_DIVISION.test regex
+          else if prev[0] in NOT_REGEX
+            return 0
+        @error 'missing / (unclosed regex)' unless closed
       else
         return 0
 
@@ -776,13 +781,13 @@ HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g
 
 # Regex-matching-regexes.
 REGEX = /// ^
-  / (?! [\s=] ) (    # disallow leading whitespace or equals sign
+  / (?!/) (
   ?: [^ [ / \n \\ ]  # every other thing
    | \\.             # anything (but newlines) escaped
    | \[              # character class
        (?: \\. | [^ \] \n \\ ] )*
      ]
-  )+ /
+  )* (/)?
 ///
 
 REGEX_FLAGS  = /^\w*/
@@ -798,6 +803,8 @@ HEREGEX_OMIT = ///
 
 REGEX_ILLEGAL = /// ^ ( / | /{3}\s*) (\*) ///
 
+POSSIBLY_DIVISION   = /// ^ /=?\s ///
+
 # Other regexes.
 MULTILINER          = /\n/g
 
@@ -841,23 +848,17 @@ RELATION = ['IN', 'OF', 'INSTANCEOF']
 # Boolean tokens.
 BOOL = ['TRUE', 'FALSE']
 
-# Tokens which a regular expression will never immediately follow, but which
-# a division operator might.
-#
-# See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
-#
-# Our list is shorter, due to sans-parentheses method calls.
-NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--']
-
-# If the previous token is not spaced, there are more preceding tokens that
-# force a division parse:
-NOT_SPACED_REGEX = NOT_REGEX.concat ')', '}', 'THIS', 'IDENTIFIER', 'STRING', ']'
-
 # Tokens which could legitimately be invoked or indexed. An opening
 # parentheses or bracket following these tokens will be recorded as the start
 # of a function invocation or indexing operation.
-CALLABLE  = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER']
-INDEXABLE = CALLABLE.concat 'NUMBER', 'BOOL', 'NULL', 'UNDEFINED'
+CALLABLE  = ['IDENTIFIER', ')', ']', '?', '@', 'THIS', 'SUPER']
+INDEXABLE = CALLABLE.concat ['NUMBER', 'STRING', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '}', '::']
+
+# Tokens which a regular expression will never immediately follow (except spaced
+# CALLABLEs in some cases), but which a division operator can.
+#
+# See: http://www-archive.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
+NOT_REGEX = INDEXABLE.concat ['++', '--']
 
 # Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
 # occurs at the start of a line. We disambiguate these from trailing whens to
diff --git a/test/error_messages.coffee b/test/error_messages.coffee
index 9f6c764a..2aa9e7f1 100644
--- a/test/error_messages.coffee
+++ b/test/error_messages.coffee
@@ -405,3 +405,29 @@ test "missing `)`, `}`, `]`", ->
       foo#{ bar "#{1}"
           ^
   '''
+
+test "unclosed regexes", ->
+  assertErrorFormat '''
+    /
+  ''', '''
+    [stdin]:1:1: error: missing / (unclosed regex)
+    /
+    ^
+  '''
+  assertErrorFormat '''
+    # Note the double escaping; this would be `/a\/` real code.
+    /a\\/
+  ''', '''
+    [stdin]:2:1: error: missing / (unclosed regex)
+    /a\\/
+    ^
+  '''
+  assertErrorFormat '''
+    /// ^
+      a #{""" ""#{if /[/].test "|" then 1 else 0}"" """}
+    ///
+  ''', '''
+    [stdin]:2:18: error: missing / (unclosed regex)
+      a #{""" ""#{if /[/].test "|" then 1 else 0}"" """}
+                     ^
+  '''
diff --git a/test/regexps.coffee b/test/regexps.coffee
index 0aa4cbec..c8f7f020 100644
--- a/test/regexps.coffee
+++ b/test/regexps.coffee
@@ -13,16 +13,34 @@ test "basic regular expression literals", ->
   ok 'a'.match /a/g
 
 test "division is not confused for a regular expression", ->
+  # Any spacing around the slash is allowed when it cannot be a regex.
   eq 2, 4 / 2 / 1
+  eq 2, 4/2/1
+  eq 2, 4/ 2 / 1
+  eq 2, 4 /2 / 1
+  eq 2, 4 / 2/ 1
+  eq 2, 4 / 2 /1
+  eq 2, 4 /2/ 1
 
-  a = 4
+  a = (regex) -> regex.test 'a b c'
+  a.valueOf = -> 4
   b = 2
   g = 1
-  eq 2, a / b/g
 
-  a = 10
-  b = a /= 4 / 2
-  eq a, 5
+  eq 2, a / b/g
+  eq 2, a/ b/g
+  eq 2, a / b/ g
+  eq 2, a	/	b/g # Tabs.
+  eq 2, a / b/g # Non-breaking spaces.
+  eq true, a /b/g
+  # Use parentheses to disambiguate.
+  eq true, a(/ b/g)
+  eq true, a(/ b/)
+  eq true, a (/ b/)
+  # Escape to disambiguate.
+  eq true, a /\ b/g
+  eq false, a	/\	b/g
+  eq true, a /\ b/
 
   obj = method: -> 2
   two = 2
@@ -32,6 +50,173 @@ test "division is not confused for a regular expression", ->
   eq 2, (4)/2/i
   eq 1, i/i/i
 
+  a = ''
+  a += ' ' until /   /.test a
+  eq a, '   '
+
+  a = if /=/.test '=' then yes else no
+  eq a, yes
+
+  a = if !/=/.test '=' then yes else no
+  eq a, no
+
+  #3182:
+  match = 'foo=bar'.match /=/
+  eq match[0], '='
+
+  #3410:
+  ok ' '.match(/ /)[0] is ' '
+
+
+test "division vs regex after a callable token", ->
+  b = 2
+  g = 1
+  r = (r) -> r.test 'b'
+
+  a = 4
+  eq 2, a / b/g
+  eq 2, a/b/g
+  eq 2, a/ b/g
+  eq true, r /b/g
+  eq 2, (1 + 3) / b/g
+  eq 2, (1 + 3)/b/g
+  eq 2, (1 + 3)/ b/g
+  eq true, (r) /b/g
+  eq 2, [4][0] / b/g
+  eq 2, [4][0]/b/g
+  eq 2, [4][0]/ b/g
+  eq true, [r][0] /b/g
+  eq 0.5, 4? / b/g
+  eq 0.5, 4?/b/g
+  eq 0.5, 4?/ b/g
+  eq true, r? /b/g
+  (->
+    eq 2, @ / b/g
+    eq 2, @/b/g
+    eq 2, @/ b/g
+  ).call 4
+  (->
+    eq true, @ /b/g
+  ).call r
+  (->
+    eq 2, this / b/g
+    eq 2, this/b/g
+    eq 2, this/ b/g
+  ).call 4
+  (->
+    eq true, this /b/g
+  ).call r
+  class A
+    p: (regex) -> if regex then r regex else 4
+  class B extends A
+    p: ->
+      eq 2, super / b/g
+      eq 2, super/b/g
+      eq 2, super/ b/g
+      eq true, super /b/g
+  new B().p()
+
+test "always division and never regex after some tokens", ->
+  b = 2
+  g = 1
+
+  eq 2, 4 / b/g
+  eq 2, 4/b/g
+  eq 2, 4/ b/g
+  eq 2, 4 /b/g
+  eq 2, "4" / b/g
+  eq 2, "4"/b/g
+  eq 2, "4"/ b/g
+  eq 2, "4" /b/g
+  ok isNaN /a/ / b/g
+  ok isNaN /a/i / b/g
+  ok isNaN /a//b/g
+  ok isNaN /a/i/b/g
+  ok isNaN /a// b/g
+  ok isNaN /a/i/ b/g
+  ok isNaN /a/ /b/g
+  ok isNaN /a/i /b/g
+  eq 0.5, true / b/g
+  eq 0.5, true/b/g
+  eq 0.5, true/ b/g
+  eq 0.5, true /b/g
+  eq 0, false / b/g
+  eq 0, false/b/g
+  eq 0, false/ b/g
+  eq 0, false /b/g
+  eq 0, null / b/g
+  eq 0, null/b/g
+  eq 0, null/ b/g
+  eq 0, null /b/g
+  ok isNaN undefined / b/g
+  ok isNaN undefined/b/g
+  ok isNaN undefined/ b/g
+  ok isNaN undefined /b/g
+  ok isNaN {a: 4} / b/g
+  ok isNaN {a: 4}/b/g
+  ok isNaN {a: 4}/ b/g
+  ok isNaN {a: 4} /b/g
+  o = prototype: 4
+  eq 2, o:: / b/g
+  eq 2, o::/b/g
+  eq 2, o::/ b/g
+  eq 2, o:: /b/g
+  i = 4
+  eq 2.0, i++ / b/g
+  eq 2.5, i++/b/g
+  eq 3.0, i++/ b/g
+  eq 3.5, i++ /b/g
+  eq 4.0, i-- / b/g
+  eq 3.5, i--/b/g
+  eq 3.0, i--/ b/g
+  eq 2.5, i-- /b/g
+
+test "compound division vs regex", ->
+  c = 4
+  i = 2
+
+  a = 10
+  b = a /= c / i
+  eq a, 5
+
+  a = 10
+  b = a /= c /i
+  eq a, 5
+
+  a = 10
+  b = a	/=	c /i # Tabs.
+  eq a, 5
+
+  a = 10
+  b = a /= c /i # Non-breaking spaces.
+  eq a, 5
+
+  a = 10
+  b = a/= c /i
+  eq a, 5
+
+  a = 10
+  b = a/=c/i
+  eq a, 5
+
+  a = (regex) -> regex.test '=C '
+  b = a /=c /i
+  eq b, true
+
+  a = (regex) -> regex.test '= C '
+  # Use parentheses to disambiguate.
+  b = a(/= c /i)
+  eq b, true
+  b = a(/= c /)
+  eq b, false
+  b = a (/= c /)
+  eq b, false
+  # Escape to disambiguate.
+  b = a /\= c /i
+  eq b, true
+  b = a /\= c /
+  eq b, false
+
 test "#764: regular expressions should be indexable", ->
   eq /0/['source'], ///#{0}///['source']