jashkenas--coffeescript/test/regexps.coffee

# Regular Expression Literals
# ---------------------------

# TODO: add method invocation tests: /regex/.toString()

# * Regexen
# * Heregexen

test "basic regular expression literals", ->
  ok 'a'.match(/a/)
  ok 'a'.match /a/
  ok 'a'.match(/a/g)
  ok 'a'.match /a/g

test "division is not confused for a regular expression", ->
  # Any spacing around the slash is allowed when it cannot be a regex.
  eq 2, 4 / 2 / 1
  eq 2, 4/2/1
  eq 2, 4/ 2 / 1
  eq 2, 4 /2 / 1
  eq 2, 4 / 2/ 1
  eq 2, 4 / 2 /1
  eq 2, 4 /2/ 1

  a = (regex) -> regex.test 'a b c'
  a.valueOf = -> 4
  b = 2
  g = 1

  eq 2, a / b/g
  eq 2, a/ b/g
  eq 2, a / b/ g
  eq 2, a	/	b/g # Tabs.
  eq 2, a / b/g # Non-breaking spaces.
  eq true, a /b/g
  # Use parentheses to disambiguate.
  eq true, a(/ b/g)
  eq true, a(/ b/)
  eq true, a (/ b/)
  # Escape to disambiguate.
  eq true, a /\ b/g
  eq false, a	/\	b/g
  eq true, a /\ b/

  obj = method: -> 2
  two = 2
  eq 2, (obj.method()/two + obj.method()/two)

  i = 1
  eq 2, (4)/2/i
  eq 1, i/i/i

  a = ''
  a += ' ' until /   /.test a
  eq a, '   '

  a = if /=/.test '=' then yes else no
  eq a, yes

  a = if !/=/.test '=' then yes else no
  eq a, no

  #3182:
  match = 'foo=bar'.match /=/
  eq match[0], '='

  #3410:
  ok ' '.match(/ /)[0] is ' '


test "division vs regex after a callable token", ->
  b = 2
  g = 1
  r = (r) -> r.test 'b'

  a = 4
  eq 2, a / b/g
  eq 2, a/b/g
  eq 2, a/ b/g
  eq true, r /b/g
  eq 2, (1 + 3) / b/g
  eq 2, (1 + 3)/b/g
  eq 2, (1 + 3)/ b/g
  eq true, (r) /b/g
  eq 2, [4][0] / b/g
  eq 2, [4][0]/b/g
  eq 2, [4][0]/ b/g
  eq true, [r][0] /b/g
  eq 0.5, 4? / b/g
  eq 0.5, 4?/b/g
  eq 0.5, 4?/ b/g
  eq true, r? /b/g
  (->
    eq 2, @ / b/g
    eq 2, @/b/g
    eq 2, @/ b/g
  ).call 4
  (->
    eq true, @ /b/g
  ).call r
  (->
    eq 2, this / b/g
    eq 2, this/b/g
    eq 2, this/ b/g
  ).call 4
  (->
    eq true, this /b/g
  ).call r
  class A
    p: (regex) -> if regex then r regex else 4
  class B extends A
    p: ->
      eq 2, super / b/g
      eq 2, super/b/g
      eq 2, super/ b/g
      eq true, super /b/g
  new B().p()

test "always division and never regex after some tokens", ->
  b = 2
  g = 1

  eq 2, 4 / b/g
  eq 2, 4/b/g
  eq 2, 4/ b/g
  eq 2, 4 /b/g
  eq 2, "4" / b/g
  eq 2, "4"/b/g
  eq 2, "4"/ b/g
  eq 2, "4" /b/g
  eq 20, "4#{0}" / b/g
  eq 20, "4#{0}"/b/g
  eq 20, "4#{0}"/ b/g
  eq 20, "4#{0}" /b/g
  ok isNaN /a/ / b/g
  ok isNaN /a/i / b/g
  ok isNaN /a//b/g
  ok isNaN /a/i/b/g
  ok isNaN /a// b/g
  ok isNaN /a/i/ b/g
  ok isNaN /a/ /b/g
  ok isNaN /a/i /b/g
  eq 0.5, true / b/g
  eq 0.5, true/b/g
  eq 0.5, true/ b/g
  eq 0.5, true /b/g
  eq 0, false / b/g
  eq 0, false/b/g
  eq 0, false/ b/g
  eq 0, false /b/g
  eq 0, null / b/g
  eq 0, null/b/g
  eq 0, null/ b/g
  eq 0, null /b/g
  ok isNaN undefined / b/g
  ok isNaN undefined/b/g
  ok isNaN undefined/ b/g
  ok isNaN undefined /b/g
  ok isNaN {a: 4} / b/g
  ok isNaN {a: 4}/b/g
  ok isNaN {a: 4}/ b/g
  ok isNaN {a: 4} /b/g
  o = prototype: 4
  eq 2, o:: / b/g
  eq 2, o::/b/g
  eq 2, o::/ b/g
  eq 2, o:: /b/g
  i = 4
  eq 2.0, i++ / b/g
  eq 2.5, i++/b/g
  eq 3.0, i++/ b/g
  eq 3.5, i++ /b/g
  eq 4.0, i-- / b/g
  eq 3.5, i--/b/g
  eq 3.0, i--/ b/g
  eq 2.5, i-- /b/g

test "compound division vs regex", ->
  c = 4
  i = 2

  a = 10
  b = a /= c / i
  eq a, 5

  a = 10
  b = a /= c /i
  eq a, 5

  a = 10
  b = a	/=	c /i # Tabs.
  eq a, 5

  a = 10
  b = a /= c /i # Non-breaking spaces.
  eq a, 5

  a = 10
  b = a/= c /i
  eq a, 5

  a = 10
  b = a/=c/i
  eq a, 5

  a = (regex) -> regex.test '=C '
  b = a /=c /i
  eq b, true

  a = (regex) -> regex.test '= C '
  # Use parentheses to disambiguate.
  b = a(/= c /i)
  eq b, true
  b = a(/= c /)
  eq b, false
  b = a (/= c /)
  eq b, false
  # Escape to disambiguate.
  b = a /\= c /i
  eq b, true
  b = a /\= c /
  eq b, false

test "#764: regular expressions should be indexable", ->
  eq /0/['source'], ///#{0}///['source']

test "#584: slashes are allowed unescaped in character classes", ->
  ok /^a\/[/]b$/.test 'a//b'

test "does not allow to escape newlines", ->
  throws -> CoffeeScript.compile '/a\\\nb/'


# Heregexe(n|s)

test "a heregex will ignore whitespace and comments", ->
  eq /^I'm\x20+[a]\s+Heregex?\/\/\//gim + '', ///
    ^ I'm \x20+ [a] \s+
    Heregex? / // # or not
  ///gim + ''

test "an empty heregex will compile to an empty, non-capturing group", ->
  eq /(?:)/ + '', ///  /// + ''
  eq /(?:)/ + '', ////// + ''

test "heregex starting with slashes", ->
  ok /////a/\////.test ' //a// '

test '#2388: `///` in heregex interpolations', ->
  ok ///a#{///b///}c///.test ' /a/b/c/ '
  ws = ' \t'
  scan = (regex) -> regex.exec('\t  foo')[0]
  eq '/\t  /', /// #{scan /// [#{ws}]* ///} /// + ''

test "regexes are not callable", ->
  throws -> CoffeeScript.compile '/a/()'
  throws -> CoffeeScript.compile '///a#{b}///()'
  throws -> CoffeeScript.compile '/a/ 1'
  throws -> CoffeeScript.compile '///a#{b}/// 1'
  throws -> CoffeeScript.compile '''
    /a/
       k: v
  '''
  throws -> CoffeeScript.compile '''
    ///a#{b}///
       k: v
  '''

test "backreferences", ->
  ok /(a)(b)\2\1/.test 'abba'

test "#3795: Escape otherwise invalid characters", ->
  ok (/ /).test '\u2028'
  ok (/ /).test '\u2029'
  ok ///\ ///.test '\u2028'
  ok ///\ ///.test '\u2029'
  ok ///a b///.test 'ab' # The space is U+2028.
  ok ///a b///.test 'ab' # The space is U+2029.
  ok ///\0
      1///.test '\x001'

  a = 'a'
  ok ///#{a} b///.test 'ab' # The space is U+2028.
  ok ///#{a} b///.test 'ab' # The space is U+2029.
  ok ///#{a}\ ///.test 'a\u2028'
  ok ///#{a}\ ///.test 'a\u2029'
  ok ///#{a}\0
      1///.test 'a\x001'
-												test reorganization waypoint

											
										
										
											2010-12-29 05:48:54 +00:00
+								# Regular Expression Literals
 								# ---------------------------
-												finished reorganizing test suite

											
										
										
											2011-01-03 09:17:00 +00:00
+								# TODO: add method invocation tests: /regex/.toString()
-												test reorganization waypoint #2

											
										
										
											2010-12-29 19:06:57 +00:00
+								# * Regexen
 								# * Heregexen
-												test reorganization waypoint

											
										
										
											2010-12-29 05:48:54 +00:00
+								test "basic regular expression literals", ->
 								  ok 'a'.match(/a/)
 								  ok 'a'.match /a/
 								  ok 'a'.match(/a/g)
 								  ok 'a'.match /a/g
 								test "division is not confused for a regular expression", ->
-												Fix #3410, #3182: Allow regex to start with space or =

A regex may not follow a specific set of tokens. These were already known before
in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them
to be more correct and to add a few missing tokens). In all other cases (except
after a spaced callable) a slash is the start of a regex, and may now start with
a space or an equals sign. It’s really that simple!

A slash after a spaced callable is the only ambigous case. We cannot know if
that's division or function application with a regex as the argument. The
spacing determines which is which:

Space on both sides:
- `a / b/i`  -> `a / b / i`
- `a /= b/i` -> `a /= b / i`

No spaces:
- `a/b/i`    -> `a / b / i`
- `a/=b/i`   -> `a /= b / i`

Space on the right side:
- `a/ b/i`   -> `a / b / i`
- `a/= b/i`  -> `a /= b / i`

Space on the left side:
- `a /b/i`   -> `a(/b/i)`
- `a /=b/i`  -> `a(/=b/i)`

The last case used to compile to `a /= b / i`, but that has been changed to be
consistent with the `/` operator. The last case really looks like a regex, so it
should be parsed as one.

Moreover, you may now also space the `/` and `/=` operators with other
whitespace characters than a space (such as tabs and non-breaking spaces) for
consistency.

Lastly, unclosed regexes are now reported as such, instead of generating some
other confusing error message.

It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you
may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See
https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.

											
										
										
											2015-01-10 00:48:00 +00:00
+								  # Any spacing around the slash is allowed when it cannot be a regex.
-												test reorganization waypoint

											
										
										
											2010-12-29 05:48:54 +00:00
+								  eq 2, 4 / 2 / 1
-												Fix #3410, #3182: Allow regex to start with space or =

A regex may not follow a specific set of tokens. These were already known before
in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them
to be more correct and to add a few missing tokens). In all other cases (except
after a spaced callable) a slash is the start of a regex, and may now start with
a space or an equals sign. It’s really that simple!

A slash after a spaced callable is the only ambigous case. We cannot know if
that's division or function application with a regex as the argument. The
spacing determines which is which:

Space on both sides:
- `a / b/i`  -> `a / b / i`
- `a /= b/i` -> `a /= b / i`

No spaces:
- `a/b/i`    -> `a / b / i`
- `a/=b/i`   -> `a /= b / i`

Space on the right side:
- `a/ b/i`   -> `a / b / i`
- `a/= b/i`  -> `a /= b / i`

Space on the left side:
- `a /b/i`   -> `a(/b/i)`
- `a /=b/i`  -> `a(/=b/i)`

The last case used to compile to `a /= b / i`, but that has been changed to be
consistent with the `/` operator. The last case really looks like a regex, so it
should be parsed as one.

Moreover, you may now also space the `/` and `/=` operators with other
whitespace characters than a space (such as tabs and non-breaking spaces) for
consistency.

Lastly, unclosed regexes are now reported as such, instead of generating some
other confusing error message.

It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you
may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See
https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.

											
										
										
											2015-01-10 00:48:00 +00:00
+								  eq 2, 4/2/1
 								  eq 2, 4/ 2 / 1
 								  eq 2, 4 /2 / 1
 								  eq 2, 4 / 2/ 1
 								  eq 2, 4 / 2 /1
 								  eq 2, 4 /2/ 1
 								  a = (regex) -> regex.test 'a b c'
 								  a.valueOf = -> 4
-												test reorganization waypoint

											
										
										
											2010-12-29 05:48:54 +00:00
+								  b = 2
 								  g = 1
-												Fix #3410, #3182: Allow regex to start with space or =

A regex may not follow a specific set of tokens. These were already known before
in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them
to be more correct and to add a few missing tokens). In all other cases (except
after a spaced callable) a slash is the start of a regex, and may now start with
a space or an equals sign. It’s really that simple!

A slash after a spaced callable is the only ambigous case. We cannot know if
that's division or function application with a regex as the argument. The
spacing determines which is which:

Space on both sides:
- `a / b/i`  -> `a / b / i`
- `a /= b/i` -> `a /= b / i`

No spaces:
- `a/b/i`    -> `a / b / i`
- `a/=b/i`   -> `a /= b / i`

Space on the right side:
- `a/ b/i`   -> `a / b / i`
- `a/= b/i`  -> `a /= b / i`

Space on the left side:
- `a /b/i`   -> `a(/b/i)`
- `a /=b/i`  -> `a(/=b/i)`

The last case used to compile to `a /= b / i`, but that has been changed to be
consistent with the `/` operator. The last case really looks like a regex, so it
should be parsed as one.

Moreover, you may now also space the `/` and `/=` operators with other
whitespace characters than a space (such as tabs and non-breaking spaces) for
consistency.

Lastly, unclosed regexes are now reported as such, instead of generating some
other confusing error message.

It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you
may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See
https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.

											
										
										
											2015-01-10 00:48:00 +00:00
+								  eq 2, a / b/g
 								  eq 2, a/ b/g
 								  eq 2, a / b/ g
 								  eq 2, a	/	b/g # Tabs.
 								  eq 2, a / b/g # Non-breaking spaces.
 								  eq true, a /b/g
 								  # Use parentheses to disambiguate.
 								  eq true, a(/ b/g)
 								  eq true, a(/ b/)
 								  eq true, a (/ b/)
 								  # Escape to disambiguate.
 								  eq true, a /\ b/g
 								  eq false, a	/\	b/g
 								  eq true, a /\ b/
-												Fixes #1280, regex and compound division mixup.

											
										
										
											2011-04-23 17:33:35 +00:00
-												test reorganization waypoint

											
										
										
											2010-12-29 05:48:54 +00:00
+								  obj = method: -> 2
 								  two = 2
 								  eq 2, (obj.method()/two + obj.method()/two)
 								  i = 1
 								  eq 2, (4)/2/i
 								  eq 1, i/i/i
-												Fix #3410, #3182: Allow regex to start with space or =

A regex may not follow a specific set of tokens. These were already known before
in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them
to be more correct and to add a few missing tokens). In all other cases (except
after a spaced callable) a slash is the start of a regex, and may now start with
a space or an equals sign. It’s really that simple!

A slash after a spaced callable is the only ambigous case. We cannot know if
that's division or function application with a regex as the argument. The
spacing determines which is which:

Space on both sides:
- `a / b/i`  -> `a / b / i`
- `a /= b/i` -> `a /= b / i`

No spaces:
- `a/b/i`    -> `a / b / i`
- `a/=b/i`   -> `a /= b / i`

Space on the right side:
- `a/ b/i`   -> `a / b / i`
- `a/= b/i`  -> `a /= b / i`

Space on the left side:
- `a /b/i`   -> `a(/b/i)`
- `a /=b/i`  -> `a(/=b/i)`

The last case used to compile to `a /= b / i`, but that has been changed to be
consistent with the `/` operator. The last case really looks like a regex, so it
should be parsed as one.

Moreover, you may now also space the `/` and `/=` operators with other
whitespace characters than a space (such as tabs and non-breaking spaces) for
consistency.

Lastly, unclosed regexes are now reported as such, instead of generating some
other confusing error message.

It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you
may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See
https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.

											
										
										
											2015-01-10 00:48:00 +00:00
+								  a = ''
 								  a += ' ' until /   /.test a
 								  eq a, '   '
 								  a = if /=/.test '=' then yes else no
 								  eq a, yes
 								  a = if !/=/.test '=' then yes else no
 								  eq a, no
 								  #3182:
 								  match = 'foo=bar'.match /=/
 								  eq match[0], '='
 								  #3410:
 								  ok ' '.match(/ /)[0] is ' '
 								test "division vs regex after a callable token", ->
 								  b = 2
 								  g = 1
 								  r = (r) -> r.test 'b'
 								  a = 4
 								  eq 2, a / b/g
 								  eq 2, a/b/g
 								  eq 2, a/ b/g
 								  eq true, r /b/g
 								  eq 2, (1 + 3) / b/g
 								  eq 2, (1 + 3)/b/g
 								  eq 2, (1 + 3)/ b/g
 								  eq true, (r) /b/g
 								  eq 2, [4][0] / b/g
 								  eq 2, [4][0]/b/g
 								  eq 2, [4][0]/ b/g
 								  eq true, [r][0] /b/g
 								  eq 0.5, 4? / b/g
 								  eq 0.5, 4?/b/g
 								  eq 0.5, 4?/ b/g
 								  eq true, r? /b/g
 								  (->
 								    eq 2, @ / b/g
 								    eq 2, @/b/g
 								    eq 2, @/ b/g
 								  ).call 4
 								  (->
 								    eq true, @ /b/g
 								  ).call r
 								  (->
 								    eq 2, this / b/g
 								    eq 2, this/b/g
 								    eq 2, this/ b/g
 								  ).call 4
 								  (->
 								    eq true, this /b/g
 								  ).call r
 								  class A
 								    p: (regex) -> if regex then r regex else 4
 								  class B extends A
 								    p: ->
 								      eq 2, super / b/g
 								      eq 2, super/b/g
 								      eq 2, super/ b/g
 								      eq true, super /b/g
 								  new B().p()
 								test "always division and never regex after some tokens", ->
 								  b = 2
 								  g = 1
 								  eq 2, 4 / b/g
 								  eq 2, 4/b/g
 								  eq 2, 4/ b/g
 								  eq 2, 4 /b/g
 								  eq 2, "4" / b/g
 								  eq 2, "4"/b/g
 								  eq 2, "4"/ b/g
 								  eq 2, "4" /b/g
-												Fix #3194: Make strings always uncallable

No matter if they have interpolations or not.

											
										
										
											2015-01-14 20:27:24 +00:00
+								  eq 20, "4#{0}" / b/g
 								  eq 20, "4#{0}"/b/g
 								  eq 20, "4#{0}"/ b/g
 								  eq 20, "4#{0}" /b/g
-												Fix #3410, #3182: Allow regex to start with space or =

A regex may not follow a specific set of tokens. These were already known before
in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them
to be more correct and to add a few missing tokens). In all other cases (except
after a spaced callable) a slash is the start of a regex, and may now start with
a space or an equals sign. It’s really that simple!

A slash after a spaced callable is the only ambigous case. We cannot know if
that's division or function application with a regex as the argument. The
spacing determines which is which:

Space on both sides:
- `a / b/i`  -> `a / b / i`
- `a /= b/i` -> `a /= b / i`

No spaces:
- `a/b/i`    -> `a / b / i`
- `a/=b/i`   -> `a /= b / i`

Space on the right side:
- `a/ b/i`   -> `a / b / i`
- `a/= b/i`  -> `a /= b / i`

Space on the left side:
- `a /b/i`   -> `a(/b/i)`
- `a /=b/i`  -> `a(/=b/i)`

The last case used to compile to `a /= b / i`, but that has been changed to be
consistent with the `/` operator. The last case really looks like a regex, so it
should be parsed as one.

Moreover, you may now also space the `/` and `/=` operators with other
whitespace characters than a space (such as tabs and non-breaking spaces) for
consistency.

Lastly, unclosed regexes are now reported as such, instead of generating some
other confusing error message.

It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you
may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See
https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.

											
										
										
											2015-01-10 00:48:00 +00:00
+								  ok isNaN /a/ / b/g
 								  ok isNaN /a/i / b/g
 								  ok isNaN /a//b/g
 								  ok isNaN /a/i/b/g
 								  ok isNaN /a// b/g
 								  ok isNaN /a/i/ b/g
 								  ok isNaN /a/ /b/g
 								  ok isNaN /a/i /b/g
 								  eq 0.5, true / b/g
 								  eq 0.5, true/b/g
 								  eq 0.5, true/ b/g
 								  eq 0.5, true /b/g
 								  eq 0, false / b/g
 								  eq 0, false/b/g
 								  eq 0, false/ b/g
 								  eq 0, false /b/g
 								  eq 0, null / b/g
 								  eq 0, null/b/g
 								  eq 0, null/ b/g
 								  eq 0, null /b/g
 								  ok isNaN undefined / b/g
 								  ok isNaN undefined/b/g
 								  ok isNaN undefined/ b/g
 								  ok isNaN undefined /b/g
 								  ok isNaN {a: 4} / b/g
 								  ok isNaN {a: 4}/b/g
 								  ok isNaN {a: 4}/ b/g
 								  ok isNaN {a: 4} /b/g
 								  o = prototype: 4
 								  eq 2, o:: / b/g
 								  eq 2, o::/b/g
 								  eq 2, o::/ b/g
 								  eq 2, o:: /b/g
 								  i = 4
 								  eq 2.0, i++ / b/g
 								  eq 2.5, i++/b/g
 								  eq 3.0, i++/ b/g
 								  eq 3.5, i++ /b/g
 								  eq 4.0, i-- / b/g
 								  eq 3.5, i--/b/g
 								  eq 3.0, i--/ b/g
 								  eq 2.5, i-- /b/g
 								test "compound division vs regex", ->
 								  c = 4
 								  i = 2
 								  a = 10
 								  b = a /= c / i
 								  eq a, 5
 								  a = 10
 								  b = a /= c /i
 								  eq a, 5
 								  a = 10
 								  b = a	/=	c /i # Tabs.
 								  eq a, 5
 								  a = 10
 								  b = a /= c /i # Non-breaking spaces.
 								  eq a, 5
 								  a = 10
 								  b = a/= c /i
 								  eq a, 5
 								  a = 10
 								  b = a/=c/i
 								  eq a, 5
 								  a = (regex) -> regex.test '=C '
 								  b = a /=c /i
 								  eq b, true
 								  a = (regex) -> regex.test '= C '
 								  # Use parentheses to disambiguate.
 								  b = a(/= c /i)
 								  eq b, true
 								  b = a(/= c /)
 								  eq b, false
 								  b = a (/= c /)
 								  eq b, false
 								  # Escape to disambiguate.
 								  b = a /\= c /i
 								  eq b, true
 								  b = a /\= c /
 								  eq b, false
-												test reorganization waypoint

											
										
										
											2010-12-29 05:48:54 +00:00
+								test "#764: regular expressions should be indexable", ->
 								  eq /0/['source'], ///#{0}///['source']
 								test "#584: slashes are allowed unescaped in character classes", ->
 								  ok /^a\/[/]b$/.test 'a//b'
-												Refactor interpolation (and string and regex) handling in lexer

- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
  used to pass through the lexer, causing a parsing error later, while
  double-quoted strings caused an error already in the lexing phase. Now both
  single and double-quoted unclosed strings error out in the lexer (which is the
  more logical option) with consistent error messages. This also fixes the last
  comment by @satyr in #3301.

- Similar to the above, unclosed heregexes also used to pass through the lexer
  and not error until in the parsing phase, which resulted in confusing error
  messages. This has been fixed, too.

- Fix #3348, by adding passing tests.

- Fix #3529: If a string starts with an interpolation, an empty string is no
  longer emitted before the interpolation (unless it is needed to coerce the
  interpolation into a string).

- Block comments cannot contain `*/`. Now the error message also shows exactly
  where the offending `*/`. This improvement might seem unrelated, but I had to
  touch that code anyway to refactor string and regex related code, and the
  change was very trivial. Moreover, it's consistent with the next two points.

- Regexes cannot start with `*`. Now the error message also shows exactly where
  the offending `*` is. (It might actually not be exatly at the start in
  heregexes.) It is a very minor improvement, but it was trivial to add.

- Octal escapes in strings are forbidden in CoffeeScript (just like in
  JavaScript strict mode). However, this used to be the case only for regular
  strings. Now they are also forbidden in heredocs. Moreover, the errors now
  point at the offending octal escape.

- Invalid regex flags are no longer allowed. This includes repeated modifiers
  and unknown ones. Moreover, invalid modifiers do not stop a heregex from
  being matched, which results in better error messages.

- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
  `RegExp("a#{1}")`. Still, those two code snippets used to generate different
  tokens, which is a bit weird, but more importantly causes problems for
  coffeelint (see clutchski/coffeelint#340). This required lots of tests in
  test/location.coffee to be updated. Note that some updates to those tests are
  unrelated to this point; some have been updated to be more consistent (I
  discovered this because the refactored code happened to be seemingly more
  correct).

- Regular regex literals used to erraneously allow newlines to be escaped,
  causing invalid JavaScript output. This has been fixed.

- Heregexes may now be completely empty (`//////`), instead of erroring out with
  a confusing message.

- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
  you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
  heregex inside a heregex.

- Fix #2321: If you used division inside interpolation and then a slash later in
  the string containing that interpolation, the division slash and the latter
  slash was erraneously matched as a regex. This has been fixed.

- Indentation inside interpolations in heredocs no longer affect how much
  indentation is removed from each line of the heredoc (which is more
  intuitive).

- Whitespace is now correctly trimmed from the start and end of strings in a few
  edge cases.

- Last but not least, the lexing of interpolated strings now seems to be more
  efficient. For a regular double-quoted string, we used to use a custom
  function to find the end of it (taking interpolations and interpolations
  within interpolations etc. into account). Then we used to re-find the
  interpolations and recursively lex their contents. In effect, the same string
  was processed twice, or even more in the case of deeper nesting of
  interpolations. Now the same string is processed just once.

- Code duplication between regular strings, heredocs, regular regexes and
  heregexes has been reduced.

- The above two points should result in more easily read code, too.

											
										
										
											2015-01-03 22:40:43 +00:00
+								test "does not allow to escape newlines", ->
 								  throws -> CoffeeScript.compile '/a\\\nb/'
-												fixes #1724: regular expressions beginning with `*`

also normalised capitalisation in a few error messages

											
										
										
											2011-09-22 08:09:58 +00:00
-												test reorganization waypoint

											
										
										
											2010-12-29 05:48:54 +00:00
-												removing over-hash-comment

											
										
										
											2011-03-12 02:41:12 +00:00
+								# Heregexe(n|s)
-												test reorganization waypoint

											
										
										
											2010-12-29 05:48:54 +00:00
 								test "a heregex will ignore whitespace and comments", ->
 								  eq /^I'm\x20+[a]\s+Heregex?\/\/\//gim + '', ///
 								    ^ I'm \x20+ [a] \s+
 								    Heregex? / // # or not
 								  ///gim + ''
 								test "an empty heregex will compile to an empty, non-capturing group", ->
 								  eq /(?:)/ + '', ///  /// + ''
-												Refactor interpolation (and string and regex) handling in lexer

- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
  used to pass through the lexer, causing a parsing error later, while
  double-quoted strings caused an error already in the lexing phase. Now both
  single and double-quoted unclosed strings error out in the lexer (which is the
  more logical option) with consistent error messages. This also fixes the last
  comment by @satyr in #3301.

- Similar to the above, unclosed heregexes also used to pass through the lexer
  and not error until in the parsing phase, which resulted in confusing error
  messages. This has been fixed, too.

- Fix #3348, by adding passing tests.

- Fix #3529: If a string starts with an interpolation, an empty string is no
  longer emitted before the interpolation (unless it is needed to coerce the
  interpolation into a string).

- Block comments cannot contain `*/`. Now the error message also shows exactly
  where the offending `*/`. This improvement might seem unrelated, but I had to
  touch that code anyway to refactor string and regex related code, and the
  change was very trivial. Moreover, it's consistent with the next two points.

- Regexes cannot start with `*`. Now the error message also shows exactly where
  the offending `*` is. (It might actually not be exatly at the start in
  heregexes.) It is a very minor improvement, but it was trivial to add.

- Octal escapes in strings are forbidden in CoffeeScript (just like in
  JavaScript strict mode). However, this used to be the case only for regular
  strings. Now they are also forbidden in heredocs. Moreover, the errors now
  point at the offending octal escape.

- Invalid regex flags are no longer allowed. This includes repeated modifiers
  and unknown ones. Moreover, invalid modifiers do not stop a heregex from
  being matched, which results in better error messages.

- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
  `RegExp("a#{1}")`. Still, those two code snippets used to generate different
  tokens, which is a bit weird, but more importantly causes problems for
  coffeelint (see clutchski/coffeelint#340). This required lots of tests in
  test/location.coffee to be updated. Note that some updates to those tests are
  unrelated to this point; some have been updated to be more consistent (I
  discovered this because the refactored code happened to be seemingly more
  correct).

- Regular regex literals used to erraneously allow newlines to be escaped,
  causing invalid JavaScript output. This has been fixed.

- Heregexes may now be completely empty (`//////`), instead of erroring out with
  a confusing message.

- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
  you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
  heregex inside a heregex.

- Fix #2321: If you used division inside interpolation and then a slash later in
  the string containing that interpolation, the division slash and the latter
  slash was erraneously matched as a regex. This has been fixed.

- Indentation inside interpolations in heredocs no longer affect how much
  indentation is removed from each line of the heredoc (which is more
  intuitive).

- Whitespace is now correctly trimmed from the start and end of strings in a few
  edge cases.

- Last but not least, the lexing of interpolated strings now seems to be more
  efficient. For a regular double-quoted string, we used to use a custom
  function to find the end of it (taking interpolations and interpolations
  within interpolations etc. into account). Then we used to re-find the
  interpolations and recursively lex their contents. In effect, the same string
  was processed twice, or even more in the case of deeper nesting of
  interpolations. Now the same string is processed just once.

- Code duplication between regular strings, heredocs, regular regexes and
  heregexes has been reduced.

- The above two points should result in more easily read code, too.

											
										
										
											2015-01-03 22:40:43 +00:00
+								  eq /(?:)/ + '', ////// + ''
 								test "heregex starting with slashes", ->
 								  ok /////a/\////.test ' //a// '
 								test '#2388: `///` in heregex interpolations', ->
 								  ok ///a#{///b///}c///.test ' /a/b/c/ '
 								  ws = ' \t'
 								  scan = (regex) -> regex.exec('\t  foo')[0]
 								  eq '/\t  /', /// #{scan /// [#{ws}]* ///} /// + ''
-												fixes #1724 for heregexen

TODO: DRY up that regex handling code so we don't have a duplicate test/error

											
										
										
											2011-09-22 08:39:13 +00:00
-												Make regexes always uncallable

No matter if they have interpolations or not.

											
										
										
											2015-01-15 18:44:14 +00:00
+								test "regexes are not callable", ->
 								  throws -> CoffeeScript.compile '/a/()'
 								  throws -> CoffeeScript.compile '///a#{b}///()'
 								  throws -> CoffeeScript.compile '/a/ 1'
 								  throws -> CoffeeScript.compile '///a#{b}/// 1'
 								  throws -> CoffeeScript.compile '''
 								    /a/
 								       k: v
 								  '''
 								  throws -> CoffeeScript.compile '''
 								    ///a#{b}///
 								       k: v
 								  '''
-												Fix #3795: Never generate invalid strings and regexes

- Invalid `\x` and `\u` escapes now throw errors.
- U+2028 and U+2029 (which JavaScript treats as newline characters) are now
  escaped to `\u2028` and `\u2029`, respectively.
- Octal escapes are now forbidden not only in strings, but in regexes as well.
- `\0` escapes are now escaped if needed (so that they do not form an octal
  literal by mistake). Note that `\01` is an octal escape in a regex, while `\1`
  is a backreference. (Added a test for backreferences while at it.)
- Fixed a bug where newlines in strings weren't removed if preceded by an
  escaped character.

											
										
										
											2015-02-05 16:23:03 +00:00
 								test "backreferences", ->
 								  ok /(a)(b)\2\1/.test 'abba'
 								test "#3795: Escape otherwise invalid characters", ->
 								  ok (/ /).test '\u2028'
 								  ok (/ /).test '\u2029'
 								  ok ///\ ///.test '\u2028'
 								  ok ///\ ///.test '\u2029'
 								  ok ///a b///.test 'ab' # The space is U+2028.
 								  ok ///a b///.test 'ab' # The space is U+2029.
 								  ok ///\0
 ///.test '\x001'
 								  a = 'a'
 								  ok ///#{a} b///.test 'ab' # The space is U+2028.
 								  ok ///#{a} b///.test 'ab' # The space is U+2029.
 								  ok ///#{a}\ ///.test 'a\u2028'
 								  ok ///#{a}\ ///.test 'a\u2029'
 								  ok ///#{a}\0
 ///.test 'a\x001'