mirror of
https://github.com/jashkenas/coffeescript.git
synced 2022-11-09 12:23:24 -05:00
It is possible to match only valid JavaScript identifiers with a really long regex (like coco and CoffeeScriptRedux does), but CoffeeScript uses a much simpler one, which allows a bit too much. Quoting jashkenas/coffeescript#1718 #issuecomment-2152464 @jashkenas: > But it still seems very much across the "worth it" line. You'll get the > SyntaxError as soon as it hits JS, and performance aside -- even the increase > in filesize for our browser coffee-script.js lib seems too much, considering > this is something no one ever does, apart from experimentation. In short, CoffeeScript treats any non-ASCII character as part of an identifier. However, unicode spaces should be excluded since having blank characters as part of a _word_ is very confusing. This commit does so, while still keeping the regex really simple.
This commit is contained in:
parent
b70f6571bd
commit
9ec427ba80
3 changed files with 29 additions and 2 deletions
|
@ -815,7 +815,7 @@
|
|||
|
||||
BOM = 65279;
|
||||
|
||||
IDENTIFIER = /^([$A-Za-z_\x7f-\uffff][$\w\x7f-\uffff]*)([^\n\S]*:(?!:))?/;
|
||||
IDENTIFIER = /^(?!\d)((?:(?!\s)[$\w\x7f-\uffff])+)([^\n\S]*:(?!:))?/;
|
||||
|
||||
NUMBER = /^0b[01]+|^0o[0-7]+|^0x[\da-f]+|^\d*\.?\d+(?:e[+-]?\d+)?/i;
|
||||
|
||||
|
|
|
@ -731,7 +731,8 @@ BOM = 65279
|
|||
|
||||
# Token matching regexes.
|
||||
IDENTIFIER = /// ^
|
||||
( [$A-Za-z_\x7f-\uffff][$\w\x7f-\uffff]* )
|
||||
(?!\d)
|
||||
( (?: (?!\s)[$\w\x7f-\uffff] )+ )
|
||||
( [^\n\S]* : (?!:) )? # Is this a property name?
|
||||
///
|
||||
|
||||
|
|
|
@ -52,6 +52,32 @@ test "Issue #986: Unicode identifiers", ->
|
|||
λ = 5
|
||||
eq λ, 5
|
||||
|
||||
test "#2516: Unicode spaces should not be part of identifiers", ->
|
||||
a = (x) -> x * 2
|
||||
b = 3
|
||||
eq 6, a b # U+00A0 NO-BREAK SPACE
|
||||
eq 6, a b # U+1680 OGHAM SPACE MARK
|
||||
eq 6, a b # U+2000 EN QUAD
|
||||
eq 6, a b # U+2001 EM QUAD
|
||||
eq 6, a b # U+2002 EN SPACE
|
||||
eq 6, a b # U+2003 EM SPACE
|
||||
eq 6, a b # U+2004 THREE-PER-EM SPACE
|
||||
eq 6, a b # U+2005 FOUR-PER-EM SPACE
|
||||
eq 6, a b # U+2006 SIX-PER-EM SPACE
|
||||
eq 6, a b # U+2007 FIGURE SPACE
|
||||
eq 6, a b # U+2008 PUNCTUATION SPACE
|
||||
eq 6, a b # U+2009 THIN SPACE
|
||||
eq 6, a b # U+200A HAIR SPACE
|
||||
eq 6, a b # U+202F NARROW NO-BREAK SPACE
|
||||
eq 6, a b # U+205F MEDIUM MATHEMATICAL SPACE
|
||||
eq 6, a b # U+3000 IDEOGRAPHIC SPACE
|
||||
|
||||
# #3560: Non-breaking space (U+00A0) (before `'c'`)
|
||||
eq 5, {c: 5}[ 'c' ]
|
||||
|
||||
# A line where every space in non-breaking
|
||||
eq 1 + 1, 2
|
||||
|
||||
test "don't accidentally stringify keywords", ->
|
||||
ok (-> this == 'this')() is false
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue