diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index 149f1935..9e4eb109 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -815,7 +815,7 @@ BOM = 65279; - IDENTIFIER = /^([$A-Za-z_\x7f-\uffff][$\w\x7f-\uffff]*)([^\n\S]*:(?!:))?/; + IDENTIFIER = /^(?!\d)((?:(?!\s)[$\w\x7f-\uffff])+)([^\n\S]*:(?!:))?/; NUMBER = /^0b[01]+|^0o[0-7]+|^0x[\da-f]+|^\d*\.?\d+(?:e[+-]?\d+)?/i; diff --git a/src/lexer.coffee b/src/lexer.coffee index 0ed1d71d..11cea8e9 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -731,7 +731,8 @@ BOM = 65279 # Token matching regexes. IDENTIFIER = /// ^ - ( [$A-Za-z_\x7f-\uffff][$\w\x7f-\uffff]* ) + (?!\d) + ( (?: (?!\s)[$\w\x7f-\uffff] )+ ) ( [^\n\S]* : (?!:) )? # Is this a property name? /// diff --git a/test/compilation.coffee b/test/compilation.coffee index b43e0b67..b1b5247a 100644 --- a/test/compilation.coffee +++ b/test/compilation.coffee @@ -52,6 +52,32 @@ test "Issue #986: Unicode identifiers", -> λ = 5 eq λ, 5 +test "#2516: Unicode spaces should not be part of identifiers", -> + a = (x) -> x * 2 + b = 3 + eq 6, a b # U+00A0 NO-BREAK SPACE + eq 6, a b # U+1680 OGHAM SPACE MARK + eq 6, a b # U+2000 EN QUAD + eq 6, a b # U+2001 EM QUAD + eq 6, a b # U+2002 EN SPACE + eq 6, a b # U+2003 EM SPACE + eq 6, a b # U+2004 THREE-PER-EM SPACE + eq 6, a b # U+2005 FOUR-PER-EM SPACE + eq 6, a b # U+2006 SIX-PER-EM SPACE + eq 6, a b # U+2007 FIGURE SPACE + eq 6, a b # U+2008 PUNCTUATION SPACE + eq 6, a b # U+2009 THIN SPACE + eq 6, a b # U+200A HAIR SPACE + eq 6, a b # U+202F NARROW NO-BREAK SPACE + eq 6, a b # U+205F MEDIUM MATHEMATICAL SPACE + eq 6, a b # U+3000 IDEOGRAPHIC SPACE + + # #3560: Non-breaking space (U+00A0) (before `'c'`) + eq 5, {c: 5}[ 'c' ] + + # A line where every space in non-breaking +  eq 1 + 1, 2   + test "don't accidentally stringify keywords", -> ok (-> this == 'this')() is false