Merge pull request #3774 from lydell/unicode-spaces

Fix #2516, #3560: Unicode space handling
This commit is contained in:
Jeremy Ashkenas 2015-01-06 16:10:59 -05:00
commit e769423d52
3 changed files with 29 additions and 2 deletions

View File

@ -815,7 +815,7 @@
BOM = 65279;
IDENTIFIER = /^([$A-Za-z_\x7f-\uffff][$\w\x7f-\uffff]*)([^\n\S]*:(?!:))?/;
IDENTIFIER = /^(?!\d)((?:(?!\s)[$\w\x7f-\uffff])+)([^\n\S]*:(?!:))?/;
NUMBER = /^0b[01]+|^0o[0-7]+|^0x[\da-f]+|^\d*\.?\d+(?:e[+-]?\d+)?/i;

View File

@ -731,7 +731,8 @@ BOM = 65279
# Token matching regexes.
IDENTIFIER = /// ^
( [$A-Za-z_\x7f-\uffff][$\w\x7f-\uffff]* )
(?!\d)
( (?: (?!\s)[$\w\x7f-\uffff] )+ )
( [^\n\S]* : (?!:) )? # Is this a property name?
///

View File

@ -52,6 +52,32 @@ test "Issue #986: Unicode identifiers", ->
λ = 5
eq λ, 5
test "#2516: Unicode spaces should not be part of identifiers", ->
a = (x) -> x * 2
b = 3
eq 6, a b # U+00A0 NO-BREAK SPACE
eq 6, ab # U+1680 OGHAM SPACE MARK
eq 6, a b # U+2000 EN QUAD
eq 6, ab # U+2001 EM QUAD
eq 6, ab # U+2002 EN SPACE
eq 6, ab # U+2003 EM SPACE
eq 6, ab # U+2004 THREE-PER-EM SPACE
eq 6, ab # U+2005 FOUR-PER-EM SPACE
eq 6, ab # U+2006 SIX-PER-EM SPACE
eq 6, ab # U+2007 FIGURE SPACE
eq 6, ab # U+2008 PUNCTUATION SPACE
eq 6, ab # U+2009 THIN SPACE
eq 6, ab # U+200A HAIR SPACE
eq 6, ab # U+202F NARROW NO-BREAK SPACE
eq 6, ab # U+205F MEDIUM MATHEMATICAL SPACE
eq 6, a b # U+3000 IDEOGRAPHIC SPACE
# #3560: Non-breaking space (U+00A0) (before `'c'`)
eq 5, {c: 5}[ 'c' ]
# A line where every space in non-breaking
  eq 1 + 1, 2  
test "don't accidentally stringify keywords", ->
ok (-> this == 'this')() is false