1346 lines
49 KiB
CoffeeScript
1346 lines
49 KiB
CoffeeScript
# The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
|
||
# matches against the beginning of the source code. When a match is found,
|
||
# a token is produced, we consume the match, and start again. Tokens are in the
|
||
# form:
|
||
#
|
||
# [tag, value, locationData]
|
||
#
|
||
# where locationData is {first_line, first_column, last_line, last_column, last_line_exclusive, last_column_exclusive}, which is a
|
||
# format that can be fed directly into [Jison](https://github.com/zaach/jison). These
|
||
# are read by jison in the `parser.lexer` function defined in coffeescript.coffee.
|
||
|
||
{Rewriter, INVERSES} = require './rewriter'
|
||
|
||
# Import the helpers we need.
|
||
{count, starts, compact, repeat, invertLiterate, merge,
|
||
attachCommentsToNode, locationDataToString, throwSyntaxError
|
||
replaceUnicodeCodePointEscapes} = require './helpers'
|
||
|
||
# The Lexer Class
|
||
# ---------------
|
||
|
||
# The Lexer class reads a stream of CoffeeScript and divvies it up into tagged
|
||
# tokens. Some potential ambiguity in the grammar has been avoided by
|
||
# pushing some extra smarts into the Lexer.
|
||
exports.Lexer = class Lexer
|
||
|
||
# **tokenize** is the Lexer's main method. Scan by attempting to match tokens
|
||
# one at a time, using a regular expression anchored at the start of the
|
||
# remaining code, or a custom recursive token-matching method
|
||
# (for interpolations). When the next token has been recorded, we move forward
|
||
# within the code past the token, and begin again.
|
||
#
|
||
# Each tokenizing method is responsible for returning the number of characters
|
||
# it has consumed.
|
||
#
|
||
# Before returning the token stream, run it through the [Rewriter](rewriter.html).
|
||
tokenize: (code, opts = {}) ->
|
||
@literate = opts.literate # Are we lexing literate CoffeeScript?
|
||
@indent = 0 # The current indentation level.
|
||
@baseIndent = 0 # The overall minimum indentation level.
|
||
@indebt = 0 # The over-indentation at the current level.
|
||
@outdebt = 0 # The under-outdentation at the current level.
|
||
@indents = [] # The stack of all current indentation levels.
|
||
@indentLiteral = '' # The indentation.
|
||
@ends = [] # The stack for pairing up tokens.
|
||
@tokens = [] # Stream of parsed tokens in the form `['TYPE', value, location data]`.
|
||
@seenFor = no # Used to recognize `FORIN`, `FOROF` and `FORFROM` tokens.
|
||
@seenImport = no # Used to recognize `IMPORT FROM? AS?` tokens.
|
||
@seenExport = no # Used to recognize `EXPORT FROM? AS?` tokens.
|
||
@importSpecifierList = no # Used to identify when in an `IMPORT {...} FROM? ...`.
|
||
@exportSpecifierList = no # Used to identify when in an `EXPORT {...} FROM? ...`.
|
||
@csxDepth = 0 # Used to optimize CSX checks, how deep in CSX we are.
|
||
@csxObjAttribute = {} # Used to detect if CSX attributes is wrapped in {} (<div {props...} />).
|
||
|
||
@chunkLine =
|
||
opts.line or 0 # The start line for the current @chunk.
|
||
@chunkColumn =
|
||
opts.column or 0 # The start column of the current @chunk.
|
||
@chunkOffset =
|
||
opts.offset or 0 # The start offset for the current @chunk.
|
||
code = @clean code # The stripped, cleaned original source code.
|
||
|
||
# At every position, run through this list of attempted matches,
|
||
# short-circuiting if any of them succeed. Their order determines precedence:
|
||
# `@literalToken` is the fallback catch-all.
|
||
i = 0
|
||
while @chunk = code[i..]
|
||
consumed = \
|
||
@identifierToken() or
|
||
@commentToken() or
|
||
@whitespaceToken() or
|
||
@lineToken() or
|
||
@stringToken() or
|
||
@numberToken() or
|
||
@csxToken() or
|
||
@regexToken() or
|
||
@jsToken() or
|
||
@literalToken()
|
||
|
||
# Update position.
|
||
[@chunkLine, @chunkColumn, @chunkOffset] = @getLineAndColumnFromChunk consumed
|
||
|
||
i += consumed
|
||
|
||
return {@tokens, index: i} if opts.untilBalanced and @ends.length is 0
|
||
|
||
@closeIndentation()
|
||
@error "missing #{end.tag}", (end.origin ? end)[2] if end = @ends.pop()
|
||
return @tokens if opts.rewrite is off
|
||
(new Rewriter).rewrite @tokens
|
||
|
||
# Preprocess the code to remove leading and trailing whitespace, carriage
|
||
# returns, etc. If we’re lexing literate CoffeeScript, strip external Markdown
|
||
# by removing all lines that aren’t indented by at least four spaces or a tab.
|
||
clean: (code) ->
|
||
code = code.slice(1) if code.charCodeAt(0) is BOM
|
||
code = code.replace(/\r/g, '').replace TRAILING_SPACES, ''
|
||
if WHITESPACE.test code
|
||
code = "\n#{code}"
|
||
@chunkLine--
|
||
code = invertLiterate code if @literate
|
||
code
|
||
|
||
# Tokenizers
|
||
# ----------
|
||
|
||
# Matches identifying literals: variables, keywords, method names, etc.
|
||
# Check to ensure that JavaScript reserved words aren’t being used as
|
||
# identifiers. Because CoffeeScript reserves a handful of keywords that are
|
||
# allowed in JavaScript, we’re careful not to tag them as keywords when
|
||
# referenced as property names here, so you can still do `jQuery.is()` even
|
||
# though `is` means `===` otherwise.
|
||
identifierToken: ->
|
||
inCSXTag = @atCSXTag()
|
||
regex = if inCSXTag then CSX_ATTRIBUTE else IDENTIFIER
|
||
return 0 unless match = regex.exec @chunk
|
||
[input, id, colon] = match
|
||
|
||
# Preserve length of id for location data
|
||
idLength = id.length
|
||
poppedToken = undefined
|
||
if id is 'own' and @tag() is 'FOR'
|
||
@token 'OWN', id
|
||
return id.length
|
||
if id is 'from' and @tag() is 'YIELD'
|
||
@token 'FROM', id
|
||
return id.length
|
||
if id is 'as' and @seenImport
|
||
if @value() is '*'
|
||
@tokens[@tokens.length - 1][0] = 'IMPORT_ALL'
|
||
else if @value(yes) in COFFEE_KEYWORDS
|
||
prev = @prev()
|
||
[prev[0], prev[1]] = ['IDENTIFIER', @value(yes)]
|
||
if @tag() in ['DEFAULT', 'IMPORT_ALL', 'IDENTIFIER']
|
||
@token 'AS', id
|
||
return id.length
|
||
if id is 'as' and @seenExport
|
||
if @tag() in ['IDENTIFIER', 'DEFAULT']
|
||
@token 'AS', id
|
||
return id.length
|
||
if @value(yes) in COFFEE_KEYWORDS
|
||
prev = @prev()
|
||
[prev[0], prev[1]] = ['IDENTIFIER', @value(yes)]
|
||
@token 'AS', id
|
||
return id.length
|
||
if id is 'default' and @seenExport and @tag() in ['EXPORT', 'AS']
|
||
@token 'DEFAULT', id
|
||
return id.length
|
||
if id is 'do' and regExSuper = /^(\s*super)(?!\(\))/.exec @chunk[3...]
|
||
@token 'SUPER', 'super'
|
||
@token 'CALL_START', '('
|
||
@token 'CALL_END', ')'
|
||
[input, sup] = regExSuper
|
||
return sup.length + 3
|
||
|
||
prev = @prev()
|
||
|
||
tag =
|
||
if colon or prev? and
|
||
(prev[0] in ['.', '?.', '::', '?::'] or
|
||
not prev.spaced and prev[0] is '@')
|
||
'PROPERTY'
|
||
else
|
||
'IDENTIFIER'
|
||
|
||
tokenData = {}
|
||
if tag is 'IDENTIFIER' and (id in JS_KEYWORDS or id in COFFEE_KEYWORDS) and
|
||
not (@exportSpecifierList and id in COFFEE_KEYWORDS)
|
||
tag = id.toUpperCase()
|
||
if tag is 'WHEN' and @tag() in LINE_BREAK
|
||
tag = 'LEADING_WHEN'
|
||
else if tag is 'FOR'
|
||
@seenFor = yes
|
||
else if tag is 'UNLESS'
|
||
tag = 'IF'
|
||
else if tag is 'IMPORT'
|
||
@seenImport = yes
|
||
else if tag is 'EXPORT'
|
||
@seenExport = yes
|
||
else if tag in UNARY
|
||
tag = 'UNARY'
|
||
else if tag in RELATION
|
||
if tag isnt 'INSTANCEOF' and @seenFor
|
||
tag = 'FOR' + tag
|
||
@seenFor = no
|
||
else
|
||
tag = 'RELATION'
|
||
if @value() is '!'
|
||
poppedToken = @tokens.pop()
|
||
tokenData.invert = poppedToken.data?.original ? poppedToken[1]
|
||
else if tag is 'IDENTIFIER' and @seenFor and id is 'from' and
|
||
isForFrom(prev)
|
||
tag = 'FORFROM'
|
||
@seenFor = no
|
||
# Throw an error on attempts to use `get` or `set` as keywords, or
|
||
# what CoffeeScript would normally interpret as calls to functions named
|
||
# `get` or `set`, i.e. `get({foo: function () {}})`.
|
||
else if tag is 'PROPERTY' and prev
|
||
if prev.spaced and prev[0] in CALLABLE and /^[gs]et$/.test(prev[1]) and
|
||
@tokens.length > 1 and @tokens[@tokens.length - 2][0] not in ['.', '?.', '@']
|
||
@error "'#{prev[1]}' cannot be used as a keyword, or as a function call
|
||
without parentheses", prev[2]
|
||
else if prev[0] is '.' and @tokens.length > 1 and (prevprev = @tokens[@tokens.length - 2])[0] is 'UNARY' and prevprev[1] is 'new'
|
||
prevprev[0] = 'NEW_TARGET'
|
||
else if @tokens.length > 2
|
||
prevprev = @tokens[@tokens.length - 2]
|
||
if prev[0] in ['@', 'THIS'] and prevprev and prevprev.spaced and
|
||
/^[gs]et$/.test(prevprev[1]) and
|
||
@tokens[@tokens.length - 3][0] not in ['.', '?.', '@']
|
||
@error "'#{prevprev[1]}' cannot be used as a keyword, or as a
|
||
function call without parentheses", prevprev[2]
|
||
|
||
if tag is 'IDENTIFIER' and id in RESERVED
|
||
@error "reserved word '#{id}'", length: id.length
|
||
|
||
unless tag is 'PROPERTY' or @exportSpecifierList or @importSpecifierList
|
||
if id in COFFEE_ALIASES
|
||
alias = id
|
||
id = COFFEE_ALIAS_MAP[id]
|
||
tokenData.original = alias
|
||
tag = switch id
|
||
when '!' then 'UNARY'
|
||
when '==', '!=' then 'COMPARE'
|
||
when 'true', 'false' then 'BOOL'
|
||
when 'break', 'continue', \
|
||
'debugger' then 'STATEMENT'
|
||
when '&&', '||' then id
|
||
else tag
|
||
|
||
tagToken = @token tag, id, length: idLength, data: tokenData
|
||
tagToken.origin = [tag, alias, tagToken[2]] if alias
|
||
if poppedToken
|
||
[tagToken[2].first_line, tagToken[2].first_column, tagToken[2].range[0]] =
|
||
[poppedToken[2].first_line, poppedToken[2].first_column, poppedToken[2].range[0]]
|
||
if colon
|
||
colonOffset = input.lastIndexOf if inCSXTag then '=' else ':'
|
||
colonToken = @token ':', ':', offset: colonOffset, length: colon.length
|
||
colonToken.csxColon = yes if inCSXTag # used by rewriter
|
||
if inCSXTag and tag is 'IDENTIFIER' and prev[0] isnt ':'
|
||
@token ',', ',', length: 0, origin: tagToken
|
||
|
||
input.length
|
||
|
||
# Matches numbers, including decimals, hex, and exponential notation.
|
||
# Be careful not to interfere with ranges in progress.
|
||
numberToken: ->
|
||
return 0 unless match = NUMBER.exec @chunk
|
||
|
||
number = match[0]
|
||
lexedLength = number.length
|
||
|
||
switch
|
||
when /^0[BOX]/.test number
|
||
@error "radix prefix in '#{number}' must be lowercase", offset: 1
|
||
when /^(?!0x).*E/.test number
|
||
@error "exponential notation in '#{number}' must be indicated with a lowercase 'e'",
|
||
offset: number.indexOf('E')
|
||
when /^0\d*[89]/.test number
|
||
@error "decimal literal '#{number}' must not be prefixed with '0'", length: lexedLength
|
||
when /^0\d+/.test number
|
||
@error "octal literal '#{number}' must be prefixed with '0o'", length: lexedLength
|
||
|
||
parsedValue = Number number
|
||
tokenData = {parsedValue}
|
||
|
||
tag = if Number.isFinite(parsedValue) then 'NUMBER' else 'INFINITY'
|
||
if tag is 'INFINITY'
|
||
tokenData.original = number
|
||
@token tag, number,
|
||
length: lexedLength
|
||
data: tokenData
|
||
lexedLength
|
||
|
||
# Matches strings, including multiline strings, as well as heredocs, with or without
|
||
# interpolation.
|
||
stringToken: ->
|
||
[quote] = STRING_START.exec(@chunk) || []
|
||
return 0 unless quote
|
||
|
||
# If the preceding token is `from` and this is an import or export statement,
|
||
# properly tag the `from`.
|
||
prev = @prev()
|
||
if prev and @value() is 'from' and (@seenImport or @seenExport)
|
||
prev[0] = 'FROM'
|
||
|
||
regex = switch quote
|
||
when "'" then STRING_SINGLE
|
||
when '"' then STRING_DOUBLE
|
||
when "'''" then HEREDOC_SINGLE
|
||
when '"""' then HEREDOC_DOUBLE
|
||
|
||
{tokens, index: end} = @matchWithInterpolations regex, quote
|
||
|
||
heredoc = quote.length is 3
|
||
if heredoc
|
||
# Find the smallest indentation. It will be removed from all lines later.
|
||
indent = null
|
||
doc = (token[1] for token, i in tokens when token[0] is 'NEOSTRING').join '#{}'
|
||
while match = HEREDOC_INDENT.exec doc
|
||
attempt = match[1]
|
||
indent = attempt if indent is null or 0 < attempt.length < indent.length
|
||
|
||
delimiter = quote.charAt(0)
|
||
@mergeInterpolationTokens tokens, {quote, indent, endOffset: end}, (value) =>
|
||
@validateUnicodeCodePointEscapes value, delimiter: quote
|
||
|
||
if @atCSXTag()
|
||
@token ',', ',', length: 0, origin: @prev
|
||
|
||
end
|
||
|
||
# Matches and consumes comments. The comments are taken out of the token
|
||
# stream and saved for later, to be reinserted into the output after
|
||
# everything has been parsed and the JavaScript code generated.
|
||
commentToken: (chunk = @chunk) ->
|
||
return 0 unless match = chunk.match COMMENT
|
||
[comment, here] = match
|
||
contents = null
|
||
# Does this comment follow code on the same line?
|
||
newLine = /^\s*\n+\s*#/.test comment
|
||
if here
|
||
matchIllegal = HERECOMMENT_ILLEGAL.exec comment
|
||
if matchIllegal
|
||
@error "block comments cannot contain #{matchIllegal[0]}",
|
||
offset: matchIllegal.index, length: matchIllegal[0].length
|
||
|
||
# Parse indentation or outdentation as if this block comment didn’t exist.
|
||
chunk = chunk.replace "####{here}###", ''
|
||
# Remove leading newlines, like `Rewriter::removeLeadingNewlines`, to
|
||
# avoid the creation of unwanted `TERMINATOR` tokens.
|
||
chunk = chunk.replace /^\n+/, ''
|
||
@lineToken chunk
|
||
|
||
# Pull out the ###-style comment’s content, and format it.
|
||
content = here
|
||
if '\n' in content
|
||
content = content.replace /// \n #{repeat ' ', @indent} ///g, '\n'
|
||
contents = [content]
|
||
else
|
||
# The `COMMENT` regex captures successive line comments as one token.
|
||
# Remove any leading newlines before the first comment, but preserve
|
||
# blank lines between line comments.
|
||
content = comment.replace /^(\n*)/, ''
|
||
content = content.replace /^([ |\t]*)#/gm, ''
|
||
contents = content.split '\n'
|
||
|
||
commentAttachments = for content, i in contents
|
||
content: content
|
||
here: here?
|
||
newLine: newLine or i isnt 0 # Line comments after the first one start new lines, by definition.
|
||
|
||
prev = @prev()
|
||
unless prev
|
||
# If there’s no previous token, create a placeholder token to attach
|
||
# this comment to; and follow with a newline.
|
||
commentAttachments[0].newLine = yes
|
||
@lineToken @chunk[comment.length..] # Set the indent.
|
||
placeholderToken = @makeToken 'JS', '', generated: yes
|
||
placeholderToken.comments = commentAttachments
|
||
@tokens.push placeholderToken
|
||
@newlineToken 0
|
||
else
|
||
attachCommentsToNode commentAttachments, prev
|
||
|
||
comment.length
|
||
|
||
# Matches JavaScript interpolated directly into the source via backticks.
|
||
jsToken: ->
|
||
return 0 unless @chunk.charAt(0) is '`' and
|
||
(match = (matchedHere = HERE_JSTOKEN.exec(@chunk)) or JSTOKEN.exec(@chunk))
|
||
# Convert escaped backticks to backticks, and escaped backslashes
|
||
# just before escaped backticks to backslashes
|
||
script = match[1]
|
||
{length} = match[0]
|
||
@token 'JS', script, {length, data: {here: !!matchedHere}}
|
||
length
|
||
|
||
# Matches regular expression literals, as well as multiline extended ones.
|
||
# Lexing regular expressions is difficult to distinguish from division, so we
|
||
# borrow some basic heuristics from JavaScript and Ruby.
|
||
regexToken: ->
|
||
switch
|
||
when match = REGEX_ILLEGAL.exec @chunk
|
||
@error "regular expressions cannot begin with #{match[2]}",
|
||
offset: match.index + match[1].length
|
||
when match = @matchWithInterpolations HEREGEX, '///'
|
||
{tokens, index} = match
|
||
comments = @chunk[0...index].match /\s+(#(?!{).*)/g
|
||
@commentToken comment for comment in comments if comments
|
||
when match = REGEX.exec @chunk
|
||
[regex, body, closed] = match
|
||
@validateEscapes body, isRegex: yes, offsetInChunk: 1
|
||
index = regex.length
|
||
prev = @prev()
|
||
if prev
|
||
if prev.spaced and prev[0] in CALLABLE
|
||
return 0 if not closed or POSSIBLY_DIVISION.test regex
|
||
else if prev[0] in NOT_REGEX
|
||
return 0
|
||
@error 'missing / (unclosed regex)' unless closed
|
||
else
|
||
return 0
|
||
|
||
[flags] = REGEX_FLAGS.exec @chunk[index..]
|
||
end = index + flags.length
|
||
origin = @makeToken 'REGEX', null, length: end
|
||
switch
|
||
when not VALID_FLAGS.test flags
|
||
@error "invalid regular expression flags #{flags}", offset: index, length: flags.length
|
||
when regex or tokens.length is 1
|
||
delimiter = if body then '/' else '///'
|
||
body ?= tokens[0][1]
|
||
@validateUnicodeCodePointEscapes body, {delimiter}
|
||
@token 'REGEX', "/#{body}/#{flags}", {length: end, origin, data: {delimiter}}
|
||
else
|
||
@token 'REGEX_START', '(', {length: 0, origin}
|
||
@token 'IDENTIFIER', 'RegExp', length: 0
|
||
@token 'CALL_START', '(', length: 0
|
||
@mergeInterpolationTokens tokens, {double: yes, heregex: {flags}, endOffset: end - flags.length, quote: '///'}, (str) =>
|
||
@validateUnicodeCodePointEscapes str, {delimiter}
|
||
if flags
|
||
@token ',', ',', offset: index - 1, length: 0
|
||
@token 'STRING', '"' + flags + '"', offset: index - 1, length: flags.length
|
||
@token ')', ')', offset: end, length: 0
|
||
@token 'REGEX_END', ')', offset: end, length: 0
|
||
|
||
end
|
||
|
||
# Matches newlines, indents, and outdents, and determines which is which.
|
||
# If we can detect that the current line is continued onto the next line,
|
||
# then the newline is suppressed:
|
||
#
|
||
# elements
|
||
# .each( ... )
|
||
# .map( ... )
|
||
#
|
||
# Keeps track of the level of indentation, because a single outdent token
|
||
# can close multiple indents, so we need to know how far in we happen to be.
|
||
lineToken: (chunk = @chunk) ->
|
||
return 0 unless match = MULTI_DENT.exec chunk
|
||
indent = match[0]
|
||
|
||
prev = @prev()
|
||
backslash = prev?[0] is '\\'
|
||
@seenFor = no unless backslash and @seenFor
|
||
@seenImport = no unless (backslash and @seenImport) or @importSpecifierList
|
||
@seenExport = no unless (backslash and @seenExport) or @exportSpecifierList
|
||
|
||
size = indent.length - 1 - indent.lastIndexOf '\n'
|
||
noNewlines = @unfinished()
|
||
|
||
newIndentLiteral = if size > 0 then indent[-size..] else ''
|
||
unless /^(.?)\1*$/.exec newIndentLiteral
|
||
@error 'mixed indentation', offset: indent.length
|
||
return indent.length
|
||
|
||
minLiteralLength = Math.min newIndentLiteral.length, @indentLiteral.length
|
||
if newIndentLiteral[...minLiteralLength] isnt @indentLiteral[...minLiteralLength]
|
||
@error 'indentation mismatch', offset: indent.length
|
||
return indent.length
|
||
|
||
if size - @indebt is @indent
|
||
if noNewlines then @suppressNewlines() else @newlineToken 0
|
||
return indent.length
|
||
|
||
if size > @indent
|
||
if noNewlines
|
||
@indebt = size - @indent unless backslash
|
||
@suppressNewlines()
|
||
return indent.length
|
||
unless @tokens.length
|
||
@baseIndent = @indent = size
|
||
@indentLiteral = newIndentLiteral
|
||
return indent.length
|
||
diff = size - @indent + @outdebt
|
||
@token 'INDENT', diff, offset: indent.length - size, length: size
|
||
@indents.push diff
|
||
@ends.push {tag: 'OUTDENT'}
|
||
@outdebt = @indebt = 0
|
||
@indent = size
|
||
@indentLiteral = newIndentLiteral
|
||
else if size < @baseIndent
|
||
@error 'missing indentation', offset: indent.length
|
||
else
|
||
@indebt = 0
|
||
@outdentToken @indent - size, noNewlines, indent.length
|
||
indent.length
|
||
|
||
# Record an outdent token or multiple tokens, if we happen to be moving back
|
||
# inwards past several recorded indents. Sets new @indent value.
|
||
outdentToken: (moveOut, noNewlines, outdentLength) ->
|
||
decreasedIndent = @indent - moveOut
|
||
while moveOut > 0
|
||
lastIndent = @indents[@indents.length - 1]
|
||
if not lastIndent
|
||
@outdebt = moveOut = 0
|
||
else if @outdebt and moveOut <= @outdebt
|
||
@outdebt -= moveOut
|
||
moveOut = 0
|
||
else
|
||
dent = @indents.pop() + @outdebt
|
||
if outdentLength and @chunk[outdentLength] in INDENTABLE_CLOSERS
|
||
decreasedIndent -= dent - moveOut
|
||
moveOut = dent
|
||
@outdebt = 0
|
||
# pair might call outdentToken, so preserve decreasedIndent
|
||
@pair 'OUTDENT'
|
||
@token 'OUTDENT', moveOut, length: outdentLength
|
||
moveOut -= dent
|
||
@outdebt -= moveOut if dent
|
||
@suppressSemicolons()
|
||
|
||
@token 'TERMINATOR', '\n', offset: outdentLength, length: 0 unless @tag() is 'TERMINATOR' or noNewlines
|
||
@indent = decreasedIndent
|
||
@indentLiteral = @indentLiteral[...decreasedIndent]
|
||
this
|
||
|
||
# Matches and consumes non-meaningful whitespace. Tag the previous token
|
||
# as being “spaced”, because there are some cases where it makes a difference.
|
||
whitespaceToken: ->
|
||
return 0 unless (match = WHITESPACE.exec @chunk) or
|
||
(nline = @chunk.charAt(0) is '\n')
|
||
prev = @prev()
|
||
prev[if match then 'spaced' else 'newLine'] = true if prev
|
||
if match then match[0].length else 0
|
||
|
||
# Generate a newline token. Consecutive newlines get merged together.
|
||
newlineToken: (offset) ->
|
||
@suppressSemicolons()
|
||
@token 'TERMINATOR', '\n', {offset, length: 0} unless @tag() is 'TERMINATOR'
|
||
this
|
||
|
||
# Use a `\` at a line-ending to suppress the newline.
|
||
# The slash is removed here once its job is done.
|
||
suppressNewlines: ->
|
||
prev = @prev()
|
||
if prev[1] is '\\'
|
||
if prev.comments and @tokens.length > 1
|
||
# `@tokens.length` should be at least 2 (some code, then `\`).
|
||
# If something puts a `\` after nothing, they deserve to lose any
|
||
# comments that trail it.
|
||
attachCommentsToNode prev.comments, @tokens[@tokens.length - 2]
|
||
@tokens.pop()
|
||
this
|
||
|
||
# CSX is like JSX but for CoffeeScript.
|
||
csxToken: ->
|
||
firstChar = @chunk[0]
|
||
# Check the previous token to detect if attribute is spread.
|
||
prevChar = if @tokens.length > 0 then @tokens[@tokens.length - 1][0] else ''
|
||
if firstChar is '<'
|
||
match = CSX_IDENTIFIER.exec(@chunk[1...]) or CSX_FRAGMENT_IDENTIFIER.exec(@chunk[1...])
|
||
return 0 unless match and (
|
||
@csxDepth > 0 or
|
||
# Not the right hand side of an unspaced comparison (i.e. `a<b`).
|
||
not (prev = @prev()) or
|
||
prev.spaced or
|
||
prev[0] not in COMPARABLE_LEFT_SIDE
|
||
)
|
||
[input, id] = match
|
||
fullId = id
|
||
if '.' in id
|
||
[id, properties...] = id.split '.'
|
||
else
|
||
properties = []
|
||
tagToken = @token 'CSX_TAG', id,
|
||
length: id.length + 1
|
||
data:
|
||
openingBracketToken: @makeToken '<', '<'
|
||
tagNameToken: @makeToken 'IDENTIFIER', id, offset: 1
|
||
offset = id.length + 1
|
||
for property in properties
|
||
@token '.', '.', {offset}
|
||
offset += 1
|
||
@token 'PROPERTY', property, {offset}
|
||
offset += property.length
|
||
@token 'CALL_START', '(', generated: yes
|
||
@token '[', '[', generated: yes
|
||
@ends.push {tag: '/>', origin: tagToken, name: id, properties}
|
||
@csxDepth++
|
||
return fullId.length + 1
|
||
else if csxTag = @atCSXTag()
|
||
if @chunk[...2] is '/>' # Self-closing tag.
|
||
@pair '/>'
|
||
@token ']', ']',
|
||
length: 2
|
||
generated: yes
|
||
@token 'CALL_END', ')',
|
||
length: 2
|
||
generated: yes
|
||
data:
|
||
selfClosingSlashToken: @makeToken '/', '/'
|
||
closingBracketToken: @makeToken '>', '>', offset: 1
|
||
@csxDepth--
|
||
return 2
|
||
else if firstChar is '{'
|
||
if prevChar is ':'
|
||
token = @token '(', '('
|
||
@csxObjAttribute[@csxDepth] = no
|
||
else
|
||
token = @token '{', '{'
|
||
@csxObjAttribute[@csxDepth] = yes
|
||
@ends.push {tag: '}', origin: token}
|
||
return 1
|
||
else if firstChar is '>' # end of opening tag
|
||
# Ignore terminators inside a tag.
|
||
{origin: openingTagToken} = @pair '/>' # As if the current tag was self-closing.
|
||
@token ']', ']',
|
||
generated: yes
|
||
data:
|
||
closingBracketToken: @makeToken '>', '>'
|
||
@token ',', 'JSX_COMMA', generated: yes
|
||
{tokens, index: end} =
|
||
@matchWithInterpolations INSIDE_CSX, '>', '</', CSX_INTERPOLATION
|
||
@mergeInterpolationTokens tokens, {endOffset: end}, (value) =>
|
||
@validateUnicodeCodePointEscapes value, delimiter: '>'
|
||
match = CSX_IDENTIFIER.exec(@chunk[end...]) or CSX_FRAGMENT_IDENTIFIER.exec(@chunk[end...])
|
||
if not match or match[1] isnt "#{csxTag.name}#{(".#{property}" for property in csxTag.properties).join ''}"
|
||
@error "expected corresponding CSX closing tag for #{csxTag.name}",
|
||
csxTag.origin.data.tagNameToken[2]
|
||
[, fullTagName] = match
|
||
afterTag = end + fullTagName.length
|
||
if @chunk[afterTag] isnt '>'
|
||
@error "missing closing > after tag name", offset: afterTag, length: 1
|
||
# -2/+2 for the opening `</` and +1 for the closing `>`.
|
||
endToken = @token 'CALL_END', ')',
|
||
offset: end - 2
|
||
length: fullTagName.length + 3
|
||
generated: yes
|
||
data:
|
||
closingTagOpeningBracketToken: @makeToken '<', '<', offset: end - 2
|
||
closingTagSlashToken: @makeToken '/', '/', offset: end - 1
|
||
# TODO: individual tokens for complex tag name? eg < / A . B >
|
||
closingTagNameToken: @makeToken 'IDENTIFIER', fullTagName, offset: end
|
||
closingTagClosingBracketToken: @makeToken '>', '>', offset: end + fullTagName.length
|
||
# make the closing tag location data more easily accessible to the grammar
|
||
addTokenData openingTagToken, endToken.data
|
||
@csxDepth--
|
||
return afterTag + 1
|
||
else
|
||
return 0
|
||
else if @atCSXTag 1
|
||
if firstChar is '}'
|
||
@pair firstChar
|
||
if @csxObjAttribute[@csxDepth]
|
||
@token '}', '}'
|
||
@csxObjAttribute[@csxDepth] = no
|
||
else
|
||
@token ')', ')'
|
||
@token ',', ','
|
||
return 1
|
||
else
|
||
return 0
|
||
else
|
||
return 0
|
||
|
||
atCSXTag: (depth = 0) ->
|
||
return no if @csxDepth is 0
|
||
i = @ends.length - 1
|
||
i-- while @ends[i]?.tag is 'OUTDENT' or depth-- > 0 # Ignore indents.
|
||
last = @ends[i]
|
||
last?.tag is '/>' and last
|
||
|
||
# We treat all other single characters as a token. E.g.: `( ) , . !`
|
||
# Multi-character operators are also literal tokens, so that Jison can assign
|
||
# the proper order of operations. There are some symbols that we tag specially
|
||
# here. `;` and newlines are both treated as a `TERMINATOR`, we distinguish
|
||
# parentheses that indicate a method call from regular parentheses, and so on.
|
||
literalToken: ->
|
||
if match = OPERATOR.exec @chunk
|
||
[value] = match
|
||
@tagParameters() if CODE.test value
|
||
else
|
||
value = @chunk.charAt 0
|
||
tag = value
|
||
prev = @prev()
|
||
|
||
if prev and value in ['=', COMPOUND_ASSIGN...]
|
||
skipToken = false
|
||
if value is '=' and prev[1] in ['||', '&&'] and not prev.spaced
|
||
prev[0] = 'COMPOUND_ASSIGN'
|
||
prev[1] += '='
|
||
prev.data.original += '=' if prev.data?.original
|
||
prev = @tokens[@tokens.length - 2]
|
||
skipToken = true
|
||
if prev and prev[0] isnt 'PROPERTY'
|
||
origin = prev.origin ? prev
|
||
message = isUnassignable prev[1], origin[1]
|
||
@error message, origin[2] if message
|
||
return value.length if skipToken
|
||
|
||
if value is '(' and prev?[0] is 'IMPORT'
|
||
prev[0] = 'DYNAMIC_IMPORT'
|
||
|
||
if value is '{' and @seenImport
|
||
@importSpecifierList = yes
|
||
else if @importSpecifierList and value is '}'
|
||
@importSpecifierList = no
|
||
else if value is '{' and prev?[0] is 'EXPORT'
|
||
@exportSpecifierList = yes
|
||
else if @exportSpecifierList and value is '}'
|
||
@exportSpecifierList = no
|
||
|
||
if value is ';'
|
||
@error 'unexpected ;' if prev?[0] in ['=', UNFINISHED...]
|
||
@seenFor = @seenImport = @seenExport = no
|
||
tag = 'TERMINATOR'
|
||
else if value is '*' and prev?[0] is 'EXPORT'
|
||
tag = 'EXPORT_ALL'
|
||
else if value in MATH then tag = 'MATH'
|
||
else if value in COMPARE then tag = 'COMPARE'
|
||
else if value in COMPOUND_ASSIGN then tag = 'COMPOUND_ASSIGN'
|
||
else if value in UNARY then tag = 'UNARY'
|
||
else if value in UNARY_MATH then tag = 'UNARY_MATH'
|
||
else if value in SHIFT then tag = 'SHIFT'
|
||
else if value is '?' and prev?.spaced then tag = 'BIN?'
|
||
else if prev
|
||
if value is '(' and not prev.spaced and prev[0] in CALLABLE
|
||
prev[0] = 'FUNC_EXIST' if prev[0] is '?'
|
||
tag = 'CALL_START'
|
||
else if value is '[' and ((prev[0] in INDEXABLE and not prev.spaced) or
|
||
(prev[0] is '::')) # `.prototype` can’t be a method you can call.
|
||
tag = 'INDEX_START'
|
||
switch prev[0]
|
||
when '?' then prev[0] = 'INDEX_SOAK'
|
||
token = @makeToken tag, value
|
||
switch value
|
||
when '(', '{', '[' then @ends.push {tag: INVERSES[value], origin: token}
|
||
when ')', '}', ']' then @pair value
|
||
@tokens.push @makeToken tag, value
|
||
value.length
|
||
|
||
# Token Manipulators
|
||
# ------------------
|
||
|
||
# A source of ambiguity in our grammar used to be parameter lists in function
|
||
# definitions versus argument lists in function calls. Walk backwards, tagging
|
||
# parameters specially in order to make things easier for the parser.
|
||
tagParameters: ->
|
||
return @tagDoIife() if @tag() isnt ')'
|
||
stack = []
|
||
{tokens} = this
|
||
i = tokens.length
|
||
paramEndToken = tokens[--i]
|
||
paramEndToken[0] = 'PARAM_END'
|
||
while tok = tokens[--i]
|
||
switch tok[0]
|
||
when ')'
|
||
stack.push tok
|
||
when '(', 'CALL_START'
|
||
if stack.length then stack.pop()
|
||
else if tok[0] is '('
|
||
tok[0] = 'PARAM_START'
|
||
return @tagDoIife i - 1
|
||
else
|
||
paramEndToken[0] = 'CALL_END'
|
||
return this
|
||
this
|
||
|
||
# Tag `do` followed by a function differently than `do` followed by eg an
|
||
# identifier to allow for different grammar precedence
|
||
tagDoIife: (tokenIndex) ->
|
||
tok = @tokens[tokenIndex ? @tokens.length - 1]
|
||
return this unless tok?[0] is 'DO'
|
||
tok[0] = 'DO_IIFE'
|
||
this
|
||
|
||
# Close up all remaining open blocks at the end of the file.
|
||
closeIndentation: ->
|
||
@outdentToken @indent
|
||
|
||
# Match the contents of a delimited token and expand variables and expressions
|
||
# inside it using Ruby-like notation for substitution of arbitrary
|
||
# expressions.
|
||
#
|
||
# "Hello #{name.capitalize()}."
|
||
#
|
||
# If it encounters an interpolation, this method will recursively create a new
|
||
# Lexer and tokenize until the `{` of `#{` is balanced with a `}`.
|
||
#
|
||
# - `regex` matches the contents of a token (but not `delimiter`, and not
|
||
# `#{` if interpolations are desired).
|
||
# - `delimiter` is the delimiter of the token. Examples are `'`, `"`, `'''`,
|
||
# `"""` and `///`.
|
||
# - `closingDelimiter` is different from `delimiter` only in CSX
|
||
# - `interpolators` matches the start of an interpolation, for CSX it's both
|
||
# `{` and `<` (i.e. nested CSX tag)
|
||
#
|
||
# This method allows us to have strings within interpolations within strings,
|
||
# ad infinitum.
|
||
matchWithInterpolations: (regex, delimiter, closingDelimiter = delimiter, interpolators = /^#\{/) ->
|
||
tokens = []
|
||
offsetInChunk = delimiter.length
|
||
return null unless @chunk[...offsetInChunk] is delimiter
|
||
str = @chunk[offsetInChunk..]
|
||
loop
|
||
[strPart] = regex.exec str
|
||
|
||
@validateEscapes strPart, {isRegex: delimiter.charAt(0) is '/', offsetInChunk}
|
||
|
||
# Push a fake `'NEOSTRING'` token, which will get turned into a real string later.
|
||
tokens.push @makeToken 'NEOSTRING', strPart, offset: offsetInChunk
|
||
|
||
str = str[strPart.length..]
|
||
offsetInChunk += strPart.length
|
||
|
||
break unless match = interpolators.exec str
|
||
[interpolator] = match
|
||
|
||
# To remove the `#` in `#{`.
|
||
interpolationOffset = interpolator.length - 1
|
||
[line, column, offset] = @getLineAndColumnFromChunk offsetInChunk + interpolationOffset
|
||
rest = str[interpolationOffset..]
|
||
{tokens: nested, index} =
|
||
new Lexer().tokenize rest, {line, column, offset, untilBalanced: on}
|
||
# Account for the `#` in `#{`.
|
||
index += interpolationOffset
|
||
|
||
braceInterpolator = str[index - 1] is '}'
|
||
if braceInterpolator
|
||
# Turn the leading and trailing `{` and `}` into parentheses. Unnecessary
|
||
# parentheses will be removed later.
|
||
[open, ..., close] = nested
|
||
open[0] = 'INTERPOLATION_START'
|
||
open[1] = '('
|
||
open[2].first_column -= interpolationOffset
|
||
open[2].range = [
|
||
open[2].range[0] - interpolationOffset
|
||
open[2].range[1]
|
||
]
|
||
close[0] = 'INTERPOLATION_END'
|
||
close[1] = ')'
|
||
close.origin = ['', 'end of interpolation', close[2]]
|
||
|
||
# Remove leading `'TERMINATOR'` (if any).
|
||
nested.splice 1, 1 if nested[1]?[0] is 'TERMINATOR'
|
||
# Remove trailing `'INDENT'/'OUTDENT'` pair (if any).
|
||
nested.splice -3, 2 if nested[nested.length - 3]?[0] is 'INDENT' and nested[nested.length - 2][0] is 'OUTDENT'
|
||
|
||
unless braceInterpolator
|
||
# We are not using `{` and `}`, so wrap the interpolated tokens instead.
|
||
open = @makeToken 'INTERPOLATION_START', '(', offset: offsetInChunk, length: 0
|
||
close = @makeToken 'INTERPOLATION_END', ')', offset: offsetInChunk + index, length: 0
|
||
nested = [open, nested..., close]
|
||
|
||
# Push a fake `'TOKENS'` token, which will get turned into real tokens later.
|
||
tokens.push ['TOKENS', nested]
|
||
|
||
str = str[index..]
|
||
offsetInChunk += index
|
||
|
||
unless str[...closingDelimiter.length] is closingDelimiter
|
||
@error "missing #{closingDelimiter}", length: delimiter.length
|
||
|
||
{tokens, index: offsetInChunk + closingDelimiter.length}
|
||
|
||
# Merge the array `tokens` of the fake token types `'TOKENS'` and `'NEOSTRING'`
|
||
# (as returned by `matchWithInterpolations`) into the token stream. The value
|
||
# of `'NEOSTRING'`s are converted using `fn` and turned into strings using
|
||
# `options` first.
|
||
mergeInterpolationTokens: (tokens, options, fn) ->
|
||
{quote, indent, double, heregex, endOffset} = options
|
||
|
||
if tokens.length > 1
|
||
lparen = @token 'STRING_START', '(', length: quote?.length ? 0, data: {quote}
|
||
|
||
firstIndex = @tokens.length
|
||
$ = tokens.length - 1
|
||
for token, i in tokens
|
||
[tag, value] = token
|
||
switch tag
|
||
when 'TOKENS'
|
||
# There are comments (and nothing else) in this interpolation.
|
||
if value.length is 2 and (value[0].comments or value[1].comments)
|
||
placeholderToken = @makeToken 'JS', ''
|
||
placeholderToken.generated = yes
|
||
# Use the same location data as the first parenthesis.
|
||
placeholderToken[2] = value[0][2]
|
||
for val in value when val.comments
|
||
placeholderToken.comments ?= []
|
||
placeholderToken.comments.push val.comments...
|
||
value.splice 1, 0, placeholderToken
|
||
# Push all the tokens in the fake `'TOKENS'` token. These already have
|
||
# sane location data.
|
||
locationToken = value[0]
|
||
tokensToPush = value
|
||
when 'NEOSTRING'
|
||
# Convert `'NEOSTRING'` into `'STRING'`.
|
||
converted = fn.call this, token[1], i
|
||
addTokenData token, initialChunk: yes if i is 0
|
||
addTokenData token, finalChunk: yes if i is $
|
||
addTokenData token, {indent, quote, double}
|
||
addTokenData token, {heregex} if heregex
|
||
token[0] = 'STRING'
|
||
token[1] = '"' + converted + '"'
|
||
if tokens.length is 1 and quote?
|
||
token[2].first_column -= quote.length
|
||
if token[1].substr(-2, 1) is '\n'
|
||
token[2].last_line +=1
|
||
token[2].last_column = quote.length - 1
|
||
else
|
||
token[2].last_column += quote.length
|
||
token[2].last_column -= 1 if token[1].length is 2
|
||
token[2].last_column_exclusive += quote.length
|
||
token[2].range = [
|
||
token[2].range[0] - quote.length
|
||
token[2].range[1] + quote.length
|
||
]
|
||
locationToken = token
|
||
tokensToPush = [token]
|
||
@tokens.push tokensToPush...
|
||
|
||
if lparen
|
||
[..., lastToken] = tokens
|
||
lparen.origin = ['STRING', null,
|
||
first_line: lparen[2].first_line
|
||
first_column: lparen[2].first_column
|
||
last_line: lastToken[2].last_line
|
||
last_column: lastToken[2].last_column
|
||
last_line_exclusive: lastToken[2].last_line_exclusive
|
||
last_column_exclusive: lastToken[2].last_column_exclusive
|
||
range: [
|
||
lparen[2].range[0]
|
||
lastToken[2].range[1]
|
||
]
|
||
]
|
||
lparen[2] = lparen.origin[2]
|
||
rparen = @token 'STRING_END', ')', offset: endOffset - (quote ? '').length, length: quote?.length ? 0
|
||
|
||
# Pairs up a closing token, ensuring that all listed pairs of tokens are
|
||
# correctly balanced throughout the course of the token stream.
|
||
pair: (tag) ->
|
||
[..., prev] = @ends
|
||
unless tag is wanted = prev?.tag
|
||
@error "unmatched #{tag}" unless 'OUTDENT' is wanted
|
||
# Auto-close `INDENT` to support syntax like this:
|
||
#
|
||
# el.click((event) ->
|
||
# el.hide())
|
||
#
|
||
[..., lastIndent] = @indents
|
||
@outdentToken lastIndent, true
|
||
return @pair tag
|
||
@ends.pop()
|
||
|
||
# Helpers
|
||
# -------
|
||
|
||
# Returns the line and column number from an offset into the current chunk.
|
||
#
|
||
# `offset` is a number of characters into `@chunk`.
|
||
getLineAndColumnFromChunk: (offset) ->
|
||
if offset is 0
|
||
return [@chunkLine, @chunkColumn, @chunkOffset]
|
||
|
||
if offset >= @chunk.length
|
||
string = @chunk
|
||
else
|
||
string = @chunk[..offset-1]
|
||
|
||
lineCount = count string, '\n'
|
||
|
||
column = @chunkColumn
|
||
if lineCount > 0
|
||
[..., lastLine] = string.split '\n'
|
||
column = lastLine.length
|
||
else
|
||
column += string.length
|
||
|
||
[@chunkLine + lineCount, column, @chunkOffset + offset]
|
||
|
||
makeLocationData: ({ offsetInChunk, length }) ->
|
||
locationData = range: []
|
||
[locationData.first_line, locationData.first_column, locationData.range[0]] =
|
||
@getLineAndColumnFromChunk offsetInChunk
|
||
|
||
# Use length - 1 for the final offset - we’re supplying the last_line and the last_column,
|
||
# so if last_column == first_column, then we’re looking at a character of length 1.
|
||
lastCharacter = if length > 0 then (length - 1) else 0
|
||
[locationData.last_line, locationData.last_column, endOffset] =
|
||
@getLineAndColumnFromChunk offsetInChunk + lastCharacter
|
||
[locationData.last_line_exclusive, locationData.last_column_exclusive] =
|
||
@getLineAndColumnFromChunk offsetInChunk + lastCharacter + (if length > 0 then 1 else 0)
|
||
locationData.range[1] = if length > 0 then endOffset + 1 else endOffset
|
||
|
||
locationData
|
||
|
||
# Same as `token`, except this just returns the token without adding it
|
||
# to the results.
|
||
makeToken: (tag, value, {offset: offsetInChunk = 0, length = value.length, origin, generated} = {}) ->
|
||
token = [tag, value, @makeLocationData {offsetInChunk, length}]
|
||
token.origin = origin if origin
|
||
token.generated = yes if generated
|
||
token
|
||
|
||
# Add a token to the results.
|
||
# `offset` is the offset into the current `@chunk` where the token starts.
|
||
# `length` is the length of the token in the `@chunk`, after the offset. If
|
||
# not specified, the length of `value` will be used.
|
||
#
|
||
# Returns the new token.
|
||
token: (tag, value, {offset, length, origin, data, generated} = {}) ->
|
||
token = @makeToken tag, value, {offset, length, origin, generated}
|
||
addTokenData token, data if data
|
||
@tokens.push token
|
||
token
|
||
|
||
# Peek at the last tag in the token stream.
|
||
tag: ->
|
||
[..., token] = @tokens
|
||
token?[0]
|
||
|
||
# Peek at the last value in the token stream.
|
||
value: (useOrigin = no) ->
|
||
[..., token] = @tokens
|
||
if useOrigin and token?.origin?
|
||
token.origin[1]
|
||
else
|
||
token?[1]
|
||
|
||
# Get the previous token in the token stream.
|
||
prev: ->
|
||
@tokens[@tokens.length - 1]
|
||
|
||
# Are we in the midst of an unfinished expression?
|
||
unfinished: ->
|
||
LINE_CONTINUER.test(@chunk) or
|
||
@tag() in UNFINISHED
|
||
|
||
validateUnicodeCodePointEscapes: (str, options) ->
|
||
replaceUnicodeCodePointEscapes str, merge options, {@error}
|
||
|
||
# Validates escapes in strings and regexes.
|
||
validateEscapes: (str, options = {}) ->
|
||
invalidEscapeRegex =
|
||
if options.isRegex
|
||
REGEX_INVALID_ESCAPE
|
||
else
|
||
STRING_INVALID_ESCAPE
|
||
match = invalidEscapeRegex.exec str
|
||
return unless match
|
||
[[], before, octal, hex, unicodeCodePoint, unicode] = match
|
||
message =
|
||
if octal
|
||
"octal escape sequences are not allowed"
|
||
else
|
||
"invalid escape sequence"
|
||
invalidEscape = "\\#{octal or hex or unicodeCodePoint or unicode}"
|
||
@error "#{message} #{invalidEscape}",
|
||
offset: (options.offsetInChunk ? 0) + match.index + before.length
|
||
length: invalidEscape.length
|
||
|
||
suppressSemicolons: ->
|
||
while @value() is ';'
|
||
@tokens.pop()
|
||
@error 'unexpected ;' if @prev()?[0] in ['=', UNFINISHED...]
|
||
|
||
# Throws an error at either a given offset from the current chunk or at the
|
||
# location of a token (`token[2]`).
|
||
error: (message, options = {}) =>
|
||
location =
|
||
if 'first_line' of options
|
||
options
|
||
else
|
||
[first_line, first_column] = @getLineAndColumnFromChunk options.offset ? 0
|
||
{first_line, first_column, last_column: first_column + (options.length ? 1) - 1}
|
||
throwSyntaxError message, location
|
||
|
||
# Helper functions
|
||
# ----------------
|
||
|
||
isUnassignable = (name, displayName = name) -> switch
|
||
when name in [JS_KEYWORDS..., COFFEE_KEYWORDS...]
|
||
"keyword '#{displayName}' can't be assigned"
|
||
when name in STRICT_PROSCRIBED
|
||
"'#{displayName}' can't be assigned"
|
||
when name in RESERVED
|
||
"reserved word '#{displayName}' can't be assigned"
|
||
else
|
||
false
|
||
|
||
exports.isUnassignable = isUnassignable
|
||
|
||
# `from` isn’t a CoffeeScript keyword, but it behaves like one in `import` and
|
||
# `export` statements (handled above) and in the declaration line of a `for`
|
||
# loop. Try to detect when `from` is a variable identifier and when it is this
|
||
# “sometimes” keyword.
|
||
isForFrom = (prev) ->
|
||
if prev[0] is 'IDENTIFIER'
|
||
# `for i from from`, `for from from iterable`
|
||
if prev[1] is 'from'
|
||
prev[1][0] = 'IDENTIFIER'
|
||
yes
|
||
# `for i from iterable`
|
||
yes
|
||
# `for from…`
|
||
else if prev[0] is 'FOR'
|
||
no
|
||
# `for {from}…`, `for [from]…`, `for {a, from}…`, `for {a: from}…`
|
||
else if prev[1] in ['{', '[', ',', ':']
|
||
no
|
||
else
|
||
yes
|
||
|
||
addTokenData = (token, data) ->
|
||
Object.assign (token.data ?= {}), data
|
||
|
||
# Constants
|
||
# ---------
|
||
|
||
# Keywords that CoffeeScript shares in common with JavaScript.
|
||
JS_KEYWORDS = [
|
||
'true', 'false', 'null', 'this'
|
||
'new', 'delete', 'typeof', 'in', 'instanceof'
|
||
'return', 'throw', 'break', 'continue', 'debugger', 'yield', 'await'
|
||
'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally'
|
||
'class', 'extends', 'super'
|
||
'import', 'export', 'default'
|
||
]
|
||
|
||
# CoffeeScript-only keywords.
|
||
COFFEE_KEYWORDS = [
|
||
'undefined', 'Infinity', 'NaN'
|
||
'then', 'unless', 'until', 'loop', 'of', 'by', 'when'
|
||
]
|
||
|
||
COFFEE_ALIAS_MAP =
|
||
and : '&&'
|
||
or : '||'
|
||
is : '=='
|
||
isnt : '!='
|
||
not : '!'
|
||
yes : 'true'
|
||
no : 'false'
|
||
on : 'true'
|
||
off : 'false'
|
||
|
||
COFFEE_ALIASES = (key for key of COFFEE_ALIAS_MAP)
|
||
COFFEE_KEYWORDS = COFFEE_KEYWORDS.concat COFFEE_ALIASES
|
||
|
||
# The list of keywords that are reserved by JavaScript, but not used, or are
|
||
# used by CoffeeScript internally. We throw an error when these are encountered,
|
||
# to avoid having a JavaScript error at runtime.
|
||
RESERVED = [
|
||
'case', 'function', 'var', 'void', 'with', 'const', 'let', 'enum'
|
||
'native', 'implements', 'interface', 'package', 'private'
|
||
'protected', 'public', 'static'
|
||
]
|
||
|
||
STRICT_PROSCRIBED = ['arguments', 'eval']
|
||
|
||
# The superset of both JavaScript keywords and reserved words, none of which may
|
||
# be used as identifiers or properties.
|
||
exports.JS_FORBIDDEN = JS_KEYWORDS.concat(RESERVED).concat(STRICT_PROSCRIBED)
|
||
|
||
# The character code of the nasty Microsoft madness otherwise known as the BOM.
|
||
BOM = 65279
|
||
|
||
# Token matching regexes.
|
||
IDENTIFIER = /// ^
|
||
(?!\d)
|
||
( (?: (?!\s)[$\w\x7f-\uffff] )+ )
|
||
( [^\n\S]* : (?!:) )? # Is this a property name?
|
||
///
|
||
|
||
CSX_IDENTIFIER = /// ^
|
||
(?![\d<]) # Must not start with `<`.
|
||
( (?: (?!\s)[\.\-$\w\x7f-\uffff] )+ ) # Like `IDENTIFIER`, but includes `-`s and `.`s.
|
||
///
|
||
|
||
# Fragment: <></>
|
||
CSX_FRAGMENT_IDENTIFIER = /// ^
|
||
()> # Ends immediately with `>`.
|
||
///
|
||
|
||
CSX_ATTRIBUTE = /// ^
|
||
(?!\d)
|
||
( (?: (?!\s)[\-$\w\x7f-\uffff] )+ ) # Like `IDENTIFIER`, but includes `-`s.
|
||
( [^\S]* = (?!=) )? # Is this an attribute with a value?
|
||
///
|
||
|
||
NUMBER = ///
|
||
^ 0b[01]+ | # binary
|
||
^ 0o[0-7]+ | # octal
|
||
^ 0x[\da-f]+ | # hex
|
||
^ \d*\.?\d+ (?:e[+-]?\d+)? # decimal
|
||
///i
|
||
|
||
OPERATOR = /// ^ (
|
||
?: [-=]> # function
|
||
| [-+*/%<>&|^!?=]= # compound assign / compare
|
||
| >>>=? # zero-fill right shift
|
||
| ([-+:])\1 # doubles
|
||
| ([&|<>*/%])\2=? # logic / shift / power / floor division / modulo
|
||
| \?(\.|::) # soak access
|
||
| \.{2,3} # range or splat
|
||
) ///
|
||
|
||
WHITESPACE = /^[^\n\S]+/
|
||
|
||
COMMENT = /^\s*###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/
|
||
|
||
CODE = /^[-=]>/
|
||
|
||
MULTI_DENT = /^(?:\n[^\n\S]*)+/
|
||
|
||
JSTOKEN = ///^ `(?!``) ((?: [^`\\] | \\[\s\S] )*) ` ///
|
||
HERE_JSTOKEN = ///^ ``` ((?: [^`\\] | \\[\s\S] | `(?!``) )*) ``` ///
|
||
|
||
# String-matching-regexes.
|
||
STRING_START = /^(?:'''|"""|'|")/
|
||
|
||
STRING_SINGLE = /// ^(?: [^\\'] | \\[\s\S] )* ///
|
||
STRING_DOUBLE = /// ^(?: [^\\"#] | \\[\s\S] | \#(?!\{) )* ///
|
||
HEREDOC_SINGLE = /// ^(?: [^\\'] | \\[\s\S] | '(?!'') )* ///
|
||
HEREDOC_DOUBLE = /// ^(?: [^\\"#] | \\[\s\S] | "(?!"") | \#(?!\{) )* ///
|
||
|
||
INSIDE_CSX = /// ^(?:
|
||
[^
|
||
\{ # Start of CoffeeScript interpolation.
|
||
< # Maybe CSX tag (`<` not allowed even if bare).
|
||
]
|
||
)* /// # Similar to `HEREDOC_DOUBLE` but there is no escaping.
|
||
CSX_INTERPOLATION = /// ^(?:
|
||
\{ # CoffeeScript interpolation.
|
||
| <(?!/) # CSX opening tag.
|
||
)///
|
||
|
||
HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g
|
||
|
||
# Regex-matching-regexes.
|
||
REGEX = /// ^
|
||
/ (?!/) ((
|
||
?: [^ [ / \n \\ ] # Every other thing.
|
||
| \\[^\n] # Anything but newlines escaped.
|
||
| \[ # Character class.
|
||
(?: \\[^\n] | [^ \] \n \\ ] )*
|
||
\]
|
||
)*) (/)?
|
||
///
|
||
|
||
REGEX_FLAGS = /^\w*/
|
||
VALID_FLAGS = /^(?!.*(.).*\1)[gimsuy]*$/
|
||
|
||
HEREGEX = /// ^
|
||
(?:
|
||
# Match any character, except those that need special handling below.
|
||
[^\\/#\s]
|
||
# Match `\` followed by any character.
|
||
| \\[\s\S]
|
||
# Match any `/` except `///`.
|
||
| /(?!//)
|
||
# Match `#` which is not part of interpolation, e.g. `#{}`.
|
||
| \#(?!\{)
|
||
# Comments consume everything until the end of the line, including `///`.
|
||
| \s+(?:#(?!\{).*)?
|
||
)*
|
||
///
|
||
|
||
REGEX_ILLEGAL = /// ^ ( / | /{3}\s*) (\*) ///
|
||
|
||
POSSIBLY_DIVISION = /// ^ /=?\s ///
|
||
|
||
# Other regexes.
|
||
HERECOMMENT_ILLEGAL = /\*\//
|
||
|
||
LINE_CONTINUER = /// ^ \s* (?: , | \??\.(?![.\d]) | \??:: ) ///
|
||
|
||
STRING_INVALID_ESCAPE = ///
|
||
( (?:^|[^\\]) (?:\\\\)* ) # Make sure the escape isn’t escaped.
|
||
\\ (
|
||
?: (0[0-7]|[1-7]) # octal escape
|
||
| (x(?![\da-fA-F]{2}).{0,2}) # hex escape
|
||
| (u\{(?![\da-fA-F]{1,}\})[^}]*\}?) # unicode code point escape
|
||
| (u(?!\{|[\da-fA-F]{4}).{0,4}) # unicode escape
|
||
)
|
||
///
|
||
REGEX_INVALID_ESCAPE = ///
|
||
( (?:^|[^\\]) (?:\\\\)* ) # Make sure the escape isn’t escaped.
|
||
\\ (
|
||
?: (0[0-7]) # octal escape
|
||
| (x(?![\da-fA-F]{2}).{0,2}) # hex escape
|
||
| (u\{(?![\da-fA-F]{1,}\})[^}]*\}?) # unicode code point escape
|
||
| (u(?!\{|[\da-fA-F]{4}).{0,4}) # unicode escape
|
||
)
|
||
///
|
||
|
||
TRAILING_SPACES = /\s+$/
|
||
|
||
# Compound assignment tokens.
|
||
COMPOUND_ASSIGN = [
|
||
'-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>='
|
||
'&=', '^=', '|=', '**=', '//=', '%%='
|
||
]
|
||
|
||
# Unary tokens.
|
||
UNARY = ['NEW', 'TYPEOF', 'DELETE']
|
||
|
||
UNARY_MATH = ['!', '~']
|
||
|
||
# Bit-shifting tokens.
|
||
SHIFT = ['<<', '>>', '>>>']
|
||
|
||
# Comparison tokens.
|
||
COMPARE = ['==', '!=', '<', '>', '<=', '>=']
|
||
|
||
# Mathematical tokens.
|
||
MATH = ['*', '/', '%', '//', '%%']
|
||
|
||
# Relational tokens that are negatable with `not` prefix.
|
||
RELATION = ['IN', 'OF', 'INSTANCEOF']
|
||
|
||
# Boolean tokens.
|
||
BOOL = ['TRUE', 'FALSE']
|
||
|
||
# Tokens which could legitimately be invoked or indexed. An opening
|
||
# parentheses or bracket following these tokens will be recorded as the start
|
||
# of a function invocation or indexing operation.
|
||
CALLABLE = ['IDENTIFIER', 'PROPERTY', ')', ']', '?', '@', 'THIS', 'SUPER', 'DYNAMIC_IMPORT']
|
||
INDEXABLE = CALLABLE.concat [
|
||
'NUMBER', 'INFINITY', 'NAN', 'STRING', 'STRING_END', 'REGEX', 'REGEX_END'
|
||
'BOOL', 'NULL', 'UNDEFINED', '}', '::'
|
||
]
|
||
|
||
# Tokens which can be the left-hand side of a less-than comparison, i.e. `a<b`.
|
||
COMPARABLE_LEFT_SIDE = ['IDENTIFIER', ')', ']', 'NUMBER']
|
||
|
||
# Tokens which a regular expression will never immediately follow (except spaced
|
||
# CALLABLEs in some cases), but which a division operator can.
|
||
#
|
||
# See: http://www-archive.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
|
||
NOT_REGEX = INDEXABLE.concat ['++', '--']
|
||
|
||
# Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
|
||
# occurs at the start of a line. We disambiguate these from trailing whens to
|
||
# avoid an ambiguity in the grammar.
|
||
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR']
|
||
|
||
# Additional indent in front of these is ignored.
|
||
INDENTABLE_CLOSERS = [')', '}', ']']
|
||
|
||
# Tokens that, when appearing at the end of a line, suppress a following TERMINATOR/INDENT token
|
||
UNFINISHED = ['\\', '.', '?.', '?::', 'UNARY', 'DO', 'DO_IIFE', 'MATH', 'UNARY_MATH', '+', '-',
|
||
'**', 'SHIFT', 'RELATION', 'COMPARE', '&', '^', '|', '&&', '||',
|
||
'BIN?', 'EXTENDS']
|