diff --git a/documentation/docs/lexer.html b/documentation/docs/lexer.html index 3c592251..10f48a7d 100644 --- a/documentation/docs/lexer.html +++ b/documentation/docs/lexer.html @@ -36,7 +36,7 @@ to avoid having a JavaScript error at runtime.
be used as identifiers or properties.JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED
Token matching regexes.
IDENTIFIER : /^([a-zA-Z$_](\w|\$)*)/
NUMBER : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
HEREDOC : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
-INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
+INTERPOLATION : /^\$([a-zA-Z_@]\w*)/
OPERATOR : /^([+\*&|\/\-%=<>:!?]+)/
WHITESPACE : /^([ \t]+)/
COMMENT : /^(((\n?[ \t]*)?#[^\n]*)+)/
@@ -64,31 +64,45 @@ treated as identifiers.
BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
The Lexer class reads a stream of CoffeeScript and divvys it up into tagged tokens. A minor bit of the ambiguity in the grammar has been avoided by -pushing some extra smarts into the Lexer.
exports.Lexer: class Lexer
Scan by attempting to match tokens one at a time. Slow and steady.
tokenize: (code, options) ->
+pushing some extra smarts into the Lexer.
exports.Lexer: class Lexer
tokenize is the Lexer's main method. Scan by attempting to match tokens +one at a time, using a regular expression anchored at the start of the +remaining code, or a custom recursive token-matching method +(for interpolations). When the next token has been recorded, we move forward +within the code past the token, and begin again.
+ +Each tokenizing method is responsible for incrementing @i
by the number of
+characters it has consumed. @i
can be thought of as our finger on the page
+of source.
tokenize: (code, options) ->
o : options or {}
@code : code # The remainder of the source code.
@i : 0 # Current character position we're parsing.
@line : o.line or 0 # The current line.
- @indent : 0 # The current indent level.
- @indents : [] # The stack of all indent levels we are currently within.
- @tokens : [] # Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
+ @indent : 0 # The current indentation level.
+ @indents : [] # The stack of all current indentation levels.
+ @tokens : [] # Stream of parsed tokens in the form ['TYPE', value, line]
while @i < @code.length
@chunk: @code.slice(@i)
@extract_next_token()
@close_indentation()
- return @tokens if o.rewrite is no
+ return @tokens if o.rewrite is off
(new Rewriter()).rewrite @tokens
At every position, run through this list of attempted matches, -short-circuiting if any of them succeed.
extract_next_token: ->
+short-circuiting if any of them succeed. Their order determines precedence:
+@literal_token
is the fallback catch-all.
extract_next_token: ->
return if @identifier_token()
return if @number_token()
return if @heredoc_token()
- return if @string_token()
- return if @js_token()
return if @regex_token()
return if @comment_token()
return if @line_token()
return if @whitespace_token()
- return @literal_token()
Matches identifying literals: variables, keywords, method names, etc.
identifier_token: ->
+ return if @js_token()
+ return if @string_token()
+ return @literal_token()
Matches identifying literals: variables, keywords, method names, etc.
+Check to ensure that JavaScript reserved words aren't being used as
+identifiers. Because CoffeeScript reserves a handful of keywords that are
+allowed in JavaScript, we're careful not to tag them as keywords when
+referenced as property names here, so you can still do jQuery.is()
even
+though is
means ===
otherwise.
identifier_token: ->
return false unless id: @match IDENTIFIER, 1
@name_access_type()
tag: 'IDENTIFIER'
@@ -102,60 +116,55 @@ short-circuiting if any of them succeed.
Matches strings, including multi-line strings.
string_token: ->
+ true
Matches strings, including multi-line strings. Ensures that quotation marks +are balanced within the string's contents, and within nested interpolations.
string_token: ->
+ return false unless starts(@chunk, '"') or starts(@chunk, "'")
string: @balanced_token ['"', '"'], ['${', '}']
- string: @balanced_token ["'", "'"] if string is false
+ string: @balanced_token ["'", "'"] unless string
return false unless string
@interpolate_string string.replace STRING_NEWLINES, " \\\n"
@line += count string, "\n"
@i += string.length
- true
Matches heredocs, adjusting indentation to the correct level.
heredoc_token: ->
+ true
Matches heredocs, adjusting indentation to the correct level, as heredocs +preserve whitespace, but ignore indentation to the left.
heredoc_token: ->
return false unless match = @chunk.match(HEREDOC)
doc: @sanitize_heredoc match[2] or match[4]
@token 'STRING', "\"$doc\""
@line += count match[1], "\n"
@i += match[1].length
- true
Matches interpolated JavaScript.
js_token: ->
+ true
Matches JavaScript interpolated directly into the source via backticks.
js_token: ->
+ return false unless starts @chunk, '`'
return false unless script: @balanced_token ['`', '`']
@token 'JS', script.replace(JS_CLEANER, '')
@i += script.length
- true
Matches regular expression literals.
regex_token: ->
+ true
Matches regular expression literals. Lexing regular expressions is difficult +to distinguish from division, so we borrow some basic heuristics from +JavaScript and Ruby.
regex_token: ->
return false unless regex: @match REGEX, 1
return false if include NOT_REGEX, @tag()
@token 'REGEX', regex
@i += regex.length
- true
Matches a balanced group such as a single or double-quoted string. Pass in -a series of delimiters, all of which must be balanced correctly within the -token's contents.
balanced_token: (delimited...) ->
- levels: []
- i: 0
- while i < @chunk.length
- for pair in delimited
- [open, close]: pair
- if levels.length and starts @chunk, '\\', i
- i += 1
- break
- else if levels.length and starts(@chunk, close, i) and levels[levels.length - 1] is pair
- levels.pop()
- i += close.length - 1
- i += 1 unless levels.length
- break
- else if starts @chunk, open, i
- levels.push(pair)
- i += open.length - 1
- break
- break unless levels.length
- i += 1
- throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
- return false if i is 0
- return @chunk.substring(0, i)
Matches and conumes comments.
comment_token: ->
+ true
Matches a token in which which the passed delimiter pairs must be correctly +balanced (ie. strings, JS literals).
balanced_token: (delimited...) ->
+ @balanced_string @chunk, delimited...
Matches and conumes comments. We pass through comments into JavaScript, +so they're treated as real tokens, like any other part of the language.
comment_token: ->
return false unless comment: @match COMMENT, 1
@line += (comment.match(MULTILINER) or []).length
lines: comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
@token 'COMMENT', compact lines
@token 'TERMINATOR', "\n"
@i += comment.length
- true
Matches newlines, indents, and outdents, and determines which is which.
line_token: ->
+ true
Matches newlines, indents, and outdents, and determines which is which. +If we can detect that the current line is continued onto the the next line, +then the newline is suppressed:
+ +elements
+ .each( ... )
+ .map( ... )
+
+
+Keeps track of the level of indentation, because a single outdent token +can close multiple indents, so we need to know how far in we happen to be.
line_token: ->
return false unless indent: @match MULTI_DENT, 1
@line += indent.match(MULTILINER).length
@i += indent.length
@@ -165,18 +174,18 @@ token's contents.
Record an outdent token or tokens, if we happen to be moving back inwards -past multiple recorded indents.
outdent_token: (move_out, no_newlines) ->
+ true
Record an outdent token or multiple tokens, if we happen to be moving back +inwards past several recorded indents.
outdent_token: (move_out, no_newlines) ->
while move_out > 0 and @indents.length
last_indent: @indents.pop()
@token 'OUTDENT', last_indent
@@ -188,14 +197,16 @@ as being "spaced", because there are some cases where it makes a difference.
prev: @prev()
prev.spaced: true if prev
@i += space.length
- true
Generate a newline token. Multiple newlines get merged together.
newline_token: (newlines) ->
+ true
Generate a newline token. Consecutive newlines get merged together.
newline_token: (newlines) ->
@token 'TERMINATOR', "\n" unless @tag() is 'TERMINATOR'
true
Use a \
at a line-ending to suppress the newline.
-The slash is removed here once its job is done.
suppress_newlines: (newlines) ->
+The slash is removed here once its job is done.
suppress_newlines: ->
@tokens.pop() if @value() is "\\"
true
We treat all other single characters as a token. Eg.: ( ) , . !
Multi-character operators are also literal tokens, so that Jison can assign
-the proper order of operations.
literal_token: ->
+the proper order of operations. There are some symbols that we tag specially
+here. ;
and newlines are both treated as a TERMINATOR
, we distinguish
+parentheses that indicate a method call from regular parentheses, and so on.
literal_token: ->
match: @chunk.match(OPERATOR)
value: match and match[1]
@tag_parameters() if value and value.match(CODE)
@@ -227,15 +238,14 @@ if it's a special kind of accessor.
Sanitize a heredoc by escaping double quotes and erasing all external -indentation on the left-hand side.
sanitize_heredoc: (doc) ->
+ @tag 1, 'PROPERTY_ACCESS'
Sanitize a heredoc by escaping internal double quotes and erasing all +external indentation on the left-hand side.
sanitize_heredoc: (doc) ->
indent: (doc.match(HEREDOC_INDENT) or ['']).sort()[0]
doc.replace(new RegExp("^" +indent, 'gm'), '')
.replace(MULTILINER, "\\n")
- .replace(/"/g, '\\"')
A source of ambiguity in our grammar was parameter lists in function -definitions (as opposed to argument lists in function calls). Tag -parameter identifiers in order to avoid this. Also, parameter lists can -make use of splats.
tag_parameters: ->
+ .replace(/"/g, '\\"')
A source of ambiguity in our grammar used to be parameter lists in function +definitions versus argument lists in function calls. Walk backwards, tagging +parameters specially in order to make things easier for the parser.
tag_parameters: ->
return if @tag() isnt ')'
i: 0
while true
@@ -247,69 +257,92 @@ make use of splats.
Close up all remaining open blocks at the end of the file.
close_indentation: ->
- @outdent_token(@indent)
Error for when you try to use a forbidden word in JavaScript as + @outdent_token(@indent)
The error for when you try to use a forbidden word in JavaScript as an identifier.
identifier_error: (word) ->
- throw new Error "SyntaxError: Reserved word \"$word\" on line ${@line + 1}"
Error for when you try to assign to a reserved word in JavaScript, + throw new Error "SyntaxError: Reserved word \"$word\" on line ${@line + 1}"
The error for when you try to assign to a reserved word in JavaScript, like "function" or "default".
assignment_error: ->
- throw new Error "SyntaxError: Reserved word \"${@value()}\" on line ${@line + 1} can't be assigned"
Expand variables and expressions inside double-quoted strings using -ECMA Harmony's interpolation syntax.
+ throw new Error "SyntaxError: Reserved word \"${@value()}\" on line ${@line + 1} can't be assigned"Matches a balanced group such as a single or double-quoted string. Pass in +a series of delimiters, all of which must be nested correctly within the +contents of the string. This method allows us to have strings within +interpolations within strings etc...
balanced_string: (str, delimited...) ->
+ levels: []
+ i: 0
+ while i < str.length
+ for pair in delimited
+ [open, close]: pair
+ if levels.length and starts str, '\\', i
+ i += 1
+ break
+ else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
+ levels.pop()
+ i += close.length - 1
+ i += 1 unless levels.length
+ break
+ else if starts str, open, i
+ levels.push(pair)
+ i += open.length - 1
+ break
+ break unless levels.length
+ i += 1
+ throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
+ return false if i is 0
+ return str.substring(0, i)
Expand variables and expressions inside double-quoted strings using +ECMA Harmony's interpolation syntax +for substitution of bare variables as well as arbitrary expressions.
"Hello $name."
"Hello ${name.capitalize()}."
-
interpolate_string: (str) ->
+
+
+If it encounters an interpolation, this method will recursively create a +new Lexer, tokenize the interpolated contents, and merge them into the +token stream.
interpolate_string: (str) ->
if str.length < 3 or not starts str, '"'
@token 'STRING', str
else
- lexer: new Lexer()
- tokens: []
- quote: str.substring(0, 1)
- str: str.substring(1, str.length - 1)
- while str.length
- match: str.match INTERPOLATION
- if match
- [group, before, interp]: match
- if starts before, '\\', before.length - 1
- prev: before.substring(0, before.length - 1)
- tokens.push ['STRING', "$quote$prev$$interp$quote"] if before.length
- else
- tokens.push ['STRING', "$quote$before$quote"] if before.length
- if starts interp, '{'
- inner: interp.substring(1, interp.length - 1)
- nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
- nested.pop()
- tokens.push ['TOKENS', nested]
- else
- interp: "this.${ interp.substring(1) }" if starts interp, '@'
- tokens.push ['IDENTIFIER', interp]
- str: str.substring(group.length)
- else
- tokens.push ['STRING', "$quote$str$quote"]
- str: ''
- if tokens.length > 1
- for i in [tokens.length - 1..1]
- [prev, tok]: [tokens[i - 1], tokens[i]]
- if tok[0] is 'STRING' and prev[0] is 'STRING'
- [prev, tok]: [prev[1].substring(1, prev[1].length - 1), tok[1].substring(1, tok[1].length - 1)]
- tokens.splice i - 1, 2, ['STRING', "$quote$prev$tok$quote"]
+ lexer: new Lexer()
+ tokens: []
+ quote: str.substring(0, 1)
+ [i, pi]: [1, 1]
+ while i < str.length - 1
+ if starts str, '\\', i
+ i += 1
+ else if match: str.substring(i).match INTERPOLATION
+ [group, interp]: match
+ interp: "this.${ interp.substring(1) }" if starts interp, '@'
+ tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i
+ tokens.push ['IDENTIFIER', interp]
+ i += group.length - 1
+ pi: i + 1
+ else if (expr: @balanced_string str.substring(i), ['${', '}']) and expr.length > 3
+ inner: expr.substring(2, expr.length - 1)
+ nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
+ nested.pop()
+ tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i
+ tokens.push ['TOKENS', nested]
+ i += expr.length - 1
+ pi: i + 1
+ i += 1
+ tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i and pi < str.length - 1
for each, i in tokens
if each[0] is 'TOKENS'
- @token nested[0], nested[1] for nested in each[1]
+ @tokens: @tokens.concat each[1]
else
@token each[0], each[1]
- @token '+', '+' if i < tokens.length - 1
Add a token to the results, taking note of the line number.
token: (tag, value) ->
- @tokens.push([tag, value, @line])
Peek at a tag in the current token stream.
tag: (index, tag) ->
+ @token '+', '+' if i < tokens.length - 1
Add a token to the results, taking note of the line number.
token: (tag, value) ->
+ @tokens.push([tag, value, @line])
Peek at a tag in the current token stream.
tag: (index, tag) ->
return unless tok: @prev(index)
return tok[0]: tag if tag?
- tok[0]
Peek at a value in the current token stream.
value: (index, val) ->
+ tok[0]
Peek at a value in the current token stream.
value: (index, val) ->
return unless tok: @prev(index)
return tok[1]: val if val?
- tok[1]
Peek at a previous token, entire.
prev: (index) ->
- @tokens[@tokens.length - (index or 1)]
Attempt to match a string against the current chunk, returning the indexed + tok[1]
Peek at a previous token, entire.
prev: (index) ->
+ @tokens[@tokens.length - (index or 1)]
Attempt to match a string against the current chunk, returning the indexed
match if successful, and false
otherwise.
match: (regex, index) ->
return false unless m: @chunk.match(regex)
- if m then m[index] else false
Does a list include a value?
include: (list, value) ->
- list.indexOf(value) >= 0
Peek at the beginning of a given string to see if it matches a sequence.
starts: (string, literal, start) ->
- string.substring(start, (start or 0) + literal.length) is literal
Trim out all falsy values from an array.
compact: (array) -> item for item in array when item
Count the number of occurences of a character in a string.
count: (string, letter) ->
+ if m then m[index] else false
Does a list include a value?
include: (list, value) ->
+ list.indexOf(value) >= 0
Peek at the beginning of a given string to see if it matches a sequence.
starts: (string, literal, start) ->
+ string.substring(start, (start or 0) + literal.length) is literal
Trim out all falsy values from an array.
compact: (array) -> item for item in array when item
Count the number of occurences of a character in a string.
count: (string, letter) ->
num: 0
pos: string.indexOf(letter)
while pos isnt -1
diff --git a/lib/grammar.js b/lib/grammar.js
index 0cad597d..a600ec2c 100644
--- a/lib/grammar.js
+++ b/lib/grammar.js
@@ -4,16 +4,14 @@
// The CoffeeScript parser is generated by [Jison](http://github.com/zaach/jison)
// from this grammar file. Jison is a bottom-up parser generator, similar in
// style to [Bison](http://www.gnu.org/software/bison), implemented in JavaScript.
- // It can recognize
- // [LALR(1), LR(0), SLR(1), and LR(1)](http://en.wikipedia.org/wiki/LR_grammar)
+ // It can recognize [LALR(1), LR(0), SLR(1), and LR(1)](http://en.wikipedia.org/wiki/LR_grammar)
// type grammars. To create the Jison parser, we list the pattern to match
// on the left-hand side, and the action to take (usually the creation of syntax
// tree nodes) on the right. As the parser runs, it
// shifts tokens from our token stream, from left to right, and
// [attempts to match](http://en.wikipedia.org/wiki/Bottom-up_parsing)
// the token sequence against the rules below. When a match can be made, it
- // reduces into the
- // [nonterminal](http://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols)
+ // reduces into the [nonterminal](http://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols)
// (the enclosing name at the top), and we proceed from there.
// If you run the `cake build:parser` command, Jison constructs a parse table
// from our rules and saves it into `lib/parser.js`.
diff --git a/lib/lexer.js b/lib/lexer.js
index 31ae0f6e..9acd6fc0 100644
--- a/lib/lexer.js
+++ b/lib/lexer.js
@@ -74,7 +74,14 @@
// pushing some extra smarts into the Lexer.
exports.Lexer = (function() {
Lexer = function Lexer() { };
- // Scan by attempting to match tokens one at a time. Slow and steady.
+ // **tokenize** is the Lexer's main method. Scan by attempting to match tokens
+ // one at a time, using a regular expression anchored at the start of the
+ // remaining code, or a custom recursive token-matching method
+ // (for interpolations). When the next token has been recorded, we move forward
+ // within the code past the token, and begin again.
+ // Each tokenizing method is responsible for incrementing `@i` by the number of
+ // characters it has consumed. `@i` can be thought of as our finger on the page
+ // of source.
Lexer.prototype.tokenize = function tokenize(code, options) {
var o;
o = options || {};
@@ -85,11 +92,11 @@
this.line = o.line || 0;
// The current line.
this.indent = 0;
- // The current indent level.
+ // The current indentation level.
this.indents = [];
- // The stack of all indent levels we are currently within.
+ // The stack of all current indentation levels.
this.tokens = [];
- // Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
+ // Stream of parsed tokens in the form ['TYPE', value, line]
while (this.i < this.code.length) {
this.chunk = this.code.slice(this.i);
this.extract_next_token();
@@ -101,7 +108,8 @@
return (new Rewriter()).rewrite(this.tokens);
};
// At every position, run through this list of attempted matches,
- // short-circuiting if any of them succeed.
+ // short-circuiting if any of them succeed. Their order determines precedence:
+ // `@literal_token` is the fallback catch-all.
Lexer.prototype.extract_next_token = function extract_next_token() {
if (this.identifier_token()) {
return null;
@@ -112,12 +120,6 @@
if (this.heredoc_token()) {
return null;
}
- if (this.string_token()) {
- return null;
- }
- if (this.js_token()) {
- return null;
- }
if (this.regex_token()) {
return null;
}
@@ -130,11 +132,22 @@
if (this.whitespace_token()) {
return null;
}
+ if (this.js_token()) {
+ return null;
+ }
+ if (this.string_token()) {
+ return null;
+ }
return this.literal_token();
};
// Tokenizers
// ----------
// Matches identifying literals: variables, keywords, method names, etc.
+ // Check to ensure that JavaScript reserved words aren't being used as
+ // identifiers. Because CoffeeScript reserves a handful of keywords that are
+ // allowed in JavaScript, we're careful not to tag them as keywords when
+ // referenced as property names here, so you can still do `jQuery.is()` even
+ // though `is` means `===` otherwise.
Lexer.prototype.identifier_token = function identifier_token() {
var id, tag;
if (!((id = this.match(IDENTIFIER, 1)))) {
@@ -165,11 +178,15 @@
this.i += number.length;
return true;
};
- // Matches strings, including multi-line strings.
+ // Matches strings, including multi-line strings. Ensures that quotation marks
+ // are balanced within the string's contents, and within nested interpolations.
Lexer.prototype.string_token = function string_token() {
var string;
+ if (!(starts(this.chunk, '"') || starts(this.chunk, "'"))) {
+ return false;
+ }
string = this.balanced_token(['"', '"'], ['${', '}']);
- if (string === false) {
+ if (!(string)) {
string = this.balanced_token(["'", "'"]);
}
if (!(string)) {
@@ -180,7 +197,8 @@
this.i += string.length;
return true;
};
- // Matches heredocs, adjusting indentation to the correct level.
+ // Matches heredocs, adjusting indentation to the correct level, as heredocs
+ // preserve whitespace, but ignore indentation to the left.
Lexer.prototype.heredoc_token = function heredoc_token() {
var doc, match;
if (!((match = this.chunk.match(HEREDOC)))) {
@@ -192,9 +210,12 @@
this.i += match[1].length;
return true;
};
- // Matches interpolated JavaScript.
+ // Matches JavaScript interpolated directly into the source via backticks.
Lexer.prototype.js_token = function js_token() {
var script;
+ if (!(starts(this.chunk, '`'))) {
+ return false;
+ }
if (!((script = this.balanced_token(['`', '`'])))) {
return false;
}
@@ -202,7 +223,9 @@
this.i += script.length;
return true;
};
- // Matches regular expression literals.
+ // Matches regular expression literals. Lexing regular expressions is difficult
+ // to distinguish from division, so we borrow some basic heuristics from
+ // JavaScript and Ruby.
Lexer.prototype.regex_token = function regex_token() {
var regex;
if (!((regex = this.match(REGEX, 1)))) {
@@ -215,57 +238,15 @@
this.i += regex.length;
return true;
};
- // Matches a balanced group such as a single or double-quoted string. Pass in
- // a series of delimiters, all of which must be balanced correctly within the
- // string.
- Lexer.prototype.balanced_string = function balanced_string(str) {
- var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
- delimited = Array.prototype.slice.call(arguments, 1);
- levels = [];
- i = 0;
- while (i < str.length) {
- _a = delimited;
- for (_b = 0, _c = _a.length; _b < _c; _b++) {
- pair = _a[_b];
- _d = pair;
- open = _d[0];
- close = _d[1];
- if (levels.length && starts(str, '\\', i)) {
- i += 1;
- break;
- } else if (levels.length && starts(str, close, i) && levels[levels.length - 1] === pair) {
- levels.pop();
- i += close.length - 1;
- if (!(levels.length)) {
- i += 1;
- }
- break;
- } else if (starts(str, open, i)) {
- levels.push(pair);
- i += open.length - 1;
- break;
- }
- }
- if (!(levels.length)) {
- break;
- }
- i += 1;
- }
- if (levels.length) {
- throw new Error("SyntaxError: Unterminated " + (levels.pop()[0]) + " starting on line " + (this.line + 1));
- }
- if (i === 0) {
- return false;
- }
- return str.substring(0, i);
- };
- // Matches a balanced string within the token's contents.
+ // Matches a token in which which the passed delimiter pairs must be correctly
+ // balanced (ie. strings, JS literals).
Lexer.prototype.balanced_token = function balanced_token() {
var delimited;
delimited = Array.prototype.slice.call(arguments, 0);
return this.balanced_string.apply(this, [this.chunk].concat(delimited));
};
- // Matches and conumes comments.
+ // Matches and conumes comments. We pass through comments into JavaScript,
+ // so they're treated as real tokens, like any other part of the language.
Lexer.prototype.comment_token = function comment_token() {
var comment, lines;
if (!((comment = this.match(COMMENT, 1)))) {
@@ -279,6 +260,13 @@
return true;
};
// Matches newlines, indents, and outdents, and determines which is which.
+ // If we can detect that the current line is continued onto the the next line,
+ // then the newline is suppressed:
+ // elements
+ // .each( ... )
+ // .map( ... )
+ // Keeps track of the level of indentation, because a single outdent token
+ // can close multiple indents, so we need to know how far in we happen to be.
Lexer.prototype.line_token = function line_token() {
var diff, indent, next_character, no_newlines, prev, size;
if (!((indent = this.match(MULTI_DENT, 1)))) {
@@ -292,12 +280,12 @@
no_newlines = next_character === '.' || (this.value() && this.value().match(NO_NEWLINE) && prev && (prev[0] !== '.') && !this.value().match(CODE));
if (size === this.indent) {
if (no_newlines) {
- return this.suppress_newlines(indent);
+ return this.suppress_newlines();
}
return this.newline_token(indent);
} else if (size > this.indent) {
if (no_newlines) {
- return this.suppress_newlines(indent);
+ return this.suppress_newlines();
}
diff = size - this.indent;
this.token('INDENT', diff);
@@ -308,8 +296,8 @@
this.indent = size;
return true;
};
- // Record an outdent token or tokens, if we happen to be moving back inwards
- // past multiple recorded indents.
+ // Record an outdent token or multiple tokens, if we happen to be moving back
+ // inwards past several recorded indents.
Lexer.prototype.outdent_token = function outdent_token(move_out, no_newlines) {
var last_indent;
while (move_out > 0 && this.indents.length) {
@@ -336,7 +324,7 @@
this.i += space.length;
return true;
};
- // Generate a newline token. Multiple newlines get merged together.
+ // Generate a newline token. Consecutive newlines get merged together.
Lexer.prototype.newline_token = function newline_token(newlines) {
if (!(this.tag() === 'TERMINATOR')) {
this.token('TERMINATOR', "\n");
@@ -345,7 +333,7 @@
};
// Use a `\` at a line-ending to suppress the newline.
// The slash is removed here once its job is done.
- Lexer.prototype.suppress_newlines = function suppress_newlines(newlines) {
+ Lexer.prototype.suppress_newlines = function suppress_newlines() {
if (this.value() === "\\") {
this.tokens.pop();
}
@@ -353,7 +341,9 @@
};
// We treat all other single characters as a token. Eg.: `( ) , . !`
// Multi-character operators are also literal tokens, so that Jison can assign
- // the proper order of operations.
+ // the proper order of operations. There are some symbols that we tag specially
+ // here. `;` and newlines are both treated as a `TERMINATOR`, we distinguish
+ // parentheses that indicate a method call from regular parentheses, and so on.
Lexer.prototype.literal_token = function literal_token() {
var match, not_spaced, tag, value;
match = this.chunk.match(OPERATOR);
@@ -407,17 +397,16 @@
}
}
};
- // Sanitize a heredoc by escaping double quotes and erasing all external
- // indentation on the left-hand side.
+ // Sanitize a heredoc by escaping internal double quotes and erasing all
+ // external indentation on the left-hand side.
Lexer.prototype.sanitize_heredoc = function sanitize_heredoc(doc) {
var indent;
indent = (doc.match(HEREDOC_INDENT) || ['']).sort()[0];
return doc.replace(new RegExp("^" + indent, 'gm'), '').replace(MULTILINER, "\\n").replace(/"/g, '\\"');
};
- // A source of ambiguity in our grammar was parameter lists in function
- // definitions (as opposed to argument lists in function calls). Tag
- // parameter identifiers in order to avoid this. Also, parameter lists can
- // make use of splats.
+ // A source of ambiguity in our grammar used to be parameter lists in function
+ // definitions versus argument lists in function calls. Walk backwards, tagging
+ // parameters specially in order to make things easier for the parser.
Lexer.prototype.tag_parameters = function tag_parameters() {
var _a, i, tok;
if (this.tag() !== ')') {
@@ -444,104 +433,126 @@
Lexer.prototype.close_indentation = function close_indentation() {
return this.outdent_token(this.indent);
};
- // Error for when you try to use a forbidden word in JavaScript as
+ // The error for when you try to use a forbidden word in JavaScript as
// an identifier.
Lexer.prototype.identifier_error = function identifier_error(word) {
throw new Error("SyntaxError: Reserved word \"" + word + "\" on line " + (this.line + 1));
};
- // Error for when you try to assign to a reserved word in JavaScript,
+ // The error for when you try to assign to a reserved word in JavaScript,
// like "function" or "default".
Lexer.prototype.assignment_error = function assignment_error() {
throw new Error("SyntaxError: Reserved word \"" + (this.value()) + "\" on line " + (this.line + 1) + " can't be assigned");
};
+ // Matches a balanced group such as a single or double-quoted string. Pass in
+ // a series of delimiters, all of which must be nested correctly within the
+ // contents of the string. This method allows us to have strings within
+ // interpolations within strings etc...
+ Lexer.prototype.balanced_string = function balanced_string(str) {
+ var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
+ delimited = Array.prototype.slice.call(arguments, 1);
+ levels = [];
+ i = 0;
+ while (i < str.length) {
+ _a = delimited;
+ for (_b = 0, _c = _a.length; _b < _c; _b++) {
+ pair = _a[_b];
+ _d = pair;
+ open = _d[0];
+ close = _d[1];
+ if (levels.length && starts(str, '\\', i)) {
+ i += 1;
+ break;
+ } else if (levels.length && starts(str, close, i) && levels[levels.length - 1] === pair) {
+ levels.pop();
+ i += close.length - 1;
+ if (!(levels.length)) {
+ i += 1;
+ }
+ break;
+ } else if (starts(str, open, i)) {
+ levels.push(pair);
+ i += open.length - 1;
+ break;
+ }
+ }
+ if (!(levels.length)) {
+ break;
+ }
+ i += 1;
+ }
+ if (levels.length) {
+ throw new Error("SyntaxError: Unterminated " + (levels.pop()[0]) + " starting on line " + (this.line + 1));
+ }
+ if (i === 0) {
+ return false;
+ }
+ return str.substring(0, i);
+ };
// Expand variables and expressions inside double-quoted strings using
- // [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation).
+ // [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation)
+ // for substitution of bare variables as well as arbitrary expressions.
// "Hello $name."
// "Hello ${name.capitalize()}."
+ // If it encounters an interpolation, this method will recursively create a
+ // new Lexer, tokenize the interpolated contents, and merge them into the
+ // token stream.
Lexer.prototype.interpolate_string = function interpolate_string(str) {
- var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, each, expression, group, i, inner, interp, last_i, lexer, match, nested, prev, quote, tok, tokens;
+ var _a, _b, _c, _d, _e, each, expr, group, i, inner, interp, lexer, match, nested, pi, quote, tokens;
if (str.length < 3 || !starts(str, '"')) {
return this.token('STRING', str);
} else {
lexer = new Lexer();
tokens = [];
quote = str.substring(0, 1);
- i = 1;
- last_i = i;
+ _a = [1, 1];
+ i = _a[0];
+ pi = _a[1];
while (i < str.length - 1) {
if (starts(str, '\\', i)) {
i += 1;
- } else {
- match = str.substring(i).match(INTERPOLATION);
- if (match) {
- _a = match;
- group = _a[0];
- interp = _a[1];
- if (starts(interp, '@')) {
- interp = "this." + (interp.substring(1));
- }
- if (last_i < i) {
- tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
- }
- tokens.push(['IDENTIFIER', interp]);
- i += group.length - 1;
- last_i = i + 1;
- } else {
- expression = this.balanced_string(str.substring(i), ['${', '}']);
- if (expression && expression.length > 3) {
- inner = expression.substring(2, expression.length - 1);
- nested = lexer.tokenize("(" + inner + ")", {
- rewrite: false,
- line: this.line
- });
- nested.pop();
- if (last_i < i) {
- tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
- }
- tokens.push(['TOKENS', nested]);
- i += expression.length - 1;
- last_i = i + 1;
- }
+ } else if ((match = str.substring(i).match(INTERPOLATION))) {
+ _b = match;
+ group = _b[0];
+ interp = _b[1];
+ if (starts(interp, '@')) {
+ interp = "this." + (interp.substring(1));
}
+ if (pi < i) {
+ tokens.push(['STRING', quote + (str.substring(pi, i)) + quote]);
+ }
+ tokens.push(['IDENTIFIER', interp]);
+ i += group.length - 1;
+ pi = i + 1;
+ } else if (((expr = this.balanced_string(str.substring(i), ['${', '}']))) && expr.length > 3) {
+ inner = expr.substring(2, expr.length - 1);
+ nested = lexer.tokenize("(" + inner + ")", {
+ rewrite: false,
+ line: this.line
+ });
+ nested.pop();
+ if (pi < i) {
+ tokens.push(['STRING', quote + (str.substring(pi, i)) + quote]);
+ }
+ tokens.push(['TOKENS', nested]);
+ i += expr.length - 1;
+ pi = i + 1;
}
i += 1;
}
- if (last_i < i && last_i < str.length - 1) {
- tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
+ if (pi < i && pi < str.length - 1) {
+ tokens.push(['STRING', quote + (str.substring(pi, i)) + quote]);
}
- if (tokens.length > 1) {
- _d = tokens.length - 1; _e = 1;
- for (_c = 0, i = _d; (_d <= _e ? i <= _e : i >= _e); (_d <= _e ? i += 1 : i -= 1), _c++) {
- _f = [tokens[i - 1], tokens[i]];
- prev = _f[0];
- tok = _f[1];
- if (tok[0] === 'STRING' && prev[0] === 'STRING') {
- _g = [prev[1].substring(1, prev[1].length - 1), tok[1].substring(1, tok[1].length - 1)];
- prev = _g[0];
- tok = _g[1];
- tokens.splice(i - 1, 2, ['STRING', quote + prev + tok + quote]);
- }
- }
- }
- _h = []; _i = tokens;
- for (i = 0, _j = _i.length; i < _j; i++) {
- each = _i[i];
- _h.push((function() {
- if (each[0] === 'TOKENS') {
- _k = each[1];
- for (_l = 0, _m = _k.length; _l < _m; _l++) {
- nested = _k[_l];
- this.token(nested[0], nested[1]);
- }
- } else {
- this.token(each[0], each[1]);
- }
+ _c = []; _d = tokens;
+ for (i = 0, _e = _d.length; i < _e; i++) {
+ each = _d[i];
+ _c.push((function() {
+ each[0] === 'TOKENS' ? (this.tokens = this.tokens.concat(each[1])) : this.token(each[0], each[1]);
if (i < tokens.length - 1) {
return this.token('+', '+');
}
}).call(this));
}
- return _h;
+ return _c;
}
};
// Helpers
diff --git a/src/lexer.coffee b/src/lexer.coffee
index dec23165..718847a8 100644
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@@ -110,40 +110,54 @@ BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
# pushing some extra smarts into the Lexer.
exports.Lexer: class Lexer
- # Scan by attempting to match tokens one at a time. Slow and steady.
+ # **tokenize** is the Lexer's main method. Scan by attempting to match tokens
+ # one at a time, using a regular expression anchored at the start of the
+ # remaining code, or a custom recursive token-matching method
+ # (for interpolations). When the next token has been recorded, we move forward
+ # within the code past the token, and begin again.
+ #
+ # Each tokenizing method is responsible for incrementing `@i` by the number of
+ # characters it has consumed. `@i` can be thought of as our finger on the page
+ # of source.
tokenize: (code, options) ->
o : options or {}
@code : code # The remainder of the source code.
@i : 0 # Current character position we're parsing.
@line : o.line or 0 # The current line.
- @indent : 0 # The current indent level.
- @indents : [] # The stack of all indent levels we are currently within.
- @tokens : [] # Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
+ @indent : 0 # The current indentation level.
+ @indents : [] # The stack of all current indentation levels.
+ @tokens : [] # Stream of parsed tokens in the form ['TYPE', value, line]
while @i < @code.length
@chunk: @code.slice(@i)
@extract_next_token()
@close_indentation()
- return @tokens if o.rewrite is no
+ return @tokens if o.rewrite is off
(new Rewriter()).rewrite @tokens
# At every position, run through this list of attempted matches,
- # short-circuiting if any of them succeed.
+ # short-circuiting if any of them succeed. Their order determines precedence:
+ # `@literal_token` is the fallback catch-all.
extract_next_token: ->
return if @identifier_token()
return if @number_token()
return if @heredoc_token()
- return if @string_token()
- return if @js_token()
return if @regex_token()
return if @comment_token()
return if @line_token()
return if @whitespace_token()
+ return if @js_token()
+ return if @string_token()
return @literal_token()
# Tokenizers
# ----------
# Matches identifying literals: variables, keywords, method names, etc.
+ # Check to ensure that JavaScript reserved words aren't being used as
+ # identifiers. Because CoffeeScript reserves a handful of keywords that are
+ # allowed in JavaScript, we're careful not to tag them as keywords when
+ # referenced as property names here, so you can still do `jQuery.is()` even
+ # though `is` means `===` otherwise.
identifier_token: ->
return false unless id: @match IDENTIFIER, 1
@name_access_type()
@@ -163,17 +177,20 @@ exports.Lexer: class Lexer
@i += number.length
true
- # Matches strings, including multi-line strings.
+ # Matches strings, including multi-line strings. Ensures that quotation marks
+ # are balanced within the string's contents, and within nested interpolations.
string_token: ->
+ return false unless starts(@chunk, '"') or starts(@chunk, "'")
string: @balanced_token ['"', '"'], ['${', '}']
- string: @balanced_token ["'", "'"] if string is false
+ string: @balanced_token ["'", "'"] unless string
return false unless string
@interpolate_string string.replace STRING_NEWLINES, " \\\n"
@line += count string, "\n"
@i += string.length
true
- # Matches heredocs, adjusting indentation to the correct level.
+ # Matches heredocs, adjusting indentation to the correct level, as heredocs
+ # preserve whitespace, but ignore indentation to the left.
heredoc_token: ->
return false unless match = @chunk.match(HEREDOC)
doc: @sanitize_heredoc match[2] or match[4]
@@ -182,14 +199,17 @@ exports.Lexer: class Lexer
@i += match[1].length
true
- # Matches interpolated JavaScript.
+ # Matches JavaScript interpolated directly into the source via backticks.
js_token: ->
+ return false unless starts @chunk, '`'
return false unless script: @balanced_token ['`', '`']
@token 'JS', script.replace(JS_CLEANER, '')
@i += script.length
true
- # Matches regular expression literals.
+ # Matches regular expression literals. Lexing regular expressions is difficult
+ # to distinguish from division, so we borrow some basic heuristics from
+ # JavaScript and Ruby.
regex_token: ->
return false unless regex: @match REGEX, 1
return false if include NOT_REGEX, @tag()
@@ -197,38 +217,13 @@ exports.Lexer: class Lexer
@i += regex.length
true
- # Matches a balanced group such as a single or double-quoted string. Pass in
- # a series of delimiters, all of which must be balanced correctly within the
- # string.
- balanced_string: (str, delimited...) ->
- levels: []
- i: 0
- while i < str.length
- for pair in delimited
- [open, close]: pair
- if levels.length and starts str, '\\', i
- i += 1
- break
- else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
- levels.pop()
- i += close.length - 1
- i += 1 unless levels.length
- break
- else if starts str, open, i
- levels.push(pair)
- i += open.length - 1
- break
- break unless levels.length
- i += 1
- throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
- return false if i is 0
- return str.substring(0, i)
-
- # Matches a balanced string within the token's contents.
+ # Matches a token in which which the passed delimiter pairs must be correctly
+ # balanced (ie. strings, JS literals).
balanced_token: (delimited...) ->
@balanced_string @chunk, delimited...
- # Matches and conumes comments.
+ # Matches and conumes comments. We pass through comments into JavaScript,
+ # so they're treated as real tokens, like any other part of the language.
comment_token: ->
return false unless comment: @match COMMENT, 1
@line += (comment.match(MULTILINER) or []).length
@@ -239,6 +234,15 @@ exports.Lexer: class Lexer
true
# Matches newlines, indents, and outdents, and determines which is which.
+ # If we can detect that the current line is continued onto the the next line,
+ # then the newline is suppressed:
+ #
+ # elements
+ # .each( ... )
+ # .map( ... )
+ #
+ # Keeps track of the level of indentation, because a single outdent token
+ # can close multiple indents, so we need to know how far in we happen to be.
line_token: ->
return false unless indent: @match MULTI_DENT, 1
@line += indent.match(MULTILINER).length
@@ -249,10 +253,10 @@ exports.Lexer: class Lexer
no_newlines: next_character is '.' or (@value() and @value().match(NO_NEWLINE) and
prev and (prev[0] isnt '.') and not @value().match(CODE))
if size is @indent
- return @suppress_newlines(indent) if no_newlines
+ return @suppress_newlines() if no_newlines
return @newline_token(indent)
else if size > @indent
- return @suppress_newlines(indent) if no_newlines
+ return @suppress_newlines() if no_newlines
diff: size - @indent
@token 'INDENT', diff
@indents.push diff
@@ -261,8 +265,8 @@ exports.Lexer: class Lexer
@indent: size
true
- # Record an outdent token or tokens, if we happen to be moving back inwards
- # past multiple recorded indents.
+ # Record an outdent token or multiple tokens, if we happen to be moving back
+ # inwards past several recorded indents.
outdent_token: (move_out, no_newlines) ->
while move_out > 0 and @indents.length
last_indent: @indents.pop()
@@ -280,20 +284,22 @@ exports.Lexer: class Lexer
@i += space.length
true
- # Generate a newline token. Multiple newlines get merged together.
+ # Generate a newline token. Consecutive newlines get merged together.
newline_token: (newlines) ->
@token 'TERMINATOR', "\n" unless @tag() is 'TERMINATOR'
true
# Use a `\` at a line-ending to suppress the newline.
# The slash is removed here once its job is done.
- suppress_newlines: (newlines) ->
+ suppress_newlines: ->
@tokens.pop() if @value() is "\\"
true
# We treat all other single characters as a token. Eg.: `( ) , . !`
# Multi-character operators are also literal tokens, so that Jison can assign
- # the proper order of operations.
+ # the proper order of operations. There are some symbols that we tag specially
+ # here. `;` and newlines are both treated as a `TERMINATOR`, we distinguish
+ # parentheses that indicate a method call from regular parentheses, and so on.
literal_token: ->
match: @chunk.match(OPERATOR)
value: match and match[1]
@@ -334,18 +340,17 @@ exports.Lexer: class Lexer
else
@tag 1, 'PROPERTY_ACCESS'
- # Sanitize a heredoc by escaping double quotes and erasing all external
- # indentation on the left-hand side.
+ # Sanitize a heredoc by escaping internal double quotes and erasing all
+ # external indentation on the left-hand side.
sanitize_heredoc: (doc) ->
indent: (doc.match(HEREDOC_INDENT) or ['']).sort()[0]
doc.replace(new RegExp("^" +indent, 'gm'), '')
.replace(MULTILINER, "\\n")
.replace(/"/g, '\\"')
- # A source of ambiguity in our grammar was parameter lists in function
- # definitions (as opposed to argument lists in function calls). Tag
- # parameter identifiers in order to avoid this. Also, parameter lists can
- # make use of splats.
+ # A source of ambiguity in our grammar used to be parameter lists in function
+ # definitions versus argument lists in function calls. Walk backwards, tagging
+ # parameters specially in order to make things easier for the parser.
tag_parameters: ->
return if @tag() isnt ')'
i: 0
@@ -363,64 +368,85 @@ exports.Lexer: class Lexer
close_indentation: ->
@outdent_token(@indent)
- # Error for when you try to use a forbidden word in JavaScript as
+ # The error for when you try to use a forbidden word in JavaScript as
# an identifier.
identifier_error: (word) ->
throw new Error "SyntaxError: Reserved word \"$word\" on line ${@line + 1}"
- # Error for when you try to assign to a reserved word in JavaScript,
+ # The error for when you try to assign to a reserved word in JavaScript,
# like "function" or "default".
assignment_error: ->
throw new Error "SyntaxError: Reserved word \"${@value()}\" on line ${@line + 1} can't be assigned"
+ # Matches a balanced group such as a single or double-quoted string. Pass in
+ # a series of delimiters, all of which must be nested correctly within the
+ # contents of the string. This method allows us to have strings within
+ # interpolations within strings etc...
+ balanced_string: (str, delimited...) ->
+ levels: []
+ i: 0
+ while i < str.length
+ for pair in delimited
+ [open, close]: pair
+ if levels.length and starts str, '\\', i
+ i += 1
+ break
+ else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
+ levels.pop()
+ i += close.length - 1
+ i += 1 unless levels.length
+ break
+ else if starts str, open, i
+ levels.push(pair)
+ i += open.length - 1
+ break
+ break unless levels.length
+ i += 1
+ throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
+ return false if i is 0
+ return str.substring(0, i)
+
# Expand variables and expressions inside double-quoted strings using
- # [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation).
+ # [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation)
+ # for substitution of bare variables as well as arbitrary expressions.
#
# "Hello $name."
# "Hello ${name.capitalize()}."
#
+ # If it encounters an interpolation, this method will recursively create a
+ # new Lexer, tokenize the interpolated contents, and merge them into the
+ # token stream.
interpolate_string: (str) ->
if str.length < 3 or not starts str, '"'
@token 'STRING', str
else
- lexer: new Lexer()
- tokens: []
- quote: str.substring(0, 1)
- i: 1
- last_i: i
+ lexer: new Lexer()
+ tokens: []
+ quote: str.substring(0, 1)
+ [i, pi]: [1, 1]
while i < str.length - 1
if starts str, '\\', i
i += 1
- else
- match: str.substring(i).match INTERPOLATION
- if match
- [group, interp]: match
- interp: "this.${ interp.substring(1) }" if starts interp, '@'
- tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
- tokens.push ['IDENTIFIER', interp]
- i += group.length - 1
- last_i: i + 1
- else
- expression: @balanced_string str.substring(i), ['${', '}']
- if expression and expression.length > 3
- inner: expression.substring(2, expression.length - 1)
- nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
- nested.pop()
- tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
- tokens.push ['TOKENS', nested]
- i += expression.length - 1
- last_i: i + 1
+ else if match: str.substring(i).match INTERPOLATION
+ [group, interp]: match
+ interp: "this.${ interp.substring(1) }" if starts interp, '@'
+ tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i
+ tokens.push ['IDENTIFIER', interp]
+ i += group.length - 1
+ pi: i + 1
+ else if (expr: @balanced_string str.substring(i), ['${', '}']) and expr.length > 3
+ inner: expr.substring(2, expr.length - 1)
+ nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
+ nested.pop()
+ tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i
+ tokens.push ['TOKENS', nested]
+ i += expr.length - 1
+ pi: i + 1
i += 1
- tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i and last_i < str.length - 1
- if tokens.length > 1
- for i in [tokens.length - 1..1]
- [prev, tok]: [tokens[i - 1], tokens[i]]
- if tok[0] is 'STRING' and prev[0] is 'STRING'
- [prev, tok]: [prev[1].substring(1, prev[1].length - 1), tok[1].substring(1, tok[1].length - 1)]
- tokens.splice i - 1, 2, ['STRING', "$quote$prev$tok$quote"]
+ tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i and pi < str.length - 1
for each, i in tokens
if each[0] is 'TOKENS'
- @token nested[0], nested[1] for nested in each[1]
+ @tokens: @tokens.concat each[1]
else
@token each[0], each[1]
@token '+', '+' if i < tokens.length - 1