From 4906cf1aff9dfe1e273ed2922ae5b9f43c01a17e Mon Sep 17 00:00:00 2001 From: Jeremy Ashkenas Date: Sun, 7 Mar 2010 12:47:03 -0500 Subject: [PATCH] cleaned and commented the lexer (again) interpolate_string() continues to shrink --- documentation/docs/lexer.html | 231 ++++++++++++++------------ lib/grammar.js | 6 +- lib/lexer.js | 295 ++++++++++++++++++---------------- src/lexer.coffee | 206 +++++++++++++----------- 4 files changed, 403 insertions(+), 335 deletions(-) diff --git a/documentation/docs/lexer.html b/documentation/docs/lexer.html index 3c592251..10f48a7d 100644 --- a/documentation/docs/lexer.html +++ b/documentation/docs/lexer.html @@ -36,7 +36,7 @@ to avoid having a JavaScript error at runtime.

be used as identifiers or properties.

JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED
#

Token matching regexes.

IDENTIFIER    : /^([a-zA-Z$_](\w|\$)*)/
 NUMBER        : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
 HEREDOC       : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
-INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
+INTERPOLATION : /^\$([a-zA-Z_@]\w*)/
 OPERATOR      : /^([+\*&|\/\-%=<>:!?]+)/
 WHITESPACE    : /^([ \t]+)/
 COMMENT       : /^(((\n?[ \t]*)?#[^\n]*)+)/
@@ -64,31 +64,45 @@ treated as identifiers.

occurs at the start of a line. We disambiguate these from trailing whens to avoid an ambiguity in the grammar.

BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
#

The Lexer Class

#

The Lexer class reads a stream of CoffeeScript and divvys it up into tagged tokens. A minor bit of the ambiguity in the grammar has been avoided by -pushing some extra smarts into the Lexer.

exports.Lexer: class Lexer
#

Scan by attempting to match tokens one at a time. Slow and steady.

  tokenize: (code, options) ->
+pushing some extra smarts into the Lexer.

exports.Lexer: class Lexer
#

tokenize is the Lexer's main method. Scan by attempting to match tokens +one at a time, using a regular expression anchored at the start of the +remaining code, or a custom recursive token-matching method +(for interpolations). When the next token has been recorded, we move forward +within the code past the token, and begin again.

+ +

Each tokenizing method is responsible for incrementing @i by the number of +characters it has consumed. @i can be thought of as our finger on the page +of source.

  tokenize: (code, options) ->
     o        : options or {}
     @code    : code         # The remainder of the source code.
     @i       : 0            # Current character position we're parsing.
     @line    : o.line or 0  # The current line.
-    @indent  : 0            # The current indent level.
-    @indents : []           # The stack of all indent levels we are currently within.
-    @tokens  : []           # Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
+    @indent  : 0            # The current indentation level.
+    @indents : []           # The stack of all current indentation levels.
+    @tokens  : []           # Stream of parsed tokens in the form ['TYPE', value, line]
     while @i < @code.length
       @chunk: @code.slice(@i)
       @extract_next_token()
     @close_indentation()
-    return @tokens if o.rewrite is no
+    return @tokens if o.rewrite is off
     (new Rewriter()).rewrite @tokens
#

At every position, run through this list of attempted matches, -short-circuiting if any of them succeed.

  extract_next_token: ->
+short-circuiting if any of them succeed. Their order determines precedence:
+@literal_token is the fallback catch-all.

  extract_next_token: ->
     return if @identifier_token()
     return if @number_token()
     return if @heredoc_token()
-    return if @string_token()
-    return if @js_token()
     return if @regex_token()
     return if @comment_token()
     return if @line_token()
     return if @whitespace_token()
-    return    @literal_token()
#

Tokenizers

#

Matches identifying literals: variables, keywords, method names, etc.

  identifier_token: ->
+    return if @js_token()
+    return if @string_token()
+    return    @literal_token()
#

Tokenizers

#

Matches identifying literals: variables, keywords, method names, etc. +Check to ensure that JavaScript reserved words aren't being used as +identifiers. Because CoffeeScript reserves a handful of keywords that are +allowed in JavaScript, we're careful not to tag them as keywords when +referenced as property names here, so you can still do jQuery.is() even +though is means === otherwise.

  identifier_token: ->
     return false unless id: @match IDENTIFIER, 1
     @name_access_type()
     tag: 'IDENTIFIER'
@@ -102,60 +116,55 @@ short-circuiting if any of them succeed.

return false unless number: @match NUMBER, 1 @token 'NUMBER', number @i += number.length - true
#

Matches strings, including multi-line strings.

  string_token: ->
+    true
#

Matches strings, including multi-line strings. Ensures that quotation marks +are balanced within the string's contents, and within nested interpolations.

  string_token: ->
+    return false unless starts(@chunk, '"') or starts(@chunk, "'")
     string: @balanced_token ['"', '"'], ['${', '}']
-    string: @balanced_token ["'", "'"] if string is false
+    string: @balanced_token ["'", "'"] unless string
     return false unless string
     @interpolate_string string.replace STRING_NEWLINES, " \\\n"
     @line += count string, "\n"
     @i += string.length
-    true
#

Matches heredocs, adjusting indentation to the correct level.

  heredoc_token: ->
+    true
#

Matches heredocs, adjusting indentation to the correct level, as heredocs +preserve whitespace, but ignore indentation to the left.

  heredoc_token: ->
     return false unless match = @chunk.match(HEREDOC)
     doc: @sanitize_heredoc match[2] or match[4]
     @token 'STRING', "\"$doc\""
     @line += count match[1], "\n"
     @i += match[1].length
-    true
#

Matches interpolated JavaScript.

  js_token: ->
+    true
#

Matches JavaScript interpolated directly into the source via backticks.

  js_token: ->
+    return false unless starts @chunk, '`'
     return false unless script: @balanced_token ['`', '`']
     @token 'JS', script.replace(JS_CLEANER, '')
     @i += script.length
-    true
#

Matches regular expression literals.

  regex_token: ->
+    true
#

Matches regular expression literals. Lexing regular expressions is difficult +to distinguish from division, so we borrow some basic heuristics from +JavaScript and Ruby.

  regex_token: ->
     return false unless regex: @match REGEX, 1
     return false if include NOT_REGEX, @tag()
     @token 'REGEX', regex
     @i += regex.length
-    true
#

Matches a balanced group such as a single or double-quoted string. Pass in -a series of delimiters, all of which must be balanced correctly within the -token's contents.

  balanced_token: (delimited...) ->
-    levels: []
-    i: 0
-    while i < @chunk.length
-      for pair in delimited
-        [open, close]: pair
-        if levels.length and starts @chunk, '\\', i
-          i += 1
-          break
-        else if levels.length and starts(@chunk, close, i) and levels[levels.length - 1] is pair
-          levels.pop()
-          i += close.length - 1
-          i += 1 unless levels.length
-          break
-        else if starts @chunk, open, i
-          levels.push(pair)
-          i += open.length - 1
-          break
-      break unless levels.length
-      i += 1
-    throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
-    return false if i is 0
-    return @chunk.substring(0, i)
#

Matches and conumes comments.

  comment_token: ->
+    true
#

Matches a token in which which the passed delimiter pairs must be correctly +balanced (ie. strings, JS literals).

  balanced_token: (delimited...) ->
+    @balanced_string @chunk, delimited...
#

Matches and conumes comments. We pass through comments into JavaScript, +so they're treated as real tokens, like any other part of the language.

  comment_token: ->
     return false unless comment: @match COMMENT, 1
     @line += (comment.match(MULTILINER) or []).length
     lines: comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
     @token 'COMMENT', compact lines
     @token 'TERMINATOR', "\n"
     @i += comment.length
-    true
#

Matches newlines, indents, and outdents, and determines which is which.

  line_token: ->
+    true
#

Matches newlines, indents, and outdents, and determines which is which. +If we can detect that the current line is continued onto the the next line, +then the newline is suppressed:

+ +
elements
+  .each( ... )
+  .map( ... )
+
+ +

Keeps track of the level of indentation, because a single outdent token +can close multiple indents, so we need to know how far in we happen to be.

  line_token: ->
     return false unless indent: @match MULTI_DENT, 1
     @line += indent.match(MULTILINER).length
     @i    += indent.length
@@ -165,18 +174,18 @@ token's contents.

no_newlines: next_character is '.' or (@value() and @value().match(NO_NEWLINE) and prev and (prev[0] isnt '.') and not @value().match(CODE)) if size is @indent - return @suppress_newlines(indent) if no_newlines + return @suppress_newlines() if no_newlines return @newline_token(indent) else if size > @indent - return @suppress_newlines(indent) if no_newlines + return @suppress_newlines() if no_newlines diff: size - @indent @token 'INDENT', diff @indents.push diff else @outdent_token @indent - size, no_newlines @indent: size - true
#

Record an outdent token or tokens, if we happen to be moving back inwards -past multiple recorded indents.

  outdent_token: (move_out, no_newlines) ->
+    true
#

Record an outdent token or multiple tokens, if we happen to be moving back +inwards past several recorded indents.

  outdent_token: (move_out, no_newlines) ->
     while move_out > 0 and @indents.length
       last_indent: @indents.pop()
       @token 'OUTDENT', last_indent
@@ -188,14 +197,16 @@ as being "spaced", because there are some cases where it makes a difference.

prev: @prev() prev.spaced: true if prev @i += space.length - true
#

Generate a newline token. Multiple newlines get merged together.

  newline_token: (newlines) ->
+    true
#

Generate a newline token. Consecutive newlines get merged together.

  newline_token: (newlines) ->
     @token 'TERMINATOR', "\n" unless @tag() is 'TERMINATOR'
     true
#

Use a \ at a line-ending to suppress the newline. -The slash is removed here once its job is done.

  suppress_newlines: (newlines) ->
+The slash is removed here once its job is done.

  suppress_newlines: ->
     @tokens.pop() if @value() is "\\"
     true
#

We treat all other single characters as a token. Eg.: ( ) , . ! Multi-character operators are also literal tokens, so that Jison can assign -the proper order of operations.

  literal_token: ->
+the proper order of operations. There are some symbols that we tag specially
+here. ; and newlines are both treated as a TERMINATOR, we distinguish
+parentheses that indicate a method call from regular parentheses, and so on.

  literal_token: ->
     match: @chunk.match(OPERATOR)
     value: match and match[1]
     @tag_parameters() if value and value.match(CODE)
@@ -227,15 +238,14 @@ if it's a special kind of accessor.

@tag(1, 'SOAK_ACCESS') @tokens.splice(-2, 1) else - @tag 1, 'PROPERTY_ACCESS'
#

Sanitize a heredoc by escaping double quotes and erasing all external -indentation on the left-hand side.

  sanitize_heredoc: (doc) ->
+        @tag 1, 'PROPERTY_ACCESS'
#

Sanitize a heredoc by escaping internal double quotes and erasing all +external indentation on the left-hand side.

  sanitize_heredoc: (doc) ->
     indent: (doc.match(HEREDOC_INDENT) or ['']).sort()[0]
     doc.replace(new RegExp("^" +indent, 'gm'), '')
        .replace(MULTILINER, "\\n")
-       .replace(/"/g, '\\"')
#

A source of ambiguity in our grammar was parameter lists in function -definitions (as opposed to argument lists in function calls). Tag -parameter identifiers in order to avoid this. Also, parameter lists can -make use of splats.

  tag_parameters: ->
+       .replace(/"/g, '\\"')
#

A source of ambiguity in our grammar used to be parameter lists in function +definitions versus argument lists in function calls. Walk backwards, tagging +parameters specially in order to make things easier for the parser.

  tag_parameters: ->
     return if @tag() isnt ')'
     i: 0
     while true
@@ -247,69 +257,92 @@ make use of splats.

when ')' then tok[0]: 'PARAM_END' when '(' then return tok[0]: 'PARAM_START' true
#

Close up all remaining open blocks at the end of the file.

  close_indentation: ->
-    @outdent_token(@indent)
#

Error for when you try to use a forbidden word in JavaScript as + @outdent_token(@indent)

#

The error for when you try to use a forbidden word in JavaScript as an identifier.

  identifier_error: (word) ->
-    throw new Error "SyntaxError: Reserved word \"$word\" on line ${@line + 1}"
#

Error for when you try to assign to a reserved word in JavaScript, + throw new Error "SyntaxError: Reserved word \"$word\" on line ${@line + 1}"

#

The error for when you try to assign to a reserved word in JavaScript, like "function" or "default".

  assignment_error: ->
-    throw new Error "SyntaxError: Reserved word \"${@value()}\" on line ${@line + 1} can't be assigned"
#

Expand variables and expressions inside double-quoted strings using -ECMA Harmony's interpolation syntax.

+ throw new Error "SyntaxError: Reserved word \"${@value()}\" on line ${@line + 1} can't be assigned"
#

Matches a balanced group such as a single or double-quoted string. Pass in +a series of delimiters, all of which must be nested correctly within the +contents of the string. This method allows us to have strings within +interpolations within strings etc...

  balanced_string: (str, delimited...) ->
+    levels: []
+    i: 0
+    while i < str.length
+      for pair in delimited
+        [open, close]: pair
+        if levels.length and starts str, '\\', i
+          i += 1
+          break
+        else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
+          levels.pop()
+          i += close.length - 1
+          i += 1 unless levels.length
+          break
+        else if starts str, open, i
+          levels.push(pair)
+          i += open.length - 1
+          break
+      break unless levels.length
+      i += 1
+    throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
+    return false if i is 0
+    return str.substring(0, i)
#

Expand variables and expressions inside double-quoted strings using +ECMA Harmony's interpolation syntax +for substitution of bare variables as well as arbitrary expressions.

"Hello $name."
 "Hello ${name.capitalize()}."
-
  interpolate_string: (str) ->
+
+ +

If it encounters an interpolation, this method will recursively create a +new Lexer, tokenize the interpolated contents, and merge them into the +token stream.

  interpolate_string: (str) ->
     if str.length < 3 or not starts str, '"'
       @token 'STRING', str
     else
-      lexer:  new Lexer()
-      tokens: []
-      quote:  str.substring(0, 1)
-      str:    str.substring(1, str.length - 1)
-      while str.length
-        match: str.match INTERPOLATION
-        if match
-          [group, before, interp]: match
-          if starts before, '\\', before.length - 1
-            prev: before.substring(0, before.length - 1)
-            tokens.push ['STRING', "$quote$prev$$interp$quote"] if before.length
-          else
-            tokens.push ['STRING', "$quote$before$quote"] if before.length
-            if starts interp, '{'
-              inner: interp.substring(1, interp.length - 1)
-              nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
-              nested.pop()
-              tokens.push ['TOKENS', nested]
-            else
-              interp: "this.${ interp.substring(1) }" if starts interp, '@'
-              tokens.push ['IDENTIFIER', interp]
-          str: str.substring(group.length)
-        else
-          tokens.push ['STRING', "$quote$str$quote"]
-          str: ''
-      if tokens.length > 1
-        for i in [tokens.length - 1..1]
-          [prev, tok]: [tokens[i - 1], tokens[i]]
-          if tok[0] is 'STRING' and prev[0] is 'STRING'
-            [prev, tok]: [prev[1].substring(1, prev[1].length - 1), tok[1].substring(1, tok[1].length - 1)]
-            tokens.splice i - 1, 2, ['STRING', "$quote$prev$tok$quote"]
+      lexer:    new Lexer()
+      tokens:   []
+      quote:    str.substring(0, 1)
+      [i, pi]:  [1, 1]
+      while i < str.length - 1
+        if starts str, '\\', i
+          i += 1
+        else if match: str.substring(i).match INTERPOLATION
+          [group, interp]: match
+          interp: "this.${ interp.substring(1) }" if starts interp, '@'
+          tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i
+          tokens.push ['IDENTIFIER', interp]
+          i += group.length - 1
+          pi: i + 1
+        else if (expr: @balanced_string str.substring(i), ['${', '}']) and expr.length > 3
+          inner: expr.substring(2, expr.length - 1)
+          nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
+          nested.pop()
+          tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i
+          tokens.push ['TOKENS', nested]
+          i += expr.length - 1
+          pi: i + 1
+        i += 1
+      tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i and pi < str.length - 1
       for each, i in tokens
         if each[0] is 'TOKENS'
-          @token nested[0], nested[1] for nested in each[1]
+          @tokens: @tokens.concat each[1]
         else
           @token each[0], each[1]
-        @token '+', '+' if i < tokens.length - 1
#

Helpers

#

Add a token to the results, taking note of the line number.

  token: (tag, value) ->
-    @tokens.push([tag, value, @line])
#

Peek at a tag in the current token stream.

  tag: (index, tag) ->
+        @token '+', '+' if i < tokens.length - 1
#

Helpers

#

Add a token to the results, taking note of the line number.

  token: (tag, value) ->
+    @tokens.push([tag, value, @line])
#

Peek at a tag in the current token stream.

  tag: (index, tag) ->
     return unless tok: @prev(index)
     return tok[0]: tag if tag?
-    tok[0]
#

Peek at a value in the current token stream.

  value: (index, val) ->
+    tok[0]
#

Peek at a value in the current token stream.

  value: (index, val) ->
     return unless tok: @prev(index)
     return tok[1]: val if val?
-    tok[1]
#

Peek at a previous token, entire.

  prev: (index) ->
-    @tokens[@tokens.length - (index or 1)]
#

Attempt to match a string against the current chunk, returning the indexed + tok[1]

#

Peek at a previous token, entire.

  prev: (index) ->
+    @tokens[@tokens.length - (index or 1)]
#

Attempt to match a string against the current chunk, returning the indexed match if successful, and false otherwise.

  match: (regex, index) ->
     return false unless m: @chunk.match(regex)
-    if m then m[index] else false
#

Utility Functions

#

Does a list include a value?

include: (list, value) ->
-  list.indexOf(value) >= 0
#

Peek at the beginning of a given string to see if it matches a sequence.

starts: (string, literal, start) ->
-  string.substring(start, (start or 0) + literal.length) is literal
#

Trim out all falsy values from an array.

compact: (array) -> item for item in array when item
#

Count the number of occurences of a character in a string.

count: (string, letter) ->
+    if m then m[index] else false
#

Utility Functions

#

Does a list include a value?

include: (list, value) ->
+  list.indexOf(value) >= 0
#

Peek at the beginning of a given string to see if it matches a sequence.

starts: (string, literal, start) ->
+  string.substring(start, (start or 0) + literal.length) is literal
#

Trim out all falsy values from an array.

compact: (array) -> item for item in array when item
#

Count the number of occurences of a character in a string.

count: (string, letter) ->
   num: 0
   pos: string.indexOf(letter)
   while pos isnt -1
diff --git a/lib/grammar.js b/lib/grammar.js
index 0cad597d..a600ec2c 100644
--- a/lib/grammar.js
+++ b/lib/grammar.js
@@ -4,16 +4,14 @@
   // The CoffeeScript parser is generated by [Jison](http://github.com/zaach/jison)
   // from this grammar file. Jison is a bottom-up parser generator, similar in
   // style to [Bison](http://www.gnu.org/software/bison), implemented in JavaScript.
-  // It can recognize
-  // [LALR(1), LR(0), SLR(1), and LR(1)](http://en.wikipedia.org/wiki/LR_grammar)
+  // It can recognize [LALR(1), LR(0), SLR(1), and LR(1)](http://en.wikipedia.org/wiki/LR_grammar)
   // type grammars. To create the Jison parser, we list the pattern to match
   // on the left-hand side, and the action to take (usually the creation of syntax
   // tree nodes) on the right. As the parser runs, it
   // shifts tokens from our token stream, from left to right, and
   // [attempts to match](http://en.wikipedia.org/wiki/Bottom-up_parsing)
   // the token sequence against the rules below. When a match can be made, it
-  // reduces into the
-  // [nonterminal](http://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols)
+  // reduces into the [nonterminal](http://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols)
   // (the enclosing name at the top), and we proceed from there.
   // If you run the `cake build:parser` command, Jison constructs a parse table
   // from our rules and saves it into `lib/parser.js`.
diff --git a/lib/lexer.js b/lib/lexer.js
index 31ae0f6e..9acd6fc0 100644
--- a/lib/lexer.js
+++ b/lib/lexer.js
@@ -74,7 +74,14 @@
   // pushing some extra smarts into the Lexer.
   exports.Lexer = (function() {
     Lexer = function Lexer() {    };
-    // Scan by attempting to match tokens one at a time. Slow and steady.
+    // **tokenize** is the Lexer's main method. Scan by attempting to match tokens
+    // one at a time, using a regular expression anchored at the start of the
+    // remaining code, or a custom recursive token-matching method
+    // (for interpolations). When the next token has been recorded, we move forward
+    // within the code past the token, and begin again.
+    // Each tokenizing method is responsible for incrementing `@i` by the number of
+    // characters it has consumed. `@i` can be thought of as our finger on the page
+    // of source.
     Lexer.prototype.tokenize = function tokenize(code, options) {
       var o;
       o = options || {};
@@ -85,11 +92,11 @@
       this.line = o.line || 0;
       // The current line.
       this.indent = 0;
-      // The current indent level.
+      // The current indentation level.
       this.indents = [];
-      // The stack of all indent levels we are currently within.
+      // The stack of all current indentation levels.
       this.tokens = [];
-      // Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
+      // Stream of parsed tokens in the form ['TYPE', value, line]
       while (this.i < this.code.length) {
         this.chunk = this.code.slice(this.i);
         this.extract_next_token();
@@ -101,7 +108,8 @@
       return (new Rewriter()).rewrite(this.tokens);
     };
     // At every position, run through this list of attempted matches,
-    // short-circuiting if any of them succeed.
+    // short-circuiting if any of them succeed. Their order determines precedence:
+    // `@literal_token` is the fallback catch-all.
     Lexer.prototype.extract_next_token = function extract_next_token() {
       if (this.identifier_token()) {
         return null;
@@ -112,12 +120,6 @@
       if (this.heredoc_token()) {
         return null;
       }
-      if (this.string_token()) {
-        return null;
-      }
-      if (this.js_token()) {
-        return null;
-      }
       if (this.regex_token()) {
         return null;
       }
@@ -130,11 +132,22 @@
       if (this.whitespace_token()) {
         return null;
       }
+      if (this.js_token()) {
+        return null;
+      }
+      if (this.string_token()) {
+        return null;
+      }
       return this.literal_token();
     };
     // Tokenizers
     // ----------
     // Matches identifying literals: variables, keywords, method names, etc.
+    // Check to ensure that JavaScript reserved words aren't being used as
+    // identifiers. Because CoffeeScript reserves a handful of keywords that are
+    // allowed in JavaScript, we're careful not to tag them as keywords when
+    // referenced as property names here, so you can still do `jQuery.is()` even
+    // though `is` means `===` otherwise.
     Lexer.prototype.identifier_token = function identifier_token() {
       var id, tag;
       if (!((id = this.match(IDENTIFIER, 1)))) {
@@ -165,11 +178,15 @@
       this.i += number.length;
       return true;
     };
-    // Matches strings, including multi-line strings.
+    // Matches strings, including multi-line strings. Ensures that quotation marks
+    // are balanced within the string's contents, and within nested interpolations.
     Lexer.prototype.string_token = function string_token() {
       var string;
+      if (!(starts(this.chunk, '"') || starts(this.chunk, "'"))) {
+        return false;
+      }
       string = this.balanced_token(['"', '"'], ['${', '}']);
-      if (string === false) {
+      if (!(string)) {
         string = this.balanced_token(["'", "'"]);
       }
       if (!(string)) {
@@ -180,7 +197,8 @@
       this.i += string.length;
       return true;
     };
-    // Matches heredocs, adjusting indentation to the correct level.
+    // Matches heredocs, adjusting indentation to the correct level, as heredocs
+    // preserve whitespace, but ignore indentation to the left.
     Lexer.prototype.heredoc_token = function heredoc_token() {
       var doc, match;
       if (!((match = this.chunk.match(HEREDOC)))) {
@@ -192,9 +210,12 @@
       this.i += match[1].length;
       return true;
     };
-    // Matches interpolated JavaScript.
+    // Matches JavaScript interpolated directly into the source via backticks.
     Lexer.prototype.js_token = function js_token() {
       var script;
+      if (!(starts(this.chunk, '`'))) {
+        return false;
+      }
       if (!((script = this.balanced_token(['`', '`'])))) {
         return false;
       }
@@ -202,7 +223,9 @@
       this.i += script.length;
       return true;
     };
-    // Matches regular expression literals.
+    // Matches regular expression literals. Lexing regular expressions is difficult
+    // to distinguish from division, so we borrow some basic heuristics from
+    // JavaScript and Ruby.
     Lexer.prototype.regex_token = function regex_token() {
       var regex;
       if (!((regex = this.match(REGEX, 1)))) {
@@ -215,57 +238,15 @@
       this.i += regex.length;
       return true;
     };
-    // Matches a balanced group such as a single or double-quoted string. Pass in
-    // a series of delimiters, all of which must be balanced correctly within the
-    // string.
-    Lexer.prototype.balanced_string = function balanced_string(str) {
-      var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
-      delimited = Array.prototype.slice.call(arguments, 1);
-      levels = [];
-      i = 0;
-      while (i < str.length) {
-        _a = delimited;
-        for (_b = 0, _c = _a.length; _b < _c; _b++) {
-          pair = _a[_b];
-          _d = pair;
-          open = _d[0];
-          close = _d[1];
-          if (levels.length && starts(str, '\\', i)) {
-            i += 1;
-            break;
-          } else if (levels.length && starts(str, close, i) && levels[levels.length - 1] === pair) {
-            levels.pop();
-            i += close.length - 1;
-            if (!(levels.length)) {
-              i += 1;
-            }
-            break;
-          } else if (starts(str, open, i)) {
-            levels.push(pair);
-            i += open.length - 1;
-            break;
-          }
-        }
-        if (!(levels.length)) {
-          break;
-        }
-        i += 1;
-      }
-      if (levels.length) {
-        throw new Error("SyntaxError: Unterminated " + (levels.pop()[0]) + " starting on line " + (this.line + 1));
-      }
-      if (i === 0) {
-        return false;
-      }
-      return str.substring(0, i);
-    };
-    // Matches a balanced string within the token's contents.
+    // Matches a token in which which the passed delimiter pairs must be correctly
+    // balanced (ie. strings, JS literals).
     Lexer.prototype.balanced_token = function balanced_token() {
       var delimited;
       delimited = Array.prototype.slice.call(arguments, 0);
       return this.balanced_string.apply(this, [this.chunk].concat(delimited));
     };
-    // Matches and conumes comments.
+    // Matches and conumes comments. We pass through comments into JavaScript,
+    // so they're treated as real tokens, like any other part of the language.
     Lexer.prototype.comment_token = function comment_token() {
       var comment, lines;
       if (!((comment = this.match(COMMENT, 1)))) {
@@ -279,6 +260,13 @@
       return true;
     };
     // Matches newlines, indents, and outdents, and determines which is which.
+    // If we can detect that the current line is continued onto the the next line,
+    // then the newline is suppressed:
+    //     elements
+    //       .each( ... )
+    //       .map( ... )
+    // Keeps track of the level of indentation, because a single outdent token
+    // can close multiple indents, so we need to know how far in we happen to be.
     Lexer.prototype.line_token = function line_token() {
       var diff, indent, next_character, no_newlines, prev, size;
       if (!((indent = this.match(MULTI_DENT, 1)))) {
@@ -292,12 +280,12 @@
       no_newlines = next_character === '.' || (this.value() && this.value().match(NO_NEWLINE) && prev && (prev[0] !== '.') && !this.value().match(CODE));
       if (size === this.indent) {
         if (no_newlines) {
-          return this.suppress_newlines(indent);
+          return this.suppress_newlines();
         }
         return this.newline_token(indent);
       } else if (size > this.indent) {
         if (no_newlines) {
-          return this.suppress_newlines(indent);
+          return this.suppress_newlines();
         }
         diff = size - this.indent;
         this.token('INDENT', diff);
@@ -308,8 +296,8 @@
       this.indent = size;
       return true;
     };
-    // Record an outdent token or tokens, if we happen to be moving back inwards
-    // past multiple recorded indents.
+    // Record an outdent token or multiple tokens, if we happen to be moving back
+    // inwards past several recorded indents.
     Lexer.prototype.outdent_token = function outdent_token(move_out, no_newlines) {
       var last_indent;
       while (move_out > 0 && this.indents.length) {
@@ -336,7 +324,7 @@
       this.i += space.length;
       return true;
     };
-    // Generate a newline token. Multiple newlines get merged together.
+    // Generate a newline token. Consecutive newlines get merged together.
     Lexer.prototype.newline_token = function newline_token(newlines) {
       if (!(this.tag() === 'TERMINATOR')) {
         this.token('TERMINATOR', "\n");
@@ -345,7 +333,7 @@
     };
     // Use a `\` at a line-ending to suppress the newline.
     // The slash is removed here once its job is done.
-    Lexer.prototype.suppress_newlines = function suppress_newlines(newlines) {
+    Lexer.prototype.suppress_newlines = function suppress_newlines() {
       if (this.value() === "\\") {
         this.tokens.pop();
       }
@@ -353,7 +341,9 @@
     };
     // We treat all other single characters as a token. Eg.: `( ) , . !`
     // Multi-character operators are also literal tokens, so that Jison can assign
-    // the proper order of operations.
+    // the proper order of operations. There are some symbols that we tag specially
+    // here. `;` and newlines are both treated as a `TERMINATOR`, we distinguish
+    // parentheses that indicate a method call from regular parentheses, and so on.
     Lexer.prototype.literal_token = function literal_token() {
       var match, not_spaced, tag, value;
       match = this.chunk.match(OPERATOR);
@@ -407,17 +397,16 @@
         }
       }
     };
-    // Sanitize a heredoc by escaping double quotes and erasing all external
-    // indentation on the left-hand side.
+    // Sanitize a heredoc by escaping internal double quotes and erasing all
+    // external indentation on the left-hand side.
     Lexer.prototype.sanitize_heredoc = function sanitize_heredoc(doc) {
       var indent;
       indent = (doc.match(HEREDOC_INDENT) || ['']).sort()[0];
       return doc.replace(new RegExp("^" + indent, 'gm'), '').replace(MULTILINER, "\\n").replace(/"/g, '\\"');
     };
-    // A source of ambiguity in our grammar was parameter lists in function
-    // definitions (as opposed to argument lists in function calls). Tag
-    // parameter identifiers in order to avoid this. Also, parameter lists can
-    // make use of splats.
+    // A source of ambiguity in our grammar used to be parameter lists in function
+    // definitions versus argument lists in function calls. Walk backwards, tagging
+    // parameters specially in order to make things easier for the parser.
     Lexer.prototype.tag_parameters = function tag_parameters() {
       var _a, i, tok;
       if (this.tag() !== ')') {
@@ -444,104 +433,126 @@
     Lexer.prototype.close_indentation = function close_indentation() {
       return this.outdent_token(this.indent);
     };
-    // Error for when you try to use a forbidden word in JavaScript as
+    // The error for when you try to use a forbidden word in JavaScript as
     // an identifier.
     Lexer.prototype.identifier_error = function identifier_error(word) {
       throw new Error("SyntaxError: Reserved word \"" + word + "\" on line " + (this.line + 1));
     };
-    // Error for when you try to assign to a reserved word in JavaScript,
+    // The error for when you try to assign to a reserved word in JavaScript,
     // like "function" or "default".
     Lexer.prototype.assignment_error = function assignment_error() {
       throw new Error("SyntaxError: Reserved word \"" + (this.value()) + "\" on line " + (this.line + 1) + " can't be assigned");
     };
+    // Matches a balanced group such as a single or double-quoted string. Pass in
+    // a series of delimiters, all of which must be nested correctly within the
+    // contents of the string. This method allows us to have strings within
+    // interpolations within strings etc...
+    Lexer.prototype.balanced_string = function balanced_string(str) {
+      var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
+      delimited = Array.prototype.slice.call(arguments, 1);
+      levels = [];
+      i = 0;
+      while (i < str.length) {
+        _a = delimited;
+        for (_b = 0, _c = _a.length; _b < _c; _b++) {
+          pair = _a[_b];
+          _d = pair;
+          open = _d[0];
+          close = _d[1];
+          if (levels.length && starts(str, '\\', i)) {
+            i += 1;
+            break;
+          } else if (levels.length && starts(str, close, i) && levels[levels.length - 1] === pair) {
+            levels.pop();
+            i += close.length - 1;
+            if (!(levels.length)) {
+              i += 1;
+            }
+            break;
+          } else if (starts(str, open, i)) {
+            levels.push(pair);
+            i += open.length - 1;
+            break;
+          }
+        }
+        if (!(levels.length)) {
+          break;
+        }
+        i += 1;
+      }
+      if (levels.length) {
+        throw new Error("SyntaxError: Unterminated " + (levels.pop()[0]) + " starting on line " + (this.line + 1));
+      }
+      if (i === 0) {
+        return false;
+      }
+      return str.substring(0, i);
+    };
     // Expand variables and expressions inside double-quoted strings using
-    // [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation).
+    // [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation)
+    // for substitution of bare variables as well as arbitrary expressions.
     //     "Hello $name."
     //     "Hello ${name.capitalize()}."
+    // If it encounters an interpolation, this method will recursively create a
+    // new Lexer, tokenize the interpolated contents, and merge them into the
+    // token stream.
     Lexer.prototype.interpolate_string = function interpolate_string(str) {
-      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, each, expression, group, i, inner, interp, last_i, lexer, match, nested, prev, quote, tok, tokens;
+      var _a, _b, _c, _d, _e, each, expr, group, i, inner, interp, lexer, match, nested, pi, quote, tokens;
       if (str.length < 3 || !starts(str, '"')) {
         return this.token('STRING', str);
       } else {
         lexer = new Lexer();
         tokens = [];
         quote = str.substring(0, 1);
-        i = 1;
-        last_i = i;
+        _a = [1, 1];
+        i = _a[0];
+        pi = _a[1];
         while (i < str.length - 1) {
           if (starts(str, '\\', i)) {
             i += 1;
-          } else {
-            match = str.substring(i).match(INTERPOLATION);
-            if (match) {
-              _a = match;
-              group = _a[0];
-              interp = _a[1];
-              if (starts(interp, '@')) {
-                interp = "this." + (interp.substring(1));
-              }
-              if (last_i < i) {
-                tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
-              }
-              tokens.push(['IDENTIFIER', interp]);
-              i += group.length - 1;
-              last_i = i + 1;
-            } else {
-              expression = this.balanced_string(str.substring(i), ['${', '}']);
-              if (expression && expression.length > 3) {
-                inner = expression.substring(2, expression.length - 1);
-                nested = lexer.tokenize("(" + inner + ")", {
-                  rewrite: false,
-                  line: this.line
-                });
-                nested.pop();
-                if (last_i < i) {
-                  tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
-                }
-                tokens.push(['TOKENS', nested]);
-                i += expression.length - 1;
-                last_i = i + 1;
-              }
+          } else if ((match = str.substring(i).match(INTERPOLATION))) {
+            _b = match;
+            group = _b[0];
+            interp = _b[1];
+            if (starts(interp, '@')) {
+              interp = "this." + (interp.substring(1));
             }
+            if (pi < i) {
+              tokens.push(['STRING', quote + (str.substring(pi, i)) + quote]);
+            }
+            tokens.push(['IDENTIFIER', interp]);
+            i += group.length - 1;
+            pi = i + 1;
+          } else if (((expr = this.balanced_string(str.substring(i), ['${', '}']))) && expr.length > 3) {
+            inner = expr.substring(2, expr.length - 1);
+            nested = lexer.tokenize("(" + inner + ")", {
+              rewrite: false,
+              line: this.line
+            });
+            nested.pop();
+            if (pi < i) {
+              tokens.push(['STRING', quote + (str.substring(pi, i)) + quote]);
+            }
+            tokens.push(['TOKENS', nested]);
+            i += expr.length - 1;
+            pi = i + 1;
           }
           i += 1;
         }
-        if (last_i < i && last_i < str.length - 1) {
-          tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
+        if (pi < i && pi < str.length - 1) {
+          tokens.push(['STRING', quote + (str.substring(pi, i)) + quote]);
         }
-        if (tokens.length > 1) {
-          _d = tokens.length - 1; _e = 1;
-          for (_c = 0, i = _d; (_d <= _e ? i <= _e : i >= _e); (_d <= _e ? i += 1 : i -= 1), _c++) {
-            _f = [tokens[i - 1], tokens[i]];
-            prev = _f[0];
-            tok = _f[1];
-            if (tok[0] === 'STRING' && prev[0] === 'STRING') {
-              _g = [prev[1].substring(1, prev[1].length - 1), tok[1].substring(1, tok[1].length - 1)];
-              prev = _g[0];
-              tok = _g[1];
-              tokens.splice(i - 1, 2, ['STRING', quote + prev + tok + quote]);
-            }
-          }
-        }
-        _h = []; _i = tokens;
-        for (i = 0, _j = _i.length; i < _j; i++) {
-          each = _i[i];
-          _h.push((function() {
-            if (each[0] === 'TOKENS') {
-              _k = each[1];
-              for (_l = 0, _m = _k.length; _l < _m; _l++) {
-                nested = _k[_l];
-                this.token(nested[0], nested[1]);
-              }
-            } else {
-              this.token(each[0], each[1]);
-            }
+        _c = []; _d = tokens;
+        for (i = 0, _e = _d.length; i < _e; i++) {
+          each = _d[i];
+          _c.push((function() {
+            each[0] === 'TOKENS' ? (this.tokens = this.tokens.concat(each[1])) : this.token(each[0], each[1]);
             if (i < tokens.length - 1) {
               return this.token('+', '+');
             }
           }).call(this));
         }
-        return _h;
+        return _c;
       }
     };
     // Helpers
diff --git a/src/lexer.coffee b/src/lexer.coffee
index dec23165..718847a8 100644
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@@ -110,40 +110,54 @@ BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
 # pushing some extra smarts into the Lexer.
 exports.Lexer: class Lexer
 
-  # Scan by attempting to match tokens one at a time. Slow and steady.
+  # **tokenize** is the Lexer's main method. Scan by attempting to match tokens
+  # one at a time, using a regular expression anchored at the start of the
+  # remaining code, or a custom recursive token-matching method
+  # (for interpolations). When the next token has been recorded, we move forward
+  # within the code past the token, and begin again.
+  #
+  # Each tokenizing method is responsible for incrementing `@i` by the number of
+  # characters it has consumed. `@i` can be thought of as our finger on the page
+  # of source.
   tokenize: (code, options) ->
     o        : options or {}
     @code    : code         # The remainder of the source code.
     @i       : 0            # Current character position we're parsing.
     @line    : o.line or 0  # The current line.
-    @indent  : 0            # The current indent level.
-    @indents : []           # The stack of all indent levels we are currently within.
-    @tokens  : []           # Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
+    @indent  : 0            # The current indentation level.
+    @indents : []           # The stack of all current indentation levels.
+    @tokens  : []           # Stream of parsed tokens in the form ['TYPE', value, line]
     while @i < @code.length
       @chunk: @code.slice(@i)
       @extract_next_token()
     @close_indentation()
-    return @tokens if o.rewrite is no
+    return @tokens if o.rewrite is off
     (new Rewriter()).rewrite @tokens
 
   # At every position, run through this list of attempted matches,
-  # short-circuiting if any of them succeed.
+  # short-circuiting if any of them succeed. Their order determines precedence:
+  # `@literal_token` is the fallback catch-all.
   extract_next_token: ->
     return if @identifier_token()
     return if @number_token()
     return if @heredoc_token()
-    return if @string_token()
-    return if @js_token()
     return if @regex_token()
     return if @comment_token()
     return if @line_token()
     return if @whitespace_token()
+    return if @js_token()
+    return if @string_token()
     return    @literal_token()
 
   # Tokenizers
   # ----------
 
   # Matches identifying literals: variables, keywords, method names, etc.
+  # Check to ensure that JavaScript reserved words aren't being used as
+  # identifiers. Because CoffeeScript reserves a handful of keywords that are
+  # allowed in JavaScript, we're careful not to tag them as keywords when
+  # referenced as property names here, so you can still do `jQuery.is()` even
+  # though `is` means `===` otherwise.
   identifier_token: ->
     return false unless id: @match IDENTIFIER, 1
     @name_access_type()
@@ -163,17 +177,20 @@ exports.Lexer: class Lexer
     @i += number.length
     true
 
-  # Matches strings, including multi-line strings.
+  # Matches strings, including multi-line strings. Ensures that quotation marks
+  # are balanced within the string's contents, and within nested interpolations.
   string_token: ->
+    return false unless starts(@chunk, '"') or starts(@chunk, "'")
     string: @balanced_token ['"', '"'], ['${', '}']
-    string: @balanced_token ["'", "'"] if string is false
+    string: @balanced_token ["'", "'"] unless string
     return false unless string
     @interpolate_string string.replace STRING_NEWLINES, " \\\n"
     @line += count string, "\n"
     @i += string.length
     true
 
-  # Matches heredocs, adjusting indentation to the correct level.
+  # Matches heredocs, adjusting indentation to the correct level, as heredocs
+  # preserve whitespace, but ignore indentation to the left.
   heredoc_token: ->
     return false unless match = @chunk.match(HEREDOC)
     doc: @sanitize_heredoc match[2] or match[4]
@@ -182,14 +199,17 @@ exports.Lexer: class Lexer
     @i += match[1].length
     true
 
-  # Matches interpolated JavaScript.
+  # Matches JavaScript interpolated directly into the source via backticks.
   js_token: ->
+    return false unless starts @chunk, '`'
     return false unless script: @balanced_token ['`', '`']
     @token 'JS', script.replace(JS_CLEANER, '')
     @i += script.length
     true
 
-  # Matches regular expression literals.
+  # Matches regular expression literals. Lexing regular expressions is difficult
+  # to distinguish from division, so we borrow some basic heuristics from
+  # JavaScript and Ruby.
   regex_token: ->
     return false unless regex: @match REGEX, 1
     return false if include NOT_REGEX, @tag()
@@ -197,38 +217,13 @@ exports.Lexer: class Lexer
     @i += regex.length
     true
 
-  # Matches a balanced group such as a single or double-quoted string. Pass in
-  # a series of delimiters, all of which must be balanced correctly within the
-  # string.
-  balanced_string: (str, delimited...) ->
-    levels: []
-    i: 0
-    while i < str.length
-      for pair in delimited
-        [open, close]: pair
-        if levels.length and starts str, '\\', i
-          i += 1
-          break
-        else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
-          levels.pop()
-          i += close.length - 1
-          i += 1 unless levels.length
-          break
-        else if starts str, open, i
-          levels.push(pair)
-          i += open.length - 1
-          break
-      break unless levels.length
-      i += 1
-    throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
-    return false if i is 0
-    return str.substring(0, i)
-
-  # Matches a balanced string within the token's contents.
+  # Matches a token in which which the passed delimiter pairs must be correctly
+  # balanced (ie. strings, JS literals).
   balanced_token: (delimited...) ->
     @balanced_string @chunk, delimited...
 
-  # Matches and conumes comments.
+  # Matches and conumes comments. We pass through comments into JavaScript,
+  # so they're treated as real tokens, like any other part of the language.
   comment_token: ->
     return false unless comment: @match COMMENT, 1
     @line += (comment.match(MULTILINER) or []).length
@@ -239,6 +234,15 @@ exports.Lexer: class Lexer
     true
 
   # Matches newlines, indents, and outdents, and determines which is which.
+  # If we can detect that the current line is continued onto the the next line,
+  # then the newline is suppressed:
+  #
+  #     elements
+  #       .each( ... )
+  #       .map( ... )
+  #
+  # Keeps track of the level of indentation, because a single outdent token
+  # can close multiple indents, so we need to know how far in we happen to be.
   line_token: ->
     return false unless indent: @match MULTI_DENT, 1
     @line += indent.match(MULTILINER).length
@@ -249,10 +253,10 @@ exports.Lexer: class Lexer
     no_newlines: next_character is '.' or (@value() and @value().match(NO_NEWLINE) and
       prev and (prev[0] isnt '.') and not @value().match(CODE))
     if size is @indent
-      return @suppress_newlines(indent) if no_newlines
+      return @suppress_newlines() if no_newlines
       return @newline_token(indent)
     else if size > @indent
-      return @suppress_newlines(indent) if no_newlines
+      return @suppress_newlines() if no_newlines
       diff: size - @indent
       @token 'INDENT', diff
       @indents.push diff
@@ -261,8 +265,8 @@ exports.Lexer: class Lexer
     @indent: size
     true
 
-  # Record an outdent token or tokens, if we happen to be moving back inwards
-  # past multiple recorded indents.
+  # Record an outdent token or multiple tokens, if we happen to be moving back
+  # inwards past several recorded indents.
   outdent_token: (move_out, no_newlines) ->
     while move_out > 0 and @indents.length
       last_indent: @indents.pop()
@@ -280,20 +284,22 @@ exports.Lexer: class Lexer
     @i += space.length
     true
 
-  # Generate a newline token. Multiple newlines get merged together.
+  # Generate a newline token. Consecutive newlines get merged together.
   newline_token: (newlines) ->
     @token 'TERMINATOR', "\n" unless @tag() is 'TERMINATOR'
     true
 
   # Use a `\` at a line-ending to suppress the newline.
   # The slash is removed here once its job is done.
-  suppress_newlines: (newlines) ->
+  suppress_newlines: ->
     @tokens.pop() if @value() is "\\"
     true
 
   # We treat all other single characters as a token. Eg.: `( ) , . !`
   # Multi-character operators are also literal tokens, so that Jison can assign
-  # the proper order of operations.
+  # the proper order of operations. There are some symbols that we tag specially
+  # here. `;` and newlines are both treated as a `TERMINATOR`, we distinguish
+  # parentheses that indicate a method call from regular parentheses, and so on.
   literal_token: ->
     match: @chunk.match(OPERATOR)
     value: match and match[1]
@@ -334,18 +340,17 @@ exports.Lexer: class Lexer
       else
         @tag 1, 'PROPERTY_ACCESS'
 
-  # Sanitize a heredoc by escaping double quotes and erasing all external
-  # indentation on the left-hand side.
+  # Sanitize a heredoc by escaping internal double quotes and erasing all
+  # external indentation on the left-hand side.
   sanitize_heredoc: (doc) ->
     indent: (doc.match(HEREDOC_INDENT) or ['']).sort()[0]
     doc.replace(new RegExp("^" +indent, 'gm'), '')
        .replace(MULTILINER, "\\n")
        .replace(/"/g, '\\"')
 
-  # A source of ambiguity in our grammar was parameter lists in function
-  # definitions (as opposed to argument lists in function calls). Tag
-  # parameter identifiers in order to avoid this. Also, parameter lists can
-  # make use of splats.
+  # A source of ambiguity in our grammar used to be parameter lists in function
+  # definitions versus argument lists in function calls. Walk backwards, tagging
+  # parameters specially in order to make things easier for the parser.
   tag_parameters: ->
     return if @tag() isnt ')'
     i: 0
@@ -363,64 +368,85 @@ exports.Lexer: class Lexer
   close_indentation: ->
     @outdent_token(@indent)
 
-  # Error for when you try to use a forbidden word in JavaScript as
+  # The error for when you try to use a forbidden word in JavaScript as
   # an identifier.
   identifier_error: (word) ->
     throw new Error "SyntaxError: Reserved word \"$word\" on line ${@line + 1}"
 
-  # Error for when you try to assign to a reserved word in JavaScript,
+  # The error for when you try to assign to a reserved word in JavaScript,
   # like "function" or "default".
   assignment_error: ->
     throw new Error "SyntaxError: Reserved word \"${@value()}\" on line ${@line + 1} can't be assigned"
 
+  # Matches a balanced group such as a single or double-quoted string. Pass in
+  # a series of delimiters, all of which must be nested correctly within the
+  # contents of the string. This method allows us to have strings within
+  # interpolations within strings etc...
+  balanced_string: (str, delimited...) ->
+    levels: []
+    i: 0
+    while i < str.length
+      for pair in delimited
+        [open, close]: pair
+        if levels.length and starts str, '\\', i
+          i += 1
+          break
+        else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
+          levels.pop()
+          i += close.length - 1
+          i += 1 unless levels.length
+          break
+        else if starts str, open, i
+          levels.push(pair)
+          i += open.length - 1
+          break
+      break unless levels.length
+      i += 1
+    throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
+    return false if i is 0
+    return str.substring(0, i)
+
   # Expand variables and expressions inside double-quoted strings using
-  # [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation).
+  # [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation)
+  # for substitution of bare variables as well as arbitrary expressions.
   #
   #     "Hello $name."
   #     "Hello ${name.capitalize()}."
   #
+  # If it encounters an interpolation, this method will recursively create a
+  # new Lexer, tokenize the interpolated contents, and merge them into the
+  # token stream.
   interpolate_string: (str) ->
     if str.length < 3 or not starts str, '"'
       @token 'STRING', str
     else
-      lexer:  new Lexer()
-      tokens: []
-      quote:  str.substring(0, 1)
-      i:      1
-      last_i: i
+      lexer:    new Lexer()
+      tokens:   []
+      quote:    str.substring(0, 1)
+      [i, pi]:  [1, 1]
       while i < str.length - 1
         if starts str, '\\', i
           i += 1
-        else
-          match: str.substring(i).match INTERPOLATION
-          if match
-            [group, interp]: match
-            interp: "this.${ interp.substring(1) }" if starts interp, '@'
-            tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
-            tokens.push ['IDENTIFIER', interp]
-            i += group.length - 1
-            last_i: i + 1
-          else
-            expression: @balanced_string str.substring(i), ['${', '}']
-            if expression and expression.length > 3
-              inner: expression.substring(2, expression.length - 1)
-              nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
-              nested.pop()
-              tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
-              tokens.push ['TOKENS', nested]
-              i += expression.length - 1
-              last_i: i + 1
+        else if match: str.substring(i).match INTERPOLATION
+          [group, interp]: match
+          interp: "this.${ interp.substring(1) }" if starts interp, '@'
+          tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i
+          tokens.push ['IDENTIFIER', interp]
+          i += group.length - 1
+          pi: i + 1
+        else if (expr: @balanced_string str.substring(i), ['${', '}']) and expr.length > 3
+          inner: expr.substring(2, expr.length - 1)
+          nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
+          nested.pop()
+          tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i
+          tokens.push ['TOKENS', nested]
+          i += expr.length - 1
+          pi: i + 1
         i += 1
-      tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i and last_i < str.length - 1
-      if tokens.length > 1
-        for i in [tokens.length - 1..1]
-          [prev, tok]: [tokens[i - 1], tokens[i]]
-          if tok[0] is 'STRING' and prev[0] is 'STRING'
-            [prev, tok]: [prev[1].substring(1, prev[1].length - 1), tok[1].substring(1, tok[1].length - 1)]
-            tokens.splice i - 1, 2, ['STRING', "$quote$prev$tok$quote"]
+      tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i and pi < str.length - 1
       for each, i in tokens
         if each[0] is 'TOKENS'
-          @token nested[0], nested[1] for nested in each[1]
+          @tokens: @tokens.concat each[1]
         else
           @token each[0], each[1]
         @token '+', '+' if i < tokens.length - 1