some edits to the interpolation path

2010-03-05 20:42:36 -05:00 · 2010-03-05 20:42:36 -05:00 · d250e9e9cc
parent 75be5eed62
commit d250e9e9cc
2 changed files with 80 additions and 60 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -1,5 +1,5 @@
 (function(){
-  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, compact, count, include;
+  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATED_EXPRESSION, INTERPOLATED_IDENTIFIER, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, compact, count, include;
  // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
  // matches against the beginning of the source code. When a match is found,
  // a token is produced, we consume the match, and start again. Tokens are in the
@ -45,6 +45,9 @@
  LAST_DENTS = /\n([ \t]*)/g;
  LAST_DENT = /\n([ \t]*)/;
  ASSIGNMENT = /^(:|=)$/;
+  // Interpolation matching regexes.
+  INTERPOLATED_EXPRESSION = /(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/;
+  INTERPOLATED_IDENTIFIER = /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/;
  // Token cleaning regexes.
  JS_CLEANER = /(^`|`$)/g;
  MULTILINER = /\n/g;
@ -76,7 +79,8 @@
  exports.Lexer = (function() {
    Lexer = function Lexer() {    };
    // Scan by attempting to match tokens one at a time. Slow and steady.
-    Lexer.prototype.tokenize = function tokenize(code, rewrite) {
+    Lexer.prototype.tokenize = function tokenize(code, options) {
+      options = options || {};
      this.code = code;
      // The remainder of the source code.
      this.i = 0;
@ -94,10 +98,10 @@
        this.extract_next_token();
      }
      this.close_indentation();
-      if (((typeof rewrite !== "undefined" && rewrite !== null) ? rewrite : true)) {
-        return (new Rewriter()).rewrite(this.tokens);
+      if (options.rewrite === false) {
+        return this.tokens;
      }
-      return this.tokens;
+      return (new Rewriter()).rewrite(this.tokens);
    };
    // At every position, run through this list of attempted matches,
    // short-circuiting if any of them succeed.
@ -166,12 +170,11 @@
    };
    // Matches strings, including multi-line strings.
    Lexer.prototype.string_token = function string_token() {
-      var escaped, string;
+      var string;
      if (!((string = this.match(STRING, 1)))) {
        return false;
      }
-      escaped = string.replace(STRING_NEWLINES, " \\\n");
-      this.interpolate_string(escaped);
+      this.interpolate_string(string.replace(STRING_NEWLINES, " \\\n"));
      this.line += count(string, "\n");
      this.i += string.length;
      return true;
@ -400,18 +403,21 @@
    Lexer.prototype.assignment_error = function assignment_error() {
      throw new Error('SyntaxError: Reserved word "' + this.value() + '" on line ' + this.line + ' can\'t be assigned');
    };
-    // Replace variables and expressions inside double-quoted strings.
-    Lexer.prototype.interpolate_string = function interpolate_string(escaped) {
-      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, before, each, expression, expression_match, group, i, identifier, identifier_match, lexer, nested, quote, rewrite, tokens;
-      if (escaped.length < 3 || escaped.indexOf('"') !== 0) {
-        return this.token('STRING', escaped);
+    // Expand variables and expressions inside double-quoted strings using
+    // [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation).
+    //     "Hello $name."
+    //     "Hello ${name.capitalize()}."
+    Lexer.prototype.interpolate_string = function interpolate_string(str) {
+      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, before, contents, each, expression, expression_match, group, i, identifier, identifier_match, lexer, nested, prev, quote, tok, tokens;
+      if (str.length < 3 || str.substring(0, 1) !== '"') {
+        return this.token('STRING', str);
      } else {
-        lexer = null;
+        lexer = new Lexer();
        tokens = [];
-        quote = escaped.substring(0, 1);
-        escaped = escaped.substring(1, escaped.length - 1);
-        while (escaped.length) {
-          expression_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/);
+        quote = str.substring(0, 1);
+        str = str.substring(1, str.length - 1);
+        while (str.length) {
+          expression_match = str.match(INTERPOLATED_EXPRESSION);
          if (expression_match) {
            _a = expression_match;
            group = _a[0];
@ -425,16 +431,15 @@
              if (before.length) {
                tokens.push(['STRING', quote + before + quote]);
              }
-              if (!(typeof lexer !== "undefined" && lexer !== null)) {
-                lexer = new Lexer();
-              }
-              nested = lexer.tokenize('(' + expression.substring(2, expression.length - 1) + ')', (rewrite = false));
+              nested = lexer.tokenize('(' + expression.substring(2, expression.length - 1) + ')', {
+                rewrite: false
+              });
              nested.pop();
              tokens.push(['TOKENS', nested]);
            }
-            escaped = escaped.substring(group.length);
+            str = str.substring(group.length);
          } else {
-            identifier_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/);
+            identifier_match = str.match(INTERPOLATED_IDENTIFIER);
            if (identifier_match) {
              _b = identifier_match;
              group = _b[0];
@ -450,27 +455,33 @@
                }
                tokens.push(['IDENTIFIER', identifier.substring(1)]);
              }
-              escaped = escaped.substring(group.length);
+              str = str.substring(group.length);
            } else {
-              tokens.push(['STRING', quote + escaped + quote]);
-              escaped = '';
+              tokens.push(['STRING', quote + str + quote]);
+              str = '';
            }
          }
        }
        if (tokens.length > 1) {
          _e = tokens.length - 1; _f = 1;
          for (_d = 0, i = _e; (_e <= _f ? i <= _f : i >= _f); (_e <= _f ? i += 1 : i -= 1), _d++) {
-            tokens[i][0] === 'STRING' && tokens[i - 1][0] === 'STRING' ? tokens.splice(i - 1, 2, ['STRING', quote + tokens[i - 1][1].substring(1, tokens[i - 1][1].length - 1) + tokens[i][1].substring(1, tokens[i][1].length - 1) + quote]) : null;
+            _g = [tokens[i - 1], tokens[i]];
+            prev = _g[0];
+            tok = _g[1];
+            if (tok[0] === 'STRING' && prev[0] === 'STRING') {
+              contents = quote + prev[1].substring(1, prev[1].length - 1) + tok[1].substring(1, tok[1].length - 1) + quote;
+              tokens.splice(i - 1, 2, ['STRING', contents]);
+            }
          }
        }
-        _g = []; _h = tokens;
-        for (i = 0, _i = _h.length; i < _i; i++) {
-          each = _h[i];
-          _g.push((function() {
+        _h = []; _i = tokens;
+        for (i = 0, _j = _i.length; i < _j; i++) {
+          each = _i[i];
+          _h.push((function() {
            if (each[0] === 'TOKENS') {
-              _j = each[1];
-              for (_k = 0, _l = _j.length; _k < _l; _k++) {
-                nested = _j[_k];
+              _k = each[1];
+              for (_l = 0, _m = _k.length; _l < _m; _l++) {
+                nested = _k[_l];
                this.token(nested[0], nested[1]);
              }
            } else {
@ -481,7 +492,7 @@
            }
          }).call(this));
        }
-        return _g;
+        return _h;
      }
    };
    // Helpers
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -71,6 +71,10 @@ LAST_DENTS : /\n([ \t]*)/g
 LAST_DENT  : /\n([ \t]*)/
 ASSIGNMENT : /^(:|=)$/

+# Interpolation matching regexes.
+INTERPOLATED_EXPRESSION: /(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/
+INTERPOLATED_IDENTIFIER: /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/
+
 # Token cleaning regexes.
 JS_CLEANER      : /(^`|`$)/g
 MULTILINER      : /\n/g
@ -112,7 +116,8 @@ BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
 exports.Lexer: class Lexer

  # Scan by attempting to match tokens one at a time. Slow and steady.
-  tokenize: (code, rewrite) ->
+  tokenize: (code, options) ->
+    options  ||= {}
    @code    : code  # The remainder of the source code.
    @i       : 0     # Current character position we're parsing.
    @line    : 0     # The current line.
@ -123,8 +128,8 @@ exports.Lexer: class Lexer
      @chunk: @code.slice(@i)
      @extract_next_token()
    @close_indentation()
-    return (new Rewriter()).rewrite @tokens if (rewrite ? true)
-    return @tokens
+    return @tokens if options.rewrite is no
+    (new Rewriter()).rewrite @tokens

  # At every position, run through this list of attempted matches,
  # short-circuiting if any of them succeed.
@ -166,8 +171,7 @@ exports.Lexer: class Lexer
  # Matches strings, including multi-line strings.
  string_token: ->
    return false unless string: @match STRING, 1
-    escaped: string.replace STRING_NEWLINES, " \\\n"
-    @interpolate_string escaped
+    @interpolate_string string.replace STRING_NEWLINES, " \\\n"
    @line += count string, "\n"
    @i += string.length
    true
@ -341,30 +345,34 @@ exports.Lexer: class Lexer
  assignment_error: ->
    throw new Error 'SyntaxError: Reserved word "' + @value() + '" on line ' + @line + ' can\'t be assigned'

-  # Replace variables and expressions inside double-quoted strings.
-  interpolate_string: (escaped) ->
-    if escaped.length < 3 or escaped.indexOf('"') isnt 0
-      @token 'STRING', escaped
+  # Expand variables and expressions inside double-quoted strings using
+  # [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation).
+  #
+  #     "Hello $name."
+  #     "Hello ${name.capitalize()}."
+  #
+  interpolate_string: (str) ->
+    if str.length < 3 or str.substring(0, 1) isnt '"'
+      @token 'STRING', str
    else
-      lexer: null
+      lexer:  new Lexer()
      tokens: []
-      quote: escaped.substring(0, 1)
-      escaped: escaped.substring(1, escaped.length - 1)
-      while escaped.length
-        expression_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/
+      quote:  str.substring(0, 1)
+      str:    str.substring(1, str.length - 1)
+      while str.length
+        expression_match: str.match INTERPOLATED_EXPRESSION
        if expression_match
          [group, before, expression]: expression_match
          if before.substring(before.length - 1) is '\\'
            tokens.push ['STRING', quote + before.substring(0, before.length - 1) + expression + quote] if before.length
          else
            tokens.push ['STRING', quote + before + quote] if before.length
-            lexer: new Lexer() if not lexer?
-            nested: lexer.tokenize '(' + expression.substring(2, expression.length - 1) + ')', rewrite: no
+            nested: lexer.tokenize '(' + expression.substring(2, expression.length - 1) + ')', {rewrite: no}
            nested.pop()
            tokens.push ['TOKENS', nested]
-          escaped: escaped.substring(group.length)
+          str: str.substring(group.length)
        else
-          identifier_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/
+          identifier_match: str.match INTERPOLATED_IDENTIFIER
          if identifier_match
            [group, before, identifier]: identifier_match
            if before.substring(before.length - 1) is '\\'
@ -372,15 +380,16 @@ exports.Lexer: class Lexer
            else
              tokens.push ['STRING', quote + before + quote] if before.length
              tokens.push ['IDENTIFIER', identifier.substring(1)]
-            escaped: escaped.substring(group.length)
+            str: str.substring(group.length)
          else
-            tokens.push ['STRING', quote + escaped + quote]
-            escaped: ''
+            tokens.push ['STRING', quote + str + quote]
+            str: ''
      if tokens.length > 1
        for i in [tokens.length - 1..1]
-          if tokens[i][0] is 'STRING' and tokens[i - 1][0] is 'STRING'
-            tokens.splice i - 1, 2, ['STRING', quote + tokens[i - 1][1].substring(1, tokens[i - 1][1].length - 1) +
-                                                       tokens[i][1].substring(1, tokens[i][1].length - 1) + quote]
+          [prev, tok]: [tokens[i - 1], tokens[i]]
+          if tok[0] is 'STRING' and prev[0] is 'STRING'
+            contents: quote + prev[1].substring(1, prev[1].length - 1) + tok[1].substring(1, tok[1].length - 1) + quote
+            tokens.splice i - 1, 2, ['STRING', contents]
      for each, i in tokens
        if each[0] is 'TOKENS'
          @token nested[0], nested[1] for nested in each[1]