Allowing expressions to be used inside strings; syntax is $\{...\}

2010-03-06 02:15:27 +02:00 · 2010-03-06 02:15:27 +02:00 · e2f86678a4
parent fe7d5dfd19
commit e2f86678a4
3 changed files with 109 additions and 31 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -76,7 +76,7 @@
  exports.Lexer = (function() {
    Lexer = function Lexer() {    };
    // Scan by attempting to match tokens one at a time. Slow and steady.
-    Lexer.prototype.tokenize = function tokenize(code) {
+    Lexer.prototype.tokenize = function tokenize(code, rewrite) {
      this.code = code;
      // The remainder of the source code.
      this.i = 0;
@ -94,7 +94,10 @@
        this.extract_next_token();
      }
      this.close_indentation();
-      return (new Rewriter()).rewrite(this.tokens);
+      if (((typeof rewrite !== "undefined" && rewrite !== null) ? rewrite : true)) {
+        return (new Rewriter()).rewrite(this.tokens);
+      }
+      return this.tokens;
    };
    // At every position, run through this list of attempted matches,
    // short-circuiting if any of them succeed.
@ -397,55 +400,88 @@
    Lexer.prototype.assignment_error = function assignment_error() {
      throw new Error('SyntaxError: Reserved word "' + this.value() + '" on line ' + this.line + ' can\'t be assigned');
    };
-    // Replace variables and block calls inside double-quoted strings.
+    // Replace variables and expressions inside double-quoted strings.
    Lexer.prototype.interpolate_string = function interpolate_string(escaped) {
-      var _a, _b, _c, _d, _e, _f, _g, _h, before, each, group, i, identifier, identifier_match, quote, tokens;
+      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, before, each, expression, expression_match, group, i, identifier, identifier_match, lexer, nested, quote, rewrite, tokens;
      if (escaped.length < 3 || escaped.indexOf('"') !== 0) {
        return this.token('STRING', escaped);
      } else {
+        lexer = null;
        tokens = [];
        quote = escaped.substring(0, 1);
        escaped = escaped.substring(1, escaped.length - 1);
        while (escaped.length) {
-          identifier_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/);
-          if (identifier_match) {
-            _a = identifier_match;
+          expression_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/);
+          if (expression_match) {
+            _a = expression_match;
            group = _a[0];
            before = _a[1];
-            identifier = _a[2];
+            expression = _a[2];
            if (before.substring(before.length - 1) === '\\') {
              if (before.length) {
-                tokens.push(['STRING', quote + before.substring(0, before.length - 1) + identifier + quote]);
+                tokens.push(['STRING', quote + before.substring(0, before.length - 1) + expression + quote]);
              }
            } else {
              if (before.length) {
                tokens.push(['STRING', quote + before + quote]);
              }
-              tokens.push(['IDENTIFIER', identifier.substring(1)]);
+              if (!(typeof lexer !== "undefined" && lexer !== null)) {
+                lexer = new Lexer();
+              }
+              nested = lexer.tokenize('(' + expression.substring(2, expression.length - 1) + ')', (rewrite = false));
+              nested.pop();
+              tokens.push(['TOKENS', nested]);
            }
            escaped = escaped.substring(group.length);
          } else {
-            tokens.push(['STRING', quote + escaped + quote]);
-            escaped = '';
+            identifier_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/);
+            if (identifier_match) {
+              _b = identifier_match;
+              group = _b[0];
+              before = _b[1];
+              identifier = _b[2];
+              if (before.substring(before.length - 1) === '\\') {
+                if (before.length) {
+                  tokens.push(['STRING', quote + before.substring(0, before.length - 1) + identifier + quote]);
+                }
+              } else {
+                if (before.length) {
+                  tokens.push(['STRING', quote + before + quote]);
+                }
+                tokens.push(['IDENTIFIER', identifier.substring(1)]);
+              }
+              escaped = escaped.substring(group.length);
+            } else {
+              tokens.push(['STRING', quote + escaped + quote]);
+              escaped = '';
+            }
          }
        }
        if (tokens.length > 1) {
-          _d = tokens.length - 1; _e = 1;
-          for (_c = 0, i = _d; (_d <= _e ? i <= _e : i >= _e); (_d <= _e ? i += 1 : i -= 1), _c++) {
+          _e = tokens.length - 1; _f = 1;
+          for (_d = 0, i = _e; (_e <= _f ? i <= _f : i >= _f); (_e <= _f ? i += 1 : i -= 1), _d++) {
            tokens[i][0] === 'STRING' && tokens[i - 1][0] === 'STRING' ? tokens.splice(i - 1, 2, ['STRING', quote + tokens[i - 1][1].substring(1, tokens[i - 1][1].length - 1) + tokens[i][1].substring(1, tokens[i][1].length - 1) + quote]) : null;
          }
        }
-        _f = []; _g = tokens;
-        for (i = 0, _h = _g.length; i < _h; i++) {
-          each = _g[i];
-          _f.push((function() {
-            this.token(each[0], each[1]);
+        _g = []; _h = tokens;
+        for (i = 0, _i = _h.length; i < _i; i++) {
+          each = _h[i];
+          _g.push((function() {
+            if (each[0] === 'TOKENS') {
+              _j = each[1];
+              for (_k = 0, _l = _j.length; _k < _l; _k++) {
+                nested = _j[_k];
+                this.token(nested[0], nested[1]);
+              }
+            } else {
+              this.token(each[0], each[1]);
+            }
            if (i < tokens.length - 1) {
              return this.token('+', '+');
            }
          }).call(this));
        }
-        return _f;
+        return _g;
      }
    };
    // Helpers
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -112,7 +112,7 @@ BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
 exports.Lexer: class Lexer

  # Scan by attempting to match tokens one at a time. Slow and steady.
-  tokenize: (code) ->
+  tokenize: (code, rewrite) ->
    @code    : code  # The remainder of the source code.
    @i       : 0     # Current character position we're parsing.
    @line    : 0     # The current line.
@ -123,7 +123,8 @@ exports.Lexer: class Lexer
      @chunk: @code.slice(@i)
      @extract_next_token()
    @close_indentation()
-    (new Rewriter()).rewrite @tokens
+    return (new Rewriter()).rewrite @tokens if (rewrite ? true)
+    return @tokens

  # At every position, run through this list of attempted matches,
  # short-circuiting if any of them succeed.
@ -340,34 +341,51 @@ exports.Lexer: class Lexer
  assignment_error: ->
    throw new Error 'SyntaxError: Reserved word "' + @value() + '" on line ' + @line + ' can\'t be assigned'

-  # Replace variables and block calls inside double-quoted strings.
+  # Replace variables and expressions inside double-quoted strings.
  interpolate_string: (escaped) ->
    if escaped.length < 3 or escaped.indexOf('"') isnt 0
      @token 'STRING', escaped
    else
+      lexer: null
      tokens: []
      quote: escaped.substring(0, 1)
      escaped: escaped.substring(1, escaped.length - 1)
      while escaped.length
-        identifier_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/
-        if identifier_match
-          [group, before, identifier]: identifier_match
+        expression_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/
+        if expression_match
+          [group, before, expression]: expression_match
          if before.substring(before.length - 1) is '\\'
-            tokens.push ['STRING', quote + before.substring(0, before.length - 1) + identifier + quote] if before.length
+            tokens.push ['STRING', quote + before.substring(0, before.length - 1) + expression + quote] if before.length
          else
            tokens.push ['STRING', quote + before + quote] if before.length
-            tokens.push ['IDENTIFIER', identifier.substring(1)]
+            lexer: new Lexer() if not lexer?
+            nested: lexer.tokenize '(' + expression.substring(2, expression.length - 1) + ')', rewrite: no
+            nested.pop()
+            tokens.push ['TOKENS', nested]
          escaped: escaped.substring(group.length)
        else
-          tokens.push ['STRING', quote + escaped + quote]
-          escaped: ''
+          identifier_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/
+          if identifier_match
+            [group, before, identifier]: identifier_match
+            if before.substring(before.length - 1) is '\\'
+              tokens.push ['STRING', quote + before.substring(0, before.length - 1) + identifier + quote] if before.length
+            else
+              tokens.push ['STRING', quote + before + quote] if before.length
+              tokens.push ['IDENTIFIER', identifier.substring(1)]
+            escaped: escaped.substring(group.length)
+          else
+            tokens.push ['STRING', quote + escaped + quote]
+            escaped: ''
      if tokens.length > 1
        for i in [tokens.length - 1..1]
          if tokens[i][0] is 'STRING' and tokens[i - 1][0] is 'STRING'
            tokens.splice i - 1, 2, ['STRING', quote + tokens[i - 1][1].substring(1, tokens[i - 1][1].length - 1) +
                                                       tokens[i][1].substring(1, tokens[i][1].length - 1) + quote]
      for each, i in tokens
-        @token each[0], each[1]
+        if each[0] is 'TOKENS'
+          @token nested[0], nested[1] for nested in each[1]
+        else
+          @token each[0], each[1]
        @token '+', '+' if i < tokens.length - 1

  # Helpers
--- a/test/test_string_interpolation.coffee
+++ b/test/test_string_interpolation.coffee
@ -1,18 +1,42 @@
 hello: 'Hello'
 world: 'World'
 ok '$hello $world!' is '$hello $world!'
+ok '${hello} ${world}!' is '${hello} ${world}!'
 ok "$hello $world!" is 'Hello World!'
+ok "${hello} ${world}!" is 'Hello World!'
 ok "[$hello$world]" is '[HelloWorld]'
+ok "[${hello}${world}]" is '[HelloWorld]'
 ok "$hello$$world" is 'Hello$World'
+# ok "${hello}$${world}" is 'Hello$World'

 [s, t, r, i, n, g]: ['s', 't', 'r', 'i', 'n', 'g']
 ok "$s$t$r$i$n$g" is 'string'
+ok "${s}${t}${r}${i}${n}${g}" is 'string'
 ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '$s$t$r$i$n$g'
+ok "\\${s}\\${t}\\${r}\\${i}\\${n}\\${g}" is '${s}${t}${r}${i}${n}${g}'
 ok "\\$string" is '$string'
+ok "\\${string}" is '${string}'

 ok "\\$Escaping first" is '$Escaping first'
+ok "\\${Escaping} first" is '${Escaping} first'
 ok "Escaping \\$in middle" is 'Escaping $in middle'
+ok "Escaping \\${in} middle" is 'Escaping ${in} middle'
 ok "Escaping \\$last" is 'Escaping $last'
+ok "Escaping \\${last}" is 'Escaping ${last}'

 ok "$$" is '$$'
+ok "${}" is '${}'
 ok "\\\\$$" is '\\\\$$'
+ok "\\\\${}" is '\\\\${}'
+
+ok "I won $20 last night." is 'I won $20 last night.'
+ok "I won $${20} last night." is 'I won $20 last night.'
+ok "I won $#20 last night." is 'I won $#20 last night.'
+ok "I won $${'#20'} last night." is 'I won $#20 last night.'
+
+ok "${hello + world}" is 'HelloWorld'
+ok "${hello + ' ' + world + '!'}" is 'Hello World!'
+
+list: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+ok "values: ${list.join(', ')}, length: ${list.length}." is 'values: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, length: 10.'
+ok "values: ${list.join ' '}" is 'values: 0 1 2 3 4 5 6 7 8 9'