Rewritting lexer.coffee to accept nested string interpolations.

2010-03-07 14:56:27 +02:00 · 2010-03-07 14:56:27 +02:00 · f74fae58e3
parent 1602e0e823
commit f74fae58e3
3 changed files with 99 additions and 74 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -34,7 +34,7 @@
  IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
  NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
  HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
-  INTERPOLATION = /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/;
+  INTERPOLATION = /^\$([a-zA-Z_@]\w*)/;
  OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
  WHITESPACE = /^([ \t]+)/;
  COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/;
@ -217,30 +217,30 @@
    };
    // Matches a balanced group such as a single or double-quoted string. Pass in
    // a series of delimiters, all of which must be balanced correctly within the
-    // token's contents.
-    Lexer.prototype.balanced_token = function balanced_token() {
+    // string.
+    Lexer.prototype.balanced_string = function balanced_string(str) {
      var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
-      delimited = Array.prototype.slice.call(arguments, 0);
+      delimited = Array.prototype.slice.call(arguments, 1);
      levels = [];
      i = 0;
-      while (i < this.chunk.length) {
+      while (i < str.length) {
        _a = delimited;
        for (_b = 0, _c = _a.length; _b < _c; _b++) {
          pair = _a[_b];
          _d = pair;
          open = _d[0];
          close = _d[1];
-          if (levels.length && starts(this.chunk, '\\', i)) {
+          if (levels.length && starts(str, '\\', i)) {
            i += 1;
            break;
-          } else if (levels.length && starts(this.chunk, close, i) && levels[levels.length - 1] === pair) {
+          } else if (levels.length && starts(str, close, i) && levels[levels.length - 1] === pair) {
            levels.pop();
            i += close.length - 1;
            if (!(levels.length)) {
              i += 1;
            }
            break;
-          } else if (starts(this.chunk, open, i)) {
+          } else if (starts(str, open, i)) {
            levels.push(pair);
            i += open.length - 1;
            break;
@ -257,7 +257,13 @@
      if (i === 0) {
        return false;
      }
-      return this.chunk.substring(0, i);
+      return str.substring(0, i);
+    };
+    // Matches a balanced string within the token's contents.
+    Lexer.prototype.balanced_token = function balanced_token() {
+      var delimited;
+      delimited = Array.prototype.slice.call(arguments, 0);
+      return this.balanced_string.apply(this, [this.chunk].concat(delimited));
    };
    // Matches and conumes comments.
    Lexer.prototype.comment_token = function comment_token() {
@ -453,50 +459,55 @@
    //     "Hello $name."
    //     "Hello ${name.capitalize()}."
    Lexer.prototype.interpolate_string = function interpolate_string(str) {
-      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, before, each, group, i, inner, interp, lexer, match, nested, prev, quote, tok, tokens;
+      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, each, expression, group, i, inner, interp, last_i, lexer, match, nested, prev, quote, tok, tokens;
      if (str.length < 3 || !starts(str, '"')) {
        return this.token('STRING', str);
      } else {
        lexer = new Lexer();
        tokens = [];
        quote = str.substring(0, 1);
-        str = str.substring(1, str.length - 1);
-        while (str.length) {
-          match = str.match(INTERPOLATION);
-          if (match) {
-            _a = match;
-            group = _a[0];
-            before = _a[1];
-            interp = _a[2];
-            if (starts(before, '\\', before.length - 1)) {
-              prev = before.substring(0, before.length - 1);
-              if (before.length) {
-                tokens.push(['STRING', quote + prev + "$" + interp + quote]);
+        i = 1;
+        last_i = i;
+        while (i < str.length - 1) {
+          if (starts(str, '\\', i)) {
+            i += 1;
+          } else {
+            match = str.substring(i).match(INTERPOLATION);
+            if (match) {
+              _a = match;
+              group = _a[0];
+              interp = _a[1];
+              if (starts(interp, '@')) {
+                interp = "this." + (interp.substring(1));
              }
+              if (last_i < i) {
+                tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
+              }
+              tokens.push(['IDENTIFIER', interp]);
+              i += group.length - 1;
+              last_i = i + 1;
            } else {
-              if (before.length) {
-                tokens.push(['STRING', quote + before + quote]);
-              }
-              if (starts(interp, '{')) {
-                inner = interp.substring(1, interp.length - 1);
+              expression = this.balanced_string(str.substring(i), ['${', '}']);
+              if (expression && expression.length > 3) {
+                inner = expression.substring(2, expression.length - 1);
                nested = lexer.tokenize("(" + inner + ")", {
                  rewrite: false,
                  line: this.line
                });
                nested.pop();
-                tokens.push(['TOKENS', nested]);
-              } else {
-                if (starts(interp, '@')) {
-                  interp = "this." + (interp.substring(1));
+                if (last_i < i) {
+                  tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
                }
-                tokens.push(['IDENTIFIER', interp]);
+                tokens.push(['TOKENS', nested]);
+                i += expression.length - 1;
+                last_i = i + 1;
              }
            }
-            str = str.substring(group.length);
-          } else {
-            tokens.push(['STRING', quote + str + quote]);
-            str = '';
          }
+          i += 1;
+        }
+        if (last_i < i && last_i < str.length - 1) {
+          tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
        }
        if (tokens.length > 1) {
          _d = tokens.length - 1; _e = 1;
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -59,7 +59,7 @@ JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED
 IDENTIFIER    : /^([a-zA-Z$_](\w|\$)*)/
 NUMBER        : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
 HEREDOC       : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
-INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
+INTERPOLATION : /^\$([a-zA-Z_@]\w*)/
 OPERATOR      : /^([+\*&|\/\-%=<>:!?]+)/
 WHITESPACE    : /^([ \t]+)/
 COMMENT       : /^(((\n?[ \t]*)?#[^\n]*)+)/
@ -199,22 +199,22 @@ exports.Lexer: class Lexer

  # Matches a balanced group such as a single or double-quoted string. Pass in
  # a series of delimiters, all of which must be balanced correctly within the
-  # token's contents.
-  balanced_token: (delimited...) ->
+  # string.
+  balanced_string: (str, delimited...) ->
    levels: []
    i: 0
-    while i < @chunk.length
+    while i < str.length
      for pair in delimited
        [open, close]: pair
-        if levels.length and starts @chunk, '\\', i
+        if levels.length and starts str, '\\', i
          i += 1
          break
-        else if levels.length and starts(@chunk, close, i) and levels[levels.length - 1] is pair
+        else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
          levels.pop()
          i += close.length - 1
          i += 1 unless levels.length
          break
-        else if starts @chunk, open, i
+        else if starts str, open, i
          levels.push(pair)
          i += open.length - 1
          break
@ -222,7 +222,11 @@ exports.Lexer: class Lexer
      i += 1
    throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
    return false if i is 0
-    return @chunk.substring(0, i)
+    return str.substring(0, i)
+
+  # Matches a balanced string within the token's contents.
+  balanced_token: (delimited...) ->
+    @balanced_string @chunk, delimited...

  # Matches and conumes comments.
  comment_token: ->
@ -382,28 +386,32 @@ exports.Lexer: class Lexer
      lexer:  new Lexer()
      tokens: []
      quote:  str.substring(0, 1)
-      str:    str.substring(1, str.length - 1)
-      while str.length
-        match: str.match INTERPOLATION
-        if match
-          [group, before, interp]: match
-          if starts before, '\\', before.length - 1
-            prev: before.substring(0, before.length - 1)
-            tokens.push ['STRING', "$quote$prev$$interp$quote"] if before.length
+      i:      1
+      last_i: i
+      while i < str.length - 1
+        if starts str, '\\', i
+          i += 1
+        else
+          match: str.substring(i).match INTERPOLATION
+          if match
+            [group, interp]: match
+            interp: "this.${ interp.substring(1) }" if starts interp, '@'
+            tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
+            tokens.push ['IDENTIFIER', interp]
+            i += group.length - 1
+            last_i: i + 1
          else
-            tokens.push ['STRING', "$quote$before$quote"] if before.length
-            if starts interp, '{'
-              inner: interp.substring(1, interp.length - 1)
+            expression: @balanced_string str.substring(i), ['${', '}']
+            if expression and expression.length > 3
+              inner: expression.substring(2, expression.length - 1)
              nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
              nested.pop()
+              tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
              tokens.push ['TOKENS', nested]
-            else
-              interp: "this.${ interp.substring(1) }" if starts interp, '@'
-              tokens.push ['IDENTIFIER', interp]
-          str: str.substring(group.length)
-        else
-          tokens.push ['STRING', "$quote$str$quote"]
-          str: ''
+              i += expression.length - 1
+              last_i: i + 1
+        i += 1
+      tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i and last_i < str.length - 1
      if tokens.length > 1
        for i in [tokens.length - 1..1]
          [prev, tok]: [tokens[i - 1], tokens[i]]
--- a/test/test_string_interpolation.coffee
+++ b/test/test_string_interpolation.coffee
@ -14,22 +14,23 @@ ok "$hello ${ 1 + 2 } $world" is "Hello 3 World"
 [s, t, r, i, n, g]: ['s', 't', 'r', 'i', 'n', 'g']
 ok "$s$t$r$i$n$g" is 'string'
 ok "${s}${t}${r}${i}${n}${g}" is 'string'
-ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '$s$t$r$i$n$g'
-ok "\\${s}\\${t}\\${r}\\${i}\\${n}\\${g}" is '${s}${t}${r}${i}${n}${g}'
-ok "\\$string" is '$string'
-ok "\\${string}" is '${string}'
+ok "\$s\$t\$r\$i\$n\$g" is '$s$t$r$i$n$g'
+ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '\\s\\t\\r\\i\\n\\g'
+ok "\${s}\${t}\${r}\${i}\${n}\${g}" is '${s}${t}${r}${i}${n}${g}'
+ok "\$string" is '$string'
+ok "\${string}" is '${string}'

-ok "\\$Escaping first" is '$Escaping first'
-ok "\\${Escaping} first" is '${Escaping} first'
-ok "Escaping \\$in middle" is 'Escaping $in middle'
-ok "Escaping \\${in} middle" is 'Escaping ${in} middle'
-ok "Escaping \\$last" is 'Escaping $last'
-ok "Escaping \\${last}" is 'Escaping ${last}'
+ok "\$Escaping first" is '$Escaping first'
+ok "\${Escaping} first" is '${Escaping} first'
+ok "Escaping \$in middle" is 'Escaping $in middle'
+ok "Escaping \${in} middle" is 'Escaping ${in} middle'
+ok "Escaping \$last" is 'Escaping $last'
+ok "Escaping \${last}" is 'Escaping ${last}'

 ok "$$" is '$$'
 ok "${}" is '${}'
-ok "\\\\$$" is '\\\\$$'
-ok "\\\\${}" is '\\\\${}'
+ok "\\\\\$$" is '\\\\\$$'
+ok "\\\${}" is '\\${}'

 ok "I won $20 last night." is 'I won $20 last night.'
 ok "I won $${20} last night." is 'I won $20 last night.'
@ -53,3 +54,8 @@ ok "I can has ${"cheeze"}" is 'I can has cheeze'
 ok 'I can has ${"cheeze"}' is 'I can has ${"cheeze"}'

 ok "Where is ${obj["name"] + '?'}" is 'Where is Joe?'
+
+ok "Where is ${"the new ${obj["name"]}"}?" is 'Where is the new Joe?'
+ok "Hello ${world ? "$hello"}" is 'Hello World'
+
+ok "Hello ${"${"${obj["name"]}" + '!'}"}" is 'Hello Joe!'