rewriter is halfway done, and working

2010-01-30 17:24:48 -05:00 · 2010-01-30 17:24:48 -05:00 · 557cdbba71
parent 84feab3492
commit 557cdbba71
5 changed files with 259 additions and 57 deletions
--- a/lib/coffee_script/lexer.js
+++ b/lib/coffee_script/lexer.js
@ -266,7 +266,7 @@
    }
    value = value || this.chunk.substr(0, 1);
    tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value;
-    if (this.value() && this.value().spaced && CALLABLE.indexOf(this.tag() >= 0)) {
+    if (this.value() && !this.value().spaced && CALLABLE.indexOf(this.tag() >= 0)) {
      if (value === '(') {
        tag = 'CALL_START';
      }
--- a/lib/coffee_script/rewriter.js
+++ b/lib/coffee_script/rewriter.js
@ -49,12 +49,12 @@
  re.prototype.rewrite = function rewrite(tokens) {
    this.tokens = tokens;
    this.adjust_comments();
-    // this.remove_leading_newlines()
-    // this.remove_mid_expression_newlines()
-    // this.move_commas_outside_outdents()
-    // this.close_open_calls_and_indexes()
+    this.remove_leading_newlines();
+    this.remove_mid_expression_newlines();
+    this.move_commas_outside_outdents();
+    this.close_open_calls_and_indexes();
    // this.add_implicit_parentheses()
-    // this.add_implicit_indentation()
+    this.add_implicit_indentation();
    // this.ensure_balance(BALANCED_PAIRS)
    // this.rewrite_closing_parens()
    return this.tokens;
@ -78,27 +78,149 @@
  // Massage newlines and indentations so that comments don't have to be
  // correctly indented, or appear on their own line.
  re.prototype.adjust_comments = function adjust_comments() {
-    return this.scan_tokens(function(prev, token, post, i) {
-      var after, before;
-      if (!(token[0] === 'COMMENT')) {
-        return 1;
-      }
-      before = this.tokens[i - 2];
-      after = this.tokens[i + 2];
-      if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) {
-        this.tokens.splice(i + 2, 1);
-        this.tokens.splice(i - 2, 1);
+    return this.scan_tokens((function(__this) {
+      var __func = function(prev, token, post, i) {
+        var after, before;
+        if (!(token[0] === 'COMMENT')) {
+          return 1;
+        }
+        before = this.tokens[i - 2];
+        after = this.tokens[i + 2];
+        if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) {
+          this.tokens.splice(i + 2, 1);
+          this.tokens.splice(i - 2, 1);
+          return 0;
+        } else if (prev[0] === "\n" && after[0] === 'INDENT') {
+          this.tokens.splice(i + 2, 1);
+          this.tokens[i - 1] = after;
+          return 1;
+        } else if (prev[0] !== "\n" && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
+          this.tokens.splice(i, 0, ["\n", "\n"]);
+          return 2;
+        } else {
+          return 1;
+        }
+      };
+      return (function() {
+        return __func.apply(__this, arguments);
+      });
+    })(this));
+  };
+  // Leading newlines would introduce an ambiguity in the grammar, so we
+  // dispatch them here.
+  re.prototype.remove_leading_newlines = function remove_leading_newlines() {
+    if (this.tokens[0][0] === "\n") {
+      return this.tokens.shift();
+    }
+  };
+  // Some blocks occur in the middle of expressions -- when we're expecting
+  // this, remove their trailing newlines.
+  re.prototype.remove_mid_expression_newlines = function remove_mid_expression_newlines() {
+    return this.scan_tokens((function(__this) {
+      var __func = function(prev, token, post, i) {
+        if (!(post && EXPRESSION_CLOSE.indexOf(post[0]) >= 0 && token[0] === "\n")) {
+          return 1;
+        }
+        this.tokens.splice(i, 1);
        return 0;
-      } else if (prev[0] === "\n" && after[0] === 'INDENT') {
-        this.tokens.splice(i + 2, 1);
-        this.tokens[i - 1] = after;
+      };
+      return (function() {
+        return __func.apply(__this, arguments);
+      });
+    })(this));
+  };
+  // Make sure that we don't accidentally break trailing commas, which need
+  // to go on the outside of expression closers.
+  re.prototype.move_commas_outside_outdents = function move_commas_outside_outdents() {
+    return this.scan_tokens((function(__this) {
+      var __func = function(prev, token, post, i) {
+        if (token[0] === 'OUTDENT' && prev[0] === ',') {
+          this.tokens.splice(i, 1, token);
+        }
        return 1;
-      } else if (prev[0] !== "\n" && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
-        this.tokens.splice(i, 0, ["\n", "\n"]);
-        return 2;
-      } else {
+      };
+      return (function() {
+        return __func.apply(__this, arguments);
+      });
+    })(this));
+  };
+  // We've tagged the opening parenthesis of a method call, and the opening
+  // bracket of an indexing operation. Match them with their close.
+  re.prototype.close_open_calls_and_indexes = function close_open_calls_and_indexes() {
+    var brackets, parens;
+    parens = [0];
+    brackets = [0];
+    return this.scan_tokens((function(__this) {
+      var __func = function(prev, token, post, i) {
+        if (token[0] === 'CALL_START') {
+          parens.push(0);
+        } else if (token[0] === 'INDEX_START') {
+          brackets.push(0);
+        } else if (token[0] === '(') {
+          parens[-1] += 1;
+        } else if (token[0] === '[') {
+          brackets[-1] += 1;
+        } else if (token[0] === ')') {
+          if (parens[parens.length - 1] === 0) {
+            parens.pop;
+            token[0] = 'CALL_END';
+          } else {
+            parens[parens.length - 1] -= 1;
+          }
+        } else if (token[0] === ']') {
+          if (brackets[brackets.length - 1] === 0) {
+            brackets.pop();
+            token[0] = 'INDEX_END';
+          } else {
+            brackets[brackets.length - 1] -= 1;
+          }
+        }
        return 1;
-      }
-    });
+      };
+      return (function() {
+        return __func.apply(__this, arguments);
+      });
+    })(this));
+  };
+  // Because our grammar is LALR(1), it can't handle some single-line
+  // expressions that lack ending delimiters. Use the lexer to add the implicit
+  // blocks, so it doesn't need to.
+  // ')' can close a single-line block, but we need to make sure it's balanced.
+  re.prototype.add_implicit_indentation = function add_implicit_indentation() {
+    return this.scan_tokens((function(__this) {
+      var __func = function(prev, token, post, i) {
+        var idx, insertion, parens, starter, tok;
+        if (!(SINGLE_LINERS.indexOf(token[0]) >= 0 && post[0] !== 'INDENT' && !(token[0] === 'ELSE' && post[0] === 'IF'))) {
+          return 1;
+        }
+        starter = token[0];
+        this.tokens.splice(i + 1, 0, ['INDENT', 2]);
+        idx = i + 1;
+        parens = 0;
+        while (true) {
+          idx += 1;
+          tok = this.tokens[idx];
+          if ((!tok || SINGLE_CLOSERS.indexOf(tok[0]) >= 0 || (tok[0] === ')' && parens === 0)) && !(starter === 'ELSE' && tok[0] === 'ELSE')) {
+            insertion = this.tokens[idx - 1][0] === "," ? idx - 1 : idx;
+            this.tokens.splice(insertion, 0, ['OUTDENT', 2]);
+            break;
+          }
+          if (tok[0] === '(') {
+            parens += 1;
+          }
+          if (tok[0] === ')') {
+            parens -= 1;
+          }
+        }
+        if (!(token[0] === 'THEN')) {
+          return 1;
+        }
+        this.tokens.splice(i, 1);
+        return 0;
+      };
+      return (function() {
+        return __func.apply(__this, arguments);
+      });
+    })(this));
  };
 })();
--- a/lib/coffee_script/rewriter.rb
+++ b/lib/coffee_script/rewriter.rb
@ -151,6 +151,30 @@ module CoffeeScript
      end
    end

+    # Methods may be optionally called without parentheses, for simple cases.
+    # Insert the implicit parentheses here, so that the parser doesn't have to
+    # deal with them.
+    def add_implicit_parentheses
+      stack = [0]
+      scan_tokens do |prev, token, post, i|
+        stack.push(0) if token[0] == :INDENT
+        if token[0] == :OUTDENT
+          last = stack.pop
+          stack[-1] += last
+        end
+        if stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?)
+          idx = token[0] == :OUTDENT ? i + 1 : i
+          stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) }
+          size, stack[-1] = stack[-1] + 1, 0
+          next size
+        end
+        next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0])
+        @tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)])
+        stack[-1] += 1
+        next 2
+      end
+    end
+
    # Because our grammar is LALR(1), it can't handle some single-line
    # expressions that lack ending delimiters. Use the lexer to add the implicit
    # blocks, so it doesn't need to.
@ -183,30 +207,6 @@ module CoffeeScript
      end
    end

-    # Methods may be optionally called without parentheses, for simple cases.
-    # Insert the implicit parentheses here, so that the parser doesn't have to
-    # deal with them.
-    def add_implicit_parentheses
-      stack = [0]
-      scan_tokens do |prev, token, post, i|
-        stack.push(0) if token[0] == :INDENT
-        if token[0] == :OUTDENT
-          last = stack.pop
-          stack[-1] += last
-        end
-        if stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?)
-          idx = token[0] == :OUTDENT ? i + 1 : i
-          stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) }
-          size, stack[-1] = stack[-1] + 1, 0
-          next size
-        end
-        next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0])
-        @tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)])
-        stack[-1] += 1
-        next 2
-      end
-    end
-
    # Ensure that all listed pairs of tokens are correctly balanced throughout
    # the course of the token stream.
    def ensure_balance(*pairs)
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -216,7 +216,7 @@ lex::literal_token: ->
  this.tag_parameters() if value and value.match(CODE)
  value ||= this.chunk.substr(0, 1)
  tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
-  if this.value() and this.value().spaced and CALLABLE.indexOf(this.tag() >= 0)
+  if this.value() and !this.value().spaced and CALLABLE.indexOf(this.tag() >= 0)
    tag: 'CALL_START'  if value is '('
    tag: 'INDEX_START' if value is '['
  this.token tag, value
--- a/src/rewriter.coffee
+++ b/src/rewriter.coffee
@ -41,12 +41,12 @@ SINGLE_CLOSERS: ["\n", 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'P
 re::rewrite: (tokens) ->
  this.tokens: tokens
  this.adjust_comments()
-  # this.remove_leading_newlines()
-  # this.remove_mid_expression_newlines()
-  # this.move_commas_outside_outdents()
-  # this.close_open_calls_and_indexes()
+  this.remove_leading_newlines()
+  this.remove_mid_expression_newlines()
+  this.move_commas_outside_outdents()
+  this.close_open_calls_and_indexes()
  # this.add_implicit_parentheses()
-  # this.add_implicit_indentation()
+  this.add_implicit_indentation()
  # this.ensure_balance(BALANCED_PAIRS)
  # this.rewrite_closing_parens()
  this.tokens
@ -56,7 +56,7 @@ re::rewrite: (tokens) ->
 # forwards (or backwards) in the stream, to make sure we don't miss anything
 # as the stream changes length under our feet.
 re::scan_tokens: (yield) ->
-  i = 0
+  i: 0
  while true
    break unless this.tokens[i]
    move: yield(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i)
@ -66,7 +66,7 @@ re::scan_tokens: (yield) ->
 # Massage newlines and indentations so that comments don't have to be
 # correctly indented, or appear on their own line.
 re::adjust_comments: ->
-  this.scan_tokens (prev, token, post, i) ->
+  this.scan_tokens (prev, token, post, i) =>
    return 1 unless token[0] is 'COMMENT'
    before: this.tokens[i - 2]
    after:  this.tokens[i + 2]
@ -87,6 +87,86 @@ re::adjust_comments: ->
    else
      return 1

+# Leading newlines would introduce an ambiguity in the grammar, so we
+# dispatch them here.
+re::remove_leading_newlines: ->
+  this.tokens.shift() if this.tokens[0][0] is "\n"
+
+# Some blocks occur in the middle of expressions -- when we're expecting
+# this, remove their trailing newlines.
+re::remove_mid_expression_newlines: ->
+  this.scan_tokens (prev, token, post, i) =>
+    return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is "\n"
+    this.tokens.splice(i, 1)
+    return 0
+
+# Make sure that we don't accidentally break trailing commas, which need
+# to go on the outside of expression closers.
+re::move_commas_outside_outdents: ->
+  this.scan_tokens (prev, token, post, i) =>
+    this.tokens.splice(i, 1, token) if token[0] is 'OUTDENT' and prev[0] is ','
+    return 1
+
+# We've tagged the opening parenthesis of a method call, and the opening
+# bracket of an indexing operation. Match them with their close.
+re::close_open_calls_and_indexes: ->
+  parens:   [0]
+  brackets: [0]
+  this.scan_tokens (prev, token, post, i) =>
+    switch token[0]
+      when 'CALL_START'  then parens.push(0)
+      when 'INDEX_START' then brackets.push(0)
+      when '('           then parens[-1] += 1
+      when '['           then brackets[-1] += 1
+      when ')'
+        if parens[parens.length - 1] is 0
+          parens.pop
+          token[0]: 'CALL_END'
+        else
+          parens[parens.length - 1] -= 1
+      when ']'
+        if brackets[brackets.length - 1] == 0
+          brackets.pop()
+          token[0]: 'INDEX_END'
+        else
+          brackets[brackets.length - 1] -= 1
+    return 1
+
+# Because our grammar is LALR(1), it can't handle some single-line
+# expressions that lack ending delimiters. Use the lexer to add the implicit
+# blocks, so it doesn't need to.
+# ')' can close a single-line block, but we need to make sure it's balanced.
+re::add_implicit_indentation: ->
+  this.scan_tokens (prev, token, post, i) =>
+    return 1 unless SINGLE_LINERS.indexOf(token[0]) >= 0 and post[0] isnt 'INDENT' and
+      not (token[0] is 'ELSE' and post[0] is 'IF')
+    starter: token[0]
+    this.tokens.splice(i + 1, 0, ['INDENT', 2])
+    idx: i + 1
+    parens: 0
+    while true
+      idx += 1
+      tok: this.tokens[idx]
+      if (not tok or SINGLE_CLOSERS.indexOf(tok[0]) >= 0 or
+          (tok[0] is ')' && parens is 0)) and
+          not (starter is 'ELSE' and tok[0] is 'ELSE')
+        insertion: if this.tokens[idx - 1][0] is "," then idx - 1 else idx
+        this.tokens.splice(insertion, 0, ['OUTDENT', 2])
+        break
+      parens += 1 if tok[0] is '('
+      parens -= 1 if tok[0] is ')'
+    return 1 unless token[0] is 'THEN'
+    this.tokens.splice(i, 1)
+    return 0
+
+
+
+
+
+
+
+
+