improvement to comment handling that should ensure that they have no effect on indentation

2022-11-09 12:23:24 -05:00 · 2010-03-02 19:23:21 -05:00 · 2010-03-02 19:23:21 -05:00 · 5fd0972b5d
commit 5fd0972b5d
parent 70cb195e6f
6 changed files with 39 additions and 32 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -1,12 +1,10 @@
 (function(){
-  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, count, include;
+  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, compact, count, include;
  // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
  // matches against the beginning of the source code. When a match is found,
  // a token is produced, we consume the match, and start again. Tokens are in the
  // form:
-  //
  //     [tag, value, line_number]
-  //
  // Which is a format that can be fed directly into [Jison](http://github.com/zaach/jison).
  // Set up the Lexer for both Node.js and the browser, depending on where we are.
  if ((typeof process !== "undefined" && process !== null)) {
@ -56,9 +54,7 @@
  HEREDOC_INDENT = /^[ \t]+/mg;
  // Tokens which a regular expression will never immediately follow, but which
  // a division operator might.
-  //
  // See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
-  //
  // Our list is shorter, due to sans-parentheses method calls.
  NOT_REGEX = ['NUMBER', 'REGEX', '++', '--', 'FALSE', 'NULL', 'TRUE'];
  // Tokens which could legitimately be invoked or indexed. A opening
@ -121,10 +117,10 @@
      if (this.regex_token()) {
        return null;
      }
-      if (this.line_token()) {
+      if (this.comment_token()) {
        return null;
      }
-      if (this.comment_token()) {
+      if (this.line_token()) {
        return null;
      }
      if (this.whitespace_token()) {
@ -214,12 +210,13 @@
    };
    // Matches and conumes comments.
    Lexer.prototype.comment_token = function comment_token() {
-      var comment;
+      var comment, lines;
      if (!((comment = this.match(COMMENT, 1)))) {
        return false;
      }
      this.line += (comment.match(MULTILINER) || []).length;
-      this.token('COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER));
+      lines = comment.replace(COMMENT_CLEANER, '').split(MULTILINER);
+      this.token('COMMENT', compact(lines));
      this.token('TERMINATOR', "\n");
      this.i += comment.length;
      return true;
@ -449,6 +446,18 @@
  include = function include(list, value) {
    return list.indexOf(value) >= 0;
  };
+  // Trim out all falsy values from an array.
+  compact = function compact(array) {
+    var _a, _b, _c, _d, item;
+    _a = []; _b = array;
+    for (_c = 0, _d = _b.length; _c < _d; _c++) {
+      item = _b[_c];
+      if (item) {
+        _a.push(item);
+      }
+    }
+    return _a;
+  };
  // Count the number of occurences of a character in a string.
  count = function count(string, letter) {
    var num, pos;
--- a/lib/rewriter.js
+++ b/lib/rewriter.js
@ -85,19 +85,14 @@
    Rewriter.prototype.adjust_comments = function adjust_comments() {
      return this.scan_tokens((function(__this) {
        var __func = function(prev, token, post, i) {
-          var after, before;
+          var after;
          if (!(token[0] === 'COMMENT')) {
            return 1;
          }
-          before = this.tokens[i - 2];
          after = this.tokens[i + 2];
-          if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) {
+          if (after && after[0] === 'INDENT') {
            this.tokens.splice(i + 2, 1);
-            this.tokens.splice(i - 2, 1);
-            return 0;
-          } else if (prev && prev[0] === 'TERMINATOR' && after && after[0] === 'INDENT') {
-            this.tokens.splice(i + 2, 1);
-            this.tokens[i - 1] = after;
+            this.tokens.splice(i, 0, after);
            return 1;
          } else if (prev && prev[0] !== 'TERMINATOR' && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
            this.tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]]);
@ -325,14 +320,12 @@
    //      el.hide())
    // In order to accomplish this, move outdents that follow closing parens
    // inwards, safely. The steps to accomplish this are:
-    //
    // 1. Check that all paired tokens are balanced and in order.
    // 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it
    //    to the stack. If you see an ')' or OUTDENT, pop the stack and replace
    //    it with the inverse of what we've just popped.
    // 3. Keep track of "debt" for tokens that we fake, to make sure we end
    //    up balanced in the end.
-    //
    Rewriter.prototype.rewrite_closing_parens = function rewrite_closing_parens() {
      var _l, debt, key, stack, val;
      stack = [];
--- a/lib/scope.js
+++ b/lib/scope.js
@ -7,7 +7,6 @@
  // Scope objects form a tree corresponding to the shape of the function
  // definitions present in the script. They provide lexical scope, to determine
  // whether a variable has been seen before or if it needs to be declared.
-  //
  // Initialize a scope with its parent, for lookups up the chain,
  // as well as the Expressions body where it should declare its variables,
  // and the function that it wraps.
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -134,8 +134,8 @@ exports.Lexer: class Lexer
    return if @string_token()
    return if @js_token()
    return if @regex_token()
-    return if @line_token()
    return if @comment_token()
+    return if @line_token()
    return if @whitespace_token()
    return    @literal_token()

@ -199,7 +199,8 @@ exports.Lexer: class Lexer
  comment_token: ->
    return false unless comment: @match COMMENT, 1
    @line += (comment.match(MULTILINER) or []).length
-    @token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
+    lines: comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
+    @token 'COMMENT', compact lines
    @token 'TERMINATOR', "\n"
    @i += comment.length
    true
@ -375,6 +376,9 @@ exports.Lexer: class Lexer
 include: (list, value) ->
  list.indexOf(value) >= 0

+# Trim out all falsy values from an array.
+compact: (array) -> item for item in array when item
+
 # Count the number of occurences of a character in a string.
 count: (string, letter) ->
  num: 0
--- a/src/rewriter.coffee
+++ b/src/rewriter.coffee
@ -72,18 +72,10 @@ exports.Rewriter: class Rewriter
  adjust_comments: ->
    @scan_tokens (prev, token, post, i) =>
      return 1 unless token[0] is 'COMMENT'
-      before: @tokens[i - 2]
      after:  @tokens[i + 2]
-      if before and after and
-          ((before[0] is 'INDENT' and after[0] is 'OUTDENT') or
-          (before[0] is 'OUTDENT' and after[0] is 'INDENT')) and
-          before[1] is after[1]
+      if after and after[0] is 'INDENT'
        @tokens.splice(i + 2, 1)
-        @tokens.splice(i - 2, 1)
-        return 0
-      else if prev and prev[0] is 'TERMINATOR' and after and after[0] is 'INDENT'
-        @tokens.splice(i + 2, 1)
-        @tokens[i - 1]: after
+        @tokens.splice(i, 0, after)
        return 1
      else if prev and prev[0] isnt 'TERMINATOR' and prev[0] isnt 'INDENT' and prev[0] isnt 'OUTDENT'
        @tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]])
--- a/test/test_funky_comments.coffee
+++ b/test/test_funky_comments.coffee
@ -23,3 +23,13 @@ ok func()
 func
 func
 # Line3
+
+obj: {
+# comment
+  # comment
+    # comment
+  one: 1
+# comment
+  two: 2
+    # comment
+}