better indentation handling for far-left heredocs and herecomments

2022-11-09 12:23:24 -05:00 · 2010-05-12 21:47:31 -04:00 · 2010-05-12 21:47:31 -04:00 · a5db69e1af
commit a5db69e1af
parent 8aceef20e1
5 changed files with 45 additions and 40 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -206,7 +206,7 @@
        comment = this.sanitize_heredoc(match[3], {
          herecomment: true
        });
-        this.token('HERECOMMENT', compact(comment.split(MULTILINER)));
+        this.token('HERECOMMENT', comment.split(MULTILINER));
      } else {
        lines = compact(match[1].replace(COMMENT_CLEANER, '').split(MULTILINER));
        i = this.tokens.length - 1;
@ -417,8 +417,12 @@
    // Sanitize a heredoc or herecomment by escaping internal double quotes and
    // erasing all external indentation on the left-hand side.
    Lexer.prototype.sanitize_heredoc = function sanitize_heredoc(doc, options) {
-      var indent;
+      var indent, match;
-      indent = (doc.match(HEREDOC_INDENT) || ['']).sort()[0];
+      while (match = HEREDOC_INDENT.exec(doc)) {
        if (!indent || match[1].length < indent.length) {
          indent = match[1];
        }
      }
      doc = doc.replace(new RegExp("^" + indent, 'gm'), '');
      if (options.herecomment) {
        return doc;
@ -666,7 +670,7 @@
  STRING_NEWLINES = /\n[ \t]*/g;
  COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg;
  NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/;
-  HEREDOC_INDENT = /^[ \t]+/mg;
+  HEREDOC_INDENT = /\n+([ \t]*)/g;
  // Tokens which a regular expression will never immediately follow, but which
  // a division operator might.
  // See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
--- a/lib/nodes.js
+++ b/lib/nodes.js
@ -1,16 +1,16 @@
 (function(){
  var AccessorNode, ArrayNode, AssignNode, BaseNode, CallNode, ClassNode, ClosureNode, CodeNode, CommentNode, CurryNode, ExistenceNode, Expressions, ExtendsNode, ForNode, IDENTIFIER, IS_STRING, IfNode, IndexNode, LiteralNode, ObjectNode, OpNode, ParentheticalNode, PushNode, RangeNode, ReturnNode, Scope, SliceNode, SplatNode, TAB, TRAILING_WHITESPACE, ThrowNode, TryNode, UTILITIES, ValueNode, WhileNode, _a, children, compact, del, flatten, helpers, literal, merge, statement, utility;
-  var __slice = Array.prototype.slice, __extends = function(child, parent) {
+  var __slice = Array.prototype.slice, __extends =               function(child, parent) {
-    var ctor = function(){ };
+  var ctor = function(){ };
-    ctor.prototype = parent.prototype;
+  ctor.prototype = parent.prototype;
-    child.__superClass__ = parent.prototype;
+  child.__superClass__ = parent.prototype;
-    child.prototype = new ctor();
+  child.prototype = new ctor();
-    child.prototype.constructor = child;
+  child.prototype.constructor = child;
-  }, __bind = function(func, obj, args) {
+}, __bind =             function(func, obj, args) {
-    return function() {
+  return function() {
-      return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);
+    return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);
    };
  };
 };
  // `nodes.coffee` contains all of the node classes for the syntax tree. Most
  // nodes are created as the result of actions in the [grammar](grammar.html),
  // but some are created by other nodes as a method of code generation. To convert
@ -1292,16 +1292,13 @@
        }
      }
      pre = ("" + set + (this.tab) + "while (" + cond + ")");
      if (!this.body) {
        return ("" + pre + " null;" + post);
      }
      if (this.guard) {
        this.body = Expressions.wrap([new IfNode(this.guard, this.body)]);
      }
-      this.returns ? (post = new ReturnNode(literal(rvar)).compile(merge(o, {
+      this.returns ? (post = '\n' + new ReturnNode(literal(rvar)).compile(merge(o, {
        indent: this.idt()
      }))) : (post = '');
-      return "" + pre + " {\n" + (this.body.compile(o)) + "\n" + this.tab + "}\n" + post;
+      return "" + pre + " {\n" + (this.body.compile(o)) + "\n" + this.tab + "}" + post;
    };
    return WhileNode;
  })();
@ -1576,9 +1573,12 @@
    };
    ForNode.prototype.compile_return_value = function compile_return_value(val, o) {
      if (this.returns) {
-        return new ReturnNode(literal(val)).compile(o);
+        return '\n' + new ReturnNode(literal(val)).compile(o);
      }
-      return val || '';
+      if (val) {
        return '\n' + val;
      }
      return '';
    };
    // Welcome to the hairiest method in all of CoffeeScript. Handles the inner
    // loop, filtering, stepping, and result saving for array, object, and range
@ -1647,7 +1647,7 @@
        top: true
      }));
      vars = range ? name : ("" + name + ", " + ivar);
-      close = this.object ? '}}\n' : '}\n';
+      close = this.object ? '}}' : '}';
      return "" + set_result + (source_part) + "for (" + for_part + ") {\n" + var_part + body + "\n" + this.tab + close + return_result;
    };
    return ForNode;
@ -1855,10 +1855,10 @@
    // Correctly set up a prototype chain for inheritance, including a reference
    // to the superclass for `super()` calls. See:
    // [goog.inherits](http://closure-library.googlecode.com/svn/docs/closure_goog_base.js.source.html#line1206).
-    __extends: "function(child, parent) {\n    var ctor = function(){ };\n    ctor.prototype = parent.prototype;\n    child.__superClass__ = parent.prototype;\n    child.prototype = new ctor();\n    child.prototype.constructor = child;\n  }",
+    __extends: "              function(child, parent) {\n  var ctor = function(){ };\n  ctor.prototype = parent.prototype;\n  child.__superClass__ = parent.prototype;\n  child.prototype = new ctor();\n  child.prototype.constructor = child;\n}",
    // Bind a function to a calling context, optionally including curried arguments.
    // See [Underscore's implementation](http://jashkenas.github.com/coffee-script/documentation/docs/underscore.html#section-47).
-    __bind: "function(func, obj, args) {\n    return function() {\n      return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);\n    };\n  }",
+    __bind: "            function(func, obj, args) {\n  return function() {\n    return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);\n  };\n}",
    // Shortcuts to speed up the lookup time for native functions.
    __hasProp: 'Object.prototype.hasOwnProperty',
    __slice: 'Array.prototype.slice'
@ -1869,7 +1869,7 @@
  TAB = '  ';
  // Trim out all trailing whitespace, so that the generated code plays nice
  // with Git.
-  TRAILING_WHITESPACE = /\s+$/gm;
+  TRAILING_WHITESPACE = /[ \t]+$/gm;
  // Keep this identifier regex in sync with the Lexer.
  IDENTIFIER = /^[a-zA-Z\$_](\w|\$)*$/;
  // Is a literal value a string?
--- a/lib/rewriter.js
+++ b/lib/rewriter.js
@ -1,10 +1,10 @@
 (function(){
  var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, helpers, include, pair;
-  var __slice = Array.prototype.slice, __bind = function(func, obj, args) {
+  var __slice = Array.prototype.slice, __bind =             function(func, obj, args) {
-    return function() {
+  return function() {
-      return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);
+    return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);
-    };
+  };
-  }, __hasProp = Object.prototype.hasOwnProperty;
+}, __hasProp = Object.prototype.hasOwnProperty;
  // The CoffeeScript language has a good deal of optional syntax, implicit syntax,
  // and shorthand syntax. This can greatly complicate a grammar and bloat
  // the resulting parse table. Instead of making the parser handle it all, we take
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -138,7 +138,7 @@ exports.Lexer: class Lexer
    return false unless match: @chunk.match(COMMENT)
    if match[3]
      comment: @sanitize_heredoc match[3], {herecomment: true}
-      @token 'HERECOMMENT', compact comment.split MULTILINER
+      @token 'HERECOMMENT', comment.split MULTILINER
    else
      lines: compact match[1].replace(COMMENT_CLEANER, '').split MULTILINER
      i: @tokens.length - 1
@ -293,7 +293,8 @@ exports.Lexer: class Lexer
  # Sanitize a heredoc or herecomment by escaping internal double quotes and
  # erasing all external indentation on the left-hand side.
  sanitize_heredoc: (doc, options) ->
-    indent: (doc.match(HEREDOC_INDENT) or ['']).sort()[0]
+    while match: HEREDOC_INDENT.exec doc
      indent: match[1] if not indent or match[1].length < indent.length
    doc: doc.replace(new RegExp("^" +indent, 'gm'), '')
    return doc if options.herecomment
    doc.replace(MULTILINER, "\\n")
@ -501,7 +502,7 @@ MULTILINER      : /\n/g
 STRING_NEWLINES : /\n[ \t]*/g
 COMMENT_CLEANER : /(^[ \t]*#|\n[ \t]*$)/mg
 NO_NEWLINE      : /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/
-HEREDOC_INDENT  : /^[ \t]+/mg
+HEREDOC_INDENT  : /\n+([ \t]*)/g
 # Tokens which a regular expression will never immediately follow, but which
 # a division operator might.
--- a/src/nodes.coffee
+++ b/src/nodes.coffee
@ -950,13 +950,12 @@ exports.WhileNode: class WhileNode extends BaseNode
      set:      "$@tab$rvar = [];\n"
      @body:    PushNode.wrap(rvar, @body) if @body
    pre:        "$set${@tab}while ($cond)"
    return "$pre null;$post" if not @body
    @body:      Expressions.wrap([new IfNode(@guard, @body)]) if @guard
    if @returns
-      post: new ReturnNode(literal(rvar)).compile(merge(o, {indent: @idt()}))
+      post: '\n' + new ReturnNode(literal(rvar)).compile(merge(o, {indent: @idt()}))
    else
      post: ''
-    "$pre {\n${ @body.compile(o) }\n$@tab}\n$post"
+    "$pre {\n${ @body.compile(o) }\n$@tab}$post"
 statement WhileNode
 children WhileNode, 'condition', 'guard', 'body'
@ -1173,8 +1172,9 @@ exports.ForNode: class ForNode extends BaseNode
    this
  compile_return_value: (val, o) ->
-    return new ReturnNode(literal(val)).compile(o) if @returns
+    return '\n' + new ReturnNode(literal(val)).compile(o) if @returns
-    val or ''
+    return '\n' + val if val
    ''
  # Welcome to the hairiest method in all of CoffeeScript. Handles the inner
  # loop, filtering, stepping, and result saving for array, object, and range
@ -1219,7 +1219,7 @@ exports.ForNode: class ForNode extends BaseNode
      for_part: "$ivar in $svar) { if (${utility('hasProp')}.call($svar, $ivar)"
    body:           body.compile(merge(o, {indent: body_dent, top: true}))
    vars:           if range then name else "$name, $ivar"
-    close:          if @object then '}}\n' else '}\n'
+    close:          if @object then '}}' else '}'
    "$set_result${source_part}for ($for_part) {\n$var_part$body\n$@tab$close$return_result"
 statement ForNode
@ -1416,7 +1416,7 @@ TAB: '  '
 # Trim out all trailing whitespace, so that the generated code plays nice
 # with Git.
-TRAILING_WHITESPACE: /\s+$/gm
+TRAILING_WHITESPACE: /[ \t]+$/gm
 # Keep this identifier regex in sync with the Lexer.
 IDENTIFIER: /^[a-zA-Z\$_](\w|\$)*$/