better indentation handling for far-left heredocs and herecomments

This commit is contained in:
Jeremy Ashkenas 2010-05-12 21:47:31 -04:00
parent 8aceef20e1
commit a5db69e1af
5 changed files with 45 additions and 40 deletions

View File

@ -206,7 +206,7 @@
comment = this.sanitize_heredoc(match[3], { comment = this.sanitize_heredoc(match[3], {
herecomment: true herecomment: true
}); });
this.token('HERECOMMENT', compact(comment.split(MULTILINER))); this.token('HERECOMMENT', comment.split(MULTILINER));
} else { } else {
lines = compact(match[1].replace(COMMENT_CLEANER, '').split(MULTILINER)); lines = compact(match[1].replace(COMMENT_CLEANER, '').split(MULTILINER));
i = this.tokens.length - 1; i = this.tokens.length - 1;
@ -417,8 +417,12 @@
// Sanitize a heredoc or herecomment by escaping internal double quotes and // Sanitize a heredoc or herecomment by escaping internal double quotes and
// erasing all external indentation on the left-hand side. // erasing all external indentation on the left-hand side.
Lexer.prototype.sanitize_heredoc = function sanitize_heredoc(doc, options) { Lexer.prototype.sanitize_heredoc = function sanitize_heredoc(doc, options) {
var indent; var indent, match;
indent = (doc.match(HEREDOC_INDENT) || ['']).sort()[0]; while (match = HEREDOC_INDENT.exec(doc)) {
if (!indent || match[1].length < indent.length) {
indent = match[1];
}
}
doc = doc.replace(new RegExp("^" + indent, 'gm'), ''); doc = doc.replace(new RegExp("^" + indent, 'gm'), '');
if (options.herecomment) { if (options.herecomment) {
return doc; return doc;
@ -666,7 +670,7 @@
STRING_NEWLINES = /\n[ \t]*/g; STRING_NEWLINES = /\n[ \t]*/g;
COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg; COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg;
NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/; NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/;
HEREDOC_INDENT = /^[ \t]+/mg; HEREDOC_INDENT = /\n+([ \t]*)/g;
// Tokens which a regular expression will never immediately follow, but which // Tokens which a regular expression will never immediately follow, but which
// a division operator might. // a division operator might.
// See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions // See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions

View File

@ -1,16 +1,16 @@
(function(){ (function(){
var AccessorNode, ArrayNode, AssignNode, BaseNode, CallNode, ClassNode, ClosureNode, CodeNode, CommentNode, CurryNode, ExistenceNode, Expressions, ExtendsNode, ForNode, IDENTIFIER, IS_STRING, IfNode, IndexNode, LiteralNode, ObjectNode, OpNode, ParentheticalNode, PushNode, RangeNode, ReturnNode, Scope, SliceNode, SplatNode, TAB, TRAILING_WHITESPACE, ThrowNode, TryNode, UTILITIES, ValueNode, WhileNode, _a, children, compact, del, flatten, helpers, literal, merge, statement, utility; var AccessorNode, ArrayNode, AssignNode, BaseNode, CallNode, ClassNode, ClosureNode, CodeNode, CommentNode, CurryNode, ExistenceNode, Expressions, ExtendsNode, ForNode, IDENTIFIER, IS_STRING, IfNode, IndexNode, LiteralNode, ObjectNode, OpNode, ParentheticalNode, PushNode, RangeNode, ReturnNode, Scope, SliceNode, SplatNode, TAB, TRAILING_WHITESPACE, ThrowNode, TryNode, UTILITIES, ValueNode, WhileNode, _a, children, compact, del, flatten, helpers, literal, merge, statement, utility;
var __slice = Array.prototype.slice, __extends = function(child, parent) { var __slice = Array.prototype.slice, __extends = function(child, parent) {
var ctor = function(){ }; var ctor = function(){ };
ctor.prototype = parent.prototype; ctor.prototype = parent.prototype;
child.__superClass__ = parent.prototype; child.__superClass__ = parent.prototype;
child.prototype = new ctor(); child.prototype = new ctor();
child.prototype.constructor = child; child.prototype.constructor = child;
}, __bind = function(func, obj, args) { }, __bind = function(func, obj, args) {
return function() { return function() {
return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments); return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);
};
}; };
};
// `nodes.coffee` contains all of the node classes for the syntax tree. Most // `nodes.coffee` contains all of the node classes for the syntax tree. Most
// nodes are created as the result of actions in the [grammar](grammar.html), // nodes are created as the result of actions in the [grammar](grammar.html),
// but some are created by other nodes as a method of code generation. To convert // but some are created by other nodes as a method of code generation. To convert
@ -1292,16 +1292,13 @@
} }
} }
pre = ("" + set + (this.tab) + "while (" + cond + ")"); pre = ("" + set + (this.tab) + "while (" + cond + ")");
if (!this.body) {
return ("" + pre + " null;" + post);
}
if (this.guard) { if (this.guard) {
this.body = Expressions.wrap([new IfNode(this.guard, this.body)]); this.body = Expressions.wrap([new IfNode(this.guard, this.body)]);
} }
this.returns ? (post = new ReturnNode(literal(rvar)).compile(merge(o, { this.returns ? (post = '\n' + new ReturnNode(literal(rvar)).compile(merge(o, {
indent: this.idt() indent: this.idt()
}))) : (post = ''); }))) : (post = '');
return "" + pre + " {\n" + (this.body.compile(o)) + "\n" + this.tab + "}\n" + post; return "" + pre + " {\n" + (this.body.compile(o)) + "\n" + this.tab + "}" + post;
}; };
return WhileNode; return WhileNode;
})(); })();
@ -1576,9 +1573,12 @@
}; };
ForNode.prototype.compile_return_value = function compile_return_value(val, o) { ForNode.prototype.compile_return_value = function compile_return_value(val, o) {
if (this.returns) { if (this.returns) {
return new ReturnNode(literal(val)).compile(o); return '\n' + new ReturnNode(literal(val)).compile(o);
} }
return val || ''; if (val) {
return '\n' + val;
}
return '';
}; };
// Welcome to the hairiest method in all of CoffeeScript. Handles the inner // Welcome to the hairiest method in all of CoffeeScript. Handles the inner
// loop, filtering, stepping, and result saving for array, object, and range // loop, filtering, stepping, and result saving for array, object, and range
@ -1647,7 +1647,7 @@
top: true top: true
})); }));
vars = range ? name : ("" + name + ", " + ivar); vars = range ? name : ("" + name + ", " + ivar);
close = this.object ? '}}\n' : '}\n'; close = this.object ? '}}' : '}';
return "" + set_result + (source_part) + "for (" + for_part + ") {\n" + var_part + body + "\n" + this.tab + close + return_result; return "" + set_result + (source_part) + "for (" + for_part + ") {\n" + var_part + body + "\n" + this.tab + close + return_result;
}; };
return ForNode; return ForNode;
@ -1855,10 +1855,10 @@
// Correctly set up a prototype chain for inheritance, including a reference // Correctly set up a prototype chain for inheritance, including a reference
// to the superclass for `super()` calls. See: // to the superclass for `super()` calls. See:
// [goog.inherits](http://closure-library.googlecode.com/svn/docs/closure_goog_base.js.source.html#line1206). // [goog.inherits](http://closure-library.googlecode.com/svn/docs/closure_goog_base.js.source.html#line1206).
__extends: "function(child, parent) {\n var ctor = function(){ };\n ctor.prototype = parent.prototype;\n child.__superClass__ = parent.prototype;\n child.prototype = new ctor();\n child.prototype.constructor = child;\n }", __extends: " function(child, parent) {\n var ctor = function(){ };\n ctor.prototype = parent.prototype;\n child.__superClass__ = parent.prototype;\n child.prototype = new ctor();\n child.prototype.constructor = child;\n}",
// Bind a function to a calling context, optionally including curried arguments. // Bind a function to a calling context, optionally including curried arguments.
// See [Underscore's implementation](http://jashkenas.github.com/coffee-script/documentation/docs/underscore.html#section-47). // See [Underscore's implementation](http://jashkenas.github.com/coffee-script/documentation/docs/underscore.html#section-47).
__bind: "function(func, obj, args) {\n return function() {\n return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);\n };\n }", __bind: " function(func, obj, args) {\n return function() {\n return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);\n };\n}",
// Shortcuts to speed up the lookup time for native functions. // Shortcuts to speed up the lookup time for native functions.
__hasProp: 'Object.prototype.hasOwnProperty', __hasProp: 'Object.prototype.hasOwnProperty',
__slice: 'Array.prototype.slice' __slice: 'Array.prototype.slice'
@ -1869,7 +1869,7 @@
TAB = ' '; TAB = ' ';
// Trim out all trailing whitespace, so that the generated code plays nice // Trim out all trailing whitespace, so that the generated code plays nice
// with Git. // with Git.
TRAILING_WHITESPACE = /\s+$/gm; TRAILING_WHITESPACE = /[ \t]+$/gm;
// Keep this identifier regex in sync with the Lexer. // Keep this identifier regex in sync with the Lexer.
IDENTIFIER = /^[a-zA-Z\$_](\w|\$)*$/; IDENTIFIER = /^[a-zA-Z\$_](\w|\$)*$/;
// Is a literal value a string? // Is a literal value a string?

View File

@ -1,10 +1,10 @@
(function(){ (function(){
var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, helpers, include, pair; var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, helpers, include, pair;
var __slice = Array.prototype.slice, __bind = function(func, obj, args) { var __slice = Array.prototype.slice, __bind = function(func, obj, args) {
return function() { return function() {
return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments); return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);
}; };
}, __hasProp = Object.prototype.hasOwnProperty; }, __hasProp = Object.prototype.hasOwnProperty;
// The CoffeeScript language has a good deal of optional syntax, implicit syntax, // The CoffeeScript language has a good deal of optional syntax, implicit syntax,
// and shorthand syntax. This can greatly complicate a grammar and bloat // and shorthand syntax. This can greatly complicate a grammar and bloat
// the resulting parse table. Instead of making the parser handle it all, we take // the resulting parse table. Instead of making the parser handle it all, we take

View File

@ -138,7 +138,7 @@ exports.Lexer: class Lexer
return false unless match: @chunk.match(COMMENT) return false unless match: @chunk.match(COMMENT)
if match[3] if match[3]
comment: @sanitize_heredoc match[3], {herecomment: true} comment: @sanitize_heredoc match[3], {herecomment: true}
@token 'HERECOMMENT', compact comment.split MULTILINER @token 'HERECOMMENT', comment.split MULTILINER
else else
lines: compact match[1].replace(COMMENT_CLEANER, '').split MULTILINER lines: compact match[1].replace(COMMENT_CLEANER, '').split MULTILINER
i: @tokens.length - 1 i: @tokens.length - 1
@ -293,7 +293,8 @@ exports.Lexer: class Lexer
# Sanitize a heredoc or herecomment by escaping internal double quotes and # Sanitize a heredoc or herecomment by escaping internal double quotes and
# erasing all external indentation on the left-hand side. # erasing all external indentation on the left-hand side.
sanitize_heredoc: (doc, options) -> sanitize_heredoc: (doc, options) ->
indent: (doc.match(HEREDOC_INDENT) or ['']).sort()[0] while match: HEREDOC_INDENT.exec doc
indent: match[1] if not indent or match[1].length < indent.length
doc: doc.replace(new RegExp("^" +indent, 'gm'), '') doc: doc.replace(new RegExp("^" +indent, 'gm'), '')
return doc if options.herecomment return doc if options.herecomment
doc.replace(MULTILINER, "\\n") doc.replace(MULTILINER, "\\n")
@ -501,7 +502,7 @@ MULTILINER : /\n/g
STRING_NEWLINES : /\n[ \t]*/g STRING_NEWLINES : /\n[ \t]*/g
COMMENT_CLEANER : /(^[ \t]*#|\n[ \t]*$)/mg COMMENT_CLEANER : /(^[ \t]*#|\n[ \t]*$)/mg
NO_NEWLINE : /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/ NO_NEWLINE : /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/
HEREDOC_INDENT : /^[ \t]+/mg HEREDOC_INDENT : /\n+([ \t]*)/g
# Tokens which a regular expression will never immediately follow, but which # Tokens which a regular expression will never immediately follow, but which
# a division operator might. # a division operator might.

View File

@ -950,13 +950,12 @@ exports.WhileNode: class WhileNode extends BaseNode
set: "$@tab$rvar = [];\n" set: "$@tab$rvar = [];\n"
@body: PushNode.wrap(rvar, @body) if @body @body: PushNode.wrap(rvar, @body) if @body
pre: "$set${@tab}while ($cond)" pre: "$set${@tab}while ($cond)"
return "$pre null;$post" if not @body
@body: Expressions.wrap([new IfNode(@guard, @body)]) if @guard @body: Expressions.wrap([new IfNode(@guard, @body)]) if @guard
if @returns if @returns
post: new ReturnNode(literal(rvar)).compile(merge(o, {indent: @idt()})) post: '\n' + new ReturnNode(literal(rvar)).compile(merge(o, {indent: @idt()}))
else else
post: '' post: ''
"$pre {\n${ @body.compile(o) }\n$@tab}\n$post" "$pre {\n${ @body.compile(o) }\n$@tab}$post"
statement WhileNode statement WhileNode
children WhileNode, 'condition', 'guard', 'body' children WhileNode, 'condition', 'guard', 'body'
@ -1173,8 +1172,9 @@ exports.ForNode: class ForNode extends BaseNode
this this
compile_return_value: (val, o) -> compile_return_value: (val, o) ->
return new ReturnNode(literal(val)).compile(o) if @returns return '\n' + new ReturnNode(literal(val)).compile(o) if @returns
val or '' return '\n' + val if val
''
# Welcome to the hairiest method in all of CoffeeScript. Handles the inner # Welcome to the hairiest method in all of CoffeeScript. Handles the inner
# loop, filtering, stepping, and result saving for array, object, and range # loop, filtering, stepping, and result saving for array, object, and range
@ -1219,7 +1219,7 @@ exports.ForNode: class ForNode extends BaseNode
for_part: "$ivar in $svar) { if (${utility('hasProp')}.call($svar, $ivar)" for_part: "$ivar in $svar) { if (${utility('hasProp')}.call($svar, $ivar)"
body: body.compile(merge(o, {indent: body_dent, top: true})) body: body.compile(merge(o, {indent: body_dent, top: true}))
vars: if range then name else "$name, $ivar" vars: if range then name else "$name, $ivar"
close: if @object then '}}\n' else '}\n' close: if @object then '}}' else '}'
"$set_result${source_part}for ($for_part) {\n$var_part$body\n$@tab$close$return_result" "$set_result${source_part}for ($for_part) {\n$var_part$body\n$@tab$close$return_result"
statement ForNode statement ForNode
@ -1416,7 +1416,7 @@ TAB: ' '
# Trim out all trailing whitespace, so that the generated code plays nice # Trim out all trailing whitespace, so that the generated code plays nice
# with Git. # with Git.
TRAILING_WHITESPACE: /\s+$/gm TRAILING_WHITESPACE: /[ \t]+$/gm
# Keep this identifier regex in sync with the Lexer. # Keep this identifier regex in sync with the Lexer.
IDENTIFIER: /^[a-zA-Z\$_](\w|\$)*$/ IDENTIFIER: /^[a-zA-Z\$_](\w|\$)*$/