improvement to comment handling that should ensure that they have no effect on indentation

This commit is contained in:
Jeremy Ashkenas 2010-03-02 19:23:21 -05:00
parent 70cb195e6f
commit 5fd0972b5d
6 changed files with 39 additions and 32 deletions

View File

@ -1,12 +1,10 @@
(function(){
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, count, include;
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, compact, count, include;
// The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
// matches against the beginning of the source code. When a match is found,
// a token is produced, we consume the match, and start again. Tokens are in the
// form:
//
// [tag, value, line_number]
//
// Which is a format that can be fed directly into [Jison](http://github.com/zaach/jison).
// Set up the Lexer for both Node.js and the browser, depending on where we are.
if ((typeof process !== "undefined" && process !== null)) {
@ -56,9 +54,7 @@
HEREDOC_INDENT = /^[ \t]+/mg;
// Tokens which a regular expression will never immediately follow, but which
// a division operator might.
//
// See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
//
// Our list is shorter, due to sans-parentheses method calls.
NOT_REGEX = ['NUMBER', 'REGEX', '++', '--', 'FALSE', 'NULL', 'TRUE'];
// Tokens which could legitimately be invoked or indexed. A opening
@ -121,10 +117,10 @@
if (this.regex_token()) {
return null;
}
if (this.line_token()) {
if (this.comment_token()) {
return null;
}
if (this.comment_token()) {
if (this.line_token()) {
return null;
}
if (this.whitespace_token()) {
@ -214,12 +210,13 @@
};
// Matches and conumes comments.
Lexer.prototype.comment_token = function comment_token() {
var comment;
var comment, lines;
if (!((comment = this.match(COMMENT, 1)))) {
return false;
}
this.line += (comment.match(MULTILINER) || []).length;
this.token('COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER));
lines = comment.replace(COMMENT_CLEANER, '').split(MULTILINER);
this.token('COMMENT', compact(lines));
this.token('TERMINATOR', "\n");
this.i += comment.length;
return true;
@ -449,6 +446,18 @@
include = function include(list, value) {
return list.indexOf(value) >= 0;
};
// Trim out all falsy values from an array.
compact = function compact(array) {
var _a, _b, _c, _d, item;
_a = []; _b = array;
for (_c = 0, _d = _b.length; _c < _d; _c++) {
item = _b[_c];
if (item) {
_a.push(item);
}
}
return _a;
};
// Count the number of occurences of a character in a string.
count = function count(string, letter) {
var num, pos;

View File

@ -85,19 +85,14 @@
Rewriter.prototype.adjust_comments = function adjust_comments() {
return this.scan_tokens((function(__this) {
var __func = function(prev, token, post, i) {
var after, before;
var after;
if (!(token[0] === 'COMMENT')) {
return 1;
}
before = this.tokens[i - 2];
after = this.tokens[i + 2];
if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) {
if (after && after[0] === 'INDENT') {
this.tokens.splice(i + 2, 1);
this.tokens.splice(i - 2, 1);
return 0;
} else if (prev && prev[0] === 'TERMINATOR' && after && after[0] === 'INDENT') {
this.tokens.splice(i + 2, 1);
this.tokens[i - 1] = after;
this.tokens.splice(i, 0, after);
return 1;
} else if (prev && prev[0] !== 'TERMINATOR' && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
this.tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]]);
@ -325,14 +320,12 @@
// el.hide())
// In order to accomplish this, move outdents that follow closing parens
// inwards, safely. The steps to accomplish this are:
//
// 1. Check that all paired tokens are balanced and in order.
// 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it
// to the stack. If you see an ')' or OUTDENT, pop the stack and replace
// it with the inverse of what we've just popped.
// 3. Keep track of "debt" for tokens that we fake, to make sure we end
// up balanced in the end.
//
Rewriter.prototype.rewrite_closing_parens = function rewrite_closing_parens() {
var _l, debt, key, stack, val;
stack = [];

View File

@ -7,7 +7,6 @@
// Scope objects form a tree corresponding to the shape of the function
// definitions present in the script. They provide lexical scope, to determine
// whether a variable has been seen before or if it needs to be declared.
//
// Initialize a scope with its parent, for lookups up the chain,
// as well as the Expressions body where it should declare its variables,
// and the function that it wraps.

View File

@ -134,8 +134,8 @@ exports.Lexer: class Lexer
return if @string_token()
return if @js_token()
return if @regex_token()
return if @line_token()
return if @comment_token()
return if @line_token()
return if @whitespace_token()
return @literal_token()
@ -199,7 +199,8 @@ exports.Lexer: class Lexer
comment_token: ->
return false unless comment: @match COMMENT, 1
@line += (comment.match(MULTILINER) or []).length
@token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
lines: comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
@token 'COMMENT', compact lines
@token 'TERMINATOR', "\n"
@i += comment.length
true
@ -375,6 +376,9 @@ exports.Lexer: class Lexer
include: (list, value) ->
list.indexOf(value) >= 0
# Trim out all falsy values from an array.
compact: (array) -> item for item in array when item
# Count the number of occurences of a character in a string.
count: (string, letter) ->
num: 0

View File

@ -72,18 +72,10 @@ exports.Rewriter: class Rewriter
adjust_comments: ->
@scan_tokens (prev, token, post, i) =>
return 1 unless token[0] is 'COMMENT'
before: @tokens[i - 2]
after: @tokens[i + 2]
if before and after and
((before[0] is 'INDENT' and after[0] is 'OUTDENT') or
(before[0] is 'OUTDENT' and after[0] is 'INDENT')) and
before[1] is after[1]
if after and after[0] is 'INDENT'
@tokens.splice(i + 2, 1)
@tokens.splice(i - 2, 1)
return 0
else if prev and prev[0] is 'TERMINATOR' and after and after[0] is 'INDENT'
@tokens.splice(i + 2, 1)
@tokens[i - 1]: after
@tokens.splice(i, 0, after)
return 1
else if prev and prev[0] isnt 'TERMINATOR' and prev[0] isnt 'INDENT' and prev[0] isnt 'OUTDENT'
@tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]])

View File

@ -23,3 +23,13 @@ ok func()
func
func
# Line3
obj: {
# comment
# comment
# comment
one: 1
# comment
two: 2
# comment
}