resolving issues with comments in the middle of unfinished single-line expressions.

This commit is contained in:
Jeremy Ashkenas 2010-03-20 00:58:25 -04:00
parent 29eff23490
commit a2778bf06d
3 changed files with 41 additions and 16 deletions

View File

@ -1,5 +1,5 @@
(function(){
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_ESCAPE, REGEX_FLAGS, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, balanced_string, compact, count, helpers, include, starts;
var ACCESSORS, ASSIGNMENT, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_ESCAPE, REGEX_FLAGS, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, balanced_string, compact, count, helpers, include, starts;
// The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
// matches against the beginning of the source code. When a match is found,
// a token is produced, we consume the match, and start again. Tokens are in the
@ -134,7 +134,7 @@
if (include(RESERVED, id)) {
this.identifier_error(id);
}
if (tag === 'WHEN' && include(BEFORE_WHEN, this.tag())) {
if (tag === 'WHEN' && include(LINE_BREAK, this.tag())) {
tag = 'LEADING_WHEN';
}
this.token(tag, id);
@ -233,14 +233,19 @@
// Matches and conumes comments. We pass through comments into JavaScript,
// so they're treated as real tokens, like any other part of the language.
Lexer.prototype.comment_token = function comment_token() {
var comment, lines;
var comment, i, lines;
if (!((comment = this.match(COMMENT, 1)))) {
return false;
}
this.line += (comment.match(MULTILINER) || []).length;
lines = comment.replace(COMMENT_CLEANER, '').split(MULTILINER);
this.token('COMMENT', compact(lines));
this.token('TERMINATOR', "\n");
lines = compact(comment.replace(COMMENT_CLEANER, '').split(MULTILINER));
i = this.tokens.length - 1;
if (this.unfinished()) {
while (this.tokens[i] && !include(LINE_BREAK, this.tokens[i][0])) {
i -= 1;
}
}
this.tokens.splice(i + 1, 0, ['COMMENT', lines, this.line], ['TERMINATOR', '\n', this.line]);
this.i += comment.length;
return true;
};
@ -262,7 +267,7 @@
prev = this.prev(2);
size = indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length;
next_character = this.chunk.match(MULTI_DENT)[4];
no_newlines = next_character === '.' || (this.value() && this.value().match && this.value().match(NO_NEWLINE) && prev && (prev[0] !== '.') && !this.value().match(CODE));
no_newlines = next_character === '.' || this.unfinished();
if (size === this.indent) {
if (no_newlines) {
return this.suppress_newlines();
@ -551,6 +556,12 @@
}
return m ? m[index] : false;
};
// Are we in the midst of an unfinished expression?
Lexer.prototype.unfinished = function unfinished() {
var prev;
prev = this.prev(2);
return this.value() && this.value().match && this.value().match(NO_NEWLINE) && prev && (prev[0] !== '.') && !this.value().match(CODE);
};
return Lexer;
}).call(this);
// There are no exensions to the core lexer by default.
@ -612,5 +623,5 @@
// Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
// occurs at the start of a line. We disambiguate these from trailing whens to
// avoid an ambiguity in the grammar.
BEFORE_WHEN = ['INDENT', 'OUTDENT', 'TERMINATOR'];
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
})();

View File

@ -98,7 +98,7 @@ exports.Lexer: class Lexer
tag: id.toUpperCase() if include(KEYWORDS, id) and
not (include(ACCESSORS, @tag(0)) and not @prev().spaced)
@identifier_error id if include RESERVED, id
tag: 'LEADING_WHEN' if tag is 'WHEN' and include BEFORE_WHEN, @tag()
tag: 'LEADING_WHEN' if tag is 'WHEN' and include LINE_BREAK, @tag()
@token(tag, id)
@i += id.length
true
@ -171,9 +171,11 @@ exports.Lexer: class Lexer
comment_token: ->
return false unless comment: @match COMMENT, 1
@line += (comment.match(MULTILINER) or []).length
lines: comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
@token 'COMMENT', compact lines
@token 'TERMINATOR', "\n"
lines: compact comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
i: @tokens.length - 1
if @unfinished()
i -= 1 while @tokens[i] and not include LINE_BREAK, @tokens[i][0]
@tokens.splice(i + 1, 0, ['COMMENT', lines, @line], ['TERMINATOR', '\n', @line])
@i += comment.length
true
@ -194,9 +196,7 @@ exports.Lexer: class Lexer
prev: @prev(2)
size: indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length
next_character: @chunk.match(MULTI_DENT)[4]
no_newlines: next_character is '.' or
(@value() and @value().match and @value().match(NO_NEWLINE) and
prev and (prev[0] isnt '.') and not @value().match(CODE))
no_newlines: next_character is '.' or @unfinished()
if size is @indent
return @suppress_newlines() if no_newlines
return @newline_token(indent)
@ -406,6 +406,12 @@ exports.Lexer: class Lexer
return false unless m: @chunk.match(regex)
if m then m[index] else false
# Are we in the midst of an unfinished expression?
unfinished: ->
prev: @prev(2)
@value() and @value().match and @value().match(NO_NEWLINE) and
prev and (prev[0] isnt '.') and not @value().match(CODE)
# There are no exensions to the core lexer by default.
Lexer.extensions: []
@ -500,4 +506,4 @@ ACCESSORS: ['PROPERTY_ACCESS', 'PROTOTYPE_ACCESS', 'SOAK_ACCESS', '@']
# Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
# occurs at the start of a line. We disambiguate these from trailing whens to
# avoid an ambiguity in the grammar.
BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
LINE_BREAK: ['INDENT', 'OUTDENT', 'TERMINATOR']

View File

@ -45,3 +45,11 @@ else # comment
45
ok result is 45
test:
'test ' +
'test ' + # comment
'test'
ok test is 'test test test'