diff --git a/lib/lexer.js b/lib/lexer.js index 072c937f..413eb3c8 100644 --- a/lib/lexer.js +++ b/lib/lexer.js @@ -1,5 +1,5 @@ (function(){ - var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_ESCAPE, REGEX_FLAGS, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, balanced_string, compact, count, helpers, include, starts; + var ACCESSORS, ASSIGNMENT, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_ESCAPE, REGEX_FLAGS, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, balanced_string, compact, count, helpers, include, starts; // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt // matches against the beginning of the source code. When a match is found, // a token is produced, we consume the match, and start again. Tokens are in the @@ -134,7 +134,7 @@ if (include(RESERVED, id)) { this.identifier_error(id); } - if (tag === 'WHEN' && include(BEFORE_WHEN, this.tag())) { + if (tag === 'WHEN' && include(LINE_BREAK, this.tag())) { tag = 'LEADING_WHEN'; } this.token(tag, id); @@ -233,14 +233,19 @@ // Matches and conumes comments. We pass through comments into JavaScript, // so they're treated as real tokens, like any other part of the language. Lexer.prototype.comment_token = function comment_token() { - var comment, lines; + var comment, i, lines; if (!((comment = this.match(COMMENT, 1)))) { return false; } this.line += (comment.match(MULTILINER) || []).length; - lines = comment.replace(COMMENT_CLEANER, '').split(MULTILINER); - this.token('COMMENT', compact(lines)); - this.token('TERMINATOR', "\n"); + lines = compact(comment.replace(COMMENT_CLEANER, '').split(MULTILINER)); + i = this.tokens.length - 1; + if (this.unfinished()) { + while (this.tokens[i] && !include(LINE_BREAK, this.tokens[i][0])) { + i -= 1; + } + } + this.tokens.splice(i + 1, 0, ['COMMENT', lines, this.line], ['TERMINATOR', '\n', this.line]); this.i += comment.length; return true; }; @@ -262,7 +267,7 @@ prev = this.prev(2); size = indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length; next_character = this.chunk.match(MULTI_DENT)[4]; - no_newlines = next_character === '.' || (this.value() && this.value().match && this.value().match(NO_NEWLINE) && prev && (prev[0] !== '.') && !this.value().match(CODE)); + no_newlines = next_character === '.' || this.unfinished(); if (size === this.indent) { if (no_newlines) { return this.suppress_newlines(); @@ -551,6 +556,12 @@ } return m ? m[index] : false; }; + // Are we in the midst of an unfinished expression? + Lexer.prototype.unfinished = function unfinished() { + var prev; + prev = this.prev(2); + return this.value() && this.value().match && this.value().match(NO_NEWLINE) && prev && (prev[0] !== '.') && !this.value().match(CODE); + }; return Lexer; }).call(this); // There are no exensions to the core lexer by default. @@ -612,5 +623,5 @@ // Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN` // occurs at the start of a line. We disambiguate these from trailing whens to // avoid an ambiguity in the grammar. - BEFORE_WHEN = ['INDENT', 'OUTDENT', 'TERMINATOR']; + LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR']; })(); diff --git a/src/lexer.coffee b/src/lexer.coffee index b7711d87..6ec68a04 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -98,7 +98,7 @@ exports.Lexer: class Lexer tag: id.toUpperCase() if include(KEYWORDS, id) and not (include(ACCESSORS, @tag(0)) and not @prev().spaced) @identifier_error id if include RESERVED, id - tag: 'LEADING_WHEN' if tag is 'WHEN' and include BEFORE_WHEN, @tag() + tag: 'LEADING_WHEN' if tag is 'WHEN' and include LINE_BREAK, @tag() @token(tag, id) @i += id.length true @@ -171,9 +171,11 @@ exports.Lexer: class Lexer comment_token: -> return false unless comment: @match COMMENT, 1 @line += (comment.match(MULTILINER) or []).length - lines: comment.replace(COMMENT_CLEANER, '').split(MULTILINER) - @token 'COMMENT', compact lines - @token 'TERMINATOR', "\n" + lines: compact comment.replace(COMMENT_CLEANER, '').split(MULTILINER) + i: @tokens.length - 1 + if @unfinished() + i -= 1 while @tokens[i] and not include LINE_BREAK, @tokens[i][0] + @tokens.splice(i + 1, 0, ['COMMENT', lines, @line], ['TERMINATOR', '\n', @line]) @i += comment.length true @@ -194,9 +196,7 @@ exports.Lexer: class Lexer prev: @prev(2) size: indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length next_character: @chunk.match(MULTI_DENT)[4] - no_newlines: next_character is '.' or - (@value() and @value().match and @value().match(NO_NEWLINE) and - prev and (prev[0] isnt '.') and not @value().match(CODE)) + no_newlines: next_character is '.' or @unfinished() if size is @indent return @suppress_newlines() if no_newlines return @newline_token(indent) @@ -406,6 +406,12 @@ exports.Lexer: class Lexer return false unless m: @chunk.match(regex) if m then m[index] else false + # Are we in the midst of an unfinished expression? + unfinished: -> + prev: @prev(2) + @value() and @value().match and @value().match(NO_NEWLINE) and + prev and (prev[0] isnt '.') and not @value().match(CODE) + # There are no exensions to the core lexer by default. Lexer.extensions: [] @@ -500,4 +506,4 @@ ACCESSORS: ['PROPERTY_ACCESS', 'PROTOTYPE_ACCESS', 'SOAK_ACCESS', '@'] # Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN` # occurs at the start of a line. We disambiguate these from trailing whens to # avoid an ambiguity in the grammar. -BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR'] +LINE_BREAK: ['INDENT', 'OUTDENT', 'TERMINATOR'] diff --git a/test/test_funky_comments.coffee b/test/test_comments.coffee similarity index 84% rename from test/test_funky_comments.coffee rename to test/test_comments.coffee index f46d6929..5c43aaea 100644 --- a/test/test_funky_comments.coffee +++ b/test/test_comments.coffee @@ -45,3 +45,11 @@ else # comment 45 ok result is 45 + + +test: + 'test ' + + 'test ' + # comment + 'test' + +ok test is 'test test test'