resolving issues with comments in the middle of unfinished single-line expressions.

2010-03-20 00:58:25 -04:00 · 2010-03-20 00:58:25 -04:00 · a2778bf06d
parent 29eff23490
commit a2778bf06d
3 changed files with 41 additions and 16 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -1,5 +1,5 @@
 (function(){
-  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_ESCAPE, REGEX_FLAGS, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, balanced_string, compact, count, helpers, include, starts;
+  var ACCESSORS, ASSIGNMENT, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_ESCAPE, REGEX_FLAGS, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, balanced_string, compact, count, helpers, include, starts;
  // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
  // matches against the beginning of the source code. When a match is found,
  // a token is produced, we consume the match, and start again. Tokens are in the
@ -134,7 +134,7 @@
      if (include(RESERVED, id)) {
        this.identifier_error(id);
      }
-      if (tag === 'WHEN' && include(BEFORE_WHEN, this.tag())) {
+      if (tag === 'WHEN' && include(LINE_BREAK, this.tag())) {
        tag = 'LEADING_WHEN';
      }
      this.token(tag, id);
@ -233,14 +233,19 @@
    // Matches and conumes comments. We pass through comments into JavaScript,
    // so they're treated as real tokens, like any other part of the language.
    Lexer.prototype.comment_token = function comment_token() {
-      var comment, lines;
+      var comment, i, lines;
      if (!((comment = this.match(COMMENT, 1)))) {
        return false;
      }
      this.line += (comment.match(MULTILINER) || []).length;
-      lines = comment.replace(COMMENT_CLEANER, '').split(MULTILINER);
-      this.token('COMMENT', compact(lines));
-      this.token('TERMINATOR', "\n");
+      lines = compact(comment.replace(COMMENT_CLEANER, '').split(MULTILINER));
+      i = this.tokens.length - 1;
+      if (this.unfinished()) {
+        while (this.tokens[i] && !include(LINE_BREAK, this.tokens[i][0])) {
+          i -= 1;
+        }
+      }
+      this.tokens.splice(i + 1, 0, ['COMMENT', lines, this.line], ['TERMINATOR', '\n', this.line]);
      this.i += comment.length;
      return true;
    };
@ -262,7 +267,7 @@
      prev = this.prev(2);
      size = indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length;
      next_character = this.chunk.match(MULTI_DENT)[4];
-      no_newlines = next_character === '.' || (this.value() && this.value().match && this.value().match(NO_NEWLINE) && prev && (prev[0] !== '.') && !this.value().match(CODE));
+      no_newlines = next_character === '.' || this.unfinished();
      if (size === this.indent) {
        if (no_newlines) {
          return this.suppress_newlines();
@ -551,6 +556,12 @@
      }
      return m ? m[index] : false;
    };
+    // Are we in the midst of an unfinished expression?
+    Lexer.prototype.unfinished = function unfinished() {
+      var prev;
+      prev = this.prev(2);
+      return this.value() && this.value().match && this.value().match(NO_NEWLINE) && prev && (prev[0] !== '.') && !this.value().match(CODE);
+    };
    return Lexer;
  }).call(this);
  // There are no exensions to the core lexer by default.
@ -612,5 +623,5 @@
  // Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
  // occurs at the start of a line. We disambiguate these from trailing whens to
  // avoid an ambiguity in the grammar.
-  BEFORE_WHEN = ['INDENT', 'OUTDENT', 'TERMINATOR'];
+  LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
 })();
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -98,7 +98,7 @@ exports.Lexer: class Lexer
    tag: id.toUpperCase() if include(KEYWORDS, id) and
      not (include(ACCESSORS, @tag(0)) and not @prev().spaced)
    @identifier_error id  if include RESERVED, id
-    tag: 'LEADING_WHEN'   if tag is 'WHEN' and include BEFORE_WHEN, @tag()
+    tag: 'LEADING_WHEN'   if tag is 'WHEN' and include LINE_BREAK, @tag()
    @token(tag, id)
    @i += id.length
    true
@ -171,9 +171,11 @@ exports.Lexer: class Lexer
  comment_token: ->
    return false unless comment: @match COMMENT, 1
    @line += (comment.match(MULTILINER) or []).length
-    lines: comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
-    @token 'COMMENT', compact lines
-    @token 'TERMINATOR', "\n"
+    lines: compact comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
+    i: @tokens.length - 1
+    if @unfinished()
+      i -= 1 while @tokens[i] and not include LINE_BREAK, @tokens[i][0]
+    @tokens.splice(i + 1, 0, ['COMMENT', lines, @line], ['TERMINATOR', '\n', @line])
    @i += comment.length
    true

@ -194,9 +196,7 @@ exports.Lexer: class Lexer
    prev: @prev(2)
    size: indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length
    next_character: @chunk.match(MULTI_DENT)[4]
-    no_newlines: next_character is '.' or
-      (@value() and @value().match and @value().match(NO_NEWLINE) and
-      prev and (prev[0] isnt '.') and not @value().match(CODE))
+    no_newlines: next_character is '.' or @unfinished()
    if size is @indent
      return @suppress_newlines() if no_newlines
      return @newline_token(indent)
@ -406,6 +406,12 @@ exports.Lexer: class Lexer
    return false unless m: @chunk.match(regex)
    if m then m[index] else false

+  # Are we in the midst of an unfinished expression?
+  unfinished: ->
+    prev: @prev(2)
+    @value() and @value().match and @value().match(NO_NEWLINE) and
+      prev and (prev[0] isnt '.') and not @value().match(CODE)
+
 # There are no exensions to the core lexer by default.
 Lexer.extensions: []

@ -500,4 +506,4 @@ ACCESSORS: ['PROPERTY_ACCESS', 'PROTOTYPE_ACCESS', 'SOAK_ACCESS', '@']
 # Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
 # occurs at the start of a line. We disambiguate these from trailing whens to
 # avoid an ambiguity in the grammar.
-BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
+LINE_BREAK: ['INDENT', 'OUTDENT', 'TERMINATOR']
--- a/test/test_funky_comments.coffee
+++ b/test/test_funky_comments.coffee
@ -45,3 +45,11 @@ else # comment
  45

 ok result is 45
+
+
+test:
+  'test ' +
+  'test ' + # comment
+  'test'
+
+ok test is 'test test test'