Improve lexer error messages

- Erraneous tokens are now fully underlined with ^:s. - The error messages are now a bit more consistent.
2015-02-06 10:52:02 +01:00 · 2015-02-06 10:52:02 +01:00 · 213225418a
parent 3b3e52097a
commit 213225418a
3 changed files with 157 additions and 74 deletions
--- a/lib/coffee-script/lexer.js
+++ b/lib/coffee-script/lexer.js
@ -40,7 +40,7 @@
      }
      this.closeIndentation();
      if (end = this.ends.pop()) {
-        throwSyntaxError("missing " + end.tag, end.origin[2]);
+        this.error("missing " + end.tag, end.origin[2]);
      }
      if (opts.rewrite === false) {
        return this.tokens;
@ -110,7 +110,9 @@
          id = new String(id);
          id.reserved = true;
        } else if (indexOf.call(RESERVED, id) >= 0) {
-          this.error("reserved word \"" + id + "\"");
+          this.error("reserved word '" + id + "'", {
+            length: id.length
+          });
        }
      }
      if (!forcedIdentifier) {
@ -156,16 +158,24 @@
        return 0;
      }
      number = match[0];
-      if (/^0[BOX]/.test(number)) {
-        this.error("radix prefix '" + number + "' must be lowercase");
-      } else if (/E/.test(number) && !/^0x/.test(number)) {
-        this.error("exponential notation '" + number + "' must be indicated with a lowercase 'e'");
-      } else if (/^0\d*[89]/.test(number)) {
-        this.error("decimal literal '" + number + "' must not be prefixed with '0'");
-      } else if (/^0\d+/.test(number)) {
-        this.error("octal literal '" + number + "' must be prefixed with '0o'");
-      }
      lexedLength = number.length;
+      if (/^0[BOX]/.test(number)) {
+        this.error("radix prefix in '" + number + "' must be lowercase", {
+          offset: 1
+        });
+      } else if (/E/.test(number) && !/^0x/.test(number)) {
+        this.error("exponential notation in '" + number + "' must be indicated with a lowercase 'e'", {
+          offset: number.indexOf('E')
+        });
+      } else if (/^0\d*[89]/.test(number)) {
+        this.error("decimal literal '" + number + "' must not be prefixed with '0'", {
+          length: lexedLength
+        });
+      } else if (/^0\d+/.test(number)) {
+        this.error("octal literal '" + number + "' must be prefixed with '0o'", {
+          length: lexedLength
+        });
+      }
      if (octalLiteral = /^0o([0-7]+)/.exec(number)) {
        number = '0x' + parseInt(octalLiteral[1], 8).toString(16);
      }
@ -263,7 +273,10 @@
      comment = match[0], here = match[1];
      if (here) {
        if (match = HERECOMMENT_ILLEGAL.exec(comment)) {
-          this.error("block comments cannot contain " + match[0], match.index);
+          this.error("block comments cannot contain " + match[0], {
+            offset: match.index,
+            length: match[0].length
+          });
        }
        if (here.indexOf('\n') >= 0) {
          here = here.replace(RegExp("\\n" + (repeat(' ', this.indent)), "g"), '\n');
@ -286,7 +299,9 @@
      var body, closed, end, errorToken, flags, index, match, prev, ref2, ref3, regex, rparen, tokens;
      switch (false) {
        case !(match = REGEX_ILLEGAL.exec(this.chunk)):
-          this.error("regular expressions cannot begin with " + match[2], match.index + match[1].length);
+          this.error("regular expressions cannot begin with " + match[2], {
+            offset: match.index + match[1].length
+          });
          break;
        case !(match = this.matchWithInterpolations(HEREGEX, '///')):
          tokens = match.tokens, index = match.index;
@ -319,7 +334,10 @@
      errorToken = this.makeToken('REGEX', this.chunk.slice(0, end), 0, end);
      switch (false) {
        case !!VALID_FLAGS.test(flags):
-          this.error("invalid regular expression flags " + flags, index);
+          this.error("invalid regular expression flags " + flags, {
+            offset: index,
+            length: flags.length
+          });
          break;
        case !(regex || tokens.length === 1):
          if (body == null) {
@ -382,7 +400,9 @@
        this.outdebt = this.indebt = 0;
        this.indent = size;
      } else if (size < this.baseIndent) {
-        this.error('missing indentation', indent.length);
+        this.error('missing indentation', {
+          offset: indent.length
+        });
      } else {
        this.indebt = 0;
        this.outdentToken(this.indent - size, noNewlines, indent.length);
@ -475,7 +495,7 @@
      prev = last(this.tokens);
      if (value === '=' && prev) {
        if (!prev[1].reserved && (ref2 = prev[1], indexOf.call(JS_FORBIDDEN, ref2) >= 0)) {
-          this.error("reserved word \"" + (this.value()) + "\" can't be assigned");
+          this.error("reserved word '" + prev[1] + "' can't be assigned", prev[2]);
        }
        if ((ref3 = prev[1]) === '||' || ref3 === '&&') {
          prev[0] = 'COMPOUND_ASSIGN';
@ -605,7 +625,9 @@
        offsetInChunk += index;
      }
      if (str.slice(0, delimiter.length) !== delimiter) {
-        this.error("missing " + delimiter);
+        this.error("missing " + delimiter, {
+          length: delimiter.length
+        });
      }
      firstToken = tokens[0], lastToken = tokens[tokens.length - 1];
      firstToken[2].first_column -= delimiter.length;
@ -766,7 +788,7 @@
    };

    Lexer.prototype.validateEscapes = function(str, options) {
-      var before, hex, match, message, octal, ref2, unicode;
+      var before, hex, invalidEscape, match, message, octal, ref2, unicode;
      if (options == null) {
        options = {};
      }
@ -778,8 +800,12 @@
      if (options.isRegex && octal && octal.charAt(0) !== '0') {
        return;
      }
-      message = octal ? "octal escape sequences are not allowed \\" + octal : "invalid escape sequence \\" + (hex || unicode);
-      return this.error(message, ((ref2 = options.offsetInChunk) != null ? ref2 : 0) + match.index + before.length);
+      message = octal ? "octal escape sequences are not allowed" : "invalid escape sequence";
+      invalidEscape = "\\" + (octal || hex || unicode);
+      return this.error(message + " " + invalidEscape, {
+        offset: ((ref2 = options.offsetInChunk) != null ? ref2 : 0) + match.index + before.length,
+        length: invalidEscape.length
+      });
    };

    Lexer.prototype.makeDelimitedLiteral = function(body, options) {
@ -822,16 +848,17 @@
      return "" + options.delimiter + body + options.delimiter;
    };

-    Lexer.prototype.error = function(message, offset) {
-      var first_column, first_line, ref2;
-      if (offset == null) {
-        offset = 0;
+    Lexer.prototype.error = function(message, options) {
+      var first_column, first_line, location, ref2, ref3, ref4;
+      if (options == null) {
+        options = {};
      }
-      ref2 = this.getLineAndColumnFromChunk(offset), first_line = ref2[0], first_column = ref2[1];
-      return throwSyntaxError(message, {
+      location = 'first_line' in options ? options : ((ref3 = this.getLineAndColumnFromChunk((ref2 = options.offset) != null ? ref2 : 0), first_line = ref3[0], first_column = ref3[1], ref3), {
        first_line: first_line,
-        first_column: first_column
+        first_column: first_column,
+        last_column: first_column + ((ref4 = options.length) != null ? ref4 : 1) - 1
      });
+      return throwSyntaxError(message, location);
    };

    return Lexer;
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -73,7 +73,7 @@ exports.Lexer = class Lexer
      return {@tokens, index: i} if opts.untilBalanced and @ends.length is 0

    @closeIndentation()
-    throwSyntaxError "missing #{end.tag}", end.origin[2] if end = @ends.pop()
+    @error "missing #{end.tag}", end.origin[2] if end = @ends.pop()
    return @tokens if opts.rewrite is off
    (new Rewriter).rewrite @tokens

@ -143,7 +143,7 @@ exports.Lexer = class Lexer
        id  = new String id
        id.reserved = yes
      else if id in RESERVED
-        @error "reserved word \"#{id}\""
+        @error "reserved word '#{id}'", length: id.length

    unless forcedIdentifier
      id  = COFFEE_ALIAS_MAP[id] if id in COFFEE_ALIASES
@ -171,15 +171,16 @@ exports.Lexer = class Lexer
  numberToken: ->
    return 0 unless match = NUMBER.exec @chunk
    number = match[0]
-    if /^0[BOX]/.test number
-      @error "radix prefix '#{number}' must be lowercase"
-    else if /E/.test(number) and not /^0x/.test number
-      @error "exponential notation '#{number}' must be indicated with a lowercase 'e'"
-    else if /^0\d*[89]/.test number
-      @error "decimal literal '#{number}' must not be prefixed with '0'"
-    else if /^0\d+/.test number
-      @error "octal literal '#{number}' must be prefixed with '0o'"
    lexedLength = number.length
+    if /^0[BOX]/.test number
+      @error "radix prefix in '#{number}' must be lowercase", offset: 1
+    else if /E/.test(number) and not /^0x/.test number
+      @error "exponential notation in '#{number}' must be indicated with a lowercase 'e'",
+        offset: number.indexOf('E')
+    else if /^0\d*[89]/.test number
+      @error "decimal literal '#{number}' must not be prefixed with '0'", length: lexedLength
+    else if /^0\d+/.test number
+      @error "octal literal '#{number}' must be prefixed with '0o'", length: lexedLength
    if octalLiteral = /^0o([0-7]+)/.exec number
      number = '0x' + parseInt(octalLiteral[1], 8).toString 16
    if binaryLiteral = /^0b([01]+)/.exec number
@ -236,7 +237,8 @@ exports.Lexer = class Lexer
    [comment, here] = match
    if here
      if match = HERECOMMENT_ILLEGAL.exec comment
-        @error "block comments cannot contain #{match[0]}", match.index
+        @error "block comments cannot contain #{match[0]}",
+          offset: match.index, length: match[0].length
      if here.indexOf('\n') >= 0
        here = here.replace /// \n #{repeat ' ', @indent} ///g, '\n'
      @token 'HERECOMMENT', here, 0, comment.length
@ -254,7 +256,8 @@ exports.Lexer = class Lexer
  regexToken: ->
    switch
      when match = REGEX_ILLEGAL.exec @chunk
-        @error "regular expressions cannot begin with #{match[2]}", match.index + match[1].length
+        @error "regular expressions cannot begin with #{match[2]}",
+          offset: match.index + match[1].length
      when match = @matchWithInterpolations HEREGEX, '///'
        {tokens, index} = match
      when match = REGEX.exec @chunk
@ -276,7 +279,7 @@ exports.Lexer = class Lexer
    errorToken = @makeToken 'REGEX', @chunk[...end], 0, end
    switch
      when not VALID_FLAGS.test flags
-        @error "invalid regular expression flags #{flags}", index
+        @error "invalid regular expression flags #{flags}", offset: index, length: flags.length
      when regex or tokens.length is 1
        body ?= @formatHeregex tokens[0][1]
        @token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, errorToken
@ -327,7 +330,7 @@ exports.Lexer = class Lexer
      @outdebt = @indebt = 0
      @indent = size
    else if size < @baseIndent
-      @error 'missing indentation', indent.length
+      @error 'missing indentation', offset: indent.length
    else
      @indebt = 0
      @outdentToken @indent - size, noNewlines, indent.length
@ -400,7 +403,7 @@ exports.Lexer = class Lexer
    prev = last @tokens
    if value is '=' and prev
      if not prev[1].reserved and prev[1] in JS_FORBIDDEN
-        @error "reserved word \"#{@value()}\" can't be assigned"
+        @error "reserved word '#{prev[1]}' can't be assigned", prev[2]
      if prev[1] in ['||', '&&']
        prev[0] = 'COMPOUND_ASSIGN'
        prev[1] += '='
@ -516,7 +519,7 @@ exports.Lexer = class Lexer
      offsetInChunk += index

    unless str[...delimiter.length] is delimiter
-      @error "missing #{delimiter}"
+      @error "missing #{delimiter}", length: delimiter.length

    [firstToken, ..., lastToken] = tokens
    firstToken[2].first_column -= delimiter.length
@ -687,10 +690,13 @@ exports.Lexer = class Lexer
    return if options.isRegex and octal and octal.charAt(0) isnt '0'
    message =
      if octal
-        "octal escape sequences are not allowed \\#{octal}"
+        "octal escape sequences are not allowed"
      else
-        "invalid escape sequence \\#{hex or unicode}"
-    @error message, (options.offsetInChunk ? 0) + match.index + before.length
+        "invalid escape sequence"
+    invalidEscape = "\\#{octal or hex or unicode}"
+    @error "#{message} #{invalidEscape}",
+      offset: (options.offsetInChunk ? 0) + match.index + before.length
+      length: invalidEscape.length

  # Constructs a string or regex by escaping certain characters.
  makeDelimitedLiteral: (body, options = {}) ->
@ -714,12 +720,16 @@ exports.Lexer = class Lexer
      when other     then (if options.double then "\\#{other}" else other)
    "#{options.delimiter}#{body}#{options.delimiter}"

-  # Throws a compiler error on the current position.
-  error: (message, offset = 0) ->
-    # TODO: Are there some cases we could improve the error line number by
-    # passing the offset in the chunk where the error happened?
-    [first_line, first_column] = @getLineAndColumnFromChunk offset
-    throwSyntaxError message, {first_line, first_column}
+  # Throws an error at either a given offset from the current chunk or at the
+  # location of a token (`token[2]`).
+  error: (message, options = {}) ->
+    location =
+      if 'first_line' of options
+        options
+      else
+        [first_line, first_column] = @getLineAndColumnFromChunk options.offset ? 0
+        {first_line, first_column, last_column: first_column + (options.length ? 1) - 1}
+    throwSyntaxError message, location

 # Constants
 # ---------
--- a/test/error_messages.coffee
+++ b/test/error_messages.coffee
@ -247,14 +247,14 @@ test "unclosed strings", ->
  """, """
    [stdin]:1:1: error: missing '''
    '''
-    ^
+    ^^^
  """
  assertErrorFormat '''
    """
  ''', '''
    [stdin]:1:1: error: missing """
    """
-    ^
+    ^^^
  '''
  assertErrorFormat '''
    "#{"
@ -275,21 +275,21 @@ test "unclosed strings", ->
  ''', '''
    [stdin]:1:4: error: missing """
    "#{"""
-       ^
+       ^^^
  '''
  assertErrorFormat '''
    """#{"""
  ''', '''
    [stdin]:1:6: error: missing """
    """#{"""
-         ^
+         ^^^
  '''
  assertErrorFormat '''
    ///#{"""
  ''', '''
    [stdin]:1:6: error: missing """
    ///#{"""
-         ^
+         ^^^
  '''
  assertErrorFormat '''
    "a
@ -310,7 +310,7 @@ test "unclosed strings", ->
  ''', '''
    [stdin]:2:1: error: missing """
    """a\\"""
-    ^
+    ^^^
  '''

 test "unclosed heregexes", ->
@ -319,7 +319,7 @@ test "unclosed heregexes", ->
  ''', '''
    [stdin]:1:1: error: missing ///
    ///
-    ^
+    ^^^
  '''
  # https://github.com/jashkenas/coffeescript/issues/3301#issuecomment-31735168
  assertErrorFormat '''
@ -328,7 +328,7 @@ test "unclosed heregexes", ->
  ''', '''
    [stdin]:2:1: error: missing ///
    ///a\\///
-    ^
+    ^^^
  '''

 test "unexpected token after string", ->
@ -378,7 +378,7 @@ test "octal escapes", ->
  ''', '''
    [stdin]:1:10: error: octal escape sequences are not allowed \\07
    "a\\0\\tb\\\\\\07c"
-      \  \   \ \ ^
+      \  \   \ \ ^\^^
  '''
  assertErrorFormat '''
    "a
@ -386,14 +386,14 @@ test "octal escapes", ->
  ''', '''
    [stdin]:2:8: error: octal escape sequences are not allowed \\1
      #{b} \\1"
-           ^
+           ^\^
  '''
  assertErrorFormat '''
    /a\\0\\tb\\\\\\07c/
  ''', '''
    [stdin]:1:10: error: octal escape sequences are not allowed \\07
    /a\\0\\tb\\\\\\07c/
-      \  \   \ \ ^
+      \  \   \ \ ^\^^
  '''
  assertErrorFormat '''
    ///a
@ -401,7 +401,7 @@ test "octal escapes", ->
  ''', '''
    [stdin]:2:8: error: octal escape sequences are not allowed \\01
      #{b} \\01///
-           ^
+           ^\^^
  '''

 test "#3795: invalid escapes", ->
@ -410,7 +410,7 @@ test "#3795: invalid escapes", ->
  ''', '''
    [stdin]:1:10: error: invalid escape sequence \\x7g
    "a\\0\\tb\\\\\\x7g"
-      \  \   \ \ ^
+      \  \   \ \ ^\^^^
  '''
  assertErrorFormat '''
    "a
@ -419,21 +419,21 @@ test "#3795: invalid escapes", ->
  ''', '''
    [stdin]:2:8: error: invalid escape sequence \\uA02
      #{b} \\uA02
-           ^
+           ^\^^^^
  '''
  assertErrorFormat '''
    /a\\u002space/
  ''', '''
    [stdin]:1:3: error: invalid escape sequence \\u002s
    /a\\u002space/
-      ^
+      ^\^^^^^
  '''
  assertErrorFormat '''
    ///a \\u002 0 space///
  ''', '''
    [stdin]:1:6: error: invalid escape sequence \\u002 
    ///a \\u002 0 space///
-         ^
+         ^\^^^^^
  '''
  assertErrorFormat '''
    ///a
@ -442,7 +442,7 @@ test "#3795: invalid escapes", ->
  ''', '''
    [stdin]:2:8: error: invalid escape sequence \\x0
      #{b} \\x0
-           ^
+           ^\^^
  '''

 test "illegal herecomment", ->
@ -453,7 +453,7 @@ test "illegal herecomment", ->
  ''', '''
    [stdin]:2:12: error: block comments cannot contain */
      Regex: /a*/g
-               ^
+               ^^
  '''

 test "#1724: regular expressions beginning with *", ->
@ -480,7 +480,7 @@ test "invalid regex flags", ->
  ''', '''
    [stdin]:1:4: error: invalid regular expression flags ii
    /a/ii
-       ^
+       ^^
  '''
  assertErrorFormat '''
    /a/G
@ -494,21 +494,21 @@ test "invalid regex flags", ->
  ''', '''
    [stdin]:1:4: error: invalid regular expression flags gimi
    /a/gimi
-       ^
+       ^^^^
  '''
  assertErrorFormat '''
    /a/g_
  ''', '''
    [stdin]:1:4: error: invalid regular expression flags g_
    /a/g_
-       ^
+       ^^
  '''
  assertErrorFormat '''
    ///a///ii
  ''', '''
    [stdin]:1:8: error: invalid regular expression flags ii
    ///a///ii
-           ^
+           ^^
  '''
  doesNotThrow -> CoffeeScript.compile '/a/ymgi'

@ -598,3 +598,49 @@ test "duplicate function arguments", ->
    (@foo, bar, @foo) ->
                ^^^^
  '''
+
+test "reserved words", ->
+  assertErrorFormat '''
+    case
+  ''', '''
+    [stdin]:1:1: error: reserved word 'case'
+    case
+    ^^^^
+  '''
+  assertErrorFormat '''
+    for = 1
+  ''', '''
+    [stdin]:1:1: error: reserved word 'for' can't be assigned
+    for = 1
+    ^^^
+  '''
+
+test "invalid numbers", ->
+  assertErrorFormat '''
+    0X0
+  ''', '''
+    [stdin]:1:2: error: radix prefix in '0X0' must be lowercase
+    0X0
+     ^
+  '''
+  assertErrorFormat '''
+    10E0
+  ''', '''
+    [stdin]:1:3: error: exponential notation in '10E0' must be indicated with a lowercase 'e'
+    10E0
+      ^
+  '''
+  assertErrorFormat '''
+    018
+  ''', '''
+    [stdin]:1:1: error: decimal literal '018' must not be prefixed with '0'
+    018
+    ^^^
+  '''
+  assertErrorFormat '''
+    010
+  ''', '''
+    [stdin]:1:1: error: octal literal '010' must be prefixed with '0o'
+    010
+    ^^^
+  '''