Improve lexer error messages

- Erraneous tokens are now fully underlined with ^:s.
- The error messages are now a bit more consistent.
This commit is contained in:
Simon Lydell 2015-02-06 10:52:02 +01:00
parent 3b3e52097a
commit 213225418a
3 changed files with 157 additions and 74 deletions

View File

@ -40,7 +40,7 @@
}
this.closeIndentation();
if (end = this.ends.pop()) {
throwSyntaxError("missing " + end.tag, end.origin[2]);
this.error("missing " + end.tag, end.origin[2]);
}
if (opts.rewrite === false) {
return this.tokens;
@ -110,7 +110,9 @@
id = new String(id);
id.reserved = true;
} else if (indexOf.call(RESERVED, id) >= 0) {
this.error("reserved word \"" + id + "\"");
this.error("reserved word '" + id + "'", {
length: id.length
});
}
}
if (!forcedIdentifier) {
@ -156,16 +158,24 @@
return 0;
}
number = match[0];
if (/^0[BOX]/.test(number)) {
this.error("radix prefix '" + number + "' must be lowercase");
} else if (/E/.test(number) && !/^0x/.test(number)) {
this.error("exponential notation '" + number + "' must be indicated with a lowercase 'e'");
} else if (/^0\d*[89]/.test(number)) {
this.error("decimal literal '" + number + "' must not be prefixed with '0'");
} else if (/^0\d+/.test(number)) {
this.error("octal literal '" + number + "' must be prefixed with '0o'");
}
lexedLength = number.length;
if (/^0[BOX]/.test(number)) {
this.error("radix prefix in '" + number + "' must be lowercase", {
offset: 1
});
} else if (/E/.test(number) && !/^0x/.test(number)) {
this.error("exponential notation in '" + number + "' must be indicated with a lowercase 'e'", {
offset: number.indexOf('E')
});
} else if (/^0\d*[89]/.test(number)) {
this.error("decimal literal '" + number + "' must not be prefixed with '0'", {
length: lexedLength
});
} else if (/^0\d+/.test(number)) {
this.error("octal literal '" + number + "' must be prefixed with '0o'", {
length: lexedLength
});
}
if (octalLiteral = /^0o([0-7]+)/.exec(number)) {
number = '0x' + parseInt(octalLiteral[1], 8).toString(16);
}
@ -263,7 +273,10 @@
comment = match[0], here = match[1];
if (here) {
if (match = HERECOMMENT_ILLEGAL.exec(comment)) {
this.error("block comments cannot contain " + match[0], match.index);
this.error("block comments cannot contain " + match[0], {
offset: match.index,
length: match[0].length
});
}
if (here.indexOf('\n') >= 0) {
here = here.replace(RegExp("\\n" + (repeat(' ', this.indent)), "g"), '\n');
@ -286,7 +299,9 @@
var body, closed, end, errorToken, flags, index, match, prev, ref2, ref3, regex, rparen, tokens;
switch (false) {
case !(match = REGEX_ILLEGAL.exec(this.chunk)):
this.error("regular expressions cannot begin with " + match[2], match.index + match[1].length);
this.error("regular expressions cannot begin with " + match[2], {
offset: match.index + match[1].length
});
break;
case !(match = this.matchWithInterpolations(HEREGEX, '///')):
tokens = match.tokens, index = match.index;
@ -319,7 +334,10 @@
errorToken = this.makeToken('REGEX', this.chunk.slice(0, end), 0, end);
switch (false) {
case !!VALID_FLAGS.test(flags):
this.error("invalid regular expression flags " + flags, index);
this.error("invalid regular expression flags " + flags, {
offset: index,
length: flags.length
});
break;
case !(regex || tokens.length === 1):
if (body == null) {
@ -382,7 +400,9 @@
this.outdebt = this.indebt = 0;
this.indent = size;
} else if (size < this.baseIndent) {
this.error('missing indentation', indent.length);
this.error('missing indentation', {
offset: indent.length
});
} else {
this.indebt = 0;
this.outdentToken(this.indent - size, noNewlines, indent.length);
@ -475,7 +495,7 @@
prev = last(this.tokens);
if (value === '=' && prev) {
if (!prev[1].reserved && (ref2 = prev[1], indexOf.call(JS_FORBIDDEN, ref2) >= 0)) {
this.error("reserved word \"" + (this.value()) + "\" can't be assigned");
this.error("reserved word '" + prev[1] + "' can't be assigned", prev[2]);
}
if ((ref3 = prev[1]) === '||' || ref3 === '&&') {
prev[0] = 'COMPOUND_ASSIGN';
@ -605,7 +625,9 @@
offsetInChunk += index;
}
if (str.slice(0, delimiter.length) !== delimiter) {
this.error("missing " + delimiter);
this.error("missing " + delimiter, {
length: delimiter.length
});
}
firstToken = tokens[0], lastToken = tokens[tokens.length - 1];
firstToken[2].first_column -= delimiter.length;
@ -766,7 +788,7 @@
};
Lexer.prototype.validateEscapes = function(str, options) {
var before, hex, match, message, octal, ref2, unicode;
var before, hex, invalidEscape, match, message, octal, ref2, unicode;
if (options == null) {
options = {};
}
@ -778,8 +800,12 @@
if (options.isRegex && octal && octal.charAt(0) !== '0') {
return;
}
message = octal ? "octal escape sequences are not allowed \\" + octal : "invalid escape sequence \\" + (hex || unicode);
return this.error(message, ((ref2 = options.offsetInChunk) != null ? ref2 : 0) + match.index + before.length);
message = octal ? "octal escape sequences are not allowed" : "invalid escape sequence";
invalidEscape = "\\" + (octal || hex || unicode);
return this.error(message + " " + invalidEscape, {
offset: ((ref2 = options.offsetInChunk) != null ? ref2 : 0) + match.index + before.length,
length: invalidEscape.length
});
};
Lexer.prototype.makeDelimitedLiteral = function(body, options) {
@ -822,16 +848,17 @@
return "" + options.delimiter + body + options.delimiter;
};
Lexer.prototype.error = function(message, offset) {
var first_column, first_line, ref2;
if (offset == null) {
offset = 0;
Lexer.prototype.error = function(message, options) {
var first_column, first_line, location, ref2, ref3, ref4;
if (options == null) {
options = {};
}
ref2 = this.getLineAndColumnFromChunk(offset), first_line = ref2[0], first_column = ref2[1];
return throwSyntaxError(message, {
location = 'first_line' in options ? options : ((ref3 = this.getLineAndColumnFromChunk((ref2 = options.offset) != null ? ref2 : 0), first_line = ref3[0], first_column = ref3[1], ref3), {
first_line: first_line,
first_column: first_column
first_column: first_column,
last_column: first_column + ((ref4 = options.length) != null ? ref4 : 1) - 1
});
return throwSyntaxError(message, location);
};
return Lexer;

View File

@ -73,7 +73,7 @@ exports.Lexer = class Lexer
return {@tokens, index: i} if opts.untilBalanced and @ends.length is 0
@closeIndentation()
throwSyntaxError "missing #{end.tag}", end.origin[2] if end = @ends.pop()
@error "missing #{end.tag}", end.origin[2] if end = @ends.pop()
return @tokens if opts.rewrite is off
(new Rewriter).rewrite @tokens
@ -143,7 +143,7 @@ exports.Lexer = class Lexer
id = new String id
id.reserved = yes
else if id in RESERVED
@error "reserved word \"#{id}\""
@error "reserved word '#{id}'", length: id.length
unless forcedIdentifier
id = COFFEE_ALIAS_MAP[id] if id in COFFEE_ALIASES
@ -171,15 +171,16 @@ exports.Lexer = class Lexer
numberToken: ->
return 0 unless match = NUMBER.exec @chunk
number = match[0]
if /^0[BOX]/.test number
@error "radix prefix '#{number}' must be lowercase"
else if /E/.test(number) and not /^0x/.test number
@error "exponential notation '#{number}' must be indicated with a lowercase 'e'"
else if /^0\d*[89]/.test number
@error "decimal literal '#{number}' must not be prefixed with '0'"
else if /^0\d+/.test number
@error "octal literal '#{number}' must be prefixed with '0o'"
lexedLength = number.length
if /^0[BOX]/.test number
@error "radix prefix in '#{number}' must be lowercase", offset: 1
else if /E/.test(number) and not /^0x/.test number
@error "exponential notation in '#{number}' must be indicated with a lowercase 'e'",
offset: number.indexOf('E')
else if /^0\d*[89]/.test number
@error "decimal literal '#{number}' must not be prefixed with '0'", length: lexedLength
else if /^0\d+/.test number
@error "octal literal '#{number}' must be prefixed with '0o'", length: lexedLength
if octalLiteral = /^0o([0-7]+)/.exec number
number = '0x' + parseInt(octalLiteral[1], 8).toString 16
if binaryLiteral = /^0b([01]+)/.exec number
@ -236,7 +237,8 @@ exports.Lexer = class Lexer
[comment, here] = match
if here
if match = HERECOMMENT_ILLEGAL.exec comment
@error "block comments cannot contain #{match[0]}", match.index
@error "block comments cannot contain #{match[0]}",
offset: match.index, length: match[0].length
if here.indexOf('\n') >= 0
here = here.replace /// \n #{repeat ' ', @indent} ///g, '\n'
@token 'HERECOMMENT', here, 0, comment.length
@ -254,7 +256,8 @@ exports.Lexer = class Lexer
regexToken: ->
switch
when match = REGEX_ILLEGAL.exec @chunk
@error "regular expressions cannot begin with #{match[2]}", match.index + match[1].length
@error "regular expressions cannot begin with #{match[2]}",
offset: match.index + match[1].length
when match = @matchWithInterpolations HEREGEX, '///'
{tokens, index} = match
when match = REGEX.exec @chunk
@ -276,7 +279,7 @@ exports.Lexer = class Lexer
errorToken = @makeToken 'REGEX', @chunk[...end], 0, end
switch
when not VALID_FLAGS.test flags
@error "invalid regular expression flags #{flags}", index
@error "invalid regular expression flags #{flags}", offset: index, length: flags.length
when regex or tokens.length is 1
body ?= @formatHeregex tokens[0][1]
@token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, errorToken
@ -327,7 +330,7 @@ exports.Lexer = class Lexer
@outdebt = @indebt = 0
@indent = size
else if size < @baseIndent
@error 'missing indentation', indent.length
@error 'missing indentation', offset: indent.length
else
@indebt = 0
@outdentToken @indent - size, noNewlines, indent.length
@ -400,7 +403,7 @@ exports.Lexer = class Lexer
prev = last @tokens
if value is '=' and prev
if not prev[1].reserved and prev[1] in JS_FORBIDDEN
@error "reserved word \"#{@value()}\" can't be assigned"
@error "reserved word '#{prev[1]}' can't be assigned", prev[2]
if prev[1] in ['||', '&&']
prev[0] = 'COMPOUND_ASSIGN'
prev[1] += '='
@ -516,7 +519,7 @@ exports.Lexer = class Lexer
offsetInChunk += index
unless str[...delimiter.length] is delimiter
@error "missing #{delimiter}"
@error "missing #{delimiter}", length: delimiter.length
[firstToken, ..., lastToken] = tokens
firstToken[2].first_column -= delimiter.length
@ -687,10 +690,13 @@ exports.Lexer = class Lexer
return if options.isRegex and octal and octal.charAt(0) isnt '0'
message =
if octal
"octal escape sequences are not allowed \\#{octal}"
"octal escape sequences are not allowed"
else
"invalid escape sequence \\#{hex or unicode}"
@error message, (options.offsetInChunk ? 0) + match.index + before.length
"invalid escape sequence"
invalidEscape = "\\#{octal or hex or unicode}"
@error "#{message} #{invalidEscape}",
offset: (options.offsetInChunk ? 0) + match.index + before.length
length: invalidEscape.length
# Constructs a string or regex by escaping certain characters.
makeDelimitedLiteral: (body, options = {}) ->
@ -714,12 +720,16 @@ exports.Lexer = class Lexer
when other then (if options.double then "\\#{other}" else other)
"#{options.delimiter}#{body}#{options.delimiter}"
# Throws a compiler error on the current position.
error: (message, offset = 0) ->
# TODO: Are there some cases we could improve the error line number by
# passing the offset in the chunk where the error happened?
[first_line, first_column] = @getLineAndColumnFromChunk offset
throwSyntaxError message, {first_line, first_column}
# Throws an error at either a given offset from the current chunk or at the
# location of a token (`token[2]`).
error: (message, options = {}) ->
location =
if 'first_line' of options
options
else
[first_line, first_column] = @getLineAndColumnFromChunk options.offset ? 0
{first_line, first_column, last_column: first_column + (options.length ? 1) - 1}
throwSyntaxError message, location
# Constants
# ---------

View File

@ -247,14 +247,14 @@ test "unclosed strings", ->
""", """
[stdin]:1:1: error: missing '''
'''
^
^^^
"""
assertErrorFormat '''
"""
''', '''
[stdin]:1:1: error: missing """
"""
^
^^^
'''
assertErrorFormat '''
"#{"
@ -275,21 +275,21 @@ test "unclosed strings", ->
''', '''
[stdin]:1:4: error: missing """
"#{"""
^
^^^
'''
assertErrorFormat '''
"""#{"""
''', '''
[stdin]:1:6: error: missing """
"""#{"""
^
^^^
'''
assertErrorFormat '''
///#{"""
''', '''
[stdin]:1:6: error: missing """
///#{"""
^
^^^
'''
assertErrorFormat '''
"a
@ -310,7 +310,7 @@ test "unclosed strings", ->
''', '''
[stdin]:2:1: error: missing """
"""a\\"""
^
^^^
'''
test "unclosed heregexes", ->
@ -319,7 +319,7 @@ test "unclosed heregexes", ->
''', '''
[stdin]:1:1: error: missing ///
///
^
^^^
'''
# https://github.com/jashkenas/coffeescript/issues/3301#issuecomment-31735168
assertErrorFormat '''
@ -328,7 +328,7 @@ test "unclosed heregexes", ->
''', '''
[stdin]:2:1: error: missing ///
///a\\///
^
^^^
'''
test "unexpected token after string", ->
@ -378,7 +378,7 @@ test "octal escapes", ->
''', '''
[stdin]:1:10: error: octal escape sequences are not allowed \\07
"a\\0\\tb\\\\\\07c"
\ \ \ \ ^
\ \ \ \ ^\^^
'''
assertErrorFormat '''
"a
@ -386,14 +386,14 @@ test "octal escapes", ->
''', '''
[stdin]:2:8: error: octal escape sequences are not allowed \\1
#{b} \\1"
^
^\^
'''
assertErrorFormat '''
/a\\0\\tb\\\\\\07c/
''', '''
[stdin]:1:10: error: octal escape sequences are not allowed \\07
/a\\0\\tb\\\\\\07c/
\ \ \ \ ^
\ \ \ \ ^\^^
'''
assertErrorFormat '''
///a
@ -401,7 +401,7 @@ test "octal escapes", ->
''', '''
[stdin]:2:8: error: octal escape sequences are not allowed \\01
#{b} \\01///
^
^\^^
'''
test "#3795: invalid escapes", ->
@ -410,7 +410,7 @@ test "#3795: invalid escapes", ->
''', '''
[stdin]:1:10: error: invalid escape sequence \\x7g
"a\\0\\tb\\\\\\x7g"
\ \ \ \ ^
\ \ \ \ ^\^^^
'''
assertErrorFormat '''
"a
@ -419,21 +419,21 @@ test "#3795: invalid escapes", ->
''', '''
[stdin]:2:8: error: invalid escape sequence \\uA02
#{b} \\uA02
^
^\^^^^
'''
assertErrorFormat '''
/a\\u002space/
''', '''
[stdin]:1:3: error: invalid escape sequence \\u002s
/a\\u002space/
^
^\^^^^^
'''
assertErrorFormat '''
///a \\u002 0 space///
''', '''
[stdin]:1:6: error: invalid escape sequence \\u002
///a \\u002 0 space///
^
^\^^^^^
'''
assertErrorFormat '''
///a
@ -442,7 +442,7 @@ test "#3795: invalid escapes", ->
''', '''
[stdin]:2:8: error: invalid escape sequence \\x0
#{b} \\x0
^
^\^^
'''
test "illegal herecomment", ->
@ -453,7 +453,7 @@ test "illegal herecomment", ->
''', '''
[stdin]:2:12: error: block comments cannot contain */
Regex: /a*/g
^
^^
'''
test "#1724: regular expressions beginning with *", ->
@ -480,7 +480,7 @@ test "invalid regex flags", ->
''', '''
[stdin]:1:4: error: invalid regular expression flags ii
/a/ii
^
^^
'''
assertErrorFormat '''
/a/G
@ -494,21 +494,21 @@ test "invalid regex flags", ->
''', '''
[stdin]:1:4: error: invalid regular expression flags gimi
/a/gimi
^
^^^^
'''
assertErrorFormat '''
/a/g_
''', '''
[stdin]:1:4: error: invalid regular expression flags g_
/a/g_
^
^^
'''
assertErrorFormat '''
///a///ii
''', '''
[stdin]:1:8: error: invalid regular expression flags ii
///a///ii
^
^^
'''
doesNotThrow -> CoffeeScript.compile '/a/ymgi'
@ -598,3 +598,49 @@ test "duplicate function arguments", ->
(@foo, bar, @foo) ->
^^^^
'''
test "reserved words", ->
assertErrorFormat '''
case
''', '''
[stdin]:1:1: error: reserved word 'case'
case
^^^^
'''
assertErrorFormat '''
for = 1
''', '''
[stdin]:1:1: error: reserved word 'for' can't be assigned
for = 1
^^^
'''
test "invalid numbers", ->
assertErrorFormat '''
0X0
''', '''
[stdin]:1:2: error: radix prefix in '0X0' must be lowercase
0X0
^
'''
assertErrorFormat '''
10E0
''', '''
[stdin]:1:3: error: exponential notation in '10E0' must be indicated with a lowercase 'e'
10E0
^
'''
assertErrorFormat '''
018
''', '''
[stdin]:1:1: error: decimal literal '018' must not be prefixed with '0'
018
^^^
'''
assertErrorFormat '''
010
''', '''
[stdin]:1:1: error: octal literal '010' must be prefixed with '0o'
010
^^^
'''