mirror of
https://github.com/jashkenas/coffeescript.git
synced 2022-11-09 12:23:24 -05:00
Lexer now adds location data, including first/last line/column to all generated tokens.
This commit is contained in:
parent
25126e2f99
commit
bb94e02fad
4 changed files with 340 additions and 109 deletions
|
@ -107,4 +107,18 @@
|
|||
};
|
||||
};
|
||||
|
||||
exports.locationDataToString = function(obj) {
|
||||
var locationData;
|
||||
if ("locationData" in obj) {
|
||||
locationData = obj.locationData;
|
||||
} else if ("first_line" in obj) {
|
||||
locationData = obj;
|
||||
}
|
||||
if (locationData) {
|
||||
return ("" + (locationData.first_line + 1) + ":" + (locationData.first_column + 1) + "-") + ("" + (locationData.last_line + 1) + ":" + (locationData.last_column + 1));
|
||||
} else {
|
||||
return "No location data";
|
||||
}
|
||||
};
|
||||
|
||||
}).call(this);
|
||||
|
|
|
@ -1,18 +1,18 @@
|
|||
// Generated by CoffeeScript 1.4.0
|
||||
(function() {
|
||||
var BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_ILLEGAL, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, STRICT_PROSCRIBED, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, key, last, starts, _ref, _ref1,
|
||||
var BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_ILLEGAL, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, STRICT_PROSCRIBED, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, key, last, locationDataToString, starts, _ref, _ref1,
|
||||
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
|
||||
|
||||
_ref = require('./rewriter'), Rewriter = _ref.Rewriter, INVERSES = _ref.INVERSES;
|
||||
|
||||
_ref1 = require('./helpers'), count = _ref1.count, starts = _ref1.starts, compact = _ref1.compact, last = _ref1.last;
|
||||
_ref1 = require('./helpers'), count = _ref1.count, starts = _ref1.starts, compact = _ref1.compact, last = _ref1.last, locationDataToString = _ref1.locationDataToString;
|
||||
|
||||
exports.Lexer = Lexer = (function() {
|
||||
|
||||
function Lexer() {}
|
||||
|
||||
Lexer.prototype.tokenize = function(code, opts) {
|
||||
var i, tag;
|
||||
var consumed, i, tag, _ref2;
|
||||
if (opts == null) {
|
||||
opts = {};
|
||||
}
|
||||
|
@ -22,6 +22,8 @@
|
|||
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
|
||||
this.code = code;
|
||||
this.line = opts.line || 0;
|
||||
this.chunkLine = opts.line || 0;
|
||||
this.chunkColumn = opts.column || 0;
|
||||
this.indent = 0;
|
||||
this.indebt = 0;
|
||||
this.outdebt = 0;
|
||||
|
@ -30,7 +32,9 @@
|
|||
this.tokens = [];
|
||||
i = 0;
|
||||
while (this.chunk = code.slice(i)) {
|
||||
i += this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
|
||||
consumed = this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
|
||||
_ref2 = this.getLineAndColumnFromChunk(consumed), this.chunkLine = _ref2[0], this.chunkColumn = _ref2[1];
|
||||
i += consumed;
|
||||
}
|
||||
this.closeIndentation();
|
||||
if (tag = this.ends.pop()) {
|
||||
|
@ -43,11 +47,13 @@
|
|||
};
|
||||
|
||||
Lexer.prototype.identifierToken = function() {
|
||||
var colon, forcedIdentifier, id, input, match, prev, tag, _ref2, _ref3;
|
||||
var colon, colonOffset, forcedIdentifier, id, idLength, input, match, poppedToken, prev, tag, tagToken, _ref2, _ref3, _ref4;
|
||||
if (!(match = IDENTIFIER.exec(this.chunk))) {
|
||||
return 0;
|
||||
}
|
||||
input = match[0], id = match[1], colon = match[2];
|
||||
idLength = id.length;
|
||||
poppedToken = void 0;
|
||||
if (id === 'own' && this.tag() === 'FOR') {
|
||||
this.token('OWN', id);
|
||||
return id.length;
|
||||
|
@ -71,7 +77,7 @@
|
|||
} else {
|
||||
tag = 'RELATION';
|
||||
if (this.value() === '!') {
|
||||
this.tokens.pop();
|
||||
poppedToken = this.tokens.pop();
|
||||
id = '!' + id;
|
||||
}
|
||||
}
|
||||
|
@ -111,9 +117,13 @@
|
|||
}
|
||||
})();
|
||||
}
|
||||
this.token(tag, id);
|
||||
tagToken = this.token(tag, id, 0, idLength);
|
||||
if (poppedToken) {
|
||||
_ref4 = [poppedToken.locationData.first_line, poppedToken.locationData.first_column], tagToken.locationData.first_line = _ref4[0], tagToken.locationData.first_column = _ref4[1];
|
||||
}
|
||||
if (colon) {
|
||||
this.token(':', ':');
|
||||
colonOffset = input.lastIndexOf(':');
|
||||
this.token(':', ':', colonOffset, colon.length);
|
||||
}
|
||||
return input.length;
|
||||
};
|
||||
|
@ -140,7 +150,7 @@
|
|||
if (binaryLiteral = /^0b([01]+)/.exec(number)) {
|
||||
number = '0x' + (parseInt(binaryLiteral[1], 2)).toString(16);
|
||||
}
|
||||
this.token('NUMBER', number);
|
||||
this.token('NUMBER', number, 0, lexedLength);
|
||||
return lexedLength;
|
||||
};
|
||||
|
||||
|
@ -151,16 +161,19 @@
|
|||
if (!(match = SIMPLESTR.exec(this.chunk))) {
|
||||
return 0;
|
||||
}
|
||||
this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
|
||||
string = match[0];
|
||||
this.token('STRING', string.replace(MULTILINER, '\\\n'), 0, string.length);
|
||||
break;
|
||||
case '"':
|
||||
if (!(string = this.balancedString(this.chunk, '"'))) {
|
||||
return 0;
|
||||
}
|
||||
if (0 < string.indexOf('#{', 1)) {
|
||||
this.interpolateString(string.slice(1, -1));
|
||||
this.interpolateString(string.slice(1, -1), {
|
||||
offsetInChunk: 1
|
||||
});
|
||||
} else {
|
||||
this.token('STRING', this.escapeLines(string));
|
||||
this.token('STRING', this.escapeLines(string, 0, string.length));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -186,10 +199,11 @@
|
|||
});
|
||||
if (quote === '"' && 0 <= doc.indexOf('#{')) {
|
||||
this.interpolateString(doc, {
|
||||
heredoc: true
|
||||
heredoc: true,
|
||||
offsetInChunk: 3
|
||||
});
|
||||
} else {
|
||||
this.token('STRING', this.makeString(doc, quote, true));
|
||||
this.token('STRING', this.makeString(doc, quote, true), 0, heredoc.length);
|
||||
}
|
||||
this.line += count(heredoc, '\n');
|
||||
return heredoc.length;
|
||||
|
@ -205,7 +219,7 @@
|
|||
this.token('HERECOMMENT', this.sanitizeHeredoc(here, {
|
||||
herecomment: true,
|
||||
indent: Array(this.indent + 1).join(' ')
|
||||
}));
|
||||
}), 0, comment.length);
|
||||
}
|
||||
this.line += count(comment, '\n');
|
||||
return comment.length;
|
||||
|
@ -216,7 +230,7 @@
|
|||
if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) {
|
||||
return 0;
|
||||
}
|
||||
this.token('JS', (script = match[0]).slice(1, -1));
|
||||
this.token('JS', (script = match[0]).slice(1, -1), 0, script.length);
|
||||
this.line += count(script, '\n');
|
||||
return script.length;
|
||||
};
|
||||
|
@ -245,49 +259,61 @@
|
|||
if (regex === '//') {
|
||||
regex = '/(?:)/';
|
||||
}
|
||||
this.token('REGEX', "" + regex + flags);
|
||||
this.token('REGEX', "" + regex + flags, 0, match.length);
|
||||
return match.length;
|
||||
};
|
||||
|
||||
Lexer.prototype.heregexToken = function(match) {
|
||||
var body, flags, heregex, re, tag, tokens, value, _i, _len, _ref2, _ref3, _ref4, _ref5;
|
||||
var body, flags, flagsOffset, heregex, plusToken, prev, re, tag, token, tokens, value, _i, _len, _ref2, _ref3, _ref4;
|
||||
heregex = match[0], body = match[1], flags = match[2];
|
||||
if (0 > body.indexOf('#{')) {
|
||||
re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/');
|
||||
if (re.match(/^\*/)) {
|
||||
this.error('regular expressions cannot begin with `*`');
|
||||
}
|
||||
this.token('REGEX', "/" + (re || '(?:)') + "/" + flags);
|
||||
this.token('REGEX', "/" + (re || '(?:)') + "/" + flags, 0, heregex.length);
|
||||
return heregex.length;
|
||||
}
|
||||
this.token('IDENTIFIER', 'RegExp');
|
||||
this.tokens.push(['CALL_START', '(']);
|
||||
this.token('IDENTIFIER', 'RegExp', 0, 0);
|
||||
this.token('CALL_START', '(', 0, 0);
|
||||
tokens = [];
|
||||
_ref2 = this.interpolateString(body, {
|
||||
regex: true
|
||||
regex: true,
|
||||
offsetInChunk: 3
|
||||
});
|
||||
for (_i = 0, _len = _ref2.length; _i < _len; _i++) {
|
||||
_ref3 = _ref2[_i], tag = _ref3[0], value = _ref3[1];
|
||||
token = _ref2[_i];
|
||||
tag = token[0], value = token[1];
|
||||
if (tag === 'TOKENS') {
|
||||
tokens.push.apply(tokens, value);
|
||||
} else {
|
||||
} else if (tag === 'NEOSTRING') {
|
||||
if (!(value = value.replace(HEREGEX_OMIT, ''))) {
|
||||
continue;
|
||||
}
|
||||
value = value.replace(/\\/g, '\\\\');
|
||||
tokens.push(['STRING', this.makeString(value, '"', true)]);
|
||||
token[0] = 'STRING';
|
||||
token[1] = this.makeString(value, '"', true);
|
||||
tokens.push(token);
|
||||
} else {
|
||||
this.error("Unexpected " + tag);
|
||||
}
|
||||
tokens.push(['+', '+']);
|
||||
prev = last(this.tokens);
|
||||
plusToken = ['+', '+'];
|
||||
plusToken.locationData = prev.locationData;
|
||||
tokens.push(plusToken);
|
||||
}
|
||||
tokens.pop();
|
||||
if (((_ref4 = tokens[0]) != null ? _ref4[0] : void 0) !== 'STRING') {
|
||||
this.tokens.push(['STRING', '""'], ['+', '+']);
|
||||
if (((_ref3 = tokens[0]) != null ? _ref3[0] : void 0) !== 'STRING') {
|
||||
this.token('STRING', '""', 0, 0);
|
||||
this.token('+', '+', 0, 0);
|
||||
}
|
||||
(_ref5 = this.tokens).push.apply(_ref5, tokens);
|
||||
(_ref4 = this.tokens).push.apply(_ref4, tokens);
|
||||
if (flags) {
|
||||
this.tokens.push([',', ','], ['STRING', '"' + flags + '"']);
|
||||
flagsOffset = heregex.lastIndexOf(flags);
|
||||
this.token(',', ',', flagsOffset, 0);
|
||||
this.token('STRING', '"' + flags + '"', flagsOffset, flags.length);
|
||||
}
|
||||
this.token(')', ')');
|
||||
this.token(')', ')', heregex.length - 1, 0);
|
||||
return heregex.length;
|
||||
};
|
||||
|
||||
|
@ -304,7 +330,7 @@
|
|||
if (noNewlines) {
|
||||
this.suppressNewlines();
|
||||
} else {
|
||||
this.newlineToken();
|
||||
this.newlineToken(0);
|
||||
}
|
||||
this.line += count(indent, '\n');
|
||||
return indent.length;
|
||||
|
@ -317,19 +343,19 @@
|
|||
return indent.length;
|
||||
}
|
||||
diff = size - this.indent + this.outdebt;
|
||||
this.token('INDENT', diff);
|
||||
this.token('INDENT', diff, 0, indent.length);
|
||||
this.indents.push(diff);
|
||||
this.ends.push('OUTDENT');
|
||||
this.outdebt = this.indebt = 0;
|
||||
} else {
|
||||
this.indebt = 0;
|
||||
this.outdentToken(this.indent - size, noNewlines);
|
||||
this.outdentToken(this.indent - size, noNewlines, indent.length);
|
||||
}
|
||||
this.indent = size;
|
||||
return indent.length;
|
||||
};
|
||||
|
||||
Lexer.prototype.outdentToken = function(moveOut, noNewlines) {
|
||||
Lexer.prototype.outdentToken = function(moveOut, noNewlines, outdentLength) {
|
||||
var dent, len;
|
||||
while (moveOut > 0) {
|
||||
len = this.indents.length - 1;
|
||||
|
@ -346,7 +372,7 @@
|
|||
moveOut -= dent;
|
||||
this.outdebt = 0;
|
||||
this.pair('OUTDENT');
|
||||
this.token('OUTDENT', dent);
|
||||
this.token('OUTDENT', dent, 0, outdentLength);
|
||||
}
|
||||
}
|
||||
if (dent) {
|
||||
|
@ -356,7 +382,7 @@
|
|||
this.tokens.pop();
|
||||
}
|
||||
if (!(this.tag() === 'TERMINATOR' || noNewlines)) {
|
||||
this.token('TERMINATOR', '\n');
|
||||
this.token('TERMINATOR', '\n', outdentLength, 0);
|
||||
}
|
||||
return this;
|
||||
};
|
||||
|
@ -377,12 +403,12 @@
|
|||
}
|
||||
};
|
||||
|
||||
Lexer.prototype.newlineToken = function() {
|
||||
Lexer.prototype.newlineToken = function(offset) {
|
||||
while (this.value() === ';') {
|
||||
this.tokens.pop();
|
||||
}
|
||||
if (this.tag() !== 'TERMINATOR') {
|
||||
this.token('TERMINATOR', '\n');
|
||||
this.token('TERMINATOR', '\n', offset, 0);
|
||||
}
|
||||
return this;
|
||||
};
|
||||
|
@ -556,11 +582,18 @@
|
|||
};
|
||||
|
||||
Lexer.prototype.interpolateString = function(str, options) {
|
||||
var expr, heredoc, i, inner, interpolated, len, letter, nested, pi, regex, tag, tokens, value, _i, _len, _ref2, _ref3, _ref4;
|
||||
var column, expr, heredoc, i, inner, interpolated, len, letter, lexedLength, line, nested, offsetInChunk, originalOffsetInChunk, pi, popped, regex, tag, token, tokens, value, _i, _len, _ref2, _ref3, _ref4;
|
||||
if (options == null) {
|
||||
options = {};
|
||||
}
|
||||
heredoc = options.heredoc, regex = options.regex;
|
||||
heredoc = options.heredoc, regex = options.regex, offsetInChunk = options.offsetInChunk;
|
||||
originalOffsetInChunk = offsetInChunk;
|
||||
lexedLength = str.length;
|
||||
offsetInChunk = offsetInChunk || 0;
|
||||
if (heredoc && str.length > 0 && str[0] === '\n') {
|
||||
str = str.slice(1);
|
||||
offsetInChunk++;
|
||||
}
|
||||
tokens = [];
|
||||
pi = 0;
|
||||
i = -1;
|
||||
|
@ -573,22 +606,24 @@
|
|||
continue;
|
||||
}
|
||||
if (pi < i) {
|
||||
tokens.push(['NEOSTRING', str.slice(pi, i)]);
|
||||
tokens.push(this.makeToken('NEOSTRING', str.slice(pi, i), offsetInChunk + pi));
|
||||
}
|
||||
inner = expr.slice(1, -1);
|
||||
if (inner.length) {
|
||||
_ref2 = this.getLineAndColumnFromChunk(offsetInChunk + i + 1), line = _ref2[0], column = _ref2[1];
|
||||
nested = new Lexer().tokenize(inner, {
|
||||
line: this.line,
|
||||
line: line,
|
||||
column: column,
|
||||
rewrite: false
|
||||
});
|
||||
nested.pop();
|
||||
if (((_ref2 = nested[0]) != null ? _ref2[0] : void 0) === 'TERMINATOR') {
|
||||
nested.shift();
|
||||
popped = nested.pop();
|
||||
if (((_ref3 = nested[0]) != null ? _ref3[0] : void 0) === 'TERMINATOR') {
|
||||
popped = nested.shift();
|
||||
}
|
||||
if (len = nested.length) {
|
||||
if (len > 1) {
|
||||
nested.unshift(['(', '(', this.line]);
|
||||
nested.push([')', ')', this.line]);
|
||||
nested.unshift(this.makeToken('(', '(', offsetInChunk + i + 1, 0));
|
||||
nested.push(this.makeToken(')', ')', offsetInChunk + i + 1 + inner.length, 0));
|
||||
}
|
||||
tokens.push(['TOKENS', nested]);
|
||||
}
|
||||
|
@ -597,33 +632,38 @@
|
|||
pi = i + 1;
|
||||
}
|
||||
if ((i > pi && pi < str.length)) {
|
||||
tokens.push(['NEOSTRING', str.slice(pi)]);
|
||||
tokens.push(this.makeToken('NEOSTRING', str.slice(pi), offsetInChunk + pi));
|
||||
}
|
||||
if (regex) {
|
||||
return tokens;
|
||||
}
|
||||
if (!tokens.length) {
|
||||
return this.token('STRING', '""');
|
||||
return this.token('STRING', '""', originalOffsetInChunk, lexedLength);
|
||||
}
|
||||
if (tokens[0][0] !== 'NEOSTRING') {
|
||||
tokens.unshift(['', '']);
|
||||
tokens.unshift(this.makeToken('NEOSTRING', '', originalOffsetInChunk));
|
||||
}
|
||||
if (interpolated = tokens.length > 1) {
|
||||
this.token('(', '(');
|
||||
this.token('(', '(', originalOffsetInChunk, 0);
|
||||
}
|
||||
for (i = _i = 0, _len = tokens.length; _i < _len; i = ++_i) {
|
||||
_ref3 = tokens[i], tag = _ref3[0], value = _ref3[1];
|
||||
token = tokens[i];
|
||||
tag = token[0], value = token[1];
|
||||
if (i) {
|
||||
this.token('+', '+');
|
||||
}
|
||||
if (tag === 'TOKENS') {
|
||||
(_ref4 = this.tokens).push.apply(_ref4, value);
|
||||
} else if (tag === 'NEOSTRING') {
|
||||
token[0] = 'STRING';
|
||||
token[1] = this.makeString(value, '"', heredoc);
|
||||
this.tokens.push(token);
|
||||
} else {
|
||||
this.token('STRING', this.makeString(value, '"', heredoc));
|
||||
this.error("Unexpected " + tag);
|
||||
}
|
||||
}
|
||||
if (interpolated) {
|
||||
this.token(')', ')');
|
||||
this.token(')', ')', originalOffsetInChunk + lexedLength, 0);
|
||||
}
|
||||
return tokens;
|
||||
};
|
||||
|
@ -641,8 +681,46 @@
|
|||
return this.ends.pop();
|
||||
};
|
||||
|
||||
Lexer.prototype.token = function(tag, value) {
|
||||
return this.tokens.push([tag, value, this.line]);
|
||||
Lexer.prototype.getLineAndColumnFromChunk = function(offset) {
|
||||
var column, lineCount, lines, string;
|
||||
if (offset === 0) {
|
||||
return [this.chunkLine, this.chunkColumn];
|
||||
}
|
||||
if (offset >= this.chunk.length) {
|
||||
string = this.chunk;
|
||||
} else {
|
||||
string = this.chunk.slice(0, +(offset - 1) + 1 || 9e9);
|
||||
}
|
||||
lineCount = count(string, '\n');
|
||||
column = this.chunkColumn;
|
||||
if (lineCount > 0) {
|
||||
lines = string.split('\n');
|
||||
column = (last(lines)).length;
|
||||
} else {
|
||||
column += string.length;
|
||||
}
|
||||
return [this.chunkLine + lineCount, column];
|
||||
};
|
||||
|
||||
Lexer.prototype.makeToken = function(tag, value, offsetInChunk, length) {
|
||||
var locationData, token, _ref2, _ref3;
|
||||
offsetInChunk = offsetInChunk || 0;
|
||||
if (length === void 0) {
|
||||
length = value.length;
|
||||
}
|
||||
locationData = {};
|
||||
_ref2 = this.getLineAndColumnFromChunk(offsetInChunk), locationData.first_line = _ref2[0], locationData.first_column = _ref2[1];
|
||||
_ref3 = this.getLineAndColumnFromChunk(offsetInChunk + length), locationData.last_line = _ref3[0], locationData.last_column = _ref3[1];
|
||||
token = [tag, value, locationData.first_line];
|
||||
token.locationData = locationData;
|
||||
return token;
|
||||
};
|
||||
|
||||
Lexer.prototype.token = function(tag, value, offsetInChunk, length) {
|
||||
var token;
|
||||
token = this.makeToken(tag, value, offsetInChunk, length);
|
||||
this.tokens.push(token);
|
||||
return token;
|
||||
};
|
||||
|
||||
Lexer.prototype.tag = function(index, tag) {
|
||||
|
|
|
@ -79,3 +79,17 @@ exports.addLocationDataFn = (first, last) ->
|
|||
obj.updateLocationDataIfMissing buildLocationData(first, last)
|
||||
|
||||
return obj
|
||||
|
||||
# Convert jison location data to a string.
|
||||
# `obj` can be a token, or a locationData.
|
||||
exports.locationDataToString = (obj) ->
|
||||
if "locationData" of obj then locationData = obj.locationData
|
||||
else if "first_line" of obj then locationData = obj
|
||||
|
||||
if locationData
|
||||
"#{locationData.first_line + 1}:#{locationData.first_column + 1}-" +
|
||||
"#{locationData.last_line + 1}:#{locationData.last_column + 1}"
|
||||
else
|
||||
"No location data"
|
||||
|
||||
|
||||
|
|
229
src/lexer.coffee
229
src/lexer.coffee
|
@ -10,7 +10,7 @@
|
|||
{Rewriter, INVERSES} = require './rewriter'
|
||||
|
||||
# Import the helpers we need.
|
||||
{count, starts, compact, last} = require './helpers'
|
||||
{count, starts, compact, last, locationDataToString} = require './helpers'
|
||||
|
||||
# The Lexer Class
|
||||
# ---------------
|
||||
|
@ -35,8 +35,12 @@ exports.Lexer = class Lexer
|
|||
code = "\n#{code}" if WHITESPACE.test code
|
||||
code = code.replace(/\r/g, '').replace TRAILING_SPACES, ''
|
||||
|
||||
@code = code # The remainder of the source code.
|
||||
@line = opts.line or 0 # The current line.
|
||||
@code = code # The source code.
|
||||
@line = opts.line or 0 # TODO: Remove
|
||||
@chunkLine =
|
||||
opts.line or 0 # The start line for the current chunk.
|
||||
@chunkColumn =
|
||||
opts.column or 0 # The start column of the current chunk.
|
||||
@indent = 0 # The current indentation level.
|
||||
@indebt = 0 # The over-indentation at the current level.
|
||||
@outdebt = 0 # The under-outdentation at the current level.
|
||||
|
@ -49,7 +53,8 @@ exports.Lexer = class Lexer
|
|||
# `@literalToken` is the fallback catch-all.
|
||||
i = 0
|
||||
while @chunk = code[i..]
|
||||
i += @identifierToken() or
|
||||
consumed = \
|
||||
@identifierToken() or
|
||||
@commentToken() or
|
||||
@whitespaceToken() or
|
||||
@lineToken() or
|
||||
|
@ -60,9 +65,15 @@ exports.Lexer = class Lexer
|
|||
@jsToken() or
|
||||
@literalToken()
|
||||
|
||||
# Update position
|
||||
[@chunkLine, @chunkColumn] = @getLineAndColumnFromChunk consumed
|
||||
|
||||
i += consumed
|
||||
|
||||
@closeIndentation()
|
||||
@error "missing #{tag}" if tag = @ends.pop()
|
||||
return @tokens if opts.rewrite is off
|
||||
# TODO: deal with Rewriter
|
||||
(new Rewriter).rewrite @tokens
|
||||
|
||||
# Tokenizers
|
||||
|
@ -78,6 +89,9 @@ exports.Lexer = class Lexer
|
|||
return 0 unless match = IDENTIFIER.exec @chunk
|
||||
[input, id, colon] = match
|
||||
|
||||
idLength = id.length
|
||||
poppedToken = undefined
|
||||
|
||||
if id is 'own' and @tag() is 'FOR'
|
||||
@token 'OWN', id
|
||||
return id.length
|
||||
|
@ -103,7 +117,7 @@ exports.Lexer = class Lexer
|
|||
else
|
||||
tag = 'RELATION'
|
||||
if @value() is '!'
|
||||
@tokens.pop()
|
||||
poppedToken = @tokens.pop()
|
||||
id = '!' + id
|
||||
|
||||
if id in JS_FORBIDDEN
|
||||
|
@ -124,8 +138,14 @@ exports.Lexer = class Lexer
|
|||
when 'break', 'continue' then 'STATEMENT'
|
||||
else tag
|
||||
|
||||
@token tag, id
|
||||
@token ':', ':' if colon
|
||||
tagToken = @token tag, id, 0, idLength
|
||||
if poppedToken
|
||||
[tagToken.locationData.first_line, tagToken.locationData.first_column] =
|
||||
[poppedToken.locationData.first_line, poppedToken.locationData.first_column]
|
||||
if colon
|
||||
colonOffset = input.lastIndexOf ':'
|
||||
@token ':', ':', colonOffset, colon.length
|
||||
|
||||
input.length
|
||||
|
||||
# Matches numbers, including decimals, hex, and exponential notation.
|
||||
|
@ -146,7 +166,7 @@ exports.Lexer = class Lexer
|
|||
number = '0x' + (parseInt octalLiteral[1], 8).toString 16
|
||||
if binaryLiteral = /^0b([01]+)/.exec number
|
||||
number = '0x' + (parseInt binaryLiteral[1], 2).toString 16
|
||||
@token 'NUMBER', number
|
||||
@token 'NUMBER', number, 0, lexedLength
|
||||
lexedLength
|
||||
|
||||
# Matches strings, including multi-line strings. Ensures that quotation marks
|
||||
|
@ -155,13 +175,14 @@ exports.Lexer = class Lexer
|
|||
switch @chunk.charAt 0
|
||||
when "'"
|
||||
return 0 unless match = SIMPLESTR.exec @chunk
|
||||
@token 'STRING', (string = match[0]).replace MULTILINER, '\\\n'
|
||||
string = match[0]
|
||||
@token 'STRING', string.replace(MULTILINER, '\\\n'), 0, string.length
|
||||
when '"'
|
||||
return 0 unless string = @balancedString @chunk, '"'
|
||||
if 0 < string.indexOf '#{', 1
|
||||
@interpolateString string[1...-1]
|
||||
@interpolateString string[1...-1], offsetInChunk: 1
|
||||
else
|
||||
@token 'STRING', @escapeLines string
|
||||
@token 'STRING', @escapeLines string, 0, string.length
|
||||
else
|
||||
return 0
|
||||
if octalEsc = /^(?:\\.|[^\\])*\\(?:0[0-7]|[1-7])/.test string
|
||||
|
@ -177,9 +198,9 @@ exports.Lexer = class Lexer
|
|||
quote = heredoc.charAt 0
|
||||
doc = @sanitizeHeredoc match[2], quote: quote, indent: null
|
||||
if quote is '"' and 0 <= doc.indexOf '#{'
|
||||
@interpolateString doc, heredoc: yes
|
||||
@interpolateString doc, heredoc: yes, offsetInChunk: 3
|
||||
else
|
||||
@token 'STRING', @makeString doc, quote, yes
|
||||
@token 'STRING', @makeString(doc, quote, yes), 0, heredoc.length
|
||||
@line += count heredoc, '\n'
|
||||
heredoc.length
|
||||
|
||||
|
@ -188,15 +209,17 @@ exports.Lexer = class Lexer
|
|||
return 0 unless match = @chunk.match COMMENT
|
||||
[comment, here] = match
|
||||
if here
|
||||
@token 'HERECOMMENT', @sanitizeHeredoc here,
|
||||
herecomment: true, indent: Array(@indent + 1).join(' ')
|
||||
@token 'HERECOMMENT',
|
||||
(@sanitizeHeredoc here,
|
||||
herecomment: true, indent: Array(@indent + 1).join(' ')),
|
||||
0, comment.length
|
||||
@line += count comment, '\n'
|
||||
comment.length
|
||||
|
||||
# Matches JavaScript interpolated directly into the source via backticks.
|
||||
jsToken: ->
|
||||
return 0 unless @chunk.charAt(0) is '`' and match = JSTOKEN.exec @chunk
|
||||
@token 'JS', (script = match[0])[1...-1]
|
||||
@token 'JS', (script = match[0])[1...-1], 0, script.length
|
||||
@line += count script, '\n'
|
||||
script.length
|
||||
|
||||
|
@ -216,7 +239,7 @@ exports.Lexer = class Lexer
|
|||
[match, regex, flags] = match
|
||||
if regex[..1] is '/*' then @error 'regular expressions cannot begin with `*`'
|
||||
if regex is '//' then regex = '/(?:)/'
|
||||
@token 'REGEX', "#{regex}#{flags}"
|
||||
@token 'REGEX', "#{regex}#{flags}", 0, match.length
|
||||
match.length
|
||||
|
||||
# Matches multiline extended regular expressions.
|
||||
|
@ -225,24 +248,45 @@ exports.Lexer = class Lexer
|
|||
if 0 > body.indexOf '#{'
|
||||
re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/')
|
||||
if re.match /^\*/ then @error 'regular expressions cannot begin with `*`'
|
||||
@token 'REGEX', "/#{ re or '(?:)' }/#{flags}"
|
||||
@token 'REGEX', "/#{ re or '(?:)' }/#{flags}", 0, heregex.length
|
||||
return heregex.length
|
||||
@token 'IDENTIFIER', 'RegExp'
|
||||
@tokens.push ['CALL_START', '(']
|
||||
@token 'IDENTIFIER', 'RegExp', 0, 0
|
||||
@token 'CALL_START', '(', 0, 0
|
||||
tokens = []
|
||||
for [tag, value] in @interpolateString(body, regex: yes)
|
||||
for token in @interpolateString(body, regex: yes, offsetInChunk: 3)
|
||||
[tag, value] = token
|
||||
if tag is 'TOKENS'
|
||||
tokens.push value...
|
||||
else
|
||||
else if tag is 'NEOSTRING'
|
||||
continue unless value = value.replace HEREGEX_OMIT, ''
|
||||
# Convert NEOSTRING into STRING
|
||||
value = value.replace /\\/g, '\\\\'
|
||||
tokens.push ['STRING', @makeString(value, '"', yes)]
|
||||
tokens.push ['+', '+']
|
||||
token[0] = 'STRING'
|
||||
token[1] = @makeString(value, '"', yes)
|
||||
tokens.push token
|
||||
else
|
||||
@error "Unexpected #{tag}"
|
||||
|
||||
prev = last @tokens
|
||||
plusToken = ['+', '+']
|
||||
plusToken.locationData = prev.locationData
|
||||
tokens.push plusToken
|
||||
|
||||
# Remove the extra "+"
|
||||
tokens.pop()
|
||||
@tokens.push ['STRING', '""'], ['+', '+'] unless tokens[0]?[0] is 'STRING'
|
||||
|
||||
unless tokens[0]?[0] is 'STRING'
|
||||
@token 'STRING', '""', 0, 0
|
||||
@token '+', '+', 0, 0
|
||||
@tokens.push tokens...
|
||||
@tokens.push [',', ','], ['STRING', '"' + flags + '"'] if flags
|
||||
@token ')', ')'
|
||||
|
||||
if flags
|
||||
# Find the flags in the heregex
|
||||
flagsOffset = heregex.lastIndexOf flags
|
||||
@token ',', ',', flagsOffset, 0
|
||||
@token 'STRING', '"' + flags + '"', flagsOffset, flags.length
|
||||
|
||||
@token ')', ')', heregex.length-1, 0
|
||||
heregex.length
|
||||
|
||||
# Matches newlines, indents, and outdents, and determines which is which.
|
||||
|
@ -262,7 +306,7 @@ exports.Lexer = class Lexer
|
|||
size = indent.length - 1 - indent.lastIndexOf '\n'
|
||||
noNewlines = @unfinished()
|
||||
if size - @indebt is @indent
|
||||
if noNewlines then @suppressNewlines() else @newlineToken()
|
||||
if noNewlines then @suppressNewlines() else @newlineToken 0
|
||||
# Advance @line line after the newlineToken, so the TERMINATOR shows up
|
||||
# on the right line.
|
||||
@line += count indent, '\n'
|
||||
|
@ -275,19 +319,19 @@ exports.Lexer = class Lexer
|
|||
@suppressNewlines()
|
||||
return indent.length
|
||||
diff = size - @indent + @outdebt
|
||||
@token 'INDENT', diff
|
||||
@token 'INDENT', diff, 0, indent.length
|
||||
@indents.push diff
|
||||
@ends.push 'OUTDENT'
|
||||
@outdebt = @indebt = 0
|
||||
else
|
||||
@indebt = 0
|
||||
@outdentToken @indent - size, noNewlines
|
||||
@outdentToken @indent - size, noNewlines, indent.length
|
||||
@indent = size
|
||||
indent.length
|
||||
|
||||
# Record an outdent token or multiple tokens, if we happen to be moving back
|
||||
# inwards past several recorded indents.
|
||||
outdentToken: (moveOut, noNewlines) ->
|
||||
outdentToken: (moveOut, noNewlines, outdentLength) ->
|
||||
while moveOut > 0
|
||||
len = @indents.length - 1
|
||||
if @indents[len] is undefined
|
||||
|
@ -303,10 +347,11 @@ exports.Lexer = class Lexer
|
|||
moveOut -= dent
|
||||
@outdebt = 0
|
||||
@pair 'OUTDENT'
|
||||
@token 'OUTDENT', dent
|
||||
@token 'OUTDENT', dent, 0, outdentLength
|
||||
@outdebt -= moveOut if dent
|
||||
@tokens.pop() while @value() is ';'
|
||||
@token 'TERMINATOR', '\n' unless @tag() is 'TERMINATOR' or noNewlines
|
||||
|
||||
@token 'TERMINATOR', '\n', outdentLength, 0 unless @tag() is 'TERMINATOR' or noNewlines
|
||||
this
|
||||
|
||||
# Matches and consumes non-meaningful whitespace. Tag the previous token
|
||||
|
@ -319,9 +364,9 @@ exports.Lexer = class Lexer
|
|||
if match then match[0].length else 0
|
||||
|
||||
# Generate a newline token. Consecutive newlines get merged together.
|
||||
newlineToken: ->
|
||||
newlineToken: (offset) ->
|
||||
@tokens.pop() while @value() is ';'
|
||||
@token 'TERMINATOR', '\n' unless @tag() is 'TERMINATOR'
|
||||
@token 'TERMINATOR', '\n', offset, 0 unless @tag() is 'TERMINATOR'
|
||||
this
|
||||
|
||||
# Use a `\` at a line-ending to suppress the newline.
|
||||
|
@ -458,7 +503,22 @@ exports.Lexer = class Lexer
|
|||
# new Lexer, tokenize the interpolated contents, and merge them into the
|
||||
# token stream.
|
||||
interpolateString: (str, options = {}) ->
|
||||
{heredoc, regex} = options
|
||||
{heredoc, regex, offsetInChunk} = options
|
||||
|
||||
# TODO: we pass in offsetInChunk, but we've already discarded the " or the
|
||||
# """, or the /// that got us here. Those characters are not going to end
|
||||
# up being part of any tokens.
|
||||
|
||||
originalOffsetInChunk = offsetInChunk
|
||||
lexedLength = str.length
|
||||
|
||||
# Clip leading \n from heredoc
|
||||
offsetInChunk = offsetInChunk || 0
|
||||
if heredoc and str.length > 0 and str[0] == '\n'
|
||||
str = str[1...]
|
||||
offsetInChunk++
|
||||
|
||||
# Parse the string.
|
||||
tokens = []
|
||||
pi = 0
|
||||
i = -1
|
||||
|
@ -469,31 +529,51 @@ exports.Lexer = class Lexer
|
|||
unless letter is '#' and str.charAt(i+1) is '{' and
|
||||
(expr = @balancedString str[i + 1..], '}')
|
||||
continue
|
||||
tokens.push ['NEOSTRING', str[pi...i]] if pi < i
|
||||
# NEOSTRING is a fake token. This will be converted to a string below.
|
||||
tokens.push @makeToken('NEOSTRING', str[pi...i], offsetInChunk + pi) if pi < i
|
||||
inner = expr[1...-1]
|
||||
if inner.length
|
||||
nested = new Lexer().tokenize inner, line: @line, rewrite: off
|
||||
nested.pop()
|
||||
nested.shift() if nested[0]?[0] is 'TERMINATOR'
|
||||
[line, column] = @getLineAndColumnFromChunk(offsetInChunk + i + 1)
|
||||
nested = new Lexer().tokenize inner, line: line, column: column, rewrite: off
|
||||
popped = nested.pop()
|
||||
popped = nested.shift() if nested[0]?[0] is 'TERMINATOR'
|
||||
if len = nested.length
|
||||
if len > 1
|
||||
nested.unshift ['(', '(', @line]
|
||||
nested.push [')', ')', @line]
|
||||
nested.unshift @makeToken '(', '(', offsetInChunk + i + 1, 0
|
||||
nested.push @makeToken ')', ')', offsetInChunk + i + 1 + inner.length, 0
|
||||
# Push a fake 'TOKENS' token, which will get turned into real tokens below.
|
||||
tokens.push ['TOKENS', nested]
|
||||
i += expr.length
|
||||
pi = i + 1
|
||||
tokens.push ['NEOSTRING', str[pi..]] if i > pi < str.length
|
||||
tokens.push @makeToken('NEOSTRING', str[pi..], offsetInChunk + pi) if i > pi < str.length
|
||||
|
||||
# If regex, then return now and let the regex code deal with all these fake tokens
|
||||
return tokens if regex
|
||||
return @token 'STRING', '""' unless tokens.length
|
||||
tokens.unshift ['', ''] unless tokens[0][0] is 'NEOSTRING'
|
||||
@token '(', '(' if interpolated = tokens.length > 1
|
||||
for [tag, value], i in tokens
|
||||
|
||||
# If we didn't find any tokens, then just return an empty string.
|
||||
return @token 'STRING', '""', originalOffsetInChunk, lexedLength unless tokens.length
|
||||
|
||||
# If the first token is not a string, add a fake empty string to the beginning.
|
||||
tokens.unshift @makeToken('NEOSTRING', '', originalOffsetInChunk) unless tokens[0][0] is 'NEOSTRING'
|
||||
|
||||
@token '(', '(', originalOffsetInChunk, 0 if interpolated = tokens.length > 1
|
||||
# Push all the tokens
|
||||
for token, i in tokens
|
||||
[tag, value] = token
|
||||
# TODO: this needs location data.
|
||||
@token '+', '+' if i
|
||||
if tag is 'TOKENS'
|
||||
# Push all the tokens in the fake 'TOKENS' token. These already have
|
||||
# sane location data.
|
||||
@tokens.push value...
|
||||
else if tag is 'NEOSTRING'
|
||||
# Convert NEOSTRING into STRING
|
||||
token[0] = 'STRING'
|
||||
token[1] = @makeString value, '"', heredoc
|
||||
@tokens.push token
|
||||
else
|
||||
@token 'STRING', @makeString value, '"', heredoc
|
||||
@token ')', ')' if interpolated
|
||||
@error "Unexpected #{tag}"
|
||||
@token ')', ')', originalOffsetInChunk + lexedLength, 0 if interpolated
|
||||
tokens
|
||||
|
||||
# Pairs up a closing token, ensuring that all listed pairs of tokens are
|
||||
|
@ -514,9 +594,54 @@ exports.Lexer = class Lexer
|
|||
# Helpers
|
||||
# -------
|
||||
|
||||
# Add a token to the results, taking note of the line number.
|
||||
token: (tag, value) ->
|
||||
@tokens.push [tag, value, @line]
|
||||
# Returns the line and column number from an offset into the current chunk.
|
||||
getLineAndColumnFromChunk: (offset) ->
|
||||
if offset is 0
|
||||
return [@chunkLine, @chunkColumn]
|
||||
|
||||
if offset >= @chunk.length
|
||||
string = @chunk
|
||||
else
|
||||
string = @chunk[..offset-1]
|
||||
|
||||
lineCount = count string, '\n'
|
||||
|
||||
column = @chunkColumn
|
||||
if lineCount > 0
|
||||
lines = string.split '\n'
|
||||
column = (last lines).length
|
||||
else
|
||||
column += string.length
|
||||
|
||||
return [@chunkLine + lineCount, column]
|
||||
|
||||
# Same as "token", exception this just returns the token without adding it
|
||||
# to the results.
|
||||
makeToken: (tag, value, offsetInChunk, length) ->
|
||||
offsetInChunk = offsetInChunk || 0
|
||||
if length is undefined then length = value.length
|
||||
|
||||
locationData = {}
|
||||
[locationData.first_line, locationData.first_column] =
|
||||
@getLineAndColumnFromChunk offsetInChunk
|
||||
[locationData.last_line, locationData.last_column] =
|
||||
@getLineAndColumnFromChunk offsetInChunk + length
|
||||
|
||||
token = [tag, value, locationData.first_line]
|
||||
token.locationData = locationData
|
||||
|
||||
return token
|
||||
|
||||
# Add a token to the results.
|
||||
# `offset` is the offset into the current @chunk where the token starts.
|
||||
# `length` is the length of the token in the @chunk, after the offset. If
|
||||
# not specified, the length of `value` will be used.
|
||||
#
|
||||
# Returns the new token.
|
||||
token: (tag, value, offsetInChunk, length) ->
|
||||
token = @makeToken tag, value, offsetInChunk, length
|
||||
@tokens.push token
|
||||
return token
|
||||
|
||||
# Peek at a tag in the current token stream.
|
||||
tag: (index, tag) ->
|
||||
|
|
Loading…
Reference in a new issue