1
0
Fork 0
mirror of https://github.com/jashkenas/coffeescript.git synced 2022-11-09 12:23:24 -05:00

Lexer now adds location data, including first/last line/column to all generated tokens.

This commit is contained in:
Jason Walton 2012-11-16 19:09:56 -05:00
parent 25126e2f99
commit bb94e02fad
4 changed files with 340 additions and 109 deletions

View file

@ -107,4 +107,18 @@
};
};
exports.locationDataToString = function(obj) {
var locationData;
if ("locationData" in obj) {
locationData = obj.locationData;
} else if ("first_line" in obj) {
locationData = obj;
}
if (locationData) {
return ("" + (locationData.first_line + 1) + ":" + (locationData.first_column + 1) + "-") + ("" + (locationData.last_line + 1) + ":" + (locationData.last_column + 1));
} else {
return "No location data";
}
};
}).call(this);

View file

@ -1,18 +1,18 @@
// Generated by CoffeeScript 1.4.0
(function() {
var BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_ILLEGAL, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, STRICT_PROSCRIBED, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, key, last, starts, _ref, _ref1,
var BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_ILLEGAL, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, STRICT_PROSCRIBED, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, key, last, locationDataToString, starts, _ref, _ref1,
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
_ref = require('./rewriter'), Rewriter = _ref.Rewriter, INVERSES = _ref.INVERSES;
_ref1 = require('./helpers'), count = _ref1.count, starts = _ref1.starts, compact = _ref1.compact, last = _ref1.last;
_ref1 = require('./helpers'), count = _ref1.count, starts = _ref1.starts, compact = _ref1.compact, last = _ref1.last, locationDataToString = _ref1.locationDataToString;
exports.Lexer = Lexer = (function() {
function Lexer() {}
Lexer.prototype.tokenize = function(code, opts) {
var i, tag;
var consumed, i, tag, _ref2;
if (opts == null) {
opts = {};
}
@ -22,6 +22,8 @@
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
this.code = code;
this.line = opts.line || 0;
this.chunkLine = opts.line || 0;
this.chunkColumn = opts.column || 0;
this.indent = 0;
this.indebt = 0;
this.outdebt = 0;
@ -30,7 +32,9 @@
this.tokens = [];
i = 0;
while (this.chunk = code.slice(i)) {
i += this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
consumed = this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
_ref2 = this.getLineAndColumnFromChunk(consumed), this.chunkLine = _ref2[0], this.chunkColumn = _ref2[1];
i += consumed;
}
this.closeIndentation();
if (tag = this.ends.pop()) {
@ -43,11 +47,13 @@
};
Lexer.prototype.identifierToken = function() {
var colon, forcedIdentifier, id, input, match, prev, tag, _ref2, _ref3;
var colon, colonOffset, forcedIdentifier, id, idLength, input, match, poppedToken, prev, tag, tagToken, _ref2, _ref3, _ref4;
if (!(match = IDENTIFIER.exec(this.chunk))) {
return 0;
}
input = match[0], id = match[1], colon = match[2];
idLength = id.length;
poppedToken = void 0;
if (id === 'own' && this.tag() === 'FOR') {
this.token('OWN', id);
return id.length;
@ -71,7 +77,7 @@
} else {
tag = 'RELATION';
if (this.value() === '!') {
this.tokens.pop();
poppedToken = this.tokens.pop();
id = '!' + id;
}
}
@ -111,9 +117,13 @@
}
})();
}
this.token(tag, id);
tagToken = this.token(tag, id, 0, idLength);
if (poppedToken) {
_ref4 = [poppedToken.locationData.first_line, poppedToken.locationData.first_column], tagToken.locationData.first_line = _ref4[0], tagToken.locationData.first_column = _ref4[1];
}
if (colon) {
this.token(':', ':');
colonOffset = input.lastIndexOf(':');
this.token(':', ':', colonOffset, colon.length);
}
return input.length;
};
@ -140,7 +150,7 @@
if (binaryLiteral = /^0b([01]+)/.exec(number)) {
number = '0x' + (parseInt(binaryLiteral[1], 2)).toString(16);
}
this.token('NUMBER', number);
this.token('NUMBER', number, 0, lexedLength);
return lexedLength;
};
@ -151,16 +161,19 @@
if (!(match = SIMPLESTR.exec(this.chunk))) {
return 0;
}
this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
string = match[0];
this.token('STRING', string.replace(MULTILINER, '\\\n'), 0, string.length);
break;
case '"':
if (!(string = this.balancedString(this.chunk, '"'))) {
return 0;
}
if (0 < string.indexOf('#{', 1)) {
this.interpolateString(string.slice(1, -1));
this.interpolateString(string.slice(1, -1), {
offsetInChunk: 1
});
} else {
this.token('STRING', this.escapeLines(string));
this.token('STRING', this.escapeLines(string, 0, string.length));
}
break;
default:
@ -186,10 +199,11 @@
});
if (quote === '"' && 0 <= doc.indexOf('#{')) {
this.interpolateString(doc, {
heredoc: true
heredoc: true,
offsetInChunk: 3
});
} else {
this.token('STRING', this.makeString(doc, quote, true));
this.token('STRING', this.makeString(doc, quote, true), 0, heredoc.length);
}
this.line += count(heredoc, '\n');
return heredoc.length;
@ -205,7 +219,7 @@
this.token('HERECOMMENT', this.sanitizeHeredoc(here, {
herecomment: true,
indent: Array(this.indent + 1).join(' ')
}));
}), 0, comment.length);
}
this.line += count(comment, '\n');
return comment.length;
@ -216,7 +230,7 @@
if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) {
return 0;
}
this.token('JS', (script = match[0]).slice(1, -1));
this.token('JS', (script = match[0]).slice(1, -1), 0, script.length);
this.line += count(script, '\n');
return script.length;
};
@ -245,49 +259,61 @@
if (regex === '//') {
regex = '/(?:)/';
}
this.token('REGEX', "" + regex + flags);
this.token('REGEX', "" + regex + flags, 0, match.length);
return match.length;
};
Lexer.prototype.heregexToken = function(match) {
var body, flags, heregex, re, tag, tokens, value, _i, _len, _ref2, _ref3, _ref4, _ref5;
var body, flags, flagsOffset, heregex, plusToken, prev, re, tag, token, tokens, value, _i, _len, _ref2, _ref3, _ref4;
heregex = match[0], body = match[1], flags = match[2];
if (0 > body.indexOf('#{')) {
re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/');
if (re.match(/^\*/)) {
this.error('regular expressions cannot begin with `*`');
}
this.token('REGEX', "/" + (re || '(?:)') + "/" + flags);
this.token('REGEX', "/" + (re || '(?:)') + "/" + flags, 0, heregex.length);
return heregex.length;
}
this.token('IDENTIFIER', 'RegExp');
this.tokens.push(['CALL_START', '(']);
this.token('IDENTIFIER', 'RegExp', 0, 0);
this.token('CALL_START', '(', 0, 0);
tokens = [];
_ref2 = this.interpolateString(body, {
regex: true
regex: true,
offsetInChunk: 3
});
for (_i = 0, _len = _ref2.length; _i < _len; _i++) {
_ref3 = _ref2[_i], tag = _ref3[0], value = _ref3[1];
token = _ref2[_i];
tag = token[0], value = token[1];
if (tag === 'TOKENS') {
tokens.push.apply(tokens, value);
} else {
} else if (tag === 'NEOSTRING') {
if (!(value = value.replace(HEREGEX_OMIT, ''))) {
continue;
}
value = value.replace(/\\/g, '\\\\');
tokens.push(['STRING', this.makeString(value, '"', true)]);
token[0] = 'STRING';
token[1] = this.makeString(value, '"', true);
tokens.push(token);
} else {
this.error("Unexpected " + tag);
}
tokens.push(['+', '+']);
prev = last(this.tokens);
plusToken = ['+', '+'];
plusToken.locationData = prev.locationData;
tokens.push(plusToken);
}
tokens.pop();
if (((_ref4 = tokens[0]) != null ? _ref4[0] : void 0) !== 'STRING') {
this.tokens.push(['STRING', '""'], ['+', '+']);
if (((_ref3 = tokens[0]) != null ? _ref3[0] : void 0) !== 'STRING') {
this.token('STRING', '""', 0, 0);
this.token('+', '+', 0, 0);
}
(_ref5 = this.tokens).push.apply(_ref5, tokens);
(_ref4 = this.tokens).push.apply(_ref4, tokens);
if (flags) {
this.tokens.push([',', ','], ['STRING', '"' + flags + '"']);
flagsOffset = heregex.lastIndexOf(flags);
this.token(',', ',', flagsOffset, 0);
this.token('STRING', '"' + flags + '"', flagsOffset, flags.length);
}
this.token(')', ')');
this.token(')', ')', heregex.length - 1, 0);
return heregex.length;
};
@ -304,7 +330,7 @@
if (noNewlines) {
this.suppressNewlines();
} else {
this.newlineToken();
this.newlineToken(0);
}
this.line += count(indent, '\n');
return indent.length;
@ -317,19 +343,19 @@
return indent.length;
}
diff = size - this.indent + this.outdebt;
this.token('INDENT', diff);
this.token('INDENT', diff, 0, indent.length);
this.indents.push(diff);
this.ends.push('OUTDENT');
this.outdebt = this.indebt = 0;
} else {
this.indebt = 0;
this.outdentToken(this.indent - size, noNewlines);
this.outdentToken(this.indent - size, noNewlines, indent.length);
}
this.indent = size;
return indent.length;
};
Lexer.prototype.outdentToken = function(moveOut, noNewlines) {
Lexer.prototype.outdentToken = function(moveOut, noNewlines, outdentLength) {
var dent, len;
while (moveOut > 0) {
len = this.indents.length - 1;
@ -346,7 +372,7 @@
moveOut -= dent;
this.outdebt = 0;
this.pair('OUTDENT');
this.token('OUTDENT', dent);
this.token('OUTDENT', dent, 0, outdentLength);
}
}
if (dent) {
@ -356,7 +382,7 @@
this.tokens.pop();
}
if (!(this.tag() === 'TERMINATOR' || noNewlines)) {
this.token('TERMINATOR', '\n');
this.token('TERMINATOR', '\n', outdentLength, 0);
}
return this;
};
@ -377,12 +403,12 @@
}
};
Lexer.prototype.newlineToken = function() {
Lexer.prototype.newlineToken = function(offset) {
while (this.value() === ';') {
this.tokens.pop();
}
if (this.tag() !== 'TERMINATOR') {
this.token('TERMINATOR', '\n');
this.token('TERMINATOR', '\n', offset, 0);
}
return this;
};
@ -556,11 +582,18 @@
};
Lexer.prototype.interpolateString = function(str, options) {
var expr, heredoc, i, inner, interpolated, len, letter, nested, pi, regex, tag, tokens, value, _i, _len, _ref2, _ref3, _ref4;
var column, expr, heredoc, i, inner, interpolated, len, letter, lexedLength, line, nested, offsetInChunk, originalOffsetInChunk, pi, popped, regex, tag, token, tokens, value, _i, _len, _ref2, _ref3, _ref4;
if (options == null) {
options = {};
}
heredoc = options.heredoc, regex = options.regex;
heredoc = options.heredoc, regex = options.regex, offsetInChunk = options.offsetInChunk;
originalOffsetInChunk = offsetInChunk;
lexedLength = str.length;
offsetInChunk = offsetInChunk || 0;
if (heredoc && str.length > 0 && str[0] === '\n') {
str = str.slice(1);
offsetInChunk++;
}
tokens = [];
pi = 0;
i = -1;
@ -573,22 +606,24 @@
continue;
}
if (pi < i) {
tokens.push(['NEOSTRING', str.slice(pi, i)]);
tokens.push(this.makeToken('NEOSTRING', str.slice(pi, i), offsetInChunk + pi));
}
inner = expr.slice(1, -1);
if (inner.length) {
_ref2 = this.getLineAndColumnFromChunk(offsetInChunk + i + 1), line = _ref2[0], column = _ref2[1];
nested = new Lexer().tokenize(inner, {
line: this.line,
line: line,
column: column,
rewrite: false
});
nested.pop();
if (((_ref2 = nested[0]) != null ? _ref2[0] : void 0) === 'TERMINATOR') {
nested.shift();
popped = nested.pop();
if (((_ref3 = nested[0]) != null ? _ref3[0] : void 0) === 'TERMINATOR') {
popped = nested.shift();
}
if (len = nested.length) {
if (len > 1) {
nested.unshift(['(', '(', this.line]);
nested.push([')', ')', this.line]);
nested.unshift(this.makeToken('(', '(', offsetInChunk + i + 1, 0));
nested.push(this.makeToken(')', ')', offsetInChunk + i + 1 + inner.length, 0));
}
tokens.push(['TOKENS', nested]);
}
@ -597,33 +632,38 @@
pi = i + 1;
}
if ((i > pi && pi < str.length)) {
tokens.push(['NEOSTRING', str.slice(pi)]);
tokens.push(this.makeToken('NEOSTRING', str.slice(pi), offsetInChunk + pi));
}
if (regex) {
return tokens;
}
if (!tokens.length) {
return this.token('STRING', '""');
return this.token('STRING', '""', originalOffsetInChunk, lexedLength);
}
if (tokens[0][0] !== 'NEOSTRING') {
tokens.unshift(['', '']);
tokens.unshift(this.makeToken('NEOSTRING', '', originalOffsetInChunk));
}
if (interpolated = tokens.length > 1) {
this.token('(', '(');
this.token('(', '(', originalOffsetInChunk, 0);
}
for (i = _i = 0, _len = tokens.length; _i < _len; i = ++_i) {
_ref3 = tokens[i], tag = _ref3[0], value = _ref3[1];
token = tokens[i];
tag = token[0], value = token[1];
if (i) {
this.token('+', '+');
}
if (tag === 'TOKENS') {
(_ref4 = this.tokens).push.apply(_ref4, value);
} else if (tag === 'NEOSTRING') {
token[0] = 'STRING';
token[1] = this.makeString(value, '"', heredoc);
this.tokens.push(token);
} else {
this.token('STRING', this.makeString(value, '"', heredoc));
this.error("Unexpected " + tag);
}
}
if (interpolated) {
this.token(')', ')');
this.token(')', ')', originalOffsetInChunk + lexedLength, 0);
}
return tokens;
};
@ -641,8 +681,46 @@
return this.ends.pop();
};
Lexer.prototype.token = function(tag, value) {
return this.tokens.push([tag, value, this.line]);
Lexer.prototype.getLineAndColumnFromChunk = function(offset) {
var column, lineCount, lines, string;
if (offset === 0) {
return [this.chunkLine, this.chunkColumn];
}
if (offset >= this.chunk.length) {
string = this.chunk;
} else {
string = this.chunk.slice(0, +(offset - 1) + 1 || 9e9);
}
lineCount = count(string, '\n');
column = this.chunkColumn;
if (lineCount > 0) {
lines = string.split('\n');
column = (last(lines)).length;
} else {
column += string.length;
}
return [this.chunkLine + lineCount, column];
};
Lexer.prototype.makeToken = function(tag, value, offsetInChunk, length) {
var locationData, token, _ref2, _ref3;
offsetInChunk = offsetInChunk || 0;
if (length === void 0) {
length = value.length;
}
locationData = {};
_ref2 = this.getLineAndColumnFromChunk(offsetInChunk), locationData.first_line = _ref2[0], locationData.first_column = _ref2[1];
_ref3 = this.getLineAndColumnFromChunk(offsetInChunk + length), locationData.last_line = _ref3[0], locationData.last_column = _ref3[1];
token = [tag, value, locationData.first_line];
token.locationData = locationData;
return token;
};
Lexer.prototype.token = function(tag, value, offsetInChunk, length) {
var token;
token = this.makeToken(tag, value, offsetInChunk, length);
this.tokens.push(token);
return token;
};
Lexer.prototype.tag = function(index, tag) {

View file

@ -79,3 +79,17 @@ exports.addLocationDataFn = (first, last) ->
obj.updateLocationDataIfMissing buildLocationData(first, last)
return obj
# Convert jison location data to a string.
# `obj` can be a token, or a locationData.
exports.locationDataToString = (obj) ->
if "locationData" of obj then locationData = obj.locationData
else if "first_line" of obj then locationData = obj
if locationData
"#{locationData.first_line + 1}:#{locationData.first_column + 1}-" +
"#{locationData.last_line + 1}:#{locationData.last_column + 1}"
else
"No location data"

View file

@ -10,7 +10,7 @@
{Rewriter, INVERSES} = require './rewriter'
# Import the helpers we need.
{count, starts, compact, last} = require './helpers'
{count, starts, compact, last, locationDataToString} = require './helpers'
# The Lexer Class
# ---------------
@ -35,8 +35,12 @@ exports.Lexer = class Lexer
code = "\n#{code}" if WHITESPACE.test code
code = code.replace(/\r/g, '').replace TRAILING_SPACES, ''
@code = code # The remainder of the source code.
@line = opts.line or 0 # The current line.
@code = code # The source code.
@line = opts.line or 0 # TODO: Remove
@chunkLine =
opts.line or 0 # The start line for the current chunk.
@chunkColumn =
opts.column or 0 # The start column of the current chunk.
@indent = 0 # The current indentation level.
@indebt = 0 # The over-indentation at the current level.
@outdebt = 0 # The under-outdentation at the current level.
@ -49,7 +53,8 @@ exports.Lexer = class Lexer
# `@literalToken` is the fallback catch-all.
i = 0
while @chunk = code[i..]
i += @identifierToken() or
consumed = \
@identifierToken() or
@commentToken() or
@whitespaceToken() or
@lineToken() or
@ -60,9 +65,15 @@ exports.Lexer = class Lexer
@jsToken() or
@literalToken()
# Update position
[@chunkLine, @chunkColumn] = @getLineAndColumnFromChunk consumed
i += consumed
@closeIndentation()
@error "missing #{tag}" if tag = @ends.pop()
return @tokens if opts.rewrite is off
# TODO: deal with Rewriter
(new Rewriter).rewrite @tokens
# Tokenizers
@ -78,6 +89,9 @@ exports.Lexer = class Lexer
return 0 unless match = IDENTIFIER.exec @chunk
[input, id, colon] = match
idLength = id.length
poppedToken = undefined
if id is 'own' and @tag() is 'FOR'
@token 'OWN', id
return id.length
@ -103,7 +117,7 @@ exports.Lexer = class Lexer
else
tag = 'RELATION'
if @value() is '!'
@tokens.pop()
poppedToken = @tokens.pop()
id = '!' + id
if id in JS_FORBIDDEN
@ -124,8 +138,14 @@ exports.Lexer = class Lexer
when 'break', 'continue' then 'STATEMENT'
else tag
@token tag, id
@token ':', ':' if colon
tagToken = @token tag, id, 0, idLength
if poppedToken
[tagToken.locationData.first_line, tagToken.locationData.first_column] =
[poppedToken.locationData.first_line, poppedToken.locationData.first_column]
if colon
colonOffset = input.lastIndexOf ':'
@token ':', ':', colonOffset, colon.length
input.length
# Matches numbers, including decimals, hex, and exponential notation.
@ -146,7 +166,7 @@ exports.Lexer = class Lexer
number = '0x' + (parseInt octalLiteral[1], 8).toString 16
if binaryLiteral = /^0b([01]+)/.exec number
number = '0x' + (parseInt binaryLiteral[1], 2).toString 16
@token 'NUMBER', number
@token 'NUMBER', number, 0, lexedLength
lexedLength
# Matches strings, including multi-line strings. Ensures that quotation marks
@ -155,13 +175,14 @@ exports.Lexer = class Lexer
switch @chunk.charAt 0
when "'"
return 0 unless match = SIMPLESTR.exec @chunk
@token 'STRING', (string = match[0]).replace MULTILINER, '\\\n'
string = match[0]
@token 'STRING', string.replace(MULTILINER, '\\\n'), 0, string.length
when '"'
return 0 unless string = @balancedString @chunk, '"'
if 0 < string.indexOf '#{', 1
@interpolateString string[1...-1]
@interpolateString string[1...-1], offsetInChunk: 1
else
@token 'STRING', @escapeLines string
@token 'STRING', @escapeLines string, 0, string.length
else
return 0
if octalEsc = /^(?:\\.|[^\\])*\\(?:0[0-7]|[1-7])/.test string
@ -177,9 +198,9 @@ exports.Lexer = class Lexer
quote = heredoc.charAt 0
doc = @sanitizeHeredoc match[2], quote: quote, indent: null
if quote is '"' and 0 <= doc.indexOf '#{'
@interpolateString doc, heredoc: yes
@interpolateString doc, heredoc: yes, offsetInChunk: 3
else
@token 'STRING', @makeString doc, quote, yes
@token 'STRING', @makeString(doc, quote, yes), 0, heredoc.length
@line += count heredoc, '\n'
heredoc.length
@ -188,15 +209,17 @@ exports.Lexer = class Lexer
return 0 unless match = @chunk.match COMMENT
[comment, here] = match
if here
@token 'HERECOMMENT', @sanitizeHeredoc here,
herecomment: true, indent: Array(@indent + 1).join(' ')
@token 'HERECOMMENT',
(@sanitizeHeredoc here,
herecomment: true, indent: Array(@indent + 1).join(' ')),
0, comment.length
@line += count comment, '\n'
comment.length
# Matches JavaScript interpolated directly into the source via backticks.
jsToken: ->
return 0 unless @chunk.charAt(0) is '`' and match = JSTOKEN.exec @chunk
@token 'JS', (script = match[0])[1...-1]
@token 'JS', (script = match[0])[1...-1], 0, script.length
@line += count script, '\n'
script.length
@ -216,7 +239,7 @@ exports.Lexer = class Lexer
[match, regex, flags] = match
if regex[..1] is '/*' then @error 'regular expressions cannot begin with `*`'
if regex is '//' then regex = '/(?:)/'
@token 'REGEX', "#{regex}#{flags}"
@token 'REGEX', "#{regex}#{flags}", 0, match.length
match.length
# Matches multiline extended regular expressions.
@ -225,24 +248,45 @@ exports.Lexer = class Lexer
if 0 > body.indexOf '#{'
re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/')
if re.match /^\*/ then @error 'regular expressions cannot begin with `*`'
@token 'REGEX', "/#{ re or '(?:)' }/#{flags}"
@token 'REGEX', "/#{ re or '(?:)' }/#{flags}", 0, heregex.length
return heregex.length
@token 'IDENTIFIER', 'RegExp'
@tokens.push ['CALL_START', '(']
@token 'IDENTIFIER', 'RegExp', 0, 0
@token 'CALL_START', '(', 0, 0
tokens = []
for [tag, value] in @interpolateString(body, regex: yes)
for token in @interpolateString(body, regex: yes, offsetInChunk: 3)
[tag, value] = token
if tag is 'TOKENS'
tokens.push value...
else
else if tag is 'NEOSTRING'
continue unless value = value.replace HEREGEX_OMIT, ''
# Convert NEOSTRING into STRING
value = value.replace /\\/g, '\\\\'
tokens.push ['STRING', @makeString(value, '"', yes)]
tokens.push ['+', '+']
token[0] = 'STRING'
token[1] = @makeString(value, '"', yes)
tokens.push token
else
@error "Unexpected #{tag}"
prev = last @tokens
plusToken = ['+', '+']
plusToken.locationData = prev.locationData
tokens.push plusToken
# Remove the extra "+"
tokens.pop()
@tokens.push ['STRING', '""'], ['+', '+'] unless tokens[0]?[0] is 'STRING'
unless tokens[0]?[0] is 'STRING'
@token 'STRING', '""', 0, 0
@token '+', '+', 0, 0
@tokens.push tokens...
@tokens.push [',', ','], ['STRING', '"' + flags + '"'] if flags
@token ')', ')'
if flags
# Find the flags in the heregex
flagsOffset = heregex.lastIndexOf flags
@token ',', ',', flagsOffset, 0
@token 'STRING', '"' + flags + '"', flagsOffset, flags.length
@token ')', ')', heregex.length-1, 0
heregex.length
# Matches newlines, indents, and outdents, and determines which is which.
@ -262,7 +306,7 @@ exports.Lexer = class Lexer
size = indent.length - 1 - indent.lastIndexOf '\n'
noNewlines = @unfinished()
if size - @indebt is @indent
if noNewlines then @suppressNewlines() else @newlineToken()
if noNewlines then @suppressNewlines() else @newlineToken 0
# Advance @line line after the newlineToken, so the TERMINATOR shows up
# on the right line.
@line += count indent, '\n'
@ -275,19 +319,19 @@ exports.Lexer = class Lexer
@suppressNewlines()
return indent.length
diff = size - @indent + @outdebt
@token 'INDENT', diff
@token 'INDENT', diff, 0, indent.length
@indents.push diff
@ends.push 'OUTDENT'
@outdebt = @indebt = 0
else
@indebt = 0
@outdentToken @indent - size, noNewlines
@outdentToken @indent - size, noNewlines, indent.length
@indent = size
indent.length
# Record an outdent token or multiple tokens, if we happen to be moving back
# inwards past several recorded indents.
outdentToken: (moveOut, noNewlines) ->
outdentToken: (moveOut, noNewlines, outdentLength) ->
while moveOut > 0
len = @indents.length - 1
if @indents[len] is undefined
@ -303,10 +347,11 @@ exports.Lexer = class Lexer
moveOut -= dent
@outdebt = 0
@pair 'OUTDENT'
@token 'OUTDENT', dent
@token 'OUTDENT', dent, 0, outdentLength
@outdebt -= moveOut if dent
@tokens.pop() while @value() is ';'
@token 'TERMINATOR', '\n' unless @tag() is 'TERMINATOR' or noNewlines
@token 'TERMINATOR', '\n', outdentLength, 0 unless @tag() is 'TERMINATOR' or noNewlines
this
# Matches and consumes non-meaningful whitespace. Tag the previous token
@ -319,9 +364,9 @@ exports.Lexer = class Lexer
if match then match[0].length else 0
# Generate a newline token. Consecutive newlines get merged together.
newlineToken: ->
newlineToken: (offset) ->
@tokens.pop() while @value() is ';'
@token 'TERMINATOR', '\n' unless @tag() is 'TERMINATOR'
@token 'TERMINATOR', '\n', offset, 0 unless @tag() is 'TERMINATOR'
this
# Use a `\` at a line-ending to suppress the newline.
@ -458,7 +503,22 @@ exports.Lexer = class Lexer
# new Lexer, tokenize the interpolated contents, and merge them into the
# token stream.
interpolateString: (str, options = {}) ->
{heredoc, regex} = options
{heredoc, regex, offsetInChunk} = options
# TODO: we pass in offsetInChunk, but we've already discarded the " or the
# """, or the /// that got us here. Those characters are not going to end
# up being part of any tokens.
originalOffsetInChunk = offsetInChunk
lexedLength = str.length
# Clip leading \n from heredoc
offsetInChunk = offsetInChunk || 0
if heredoc and str.length > 0 and str[0] == '\n'
str = str[1...]
offsetInChunk++
# Parse the string.
tokens = []
pi = 0
i = -1
@ -469,31 +529,51 @@ exports.Lexer = class Lexer
unless letter is '#' and str.charAt(i+1) is '{' and
(expr = @balancedString str[i + 1..], '}')
continue
tokens.push ['NEOSTRING', str[pi...i]] if pi < i
# NEOSTRING is a fake token. This will be converted to a string below.
tokens.push @makeToken('NEOSTRING', str[pi...i], offsetInChunk + pi) if pi < i
inner = expr[1...-1]
if inner.length
nested = new Lexer().tokenize inner, line: @line, rewrite: off
nested.pop()
nested.shift() if nested[0]?[0] is 'TERMINATOR'
[line, column] = @getLineAndColumnFromChunk(offsetInChunk + i + 1)
nested = new Lexer().tokenize inner, line: line, column: column, rewrite: off
popped = nested.pop()
popped = nested.shift() if nested[0]?[0] is 'TERMINATOR'
if len = nested.length
if len > 1
nested.unshift ['(', '(', @line]
nested.push [')', ')', @line]
nested.unshift @makeToken '(', '(', offsetInChunk + i + 1, 0
nested.push @makeToken ')', ')', offsetInChunk + i + 1 + inner.length, 0
# Push a fake 'TOKENS' token, which will get turned into real tokens below.
tokens.push ['TOKENS', nested]
i += expr.length
pi = i + 1
tokens.push ['NEOSTRING', str[pi..]] if i > pi < str.length
tokens.push @makeToken('NEOSTRING', str[pi..], offsetInChunk + pi) if i > pi < str.length
# If regex, then return now and let the regex code deal with all these fake tokens
return tokens if regex
return @token 'STRING', '""' unless tokens.length
tokens.unshift ['', ''] unless tokens[0][0] is 'NEOSTRING'
@token '(', '(' if interpolated = tokens.length > 1
for [tag, value], i in tokens
# If we didn't find any tokens, then just return an empty string.
return @token 'STRING', '""', originalOffsetInChunk, lexedLength unless tokens.length
# If the first token is not a string, add a fake empty string to the beginning.
tokens.unshift @makeToken('NEOSTRING', '', originalOffsetInChunk) unless tokens[0][0] is 'NEOSTRING'
@token '(', '(', originalOffsetInChunk, 0 if interpolated = tokens.length > 1
# Push all the tokens
for token, i in tokens
[tag, value] = token
# TODO: this needs location data.
@token '+', '+' if i
if tag is 'TOKENS'
# Push all the tokens in the fake 'TOKENS' token. These already have
# sane location data.
@tokens.push value...
else if tag is 'NEOSTRING'
# Convert NEOSTRING into STRING
token[0] = 'STRING'
token[1] = @makeString value, '"', heredoc
@tokens.push token
else
@token 'STRING', @makeString value, '"', heredoc
@token ')', ')' if interpolated
@error "Unexpected #{tag}"
@token ')', ')', originalOffsetInChunk + lexedLength, 0 if interpolated
tokens
# Pairs up a closing token, ensuring that all listed pairs of tokens are
@ -514,9 +594,54 @@ exports.Lexer = class Lexer
# Helpers
# -------
# Add a token to the results, taking note of the line number.
token: (tag, value) ->
@tokens.push [tag, value, @line]
# Returns the line and column number from an offset into the current chunk.
getLineAndColumnFromChunk: (offset) ->
if offset is 0
return [@chunkLine, @chunkColumn]
if offset >= @chunk.length
string = @chunk
else
string = @chunk[..offset-1]
lineCount = count string, '\n'
column = @chunkColumn
if lineCount > 0
lines = string.split '\n'
column = (last lines).length
else
column += string.length
return [@chunkLine + lineCount, column]
# Same as "token", exception this just returns the token without adding it
# to the results.
makeToken: (tag, value, offsetInChunk, length) ->
offsetInChunk = offsetInChunk || 0
if length is undefined then length = value.length
locationData = {}
[locationData.first_line, locationData.first_column] =
@getLineAndColumnFromChunk offsetInChunk
[locationData.last_line, locationData.last_column] =
@getLineAndColumnFromChunk offsetInChunk + length
token = [tag, value, locationData.first_line]
token.locationData = locationData
return token
# Add a token to the results.
# `offset` is the offset into the current @chunk where the token starts.
# `length` is the length of the token in the @chunk, after the offset. If
# not specified, the length of `value` will be used.
#
# Returns the new token.
token: (tag, value, offsetInChunk, length) ->
token = @makeToken tag, value, offsetInChunk, length
@tokens.push token
return token
# Peek at a tag in the current token stream.
tag: (index, tag) ->