jashkenas--coffeescript/lib/coffee-script/lexer.js

905 lines
30 KiB
JavaScript
Raw Normal View History

2014-08-26 16:24:29 +00:00
// Generated by CoffeeScript 1.8.0
2010-07-25 07:15:12 +00:00
(function() {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, repeat, starts, throwSyntaxError, _ref, _ref1,
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
2011-09-16 23:26:04 +00:00
_ref = require('./rewriter'), Rewriter = _ref.Rewriter, INVERSES = _ref.INVERSES;
2013-04-27 22:56:44 +00:00
_ref1 = require('./helpers'), count = _ref1.count, starts = _ref1.starts, compact = _ref1.compact, last = _ref1.last, repeat = _ref1.repeat, invertLiterate = _ref1.invertLiterate, locationDataToString = _ref1.locationDataToString, throwSyntaxError = _ref1.throwSyntaxError;
2013-02-25 17:41:34 +00:00
2010-12-23 18:50:52 +00:00
exports.Lexer = Lexer = (function() {
function Lexer() {}
2010-11-05 04:04:52 +00:00
Lexer.prototype.tokenize = function(code, opts) {
var consumed, end, i, _ref2;
2012-04-10 18:57:45 +00:00
if (opts == null) {
opts = {};
}
this.literate = opts.literate;
this.indent = 0;
this.baseIndent = 0;
this.indebt = 0;
this.outdebt = 0;
this.indents = [];
2011-09-16 23:26:04 +00:00
this.ends = [];
this.tokens = [];
this.chunkLine = opts.line || 0;
this.chunkColumn = opts.column || 0;
code = this.clean(code);
i = 0;
while (this.chunk = code.slice(i)) {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
consumed = this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
_ref2 = this.getLineAndColumnFromChunk(consumed), this.chunkLine = _ref2[0], this.chunkColumn = _ref2[1];
i += consumed;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
if (opts.untilBalanced && this.ends.length === 0) {
return {
tokens: this.tokens,
index: i
};
}
}
this.closeIndentation();
if (end = this.ends.pop()) {
throwSyntaxError("missing " + end.tag, end.origin[2]);
2012-04-10 18:57:45 +00:00
}
if (opts.rewrite === false) {
return this.tokens;
}
return (new Rewriter).rewrite(this.tokens);
};
Lexer.prototype.clean = function(code) {
if (code.charCodeAt(0) === BOM) {
code = code.slice(1);
}
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
if (WHITESPACE.test(code)) {
code = "\n" + code;
this.chunkLine--;
}
if (this.literate) {
code = invertLiterate(code);
}
return code;
};
Lexer.prototype.identifierToken = function() {
var colon, colonOffset, forcedIdentifier, id, idLength, input, match, poppedToken, prev, tag, tagToken, _ref2, _ref3, _ref4;
2014-09-06 10:55:27 +00:00
if (!(match = IDENTIFIER.exec(this.chunk))) {
2012-04-10 18:57:45 +00:00
return 0;
}
input = match[0], id = match[1], colon = match[2];
idLength = id.length;
poppedToken = void 0;
2010-11-28 23:33:43 +00:00
if (id === 'own' && this.tag() === 'FOR') {
this.token('OWN', id);
return id.length;
}
2014-09-06 11:53:21 +00:00
if (id === 'from' && this.tag() === 'YIELD') {
this.token('FROM', id);
return id.length;
}
forcedIdentifier = colon || (prev = last(this.tokens)) && (((_ref2 = prev[0]) === '.' || _ref2 === '?.' || _ref2 === '::' || _ref2 === '?::') || !prev.spaced && prev[0] === '@');
tag = 'IDENTIFIER';
2011-06-19 08:49:41 +00:00
if (!forcedIdentifier && (__indexOf.call(JS_KEYWORDS, id) >= 0 || __indexOf.call(COFFEE_KEYWORDS, id) >= 0)) {
tag = id.toUpperCase();
2012-03-01 04:46:03 +00:00
if (tag === 'WHEN' && (_ref3 = this.tag(), __indexOf.call(LINE_BREAK, _ref3) >= 0)) {
2010-09-23 05:14:18 +00:00
tag = 'LEADING_WHEN';
} else if (tag === 'FOR') {
this.seenFor = true;
} else if (tag === 'UNLESS') {
tag = 'IF';
} else if (__indexOf.call(UNARY, tag) >= 0) {
tag = 'UNARY';
} else if (__indexOf.call(RELATION, tag) >= 0) {
if (tag !== 'INSTANCEOF' && this.seenFor) {
tag = 'FOR' + tag;
this.seenFor = false;
} else {
tag = 'RELATION';
if (this.value() === '!') {
poppedToken = this.tokens.pop();
id = '!' + id;
}
}
2010-09-23 05:14:18 +00:00
}
}
if (__indexOf.call(JS_FORBIDDEN, id) >= 0) {
if (forcedIdentifier) {
tag = 'IDENTIFIER';
id = new String(id);
id.reserved = true;
} else if (__indexOf.call(RESERVED, id) >= 0) {
2011-11-25 13:49:51 +00:00
this.error("reserved word \"" + id + "\"");
}
}
2010-10-07 11:05:22 +00:00
if (!forcedIdentifier) {
2012-04-10 18:57:45 +00:00
if (__indexOf.call(COFFEE_ALIASES, id) >= 0) {
id = COFFEE_ALIAS_MAP[id];
}
2010-12-23 18:50:52 +00:00
tag = (function() {
2010-11-05 04:04:52 +00:00
switch (id) {
case '!':
return 'UNARY';
case '==':
case '!=':
return 'COMPARE';
case '&&':
case '||':
return 'LOGIC';
case 'true':
case 'false':
return 'BOOL';
case 'break':
case 'continue':
return 'STATEMENT';
2010-11-05 04:04:52 +00:00
default:
return tag;
}
2010-12-23 18:50:52 +00:00
})();
2010-03-22 01:46:06 +00:00
}
tagToken = this.token(tag, id, 0, idLength);
if (poppedToken) {
_ref4 = [poppedToken[2].first_line, poppedToken[2].first_column], tagToken[2].first_line = _ref4[0], tagToken[2].first_column = _ref4[1];
}
2012-04-10 18:57:45 +00:00
if (colon) {
colonOffset = input.lastIndexOf(':');
this.token(':', ':', colonOffset, colon.length);
2012-04-10 18:57:45 +00:00
}
return input.length;
};
Lexer.prototype.numberToken = function() {
var binaryLiteral, lexedLength, match, number, octalLiteral;
2012-04-10 18:57:45 +00:00
if (!(match = NUMBER.exec(this.chunk))) {
return 0;
}
number = match[0];
if (/^0[BOX]/.test(number)) {
this.error("radix prefix '" + number + "' must be lowercase");
} else if (/E/.test(number) && !/^0x/.test(number)) {
this.error("exponential notation '" + number + "' must be indicated with a lowercase 'e'");
} else if (/^0\d*[89]/.test(number)) {
this.error("decimal literal '" + number + "' must not be prefixed with '0'");
} else if (/^0\d+/.test(number)) {
this.error("octal literal '" + number + "' must be prefixed with '0o'");
}
lexedLength = number.length;
if (octalLiteral = /^0o([0-7]+)/.exec(number)) {
2013-04-27 22:56:44 +00:00
number = '0x' + parseInt(octalLiteral[1], 8).toString(16);
}
if (binaryLiteral = /^0b([01]+)/.exec(number)) {
2013-04-27 22:56:44 +00:00
number = '0x' + parseInt(binaryLiteral[1], 2).toString(16);
2011-10-24 18:51:41 +00:00
}
this.token('NUMBER', number, 0, lexedLength);
2011-10-24 18:51:41 +00:00
return lexedLength;
};
Lexer.prototype.stringToken = function() {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
var $, attempt, doc, end, heredoc, i, indent, indentRegex, match, quote, regex, start, token, tokens, _ref2, _ref3;
quote = (STRING_START.exec(this.chunk) || [])[0];
if (!quote) {
return 0;
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
regex = (function() {
switch (quote) {
case "'":
return STRING_SINGLE;
case '"':
return STRING_DOUBLE;
case "'''":
return HEREDOC_SINGLE;
case '"""':
return HEREDOC_DOUBLE;
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
})();
heredoc = quote.length === 3;
start = quote.length;
_ref2 = this.matchWithInterpolations(this.chunk.slice(start), regex, quote, start), tokens = _ref2.tokens, end = _ref2.index;
$ = tokens.length - 1;
if (heredoc) {
indent = null;
doc = ((function() {
var _i, _len, _results;
_results = [];
for (i = _i = 0, _len = tokens.length; _i < _len; i = ++_i) {
token = tokens[i];
if (token[0] === 'NEOSTRING') {
_results.push(token[1]);
}
}
return _results;
})()).join('#{}');
while (match = HEREDOC_INDENT.exec(doc)) {
attempt = match[1];
if (indent === null || (0 < (_ref3 = attempt.length) && _ref3 < indent.length)) {
indent = attempt;
}
}
if (indent) {
indentRegex = RegExp("^" + indent, "gm");
}
this.mergeInterpolationTokens(tokens, {
quote: quote[0],
start: start,
end: end
}, (function(_this) {
return function(value, i) {
value = _this.formatString(value);
if (i === 0) {
value = value.replace(LEADING_BLANK_LINE, '');
}
if (i === $) {
value = value.replace(TRAILING_BLANK_LINE, '');
}
value = value.replace(indentRegex, '');
value = value.replace(MULTILINER, '\\n');
return value;
};
})(this));
} else {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
this.mergeInterpolationTokens(tokens, {
quote: quote,
start: start,
end: end
}, (function(_this) {
return function(value, i) {
value = _this.formatString(value);
value = value.replace(STRING_OMIT, function(match, offset) {
if ((i === 0 && offset === 0) || (i === $ && offset + match.length === value.length)) {
return '';
} else {
return ' ';
}
});
return value;
};
})(this));
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
return end;
};
Lexer.prototype.commentToken = function() {
var comment, here, match;
2012-04-10 18:57:45 +00:00
if (!(match = this.chunk.match(COMMENT))) {
return 0;
}
comment = match[0], here = match[1];
2010-09-23 05:14:18 +00:00
if (here) {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
if (match = HERECOMMENT_ILLEGAL.exec(comment)) {
this.error("block comments cannot contain " + match[0], match.index);
}
if (here.indexOf('\n') >= 0) {
here = here.replace(RegExp("\\n" + (repeat(' ', this.indent)), "g"), '\n');
}
this.token('HERECOMMENT', here, 0, comment.length);
}
return comment.length;
};
Lexer.prototype.jsToken = function() {
var match, script;
if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) {
return 0;
}
this.token('JS', (script = match[0]).slice(1, -1), 0, script.length);
return script.length;
};
Lexer.prototype.regexToken = function() {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
var end, flags, index, match, prev, re, regex, tokens, _ref2, _ref3;
switch (false) {
case !(match = REGEX_ILLEGAL.exec(this.chunk)):
this.error("regular expressions cannot begin with " + match[2], match.index + match[1].length);
break;
case this.chunk.slice(0, 3) !== '///':
_ref2 = this.matchWithInterpolations(this.chunk.slice(3), HEREGEX, '///', 3), tokens = _ref2.tokens, index = _ref2.index;
break;
case !(match = REGEX.exec(this.chunk)):
regex = match[0];
index = regex.length;
prev = last(this.tokens);
if (prev && (_ref3 = prev[0], __indexOf.call((prev.spaced ? NOT_REGEX : NOT_SPACED_REGEX), _ref3) >= 0)) {
return 0;
2012-04-10 18:57:45 +00:00
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
break;
default:
return 0;
}
flags = REGEX_FLAGS.exec(this.chunk.slice(index))[0];
end = index + flags.length;
switch (false) {
case !!VALID_FLAGS.test(flags):
this.error("invalid regular expression flags " + flags, index);
break;
case !regex:
this.token('REGEX', "" + regex + flags);
break;
case tokens.length !== 1:
re = this.formatHeregex(tokens[0][1]).replace(/\//g, '\\/');
this.token('REGEX', "/" + (re || '(?:)') + "/" + flags);
break;
default:
this.token('IDENTIFIER', 'RegExp', 0, 0);
this.token('CALL_START', '(', 0, 0);
this.mergeInterpolationTokens(tokens, {
quote: '"',
start: 3,
end: end
}, (function(_this) {
return function(value) {
return _this.formatHeregex(value).replace(/\\/g, '\\\\');
};
})(this));
if (flags) {
this.token(',', ',', index, 0);
this.token('STRING', '"' + flags + '"', index, flags.length);
}
this.token(')', ')', end, 0);
}
return end;
};
Lexer.prototype.lineToken = function() {
2012-06-07 00:39:17 +00:00
var diff, indent, match, noNewlines, size;
2012-04-10 18:57:45 +00:00
if (!(match = MULTI_DENT.exec(this.chunk))) {
return 0;
}
indent = match[0];
this.seenFor = false;
2010-09-23 05:14:18 +00:00
size = indent.length - 1 - indent.lastIndexOf('\n');
2010-10-25 18:56:02 +00:00
noNewlines = this.unfinished();
if (size - this.indebt === this.indent) {
if (noNewlines) {
this.suppressNewlines();
} else {
this.newlineToken(0);
}
return indent.length;
}
if (size > this.indent) {
if (noNewlines) {
this.indebt = size - this.indent;
this.suppressNewlines();
return indent.length;
}
if (!this.tokens.length) {
this.baseIndent = this.indent = size;
return indent.length;
}
diff = size - this.indent + this.outdebt;
this.token('INDENT', diff, indent.length - size, size);
this.indents.push(diff);
this.ends.push({
tag: 'OUTDENT'
});
this.outdebt = this.indebt = 0;
2014-01-22 18:30:13 +00:00
this.indent = size;
} else if (size < this.baseIndent) {
this.error('missing indentation', indent.length);
} else {
this.indebt = 0;
this.outdentToken(this.indent - size, noNewlines, indent.length);
}
return indent.length;
};
Lexer.prototype.outdentToken = function(moveOut, noNewlines, outdentLength) {
2014-01-22 18:30:13 +00:00
var decreasedIndent, dent, lastIndent, _ref2;
decreasedIndent = this.indent - moveOut;
while (moveOut > 0) {
2014-01-22 18:30:13 +00:00
lastIndent = this.indents[this.indents.length - 1];
if (!lastIndent) {
moveOut = 0;
2014-01-22 18:30:13 +00:00
} else if (lastIndent === this.outdebt) {
moveOut -= this.outdebt;
this.outdebt = 0;
2014-01-22 18:30:13 +00:00
} else if (lastIndent < this.outdebt) {
this.outdebt -= lastIndent;
moveOut -= lastIndent;
} else {
dent = this.indents.pop() + this.outdebt;
2014-01-22 18:30:13 +00:00
if (outdentLength && (_ref2 = this.chunk[outdentLength], __indexOf.call(INDENTABLE_CLOSERS, _ref2) >= 0)) {
decreasedIndent -= dent - moveOut;
moveOut = dent;
}
this.outdebt = 0;
2011-09-16 23:26:04 +00:00
this.pair('OUTDENT');
2014-01-22 18:30:13 +00:00
this.token('OUTDENT', moveOut, 0, outdentLength);
moveOut -= dent;
}
}
2012-04-10 18:57:45 +00:00
if (dent) {
this.outdebt -= moveOut;
}
while (this.value() === ';') {
this.tokens.pop();
}
if (!(this.tag() === 'TERMINATOR' || noNewlines)) {
this.token('TERMINATOR', '\n', outdentLength, 0);
}
this.indent = decreasedIndent;
return this;
};
Lexer.prototype.whitespaceToken = function() {
2010-10-21 01:37:58 +00:00
var match, nline, prev;
if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.charAt(0) === '\n'))) {
return 0;
}
2010-09-28 12:52:51 +00:00
prev = last(this.tokens);
2012-04-10 18:57:45 +00:00
if (prev) {
prev[match ? 'spaced' : 'newLine'] = true;
}
if (match) {
return match[0].length;
} else {
return 0;
}
};
Lexer.prototype.newlineToken = function(offset) {
while (this.value() === ';') {
this.tokens.pop();
}
2012-04-10 18:57:45 +00:00
if (this.tag() !== 'TERMINATOR') {
this.token('TERMINATOR', '\n', offset, 0);
2012-04-10 18:57:45 +00:00
}
return this;
};
Lexer.prototype.suppressNewlines = function() {
2012-04-10 18:57:45 +00:00
if (this.value() === '\\') {
this.tokens.pop();
}
return this;
};
Lexer.prototype.literalToken = function() {
var match, prev, tag, token, value, _ref2, _ref3, _ref4, _ref5;
2010-10-05 14:31:48 +00:00
if (match = OPERATOR.exec(this.chunk)) {
value = match[0];
2012-04-10 18:57:45 +00:00
if (CODE.test(value)) {
this.tagParameters();
}
2010-09-23 05:14:18 +00:00
} else {
value = this.chunk.charAt(0);
}
tag = value;
prev = last(this.tokens);
if (value === '=' && prev) {
2012-03-01 04:46:03 +00:00
if (!prev[1].reserved && (_ref2 = prev[1], __indexOf.call(JS_FORBIDDEN, _ref2) >= 0)) {
this.error("reserved word \"" + (this.value()) + "\" can't be assigned");
}
2012-03-01 04:46:03 +00:00
if ((_ref3 = prev[1]) === '||' || _ref3 === '&&') {
2010-10-05 14:31:48 +00:00
prev[0] = 'COMPOUND_ASSIGN';
prev[1] += '=';
return value.length;
2010-07-25 05:36:50 +00:00
}
2010-07-25 05:23:37 +00:00
}
if (value === ';') {
2011-09-21 04:21:46 +00:00
this.seenFor = false;
tag = 'TERMINATOR';
} else if (__indexOf.call(MATH, value) >= 0) {
tag = 'MATH';
} else if (__indexOf.call(COMPARE, value) >= 0) {
tag = 'COMPARE';
} else if (__indexOf.call(COMPOUND_ASSIGN, value) >= 0) {
tag = 'COMPOUND_ASSIGN';
} else if (__indexOf.call(UNARY, value) >= 0) {
tag = 'UNARY';
} else if (__indexOf.call(UNARY_MATH, value) >= 0) {
tag = 'UNARY_MATH';
} else if (__indexOf.call(SHIFT, value) >= 0) {
tag = 'SHIFT';
} else if (__indexOf.call(LOGIC, value) >= 0 || value === '?' && (prev != null ? prev.spaced : void 0)) {
tag = 'LOGIC';
} else if (prev && !prev.spaced) {
2012-03-01 04:46:03 +00:00
if (value === '(' && (_ref4 = prev[0], __indexOf.call(CALLABLE, _ref4) >= 0)) {
2012-04-10 18:57:45 +00:00
if (prev[0] === '?') {
prev[0] = 'FUNC_EXIST';
}
tag = 'CALL_START';
2012-03-01 04:46:03 +00:00
} else if (value === '[' && (_ref5 = prev[0], __indexOf.call(INDEXABLE, _ref5) >= 0)) {
tag = 'INDEX_START';
switch (prev[0]) {
2010-09-23 05:14:18 +00:00
case '?':
2010-10-05 14:31:48 +00:00
prev[0] = 'INDEX_SOAK';
}
}
}
token = this.makeToken(tag, value);
2011-09-16 23:26:04 +00:00
switch (value) {
case '(':
case '{':
case '[':
this.ends.push({
tag: INVERSES[value],
origin: token
});
2011-09-16 23:26:04 +00:00
break;
case ')':
case '}':
case ']':
this.pair(value);
}
this.tokens.push(token);
return value.length;
};
Lexer.prototype.tagParameters = function() {
2010-10-26 04:09:46 +00:00
var i, stack, tok, tokens;
2012-04-10 18:57:45 +00:00
if (this.tag() !== ')') {
return this;
}
2010-10-26 04:09:46 +00:00
stack = [];
tokens = this.tokens;
i = tokens.length;
tokens[--i][0] = 'PARAM_END';
while (tok = tokens[--i]) {
switch (tok[0]) {
2010-09-22 03:58:05 +00:00
case ')':
2010-10-26 04:09:46 +00:00
stack.push(tok);
2010-09-22 03:58:05 +00:00
break;
case '(':
case 'CALL_START':
2010-10-26 04:09:46 +00:00
if (stack.length) {
stack.pop();
} else if (tok[0] === '(') {
2010-10-26 04:09:46 +00:00
tok[0] = 'PARAM_START';
return this;
} else {
return this;
2010-10-26 04:09:46 +00:00
}
}
}
return this;
};
Lexer.prototype.closeIndentation = function() {
return this.outdentToken(this.indent);
};
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
Lexer.prototype.matchWithInterpolations = function(str, regex, end, offsetInChunk) {
var column, index, line, nested, strPart, tokens, _ref2, _ref3, _ref4;
tokens = [];
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
while (true) {
strPart = regex.exec(str)[0];
tokens.push(this.makeToken('NEOSTRING', strPart, offsetInChunk));
str = str.slice(strPart.length);
offsetInChunk += strPart.length;
if (str.slice(0, 2) !== '#{') {
break;
2012-04-10 18:57:45 +00:00
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
_ref2 = this.getLineAndColumnFromChunk(offsetInChunk + 1), line = _ref2[0], column = _ref2[1];
_ref3 = new Lexer().tokenize(str.slice(1), {
line: line,
column: column,
untilBalanced: true
}), nested = _ref3.tokens, index = _ref3.index;
index += 1;
nested.shift();
nested.pop();
if (((_ref4 = nested[0]) != null ? _ref4[0] : void 0) === 'TERMINATOR') {
nested.shift();
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
if (nested.length > 1) {
nested.unshift(this.makeToken('(', '(', offsetInChunk + 1, 0));
nested.push(this.makeToken(')', ')', offsetInChunk + 1 + index, 0));
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
tokens.push(['TOKENS', nested]);
str = str.slice(index);
offsetInChunk += index;
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
if (str.slice(0, end.length) !== end) {
this.error("missing " + end);
2012-04-10 18:57:45 +00:00
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
return {
tokens: tokens,
index: offsetInChunk + end.length
};
};
Lexer.prototype.mergeInterpolationTokens = function(tokens, _arg, fn) {
var converted, end, errorToken, firstEmptyStringIndex, firstIndex, i, interpolated, locationToken, plusToken, quote, rparen, start, tag, token, tokensToPush, value, _i, _len, _ref2;
quote = _arg.quote, start = _arg.start, end = _arg.end;
2012-04-10 18:57:45 +00:00
if (interpolated = tokens.length > 1) {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
errorToken = this.makeToken('', 'interpolation', start + tokens[0][1].length, 2);
this.token('(', '(', 0, 0, errorToken);
2012-04-10 18:57:45 +00:00
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
firstIndex = this.tokens.length;
for (i = _i = 0, _len = tokens.length; _i < _len; i = ++_i) {
token = tokens[i];
tag = token[0], value = token[1];
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
switch (tag) {
case 'TOKENS':
if (value.length === 0) {
continue;
}
locationToken = value[0];
tokensToPush = value;
break;
case 'NEOSTRING':
converted = fn(token[1], i);
if (converted.length === 0) {
if (i === 0) {
firstEmptyStringIndex = this.tokens.length;
} else {
continue;
}
}
if (i === 2 && (firstEmptyStringIndex != null)) {
this.tokens.splice(firstEmptyStringIndex, 2);
}
token[0] = 'STRING';
token[1] = this.makeString(converted, quote);
locationToken = token;
tokensToPush = [token];
}
if (this.tokens.length > firstIndex) {
plusToken = this.token('+', '+');
plusToken[2] = {
first_line: locationToken[2].first_line,
first_column: locationToken[2].first_column,
last_line: locationToken[2].first_line,
last_column: locationToken[2].first_column
2012-11-19 22:37:46 +00:00
};
2012-04-10 18:57:45 +00:00
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
(_ref2 = this.tokens).push.apply(_ref2, tokensToPush);
}
2012-04-10 18:57:45 +00:00
if (interpolated) {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
rparen = this.token(')', ')', end, 0);
return rparen.stringEnd = true;
2012-04-10 18:57:45 +00:00
}
};
2011-09-16 23:26:04 +00:00
Lexer.prototype.pair = function(tag) {
var wanted, _ref2;
if (tag !== (wanted = (_ref2 = last(this.ends)) != null ? _ref2.tag : void 0)) {
2012-04-10 18:57:45 +00:00
if ('OUTDENT' !== wanted) {
this.error("unmatched " + tag);
}
2014-01-22 18:30:13 +00:00
this.outdentToken(last(this.indents), true);
2011-09-16 23:26:04 +00:00
return this.pair(tag);
}
return this.ends.pop();
};
Lexer.prototype.getLineAndColumnFromChunk = function(offset) {
var column, lineCount, lines, string;
if (offset === 0) {
return [this.chunkLine, this.chunkColumn];
}
if (offset >= this.chunk.length) {
string = this.chunk;
} else {
string = this.chunk.slice(0, +(offset - 1) + 1 || 9e9);
}
lineCount = count(string, '\n');
column = this.chunkColumn;
if (lineCount > 0) {
lines = string.split('\n');
2013-04-27 22:56:44 +00:00
column = last(lines).length;
} else {
column += string.length;
}
return [this.chunkLine + lineCount, column];
};
Lexer.prototype.makeToken = function(tag, value, offsetInChunk, length) {
var lastCharacter, locationData, token, _ref2, _ref3;
if (offsetInChunk == null) {
offsetInChunk = 0;
}
if (length == null) {
length = value.length;
}
locationData = {};
_ref2 = this.getLineAndColumnFromChunk(offsetInChunk), locationData.first_line = _ref2[0], locationData.first_column = _ref2[1];
lastCharacter = Math.max(0, length - 1);
_ref3 = this.getLineAndColumnFromChunk(offsetInChunk + lastCharacter), locationData.last_line = _ref3[0], locationData.last_column = _ref3[1];
token = [tag, value, locationData];
return token;
};
Lexer.prototype.token = function(tag, value, offsetInChunk, length, origin) {
var token;
token = this.makeToken(tag, value, offsetInChunk, length);
if (origin) {
token.origin = origin;
}
this.tokens.push(token);
return token;
};
2010-10-05 14:31:48 +00:00
Lexer.prototype.tag = function(index, tag) {
var tok;
2010-11-05 04:04:52 +00:00
return (tok = last(this.tokens, index)) && (tag ? tok[0] = tag : tok[0]);
};
Lexer.prototype.value = function(index, val) {
var tok;
2010-11-05 04:04:52 +00:00
return (tok = last(this.tokens, index)) && (val ? tok[1] = val : tok[1]);
};
Lexer.prototype.unfinished = function() {
2012-03-01 04:46:03 +00:00
var _ref2;
2014-09-06 13:40:53 +00:00
return LINE_CONTINUER.test(this.chunk) || ((_ref2 = this.tag()) === '\\' || _ref2 === '.' || _ref2 === '?.' || _ref2 === '?::' || _ref2 === 'UNARY' || _ref2 === 'MATH' || _ref2 === 'UNARY_MATH' || _ref2 === '+' || _ref2 === '-' || _ref2 === 'YIELD' || _ref2 === '**' || _ref2 === 'SHIFT' || _ref2 === 'RELATION' || _ref2 === 'COMPARE' || _ref2 === 'LOGIC' || _ref2 === 'THROW' || _ref2 === 'EXTENDS');
};
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
Lexer.prototype.formatString = function(str) {
return str.replace(/\\[^\S\n]*(\n|\\)\s*/g, function(escaped, character) {
if (character === '\n') {
return '';
} else {
return escaped;
}
});
};
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
Lexer.prototype.formatHeregex = function(str) {
return str.replace(HEREGEX_OMIT, '$1$2').replace(MULTILINER, '\\n');
};
Lexer.prototype.makeString = function(body, quote) {
var match;
2012-04-10 18:57:45 +00:00
if (!body) {
return quote + quote;
}
body = body.replace(RegExp("\\\\(" + quote + "|\\\\)", "g"), function(match, contents) {
if (contents === quote) {
return contents;
} else {
return match;
}
});
body = body.replace(RegExp("" + quote, "g"), '\\$&');
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
if (match = OCTAL_ESCAPE.exec(body)) {
this.error("octal escape sequences are not allowed " + match[2], match.index + match[1].length + 1);
}
return quote + body + quote;
};
Lexer.prototype.error = function(message, offset) {
var first_column, first_line, _ref2;
if (offset == null) {
offset = 0;
}
_ref2 = this.getLineAndColumnFromChunk(offset), first_line = _ref2[0], first_column = _ref2[1];
return throwSyntaxError(message, {
first_line: first_line,
first_column: first_column
});
2011-09-16 23:26:04 +00:00
};
return Lexer;
2011-12-14 15:39:20 +00:00
2010-12-23 18:50:52 +00:00
})();
2014-09-06 10:55:27 +00:00
JS_KEYWORDS = ['true', 'false', 'null', 'this', 'new', 'delete', 'typeof', 'in', 'instanceof', 'return', 'throw', 'break', 'continue', 'debugger', 'yield', 'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally', 'class', 'extends', 'super'];
COFFEE_KEYWORDS = ['undefined', 'then', 'unless', 'until', 'loop', 'of', 'by', 'when'];
2011-04-30 14:35:56 +00:00
COFFEE_ALIAS_MAP = {
and: '&&',
or: '||',
is: '==',
isnt: '!=',
not: '!',
2010-10-23 18:24:30 +00:00
yes: 'true',
no: 'false',
on: 'true',
off: 'false'
2011-04-30 14:35:56 +00:00
};
2011-04-30 14:35:56 +00:00
COFFEE_ALIASES = (function() {
var _results;
_results = [];
for (key in COFFEE_ALIAS_MAP) {
_results.push(key);
}
return _results;
})();
2011-04-30 14:35:56 +00:00
COFFEE_KEYWORDS = COFFEE_KEYWORDS.concat(COFFEE_ALIASES);
RESERVED = ['case', 'default', 'function', 'var', 'void', 'with', 'const', 'let', 'enum', 'export', 'import', 'native', '__hasProp', '__extends', '__slice', '__bind', '__indexOf', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'static'];
2014-09-06 11:53:21 +00:00
STRICT_PROSCRIBED = ['arguments', 'eval', 'yield*'];
JS_FORBIDDEN = JS_KEYWORDS.concat(RESERVED).concat(STRICT_PROSCRIBED);
exports.RESERVED = RESERVED.concat(JS_KEYWORDS).concat(COFFEE_KEYWORDS).concat(STRICT_PROSCRIBED);
exports.STRICT_PROSCRIBED = STRICT_PROSCRIBED;
BOM = 65279;
2010-12-29 01:42:20 +00:00
IDENTIFIER = /^([$A-Za-z_\x7f-\uffff][$\w\x7f-\uffff]*)([^\n\S]*:(?!:))?/;
NUMBER = /^0b[01]+|^0o[0-7]+|^0x[\da-f]+|^\d*\.?\d+(?:e[+-]?\d+)?/i;
2014-09-06 10:55:27 +00:00
OPERATOR = /^(?:[-=]>|[-+*\/%<>&|^!?=]=|>>>=?|([-+:])\1|([&|<>*\/%])\2=?|\?(\.|::)|\.{2,3})/;
WHITESPACE = /^[^\n\S]+/;
COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/;
2014-09-06 10:55:27 +00:00
CODE = /^[-=]>/;
MULTI_DENT = /^(?:\n[^\n\S]*)+/;
2010-09-24 13:38:28 +00:00
JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
STRING_START = /^(?:'''|"""|'|")/;
STRING_SINGLE = /^(?:[^\\']|\\[\s\S])*/;
STRING_DOUBLE = /^(?:[^\\"#]|\\[\s\S]|\#(?!\{))*/;
HEREDOC_SINGLE = /^(?:[^\\']|\\[\s\S]|'(?!''))*/;
HEREDOC_DOUBLE = /^(?:[^\\"#]|\\[\s\S]|"(?!"")|\#(?!\{))*/;
STRING_OMIT = /\s*\n\s*/g;
HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g;
REGEX = /^\/(?![\s=])(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])+\//;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
REGEX_FLAGS = /^\w*/;
VALID_FLAGS = /^(?!.*(.).*\1)[imgy]*$/;
HEREGEX = /^(?:[^\\\/#]|\\[\s\S]|\/(?!\/\/)|\#(?!\{))*/;
2013-10-23 22:36:46 +00:00
HEREGEX_OMIT = /((?:\\\\)+)|\\(\s|\/)|\s+(?:#.*)?/g;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
REGEX_ILLEGAL = /^(\/|\/{3}\s*)(\*)/;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
MULTILINER = /\n/g;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
HERECOMMENT_ILLEGAL = /\*\//;
LINE_CONTINUER = /^\s*(?:,|\??\.(?![.\d])|::)/;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
OCTAL_ESCAPE = /^((?:\\.|[^\\])*)(\\(?:0[0-7]|[1-7]))/;
LEADING_BLANK_LINE = /^[^\n\S]*\n/;
TRAILING_BLANK_LINE = /\n[^\n\S]*$/;
2010-10-03 23:22:42 +00:00
TRAILING_SPACES = /\s+$/;
COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|=', '**=', '//=', '%%='];
2014-09-06 11:53:21 +00:00
UNARY = ['NEW', 'TYPEOF', 'DELETE', 'DO'];
UNARY_MATH = ['!', '~'];
2010-10-23 18:24:30 +00:00
LOGIC = ['&&', '||', '&', '|', '^'];
SHIFT = ['<<', '>>', '>>>'];
2010-10-23 18:24:30 +00:00
COMPARE = ['==', '!=', '<', '>', '<=', '>='];
MATH = ['*', '/', '%', '//', '%%'];
RELATION = ['IN', 'OF', 'INSTANCEOF'];
BOOL = ['TRUE', 'FALSE'];
2013-04-21 10:27:30 +00:00
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--'];
2013-04-21 10:27:30 +00:00
NOT_SPACED_REGEX = NOT_REGEX.concat(')', '}', 'THIS', 'IDENTIFIER', 'STRING', ']');
CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER'];
INDEXABLE = CALLABLE.concat('NUMBER', 'BOOL', 'NULL', 'UNDEFINED');
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
2011-12-14 15:39:20 +00:00
2014-01-22 18:30:13 +00:00
INDENTABLE_CLOSERS = [')', '}', ']'];
}).call(this);