jashkenas--coffeescript/lib/coffee-script/lexer.js

1070 lines
36 KiB
JavaScript
Raw Normal View History

// Generated by CoffeeScript 2.0.0-alpha
2010-07-25 07:15:12 +00:00
(function() {
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVALID_ESCAPE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isUnassignable, key, locationDataToString, ref, ref1, repeat, starts, throwSyntaxError,
indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; },
slice = [].slice;
ref = require('./rewriter'), Rewriter = ref.Rewriter, INVERSES = ref.INVERSES;
ref1 = require('./helpers'), count = ref1.count, starts = ref1.starts, compact = ref1.compact, repeat = ref1.repeat, invertLiterate = ref1.invertLiterate, locationDataToString = ref1.locationDataToString, throwSyntaxError = ref1.throwSyntaxError;
2013-02-25 17:41:34 +00:00
2010-12-23 18:50:52 +00:00
exports.Lexer = Lexer = (function() {
function Lexer() {}
[CS2] Output ES2015 arrow functions, default parameters, rest parameters (#4311) * Eliminate wrapper around “bound” (arrow) functions; output `=>` for such functions * Remove irrelevant (and breaking) tests * Minor cleanup * When a function parameter is a splat (i.e., it uses the ES2015 rest parameter syntax) output that parameter as ES2015 * Rearrange function parameters when one of the parameters is a splat and isn’t the last parameter (very WIP) * Handle params like `@param`, adding assignment expressions for them when they appear; ensure splat parameter is last * Add parameter names (not a text like `'\nValue IdentifierLiteral: a'`) to the scope, so that parameters can’t be deleted; move body-related lines together; more explanation of what’s going on * For parameters with a default value, correctly add the parameter name to the function scope * Handle expansions in function parameters: when an expansion is found, set the parameters to only be the original parameters left of the expansion, then an `...args` parameter; and in the function body define variables for the parameters to the right of the expansion, including setting default values * Handle splat parameters the same way we handle expansions: if a splat parameter is found, it becomes the last parameter in the function definition, and all following parameters get declared in the function body. Fix the splat/rest parameter values after the post-splat parameters have been extracted from it. Clean up `Code.compileNode` so that we loop through the parameters only once, and we create all expressions using calls like `new IdentifierLiteral` rather than `@makeCode`. * Fix parameter name when a parameter is a splat attached to `this` (e.g. `@param...`) * Rather than assigning post-splat parameters based on index, use slice; passes test “Functions with splats being called with too few arguments” * Dial back our w00t indentation * Better parsing of parameter names (WIP) * Refactor processing of splat/expansion parameters * Fix assignment of default parameters for parameters that come after a splat * Better check for whether a param is attached to `this` * More understandable variable names * For parameters after a splat or expansion, assign them similar to the 1.x destructuring method of using `arguments`, except only concern ourselves with the post-splat parameters instead of all parameters; and use the splat/expansion parameter name, since `arguments` in ES fat arrow functions refers to the parent function’s `arguments` rather than the fat arrow function’s arguments/parameters * Don’t add unnamed parameters (like `[]` as a parameter) to the function scope * Disallow multiple splat/expansion parameters in function definitions; disallow lone expansion parameters * Fix `this` params not getting assigned if the parameter is after a splat parameter * Allow names of function parameters attached to `this` to be reserved words * Always add a statement to the function body defining a variable with its default value, if it has one, if the variable `== null`; this covers the case when ES doesn’t apply the default value when `null` is passed in as a value, but CoffeeScript expects `null` and `undefined` to act interchangeably * Aftermath of having both `undefined` and `null` trigger the use of default values for parameters with default values * More careful parsing of destructured parameters * Fall back to processing destructured parameters in the function body, to account for `this` or default values within destructured objects * Clean up comments * Restore new bare function test, minus the arrow function part of it * Test that bound/arrow functions aren’t overwriting the `arguments` object, which should refer to the parent scope’s `arguments` (like `this`) * Follow ES2015 spec for parameter default values: `null` gets assigned as as `null`, not the default value * Mimic ES default parameters behavior for parameters after a splat or expansion parameter * Bound functions cannot be generators: remove no-longer-relevant test, add check to throw error if `yield` appears inside a bound (arrow) function * Error for bound generator functions should underline the `yield`
2016-10-26 05:26:13 +00:00
Lexer.prototype.tokenize = function(code, opts = {}) {
var consumed, end, i, ref2;
this.literate = opts.literate;
this.indent = 0;
this.baseIndent = 0;
this.indebt = 0;
this.outdebt = 0;
this.indents = [];
this.indentLiteral = '';
2011-09-16 23:26:04 +00:00
this.ends = [];
this.tokens = [];
this.seenFor = false;
Support import and export of ES2015 modules (#4300) This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages: ```js import "module-name" import defaultMember from "module-name" import * as name from "module-name" import { } from "module-name" import { member } from "module-name" import { member as alias } from "module-name" import { member1, member2 as alias2, … } from "module-name" import defaultMember, * as name from "module-name" import defaultMember, { … } from "module-name" export default expression export class name export { } export { name } export { name as exportedName } export { name as default } export { name1, name2 as exportedName2, name3 as default, … } export * from "module-name" export { … } from "module-name" ``` As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports: ```js export name = … ``` CoffeeScript also supports optional commas within `{ … }`. This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this. This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used. There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests: * I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu) * I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos) The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
this.seenImport = false;
this.seenExport = false;
this.exportSpecifierList = false;
this.chunkLine = opts.line || 0;
this.chunkColumn = opts.column || 0;
code = this.clean(code);
i = 0;
while (this.chunk = code.slice(i)) {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
consumed = this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
ref2 = this.getLineAndColumnFromChunk(consumed), this.chunkLine = ref2[0], this.chunkColumn = ref2[1];
i += consumed;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
if (opts.untilBalanced && this.ends.length === 0) {
return {
tokens: this.tokens,
index: i
};
}
}
this.closeIndentation();
if (end = this.ends.pop()) {
this.error("missing " + end.tag, end.origin[2]);
2012-04-10 18:57:45 +00:00
}
if (opts.rewrite === false) {
return this.tokens;
}
return (new Rewriter).rewrite(this.tokens);
};
Lexer.prototype.clean = function(code) {
if (code.charCodeAt(0) === BOM) {
code = code.slice(1);
}
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
if (WHITESPACE.test(code)) {
code = "\n" + code;
this.chunkLine--;
}
if (this.literate) {
code = invertLiterate(code);
}
return code;
};
Lexer.prototype.identifierToken = function() {
var alias, colon, colonOffset, id, idLength, input, match, poppedToken, prev, ref2, ref3, ref4, ref5, ref6, ref7, tag, tagToken;
2014-09-06 10:55:27 +00:00
if (!(match = IDENTIFIER.exec(this.chunk))) {
2012-04-10 18:57:45 +00:00
return 0;
}
input = match[0], id = match[1], colon = match[2];
idLength = id.length;
poppedToken = void 0;
2010-11-28 23:33:43 +00:00
if (id === 'own' && this.tag() === 'FOR') {
this.token('OWN', id);
return id.length;
}
2014-09-06 11:53:21 +00:00
if (id === 'from' && this.tag() === 'YIELD') {
this.token('FROM', id);
return id.length;
}
if (id === 'as' && this.seenImport) {
if (this.value() === '*') {
this.tokens[this.tokens.length - 1][0] = 'IMPORT_ALL';
} else if (ref2 = this.value(), indexOf.call(COFFEE_KEYWORDS, ref2) >= 0) {
this.tokens[this.tokens.length - 1][0] = 'IDENTIFIER';
}
if ((ref3 = this.tag()) === 'IMPORT_ALL' || ref3 === 'IDENTIFIER') {
this.token('AS', id);
return id.length;
}
}
if (id === 'as' && this.seenExport && this.tag() === 'IDENTIFIER') {
Support import and export of ES2015 modules (#4300) This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages: ```js import "module-name" import defaultMember from "module-name" import * as name from "module-name" import { } from "module-name" import { member } from "module-name" import { member as alias } from "module-name" import { member1, member2 as alias2, … } from "module-name" import defaultMember, * as name from "module-name" import defaultMember, { … } from "module-name" export default expression export class name export { } export { name } export { name as exportedName } export { name as default } export { name1, name2 as exportedName2, name3 as default, … } export * from "module-name" export { … } from "module-name" ``` As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports: ```js export name = … ``` CoffeeScript also supports optional commas within `{ … }`. This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this. This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used. There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests: * I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu) * I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos) The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
this.token('AS', id);
return id.length;
}
if (id === 'default' && this.seenExport) {
this.token('DEFAULT', id);
return id.length;
}
ref4 = this.tokens, prev = ref4[ref4.length - 1];
tag = colon || (prev != null) && (((ref5 = prev[0]) === '.' || ref5 === '?.' || ref5 === '::' || ref5 === '?::') || !prev.spaced && prev[0] === '@') ? 'PROPERTY' : 'IDENTIFIER';
if (tag === 'IDENTIFIER' && (indexOf.call(JS_KEYWORDS, id) >= 0 || indexOf.call(COFFEE_KEYWORDS, id) >= 0) && !(this.exportSpecifierList && indexOf.call(COFFEE_KEYWORDS, id) >= 0)) {
tag = id.toUpperCase();
if (tag === 'WHEN' && (ref6 = this.tag(), indexOf.call(LINE_BREAK, ref6) >= 0)) {
2010-09-23 05:14:18 +00:00
tag = 'LEADING_WHEN';
} else if (tag === 'FOR') {
this.seenFor = true;
} else if (tag === 'UNLESS') {
tag = 'IF';
Support import and export of ES2015 modules (#4300) This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages: ```js import "module-name" import defaultMember from "module-name" import * as name from "module-name" import { } from "module-name" import { member } from "module-name" import { member as alias } from "module-name" import { member1, member2 as alias2, … } from "module-name" import defaultMember, * as name from "module-name" import defaultMember, { … } from "module-name" export default expression export class name export { } export { name } export { name as exportedName } export { name as default } export { name1, name2 as exportedName2, name3 as default, … } export * from "module-name" export { … } from "module-name" ``` As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports: ```js export name = … ``` CoffeeScript also supports optional commas within `{ … }`. This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this. This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used. There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests: * I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu) * I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos) The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
} else if (tag === 'IMPORT') {
this.seenImport = true;
} else if (tag === 'EXPORT') {
this.seenExport = true;
} else if (indexOf.call(UNARY, tag) >= 0) {
tag = 'UNARY';
} else if (indexOf.call(RELATION, tag) >= 0) {
if (tag !== 'INSTANCEOF' && this.seenFor) {
tag = 'FOR' + tag;
this.seenFor = false;
} else {
tag = 'RELATION';
if (this.value() === '!') {
poppedToken = this.tokens.pop();
id = '!' + id;
}
}
2010-09-23 05:14:18 +00:00
}
}
if (tag === 'IDENTIFIER' && indexOf.call(RESERVED, id) >= 0) {
this.error("reserved word '" + id + "'", {
length: id.length
});
}
if (tag !== 'PROPERTY') {
if (indexOf.call(COFFEE_ALIASES, id) >= 0) {
alias = id;
2012-04-10 18:57:45 +00:00
id = COFFEE_ALIAS_MAP[id];
}
2010-12-23 18:50:52 +00:00
tag = (function() {
2010-11-05 04:04:52 +00:00
switch (id) {
case '!':
return 'UNARY';
case '==':
case '!=':
return 'COMPARE';
case 'true':
case 'false':
return 'BOOL';
case 'break':
case 'continue':
Refactor `Literal` into several subtypes Previously, the parser created `Literal` nodes for many things. This resulted in information loss. Instead of being able to check the node type, we had to use regexes to tell the different types of `Literal`s apart. That was a bit like parsing literals twice: Once in the lexer, and once (or more) in the compiler. It also caused problems, such as `` `this` `` and `this` being indistinguishable (fixes #2009). Instead returning `new Literal` in the grammar, subtypes of it are now returned instead, such as `NumberLiteral`, `StringLiteral` and `IdentifierLiteral`. `new Literal` by itself is only used to represent code chunks that fit no category. (While mentioning `NumberLiteral`, there's also `InfinityLiteral` now, which is a subtype of `NumberLiteral`.) `StringWithInterpolations` has been added as a subtype of `Parens`, and `RegexWithInterpolations` as a subtype of `Call`. This makes it easier for other programs to make use of CoffeeScript's "AST" (nodes). For example, it is now possible to distinguish between `"a #{b} c"` and `"a " + b + " c"`. Fixes #4192. `SuperCall` has been added as a subtype of `Call`. Note, though, that some information is still lost, especially in the lexer. For example, there is no way to distinguish a heredoc from a regular string, or a heregex without interpolations from a regular regex. Binary and octal number literals are indistinguishable from hexadecimal literals. After the new subtypes were added, they were taken advantage of, removing most regexes in nodes.coffee. `SIMPLENUM` (which matches non-hex integers) had to be kept, though, because such numbers need special handling in JavaScript (for example in `1..toString()`). An especially nice hack to get rid of was using `new String()` for the token value for reserved identifiers (to be able to set a property on them which could survive through the parser). Now it's a good old regular string. In range literals, slices, splices and for loop steps when number literals are involved, CoffeeScript can do some optimizations, such as precomputing the value of, say, `5 - 3` (outputting `2` instead of `5 - 3` literally). As a side bonus, this now also works with hexadecimal number literals, such as `0x02`. Finally, this also improves the output of `coffee --nodes`: # Before: $ bin/coffee -ne 'while true "#{a}" break' Block While Value Bool Block Value Parens Block Op + Value """" Value Parens Block Value "a" "break" # After: $ bin/coffee -ne 'while true "#{a}" break' Block While Value BooleanLiteral: true Block Value StringWithInterpolations Block Op + Value StringLiteral: "" Value Parens Block Value IdentifierLiteral: a StatementLiteral: break
2016-01-31 19:24:31 +00:00
case 'debugger':
return 'STATEMENT';
case '&&':
case '||':
return id;
2010-11-05 04:04:52 +00:00
default:
return tag;
}
2010-12-23 18:50:52 +00:00
})();
2010-03-22 01:46:06 +00:00
}
tagToken = this.token(tag, id, 0, idLength);
if (alias) {
tagToken.origin = [tag, alias, tagToken[2]];
}
if (poppedToken) {
ref7 = [poppedToken[2].first_line, poppedToken[2].first_column], tagToken[2].first_line = ref7[0], tagToken[2].first_column = ref7[1];
}
2012-04-10 18:57:45 +00:00
if (colon) {
colonOffset = input.lastIndexOf(':');
this.token(':', ':', colonOffset, colon.length);
2012-04-10 18:57:45 +00:00
}
return input.length;
};
Lexer.prototype.numberToken = function() {
var base, lexedLength, match, number, numberValue, tag;
2012-04-10 18:57:45 +00:00
if (!(match = NUMBER.exec(this.chunk))) {
return 0;
}
number = match[0];
lexedLength = number.length;
switch (false) {
case !/^0[BOX]/.test(number):
this.error("radix prefix in '" + number + "' must be lowercase", {
offset: 1
});
break;
case !/^(?!0x).*E/.test(number):
this.error("exponential notation in '" + number + "' must be indicated with a lowercase 'e'", {
offset: number.indexOf('E')
});
break;
case !/^0\d*[89]/.test(number):
this.error("decimal literal '" + number + "' must not be prefixed with '0'", {
length: lexedLength
});
break;
case !/^0\d+/.test(number):
this.error("octal literal '" + number + "' must be prefixed with '0o'", {
length: lexedLength
});
}
base = (function() {
switch (number.charAt(1)) {
case 'b':
return 2;
case 'o':
return 8;
case 'x':
return 16;
default:
return null;
}
})();
numberValue = base != null ? parseInt(number.slice(2), base) : parseFloat(number);
tag = numberValue === 2e308 ? 'INFINITY' : 'NUMBER';
Refactor `Literal` into several subtypes Previously, the parser created `Literal` nodes for many things. This resulted in information loss. Instead of being able to check the node type, we had to use regexes to tell the different types of `Literal`s apart. That was a bit like parsing literals twice: Once in the lexer, and once (or more) in the compiler. It also caused problems, such as `` `this` `` and `this` being indistinguishable (fixes #2009). Instead returning `new Literal` in the grammar, subtypes of it are now returned instead, such as `NumberLiteral`, `StringLiteral` and `IdentifierLiteral`. `new Literal` by itself is only used to represent code chunks that fit no category. (While mentioning `NumberLiteral`, there's also `InfinityLiteral` now, which is a subtype of `NumberLiteral`.) `StringWithInterpolations` has been added as a subtype of `Parens`, and `RegexWithInterpolations` as a subtype of `Call`. This makes it easier for other programs to make use of CoffeeScript's "AST" (nodes). For example, it is now possible to distinguish between `"a #{b} c"` and `"a " + b + " c"`. Fixes #4192. `SuperCall` has been added as a subtype of `Call`. Note, though, that some information is still lost, especially in the lexer. For example, there is no way to distinguish a heredoc from a regular string, or a heregex without interpolations from a regular regex. Binary and octal number literals are indistinguishable from hexadecimal literals. After the new subtypes were added, they were taken advantage of, removing most regexes in nodes.coffee. `SIMPLENUM` (which matches non-hex integers) had to be kept, though, because such numbers need special handling in JavaScript (for example in `1..toString()`). An especially nice hack to get rid of was using `new String()` for the token value for reserved identifiers (to be able to set a property on them which could survive through the parser). Now it's a good old regular string. In range literals, slices, splices and for loop steps when number literals are involved, CoffeeScript can do some optimizations, such as precomputing the value of, say, `5 - 3` (outputting `2` instead of `5 - 3` literally). As a side bonus, this now also works with hexadecimal number literals, such as `0x02`. Finally, this also improves the output of `coffee --nodes`: # Before: $ bin/coffee -ne 'while true "#{a}" break' Block While Value Bool Block Value Parens Block Op + Value """" Value Parens Block Value "a" "break" # After: $ bin/coffee -ne 'while true "#{a}" break' Block While Value BooleanLiteral: true Block Value StringWithInterpolations Block Op + Value StringLiteral: "" Value Parens Block Value IdentifierLiteral: a StatementLiteral: break
2016-01-31 19:24:31 +00:00
this.token(tag, number, 0, lexedLength);
2011-10-24 18:51:41 +00:00
return lexedLength;
};
Lexer.prototype.stringToken = function() {
var $, attempt, delimiter, doc, end, heredoc, i, indent, indentRegex, match, quote, ref2, ref3, regex, token, tokens;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
quote = (STRING_START.exec(this.chunk) || [])[0];
if (!quote) {
return 0;
}
Support import and export of ES2015 modules (#4300) This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages: ```js import "module-name" import defaultMember from "module-name" import * as name from "module-name" import { } from "module-name" import { member } from "module-name" import { member as alias } from "module-name" import { member1, member2 as alias2, … } from "module-name" import defaultMember, * as name from "module-name" import defaultMember, { … } from "module-name" export default expression export class name export { } export { name } export { name as exportedName } export { name as default } export { name1, name2 as exportedName2, name3 as default, … } export * from "module-name" export { … } from "module-name" ``` As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports: ```js export name = … ``` CoffeeScript also supports optional commas within `{ … }`. This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this. This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used. There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests: * I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu) * I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos) The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
if (this.tokens.length && this.value() === 'from' && (this.seenImport || this.seenExport)) {
this.tokens[this.tokens.length - 1][0] = 'FROM';
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
regex = (function() {
switch (quote) {
case "'":
return STRING_SINGLE;
case '"':
return STRING_DOUBLE;
case "'''":
return HEREDOC_SINGLE;
case '"""':
return HEREDOC_DOUBLE;
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
})();
heredoc = quote.length === 3;
ref2 = this.matchWithInterpolations(regex, quote), tokens = ref2.tokens, end = ref2.index;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
$ = tokens.length - 1;
2015-02-22 16:08:15 +00:00
delimiter = quote.charAt(0);
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
if (heredoc) {
indent = null;
doc = ((function() {
var j, len, results;
results = [];
for (i = j = 0, len = tokens.length; j < len; i = ++j) {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
token = tokens[i];
if (token[0] === 'NEOSTRING') {
results.push(token[1]);
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
}
}
return results;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
})()).join('#{}');
while (match = HEREDOC_INDENT.exec(doc)) {
attempt = match[1];
if (indent === null || (0 < (ref3 = attempt.length) && ref3 < indent.length)) {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
indent = attempt;
}
}
if (indent) {
indentRegex = RegExp("\\n" + indent, "g");
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
}
this.mergeInterpolationTokens(tokens, {
delimiter: delimiter
[CS2] Output ES2015 arrow functions, default parameters, rest parameters (#4311) * Eliminate wrapper around “bound” (arrow) functions; output `=>` for such functions * Remove irrelevant (and breaking) tests * Minor cleanup * When a function parameter is a splat (i.e., it uses the ES2015 rest parameter syntax) output that parameter as ES2015 * Rearrange function parameters when one of the parameters is a splat and isn’t the last parameter (very WIP) * Handle params like `@param`, adding assignment expressions for them when they appear; ensure splat parameter is last * Add parameter names (not a text like `'\nValue IdentifierLiteral: a'`) to the scope, so that parameters can’t be deleted; move body-related lines together; more explanation of what’s going on * For parameters with a default value, correctly add the parameter name to the function scope * Handle expansions in function parameters: when an expansion is found, set the parameters to only be the original parameters left of the expansion, then an `...args` parameter; and in the function body define variables for the parameters to the right of the expansion, including setting default values * Handle splat parameters the same way we handle expansions: if a splat parameter is found, it becomes the last parameter in the function definition, and all following parameters get declared in the function body. Fix the splat/rest parameter values after the post-splat parameters have been extracted from it. Clean up `Code.compileNode` so that we loop through the parameters only once, and we create all expressions using calls like `new IdentifierLiteral` rather than `@makeCode`. * Fix parameter name when a parameter is a splat attached to `this` (e.g. `@param...`) * Rather than assigning post-splat parameters based on index, use slice; passes test “Functions with splats being called with too few arguments” * Dial back our w00t indentation * Better parsing of parameter names (WIP) * Refactor processing of splat/expansion parameters * Fix assignment of default parameters for parameters that come after a splat * Better check for whether a param is attached to `this` * More understandable variable names * For parameters after a splat or expansion, assign them similar to the 1.x destructuring method of using `arguments`, except only concern ourselves with the post-splat parameters instead of all parameters; and use the splat/expansion parameter name, since `arguments` in ES fat arrow functions refers to the parent function’s `arguments` rather than the fat arrow function’s arguments/parameters * Don’t add unnamed parameters (like `[]` as a parameter) to the function scope * Disallow multiple splat/expansion parameters in function definitions; disallow lone expansion parameters * Fix `this` params not getting assigned if the parameter is after a splat parameter * Allow names of function parameters attached to `this` to be reserved words * Always add a statement to the function body defining a variable with its default value, if it has one, if the variable `== null`; this covers the case when ES doesn’t apply the default value when `null` is passed in as a value, but CoffeeScript expects `null` and `undefined` to act interchangeably * Aftermath of having both `undefined` and `null` trigger the use of default values for parameters with default values * More careful parsing of destructured parameters * Fall back to processing destructured parameters in the function body, to account for `this` or default values within destructured objects * Clean up comments * Restore new bare function test, minus the arrow function part of it * Test that bound/arrow functions aren’t overwriting the `arguments` object, which should refer to the parent scope’s `arguments` (like `this`) * Follow ES2015 spec for parameter default values: `null` gets assigned as as `null`, not the default value * Mimic ES default parameters behavior for parameters after a splat or expansion parameter * Bound functions cannot be generators: remove no-longer-relevant test, add check to throw error if `yield` appears inside a bound (arrow) function * Error for bound generator functions should underline the `yield`
2016-10-26 05:26:13 +00:00
}, (value, i) => {
value = this.formatString(value);
if (indentRegex) {
value = value.replace(indentRegex, '\n');
}
if (i === 0) {
value = value.replace(LEADING_BLANK_LINE, '');
}
if (i === $) {
value = value.replace(TRAILING_BLANK_LINE, '');
}
return value;
});
} else {
this.mergeInterpolationTokens(tokens, {
delimiter: delimiter
[CS2] Output ES2015 arrow functions, default parameters, rest parameters (#4311) * Eliminate wrapper around “bound” (arrow) functions; output `=>` for such functions * Remove irrelevant (and breaking) tests * Minor cleanup * When a function parameter is a splat (i.e., it uses the ES2015 rest parameter syntax) output that parameter as ES2015 * Rearrange function parameters when one of the parameters is a splat and isn’t the last parameter (very WIP) * Handle params like `@param`, adding assignment expressions for them when they appear; ensure splat parameter is last * Add parameter names (not a text like `'\nValue IdentifierLiteral: a'`) to the scope, so that parameters can’t be deleted; move body-related lines together; more explanation of what’s going on * For parameters with a default value, correctly add the parameter name to the function scope * Handle expansions in function parameters: when an expansion is found, set the parameters to only be the original parameters left of the expansion, then an `...args` parameter; and in the function body define variables for the parameters to the right of the expansion, including setting default values * Handle splat parameters the same way we handle expansions: if a splat parameter is found, it becomes the last parameter in the function definition, and all following parameters get declared in the function body. Fix the splat/rest parameter values after the post-splat parameters have been extracted from it. Clean up `Code.compileNode` so that we loop through the parameters only once, and we create all expressions using calls like `new IdentifierLiteral` rather than `@makeCode`. * Fix parameter name when a parameter is a splat attached to `this` (e.g. `@param...`) * Rather than assigning post-splat parameters based on index, use slice; passes test “Functions with splats being called with too few arguments” * Dial back our w00t indentation * Better parsing of parameter names (WIP) * Refactor processing of splat/expansion parameters * Fix assignment of default parameters for parameters that come after a splat * Better check for whether a param is attached to `this` * More understandable variable names * For parameters after a splat or expansion, assign them similar to the 1.x destructuring method of using `arguments`, except only concern ourselves with the post-splat parameters instead of all parameters; and use the splat/expansion parameter name, since `arguments` in ES fat arrow functions refers to the parent function’s `arguments` rather than the fat arrow function’s arguments/parameters * Don’t add unnamed parameters (like `[]` as a parameter) to the function scope * Disallow multiple splat/expansion parameters in function definitions; disallow lone expansion parameters * Fix `this` params not getting assigned if the parameter is after a splat parameter * Allow names of function parameters attached to `this` to be reserved words * Always add a statement to the function body defining a variable with its default value, if it has one, if the variable `== null`; this covers the case when ES doesn’t apply the default value when `null` is passed in as a value, but CoffeeScript expects `null` and `undefined` to act interchangeably * Aftermath of having both `undefined` and `null` trigger the use of default values for parameters with default values * More careful parsing of destructured parameters * Fall back to processing destructured parameters in the function body, to account for `this` or default values within destructured objects * Clean up comments * Restore new bare function test, minus the arrow function part of it * Test that bound/arrow functions aren’t overwriting the `arguments` object, which should refer to the parent scope’s `arguments` (like `this`) * Follow ES2015 spec for parameter default values: `null` gets assigned as as `null`, not the default value * Mimic ES default parameters behavior for parameters after a splat or expansion parameter * Bound functions cannot be generators: remove no-longer-relevant test, add check to throw error if `yield` appears inside a bound (arrow) function * Error for bound generator functions should underline the `yield`
2016-10-26 05:26:13 +00:00
}, (value, i) => {
value = this.formatString(value);
value = value.replace(SIMPLE_STRING_OMIT, function(match, offset) {
if ((i === 0 && offset === 0) || (i === $ && offset + match.length === value.length)) {
return '';
} else {
return ' ';
}
});
return value;
});
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
return end;
};
Lexer.prototype.commentToken = function() {
var comment, here, match;
2012-04-10 18:57:45 +00:00
if (!(match = this.chunk.match(COMMENT))) {
return 0;
}
comment = match[0], here = match[1];
2010-09-23 05:14:18 +00:00
if (here) {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
if (match = HERECOMMENT_ILLEGAL.exec(comment)) {
this.error("block comments cannot contain " + match[0], {
offset: match.index,
length: match[0].length
});
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
}
if (here.indexOf('\n') >= 0) {
here = here.replace(RegExp("\\n" + (repeat(' ', this.indent)), "g"), '\n');
}
this.token('HERECOMMENT', here, 0, comment.length);
}
return comment.length;
};
Lexer.prototype.jsToken = function() {
var match, script;
if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) {
return 0;
}
this.token('JS', (script = match[0]).slice(1, -1), 0, script.length);
return script.length;
};
Lexer.prototype.regexToken = function() {
2015-02-07 19:16:59 +00:00
var body, closed, end, flags, index, match, origin, prev, ref2, ref3, ref4, regex, tokens;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
switch (false) {
case !(match = REGEX_ILLEGAL.exec(this.chunk)):
this.error("regular expressions cannot begin with " + match[2], {
offset: match.index + match[1].length
});
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
break;
case !(match = this.matchWithInterpolations(HEREGEX, '///')):
tokens = match.tokens, index = match.index;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
break;
case !(match = REGEX.exec(this.chunk)):
regex = match[0], body = match[1], closed = match[2];
this.validateEscapes(body, {
isRegex: true,
offsetInChunk: 1
});
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
index = regex.length;
ref2 = this.tokens, prev = ref2[ref2.length - 1];
Fix #3410, #3182: Allow regex to start with space or = A regex may not follow a specific set of tokens. These were already known before in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them to be more correct and to add a few missing tokens). In all other cases (except after a spaced callable) a slash is the start of a regex, and may now start with a space or an equals sign. It’s really that simple! A slash after a spaced callable is the only ambigous case. We cannot know if that's division or function application with a regex as the argument. The spacing determines which is which: Space on both sides: - `a / b/i` -> `a / b / i` - `a /= b/i` -> `a /= b / i` No spaces: - `a/b/i` -> `a / b / i` - `a/=b/i` -> `a /= b / i` Space on the right side: - `a/ b/i` -> `a / b / i` - `a/= b/i` -> `a /= b / i` Space on the left side: - `a /b/i` -> `a(/b/i)` - `a /=b/i` -> `a(/=b/i)` The last case used to compile to `a /= b / i`, but that has been changed to be consistent with the `/` operator. The last case really looks like a regex, so it should be parsed as one. Moreover, you may now also space the `/` and `/=` operators with other whitespace characters than a space (such as tabs and non-breaking spaces) for consistency. Lastly, unclosed regexes are now reported as such, instead of generating some other confusing error message. It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.
2015-01-10 00:48:00 +00:00
if (prev) {
2015-02-07 19:16:59 +00:00
if (prev.spaced && (ref3 = prev[0], indexOf.call(CALLABLE, ref3) >= 0)) {
Fix #3410, #3182: Allow regex to start with space or = A regex may not follow a specific set of tokens. These were already known before in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them to be more correct and to add a few missing tokens). In all other cases (except after a spaced callable) a slash is the start of a regex, and may now start with a space or an equals sign. It’s really that simple! A slash after a spaced callable is the only ambigous case. We cannot know if that's division or function application with a regex as the argument. The spacing determines which is which: Space on both sides: - `a / b/i` -> `a / b / i` - `a /= b/i` -> `a /= b / i` No spaces: - `a/b/i` -> `a / b / i` - `a/=b/i` -> `a /= b / i` Space on the right side: - `a/ b/i` -> `a / b / i` - `a/= b/i` -> `a /= b / i` Space on the left side: - `a /b/i` -> `a(/b/i)` - `a /=b/i` -> `a(/=b/i)` The last case used to compile to `a /= b / i`, but that has been changed to be consistent with the `/` operator. The last case really looks like a regex, so it should be parsed as one. Moreover, you may now also space the `/` and `/=` operators with other whitespace characters than a space (such as tabs and non-breaking spaces) for consistency. Lastly, unclosed regexes are now reported as such, instead of generating some other confusing error message. It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.
2015-01-10 00:48:00 +00:00
if (!closed || POSSIBLY_DIVISION.test(regex)) {
return 0;
}
} else if (ref4 = prev[0], indexOf.call(NOT_REGEX, ref4) >= 0) {
Fix #3410, #3182: Allow regex to start with space or = A regex may not follow a specific set of tokens. These were already known before in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them to be more correct and to add a few missing tokens). In all other cases (except after a spaced callable) a slash is the start of a regex, and may now start with a space or an equals sign. It’s really that simple! A slash after a spaced callable is the only ambigous case. We cannot know if that's division or function application with a regex as the argument. The spacing determines which is which: Space on both sides: - `a / b/i` -> `a / b / i` - `a /= b/i` -> `a /= b / i` No spaces: - `a/b/i` -> `a / b / i` - `a/=b/i` -> `a /= b / i` Space on the right side: - `a/ b/i` -> `a / b / i` - `a/= b/i` -> `a /= b / i` Space on the left side: - `a /b/i` -> `a(/b/i)` - `a /=b/i` -> `a(/=b/i)` The last case used to compile to `a /= b / i`, but that has been changed to be consistent with the `/` operator. The last case really looks like a regex, so it should be parsed as one. Moreover, you may now also space the `/` and `/=` operators with other whitespace characters than a space (such as tabs and non-breaking spaces) for consistency. Lastly, unclosed regexes are now reported as such, instead of generating some other confusing error message. It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.
2015-01-10 00:48:00 +00:00
return 0;
}
}
if (!closed) {
this.error('missing / (unclosed regex)');
2012-04-10 18:57:45 +00:00
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
break;
default:
return 0;
}
flags = REGEX_FLAGS.exec(this.chunk.slice(index))[0];
end = index + flags.length;
2015-02-07 19:16:59 +00:00
origin = this.makeToken('REGEX', null, 0, end);
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
switch (false) {
case !!VALID_FLAGS.test(flags):
this.error("invalid regular expression flags " + flags, {
offset: index,
length: flags.length
});
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
break;
case !(regex || tokens.length === 1):
if (body == null) {
body = this.formatHeregex(tokens[0][1]);
}
this.token('REGEX', "" + (this.makeDelimitedLiteral(body, {
delimiter: '/'
2015-02-07 19:16:59 +00:00
})) + flags, 0, end, origin);
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
break;
default:
2015-02-07 19:16:59 +00:00
this.token('REGEX_START', '(', 0, 0, origin);
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
this.token('IDENTIFIER', 'RegExp', 0, 0);
2015-02-07 19:16:59 +00:00
this.token('CALL_START', '(', 0, 0);
this.mergeInterpolationTokens(tokens, {
delimiter: '"',
double: true
}, this.formatHeregex);
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
if (flags) {
this.token(',', ',', index, 0);
this.token('STRING', '"' + flags + '"', index, flags.length);
}
2015-02-07 19:16:59 +00:00
this.token(')', ')', end, 0);
this.token('REGEX_END', ')', end, 0);
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
}
return end;
};
Lexer.prototype.lineToken = function() {
var diff, indent, match, minLiteralLength, newIndentLiteral, noNewlines, size;
2012-04-10 18:57:45 +00:00
if (!(match = MULTI_DENT.exec(this.chunk))) {
return 0;
}
indent = match[0];
this.seenFor = false;
2010-09-23 05:14:18 +00:00
size = indent.length - 1 - indent.lastIndexOf('\n');
2010-10-25 18:56:02 +00:00
noNewlines = this.unfinished();
newIndentLiteral = size > 0 ? indent.slice(-size) : '';
if (!/^(.?)\1*$/.exec(newIndentLiteral)) {
this.error('mixed indentation', {
offset: indent.length
});
return indent.length;
}
minLiteralLength = Math.min(newIndentLiteral.length, this.indentLiteral.length);
if (newIndentLiteral.slice(0, minLiteralLength) !== this.indentLiteral.slice(0, minLiteralLength)) {
this.error('indentation mismatch', {
offset: indent.length
});
return indent.length;
}
if (size - this.indebt === this.indent) {
if (noNewlines) {
this.suppressNewlines();
} else {
this.newlineToken(0);
}
return indent.length;
}
if (size > this.indent) {
if (noNewlines) {
this.indebt = size - this.indent;
this.suppressNewlines();
return indent.length;
}
if (!this.tokens.length) {
this.baseIndent = this.indent = size;
this.indentLiteral = newIndentLiteral;
return indent.length;
}
diff = size - this.indent + this.outdebt;
this.token('INDENT', diff, indent.length - size, size);
this.indents.push(diff);
this.ends.push({
tag: 'OUTDENT'
});
this.outdebt = this.indebt = 0;
2014-01-22 18:30:13 +00:00
this.indent = size;
this.indentLiteral = newIndentLiteral;
} else if (size < this.baseIndent) {
this.error('missing indentation', {
offset: indent.length
});
} else {
this.indebt = 0;
this.outdentToken(this.indent - size, noNewlines, indent.length);
}
return indent.length;
};
Lexer.prototype.outdentToken = function(moveOut, noNewlines, outdentLength) {
var decreasedIndent, dent, lastIndent, ref2;
2014-01-22 18:30:13 +00:00
decreasedIndent = this.indent - moveOut;
while (moveOut > 0) {
2014-01-22 18:30:13 +00:00
lastIndent = this.indents[this.indents.length - 1];
if (!lastIndent) {
moveOut = 0;
2014-01-22 18:30:13 +00:00
} else if (lastIndent === this.outdebt) {
moveOut -= this.outdebt;
this.outdebt = 0;
2014-01-22 18:30:13 +00:00
} else if (lastIndent < this.outdebt) {
this.outdebt -= lastIndent;
moveOut -= lastIndent;
} else {
dent = this.indents.pop() + this.outdebt;
if (outdentLength && (ref2 = this.chunk[outdentLength], indexOf.call(INDENTABLE_CLOSERS, ref2) >= 0)) {
2014-01-22 18:30:13 +00:00
decreasedIndent -= dent - moveOut;
moveOut = dent;
}
this.outdebt = 0;
2011-09-16 23:26:04 +00:00
this.pair('OUTDENT');
2014-01-22 18:30:13 +00:00
this.token('OUTDENT', moveOut, 0, outdentLength);
moveOut -= dent;
}
}
2012-04-10 18:57:45 +00:00
if (dent) {
this.outdebt -= moveOut;
}
while (this.value() === ';') {
this.tokens.pop();
}
if (!(this.tag() === 'TERMINATOR' || noNewlines)) {
this.token('TERMINATOR', '\n', outdentLength, 0);
}
this.indent = decreasedIndent;
this.indentLiteral = this.indentLiteral.slice(0, decreasedIndent);
return this;
};
Lexer.prototype.whitespaceToken = function() {
var match, nline, prev, ref2;
if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.charAt(0) === '\n'))) {
return 0;
}
ref2 = this.tokens, prev = ref2[ref2.length - 1];
2012-04-10 18:57:45 +00:00
if (prev) {
prev[match ? 'spaced' : 'newLine'] = true;
}
if (match) {
return match[0].length;
} else {
return 0;
}
};
Lexer.prototype.newlineToken = function(offset) {
while (this.value() === ';') {
this.tokens.pop();
}
2012-04-10 18:57:45 +00:00
if (this.tag() !== 'TERMINATOR') {
this.token('TERMINATOR', '\n', offset, 0);
2012-04-10 18:57:45 +00:00
}
return this;
};
Lexer.prototype.suppressNewlines = function() {
2012-04-10 18:57:45 +00:00
if (this.value() === '\\') {
this.tokens.pop();
}
return this;
};
Lexer.prototype.literalToken = function() {
var match, message, origin, prev, ref2, ref3, ref4, ref5, ref6, skipToken, tag, token, value;
2010-10-05 14:31:48 +00:00
if (match = OPERATOR.exec(this.chunk)) {
value = match[0];
2012-04-10 18:57:45 +00:00
if (CODE.test(value)) {
this.tagParameters();
}
2010-09-23 05:14:18 +00:00
} else {
value = this.chunk.charAt(0);
}
tag = value;
ref2 = this.tokens, prev = ref2[ref2.length - 1];
if (prev && indexOf.call(['='].concat(slice.call(COMPOUND_ASSIGN)), value) >= 0) {
skipToken = false;
if (value === '=' && ((ref3 = prev[1]) === '||' || ref3 === '&&') && !prev.spaced) {
2010-10-05 14:31:48 +00:00
prev[0] = 'COMPOUND_ASSIGN';
prev[1] += '=';
prev = this.tokens[this.tokens.length - 2];
skipToken = true;
}
if (prev && prev[0] !== 'PROPERTY') {
origin = (ref4 = prev.origin) != null ? ref4 : prev;
message = isUnassignable(prev[1], origin[1]);
if (message) {
this.error(message, origin[2]);
}
}
if (skipToken) {
return value.length;
2010-07-25 05:36:50 +00:00
}
2010-07-25 05:23:37 +00:00
}
if (value === '{' && (prev != null ? prev[0] : void 0) === 'EXPORT') {
this.exportSpecifierList = true;
} else if (this.exportSpecifierList && value === '}') {
this.exportSpecifierList = false;
}
if (value === ';') {
Support import and export of ES2015 modules (#4300) This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages: ```js import "module-name" import defaultMember from "module-name" import * as name from "module-name" import { } from "module-name" import { member } from "module-name" import { member as alias } from "module-name" import { member1, member2 as alias2, … } from "module-name" import defaultMember, * as name from "module-name" import defaultMember, { … } from "module-name" export default expression export class name export { } export { name } export { name as exportedName } export { name as default } export { name1, name2 as exportedName2, name3 as default, … } export * from "module-name" export { … } from "module-name" ``` As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports: ```js export name = … ``` CoffeeScript also supports optional commas within `{ … }`. This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this. This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used. There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests: * I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu) * I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos) The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
this.seenFor = this.seenImport = this.seenExport = false;
tag = 'TERMINATOR';
} else if (value === '*' && prev[0] === 'EXPORT') {
tag = 'EXPORT_ALL';
} else if (indexOf.call(MATH, value) >= 0) {
tag = 'MATH';
} else if (indexOf.call(COMPARE, value) >= 0) {
tag = 'COMPARE';
} else if (indexOf.call(COMPOUND_ASSIGN, value) >= 0) {
tag = 'COMPOUND_ASSIGN';
} else if (indexOf.call(UNARY, value) >= 0) {
tag = 'UNARY';
} else if (indexOf.call(UNARY_MATH, value) >= 0) {
tag = 'UNARY_MATH';
} else if (indexOf.call(SHIFT, value) >= 0) {
tag = 'SHIFT';
} else if (value === '?' && (prev != null ? prev.spaced : void 0)) {
tag = 'BIN?';
} else if (prev && !prev.spaced) {
2015-02-07 19:16:59 +00:00
if (value === '(' && (ref5 = prev[0], indexOf.call(CALLABLE, ref5) >= 0)) {
2012-04-10 18:57:45 +00:00
if (prev[0] === '?') {
prev[0] = 'FUNC_EXIST';
}
tag = 'CALL_START';
} else if (value === '[' && (ref6 = prev[0], indexOf.call(INDEXABLE, ref6) >= 0)) {
tag = 'INDEX_START';
switch (prev[0]) {
2010-09-23 05:14:18 +00:00
case '?':
2010-10-05 14:31:48 +00:00
prev[0] = 'INDEX_SOAK';
}
}
}
token = this.makeToken(tag, value);
2011-09-16 23:26:04 +00:00
switch (value) {
case '(':
case '{':
case '[':
this.ends.push({
tag: INVERSES[value],
origin: token
});
2011-09-16 23:26:04 +00:00
break;
case ')':
case '}':
case ']':
this.pair(value);
}
this.tokens.push(token);
return value.length;
};
Lexer.prototype.tagParameters = function() {
2010-10-26 04:09:46 +00:00
var i, stack, tok, tokens;
2012-04-10 18:57:45 +00:00
if (this.tag() !== ')') {
return this;
}
2010-10-26 04:09:46 +00:00
stack = [];
tokens = this.tokens;
i = tokens.length;
tokens[--i][0] = 'PARAM_END';
while (tok = tokens[--i]) {
switch (tok[0]) {
2010-09-22 03:58:05 +00:00
case ')':
2010-10-26 04:09:46 +00:00
stack.push(tok);
2010-09-22 03:58:05 +00:00
break;
case '(':
case 'CALL_START':
2010-10-26 04:09:46 +00:00
if (stack.length) {
stack.pop();
} else if (tok[0] === '(') {
2010-10-26 04:09:46 +00:00
tok[0] = 'PARAM_START';
return this;
} else {
return this;
2010-10-26 04:09:46 +00:00
}
}
}
return this;
};
Lexer.prototype.closeIndentation = function() {
return this.outdentToken(this.indent);
};
Lexer.prototype.matchWithInterpolations = function(regex, delimiter) {
var close, column, firstToken, index, lastToken, line, nested, offsetInChunk, open, ref2, ref3, ref4, str, strPart, tokens;
tokens = [];
offsetInChunk = delimiter.length;
if (this.chunk.slice(0, offsetInChunk) !== delimiter) {
return null;
}
str = this.chunk.slice(offsetInChunk);
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
while (true) {
strPart = regex.exec(str)[0];
this.validateEscapes(strPart, {
isRegex: delimiter.charAt(0) === '/',
offsetInChunk: offsetInChunk
});
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
tokens.push(this.makeToken('NEOSTRING', strPart, offsetInChunk));
str = str.slice(strPart.length);
offsetInChunk += strPart.length;
if (str.slice(0, 2) !== '#{') {
break;
2012-04-10 18:57:45 +00:00
}
ref2 = this.getLineAndColumnFromChunk(offsetInChunk + 1), line = ref2[0], column = ref2[1];
ref3 = new Lexer().tokenize(str.slice(1), {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
line: line,
column: column,
untilBalanced: true
}), nested = ref3.tokens, index = ref3.index;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
index += 1;
open = nested[0], close = nested[nested.length - 1];
open[0] = open[1] = '(';
close[0] = close[1] = ')';
close.origin = ['', 'end of interpolation', close[2]];
if (((ref4 = nested[1]) != null ? ref4[0] : void 0) === 'TERMINATOR') {
nested.splice(1, 1);
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
tokens.push(['TOKENS', nested]);
str = str.slice(index);
offsetInChunk += index;
}
if (str.slice(0, delimiter.length) !== delimiter) {
this.error("missing " + delimiter, {
length: delimiter.length
});
2012-04-10 18:57:45 +00:00
}
firstToken = tokens[0], lastToken = tokens[tokens.length - 1];
firstToken[2].first_column -= delimiter.length;
if (lastToken[1].substr(-1) === '\n') {
lastToken[2].last_line += 1;
lastToken[2].last_column = delimiter.length - 1;
} else {
lastToken[2].last_column += delimiter.length;
}
2015-02-07 19:16:59 +00:00
if (lastToken[1].length === 0) {
lastToken[2].last_column -= 1;
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
return {
tokens: tokens,
index: offsetInChunk + delimiter.length
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
};
};
Lexer.prototype.mergeInterpolationTokens = function(tokens, options, fn) {
2015-02-07 19:16:59 +00:00
var converted, firstEmptyStringIndex, firstIndex, i, j, lastToken, len, locationToken, lparen, plusToken, ref2, rparen, tag, token, tokensToPush, value;
if (tokens.length > 1) {
lparen = this.token('STRING_START', '(', 0, 0);
2012-04-10 18:57:45 +00:00
}
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
firstIndex = this.tokens.length;
for (i = j = 0, len = tokens.length; j < len; i = ++j) {
token = tokens[i];
tag = token[0], value = token[1];
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
switch (tag) {
case 'TOKENS':
if (value.length === 2) {
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
continue;
}
locationToken = value[0];
tokensToPush = value;
break;
case 'NEOSTRING':
converted = fn(token[1], i);
if (converted.length === 0) {
if (i === 0) {
firstEmptyStringIndex = this.tokens.length;
} else {
continue;
}
}
if (i === 2 && (firstEmptyStringIndex != null)) {
this.tokens.splice(firstEmptyStringIndex, 2);
}
token[0] = 'STRING';
token[1] = this.makeDelimitedLiteral(converted, options);
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
locationToken = token;
tokensToPush = [token];
}
if (this.tokens.length > firstIndex) {
plusToken = this.token('+', '+');
plusToken[2] = {
first_line: locationToken[2].first_line,
first_column: locationToken[2].first_column,
last_line: locationToken[2].first_line,
last_column: locationToken[2].first_column
2012-11-19 22:37:46 +00:00
};
2012-04-10 18:57:45 +00:00
}
(ref2 = this.tokens).push.apply(ref2, tokensToPush);
}
2015-02-07 19:16:59 +00:00
if (lparen) {
lastToken = tokens[tokens.length - 1];
lparen.origin = [
'STRING', null, {
first_line: lparen[2].first_line,
first_column: lparen[2].first_column,
last_line: lastToken[2].last_line,
last_column: lastToken[2].last_column
}
];
rparen = this.token('STRING_END', ')');
return rparen[2] = {
first_line: lastToken[2].last_line,
2015-02-07 19:16:59 +00:00
first_column: lastToken[2].last_column,
last_line: lastToken[2].last_line,
2015-02-07 19:16:59 +00:00
last_column: lastToken[2].last_column
};
2012-04-10 18:57:45 +00:00
}
};
2011-09-16 23:26:04 +00:00
Lexer.prototype.pair = function(tag) {
var lastIndent, prev, ref2, ref3, wanted;
ref2 = this.ends, prev = ref2[ref2.length - 1];
if (tag !== (wanted = prev != null ? prev.tag : void 0)) {
2012-04-10 18:57:45 +00:00
if ('OUTDENT' !== wanted) {
this.error("unmatched " + tag);
}
ref3 = this.indents, lastIndent = ref3[ref3.length - 1];
this.outdentToken(lastIndent, true);
2011-09-16 23:26:04 +00:00
return this.pair(tag);
}
return this.ends.pop();
};
Lexer.prototype.getLineAndColumnFromChunk = function(offset) {
var column, lastLine, lineCount, ref2, string;
if (offset === 0) {
return [this.chunkLine, this.chunkColumn];
}
if (offset >= this.chunk.length) {
string = this.chunk;
} else {
string = this.chunk.slice(0, +(offset - 1) + 1 || 9e9);
}
lineCount = count(string, '\n');
column = this.chunkColumn;
if (lineCount > 0) {
ref2 = string.split('\n'), lastLine = ref2[ref2.length - 1];
column = lastLine.length;
} else {
column += string.length;
}
return [this.chunkLine + lineCount, column];
};
[CS2] Output ES2015 arrow functions, default parameters, rest parameters (#4311) * Eliminate wrapper around “bound” (arrow) functions; output `=>` for such functions * Remove irrelevant (and breaking) tests * Minor cleanup * When a function parameter is a splat (i.e., it uses the ES2015 rest parameter syntax) output that parameter as ES2015 * Rearrange function parameters when one of the parameters is a splat and isn’t the last parameter (very WIP) * Handle params like `@param`, adding assignment expressions for them when they appear; ensure splat parameter is last * Add parameter names (not a text like `'\nValue IdentifierLiteral: a'`) to the scope, so that parameters can’t be deleted; move body-related lines together; more explanation of what’s going on * For parameters with a default value, correctly add the parameter name to the function scope * Handle expansions in function parameters: when an expansion is found, set the parameters to only be the original parameters left of the expansion, then an `...args` parameter; and in the function body define variables for the parameters to the right of the expansion, including setting default values * Handle splat parameters the same way we handle expansions: if a splat parameter is found, it becomes the last parameter in the function definition, and all following parameters get declared in the function body. Fix the splat/rest parameter values after the post-splat parameters have been extracted from it. Clean up `Code.compileNode` so that we loop through the parameters only once, and we create all expressions using calls like `new IdentifierLiteral` rather than `@makeCode`. * Fix parameter name when a parameter is a splat attached to `this` (e.g. `@param...`) * Rather than assigning post-splat parameters based on index, use slice; passes test “Functions with splats being called with too few arguments” * Dial back our w00t indentation * Better parsing of parameter names (WIP) * Refactor processing of splat/expansion parameters * Fix assignment of default parameters for parameters that come after a splat * Better check for whether a param is attached to `this` * More understandable variable names * For parameters after a splat or expansion, assign them similar to the 1.x destructuring method of using `arguments`, except only concern ourselves with the post-splat parameters instead of all parameters; and use the splat/expansion parameter name, since `arguments` in ES fat arrow functions refers to the parent function’s `arguments` rather than the fat arrow function’s arguments/parameters * Don’t add unnamed parameters (like `[]` as a parameter) to the function scope * Disallow multiple splat/expansion parameters in function definitions; disallow lone expansion parameters * Fix `this` params not getting assigned if the parameter is after a splat parameter * Allow names of function parameters attached to `this` to be reserved words * Always add a statement to the function body defining a variable with its default value, if it has one, if the variable `== null`; this covers the case when ES doesn’t apply the default value when `null` is passed in as a value, but CoffeeScript expects `null` and `undefined` to act interchangeably * Aftermath of having both `undefined` and `null` trigger the use of default values for parameters with default values * More careful parsing of destructured parameters * Fall back to processing destructured parameters in the function body, to account for `this` or default values within destructured objects * Clean up comments * Restore new bare function test, minus the arrow function part of it * Test that bound/arrow functions aren’t overwriting the `arguments` object, which should refer to the parent scope’s `arguments` (like `this`) * Follow ES2015 spec for parameter default values: `null` gets assigned as as `null`, not the default value * Mimic ES default parameters behavior for parameters after a splat or expansion parameter * Bound functions cannot be generators: remove no-longer-relevant test, add check to throw error if `yield` appears inside a bound (arrow) function * Error for bound generator functions should underline the `yield`
2016-10-26 05:26:13 +00:00
Lexer.prototype.makeToken = function(tag, value, offsetInChunk = 0, length = value.length) {
var lastCharacter, locationData, ref2, ref3, token;
locationData = {};
ref2 = this.getLineAndColumnFromChunk(offsetInChunk), locationData.first_line = ref2[0], locationData.first_column = ref2[1];
lastCharacter = length > 0 ? length - 1 : 0;
ref3 = this.getLineAndColumnFromChunk(offsetInChunk + lastCharacter), locationData.last_line = ref3[0], locationData.last_column = ref3[1];
token = [tag, value, locationData];
return token;
};
Lexer.prototype.token = function(tag, value, offsetInChunk, length, origin) {
var token;
token = this.makeToken(tag, value, offsetInChunk, length);
if (origin) {
token.origin = origin;
}
this.tokens.push(token);
return token;
};
Lexer.prototype.tag = function() {
var ref2, token;
ref2 = this.tokens, token = ref2[ref2.length - 1];
return token != null ? token[0] : void 0;
};
Lexer.prototype.value = function() {
var ref2, token;
ref2 = this.tokens, token = ref2[ref2.length - 1];
return token != null ? token[1] : void 0;
};
Lexer.prototype.unfinished = function() {
var ref2;
return LINE_CONTINUER.test(this.chunk) || ((ref2 = this.tag()) === '\\' || ref2 === '.' || ref2 === '?.' || ref2 === '?::' || ref2 === 'UNARY' || ref2 === 'MATH' || ref2 === 'UNARY_MATH' || ref2 === '+' || ref2 === '-' || ref2 === '**' || ref2 === 'SHIFT' || ref2 === 'RELATION' || ref2 === 'COMPARE' || ref2 === '&' || ref2 === '^' || ref2 === '|' || ref2 === '&&' || ref2 === '||' || ref2 === 'BIN?' || ref2 === 'THROW' || ref2 === 'EXTENDS');
};
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
Lexer.prototype.formatString = function(str) {
return str.replace(STRING_OMIT, '$1');
};
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
Lexer.prototype.formatHeregex = function(str) {
return str.replace(HEREGEX_OMIT, '$1$2');
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
};
[CS2] Output ES2015 arrow functions, default parameters, rest parameters (#4311) * Eliminate wrapper around “bound” (arrow) functions; output `=>` for such functions * Remove irrelevant (and breaking) tests * Minor cleanup * When a function parameter is a splat (i.e., it uses the ES2015 rest parameter syntax) output that parameter as ES2015 * Rearrange function parameters when one of the parameters is a splat and isn’t the last parameter (very WIP) * Handle params like `@param`, adding assignment expressions for them when they appear; ensure splat parameter is last * Add parameter names (not a text like `'\nValue IdentifierLiteral: a'`) to the scope, so that parameters can’t be deleted; move body-related lines together; more explanation of what’s going on * For parameters with a default value, correctly add the parameter name to the function scope * Handle expansions in function parameters: when an expansion is found, set the parameters to only be the original parameters left of the expansion, then an `...args` parameter; and in the function body define variables for the parameters to the right of the expansion, including setting default values * Handle splat parameters the same way we handle expansions: if a splat parameter is found, it becomes the last parameter in the function definition, and all following parameters get declared in the function body. Fix the splat/rest parameter values after the post-splat parameters have been extracted from it. Clean up `Code.compileNode` so that we loop through the parameters only once, and we create all expressions using calls like `new IdentifierLiteral` rather than `@makeCode`. * Fix parameter name when a parameter is a splat attached to `this` (e.g. `@param...`) * Rather than assigning post-splat parameters based on index, use slice; passes test “Functions with splats being called with too few arguments” * Dial back our w00t indentation * Better parsing of parameter names (WIP) * Refactor processing of splat/expansion parameters * Fix assignment of default parameters for parameters that come after a splat * Better check for whether a param is attached to `this` * More understandable variable names * For parameters after a splat or expansion, assign them similar to the 1.x destructuring method of using `arguments`, except only concern ourselves with the post-splat parameters instead of all parameters; and use the splat/expansion parameter name, since `arguments` in ES fat arrow functions refers to the parent function’s `arguments` rather than the fat arrow function’s arguments/parameters * Don’t add unnamed parameters (like `[]` as a parameter) to the function scope * Disallow multiple splat/expansion parameters in function definitions; disallow lone expansion parameters * Fix `this` params not getting assigned if the parameter is after a splat parameter * Allow names of function parameters attached to `this` to be reserved words * Always add a statement to the function body defining a variable with its default value, if it has one, if the variable `== null`; this covers the case when ES doesn’t apply the default value when `null` is passed in as a value, but CoffeeScript expects `null` and `undefined` to act interchangeably * Aftermath of having both `undefined` and `null` trigger the use of default values for parameters with default values * More careful parsing of destructured parameters * Fall back to processing destructured parameters in the function body, to account for `this` or default values within destructured objects * Clean up comments * Restore new bare function test, minus the arrow function part of it * Test that bound/arrow functions aren’t overwriting the `arguments` object, which should refer to the parent scope’s `arguments` (like `this`) * Follow ES2015 spec for parameter default values: `null` gets assigned as as `null`, not the default value * Mimic ES default parameters behavior for parameters after a splat or expansion parameter * Bound functions cannot be generators: remove no-longer-relevant test, add check to throw error if `yield` appears inside a bound (arrow) function * Error for bound generator functions should underline the `yield`
2016-10-26 05:26:13 +00:00
Lexer.prototype.validateEscapes = function(str, options = {}) {
var before, hex, invalidEscape, match, message, octal, ref2, unicode;
match = INVALID_ESCAPE.exec(str);
if (!match) {
return;
}
match[0], before = match[1], octal = match[2], hex = match[3], unicode = match[4];
if (options.isRegex && octal && octal.charAt(0) !== '0') {
return;
}
message = octal ? "octal escape sequences are not allowed" : "invalid escape sequence";
invalidEscape = "\\" + (octal || hex || unicode);
return this.error(message + " " + invalidEscape, {
offset: ((ref2 = options.offsetInChunk) != null ? ref2 : 0) + match.index + before.length,
length: invalidEscape.length
});
};
[CS2] Output ES2015 arrow functions, default parameters, rest parameters (#4311) * Eliminate wrapper around “bound” (arrow) functions; output `=>` for such functions * Remove irrelevant (and breaking) tests * Minor cleanup * When a function parameter is a splat (i.e., it uses the ES2015 rest parameter syntax) output that parameter as ES2015 * Rearrange function parameters when one of the parameters is a splat and isn’t the last parameter (very WIP) * Handle params like `@param`, adding assignment expressions for them when they appear; ensure splat parameter is last * Add parameter names (not a text like `'\nValue IdentifierLiteral: a'`) to the scope, so that parameters can’t be deleted; move body-related lines together; more explanation of what’s going on * For parameters with a default value, correctly add the parameter name to the function scope * Handle expansions in function parameters: when an expansion is found, set the parameters to only be the original parameters left of the expansion, then an `...args` parameter; and in the function body define variables for the parameters to the right of the expansion, including setting default values * Handle splat parameters the same way we handle expansions: if a splat parameter is found, it becomes the last parameter in the function definition, and all following parameters get declared in the function body. Fix the splat/rest parameter values after the post-splat parameters have been extracted from it. Clean up `Code.compileNode` so that we loop through the parameters only once, and we create all expressions using calls like `new IdentifierLiteral` rather than `@makeCode`. * Fix parameter name when a parameter is a splat attached to `this` (e.g. `@param...`) * Rather than assigning post-splat parameters based on index, use slice; passes test “Functions with splats being called with too few arguments” * Dial back our w00t indentation * Better parsing of parameter names (WIP) * Refactor processing of splat/expansion parameters * Fix assignment of default parameters for parameters that come after a splat * Better check for whether a param is attached to `this` * More understandable variable names * For parameters after a splat or expansion, assign them similar to the 1.x destructuring method of using `arguments`, except only concern ourselves with the post-splat parameters instead of all parameters; and use the splat/expansion parameter name, since `arguments` in ES fat arrow functions refers to the parent function’s `arguments` rather than the fat arrow function’s arguments/parameters * Don’t add unnamed parameters (like `[]` as a parameter) to the function scope * Disallow multiple splat/expansion parameters in function definitions; disallow lone expansion parameters * Fix `this` params not getting assigned if the parameter is after a splat parameter * Allow names of function parameters attached to `this` to be reserved words * Always add a statement to the function body defining a variable with its default value, if it has one, if the variable `== null`; this covers the case when ES doesn’t apply the default value when `null` is passed in as a value, but CoffeeScript expects `null` and `undefined` to act interchangeably * Aftermath of having both `undefined` and `null` trigger the use of default values for parameters with default values * More careful parsing of destructured parameters * Fall back to processing destructured parameters in the function body, to account for `this` or default values within destructured objects * Clean up comments * Restore new bare function test, minus the arrow function part of it * Test that bound/arrow functions aren’t overwriting the `arguments` object, which should refer to the parent scope’s `arguments` (like `this`) * Follow ES2015 spec for parameter default values: `null` gets assigned as as `null`, not the default value * Mimic ES default parameters behavior for parameters after a splat or expansion parameter * Bound functions cannot be generators: remove no-longer-relevant test, add check to throw error if `yield` appears inside a bound (arrow) function * Error for bound generator functions should underline the `yield`
2016-10-26 05:26:13 +00:00
Lexer.prototype.makeDelimitedLiteral = function(body, options = {}) {
var regex;
if (body === '' && options.delimiter === '/') {
body = '(?:)';
}
regex = RegExp("(\\\\\\\\)|(\\\\0(?=[1-7]))|\\\\?(" + options.delimiter + ")|\\\\?(?:(\\n)|(\\r)|(\\u2028)|(\\u2029))|(\\\\.)", "g");
body = body.replace(regex, function(match, backslash, nul, delimiter, lf, cr, ls, ps, other) {
switch (false) {
case !backslash:
if (options.double) {
return backslash + backslash;
} else {
return backslash;
}
case !nul:
return '\\x00';
case !delimiter:
return "\\" + delimiter;
case !lf:
return '\\n';
case !cr:
return '\\r';
case !ls:
return '\\u2028';
case !ps:
return '\\u2029';
case !other:
if (options.double) {
return "\\" + other;
} else {
return other;
}
}
});
return "" + options.delimiter + body + options.delimiter;
};
[CS2] Output ES2015 arrow functions, default parameters, rest parameters (#4311) * Eliminate wrapper around “bound” (arrow) functions; output `=>` for such functions * Remove irrelevant (and breaking) tests * Minor cleanup * When a function parameter is a splat (i.e., it uses the ES2015 rest parameter syntax) output that parameter as ES2015 * Rearrange function parameters when one of the parameters is a splat and isn’t the last parameter (very WIP) * Handle params like `@param`, adding assignment expressions for them when they appear; ensure splat parameter is last * Add parameter names (not a text like `'\nValue IdentifierLiteral: a'`) to the scope, so that parameters can’t be deleted; move body-related lines together; more explanation of what’s going on * For parameters with a default value, correctly add the parameter name to the function scope * Handle expansions in function parameters: when an expansion is found, set the parameters to only be the original parameters left of the expansion, then an `...args` parameter; and in the function body define variables for the parameters to the right of the expansion, including setting default values * Handle splat parameters the same way we handle expansions: if a splat parameter is found, it becomes the last parameter in the function definition, and all following parameters get declared in the function body. Fix the splat/rest parameter values after the post-splat parameters have been extracted from it. Clean up `Code.compileNode` so that we loop through the parameters only once, and we create all expressions using calls like `new IdentifierLiteral` rather than `@makeCode`. * Fix parameter name when a parameter is a splat attached to `this` (e.g. `@param...`) * Rather than assigning post-splat parameters based on index, use slice; passes test “Functions with splats being called with too few arguments” * Dial back our w00t indentation * Better parsing of parameter names (WIP) * Refactor processing of splat/expansion parameters * Fix assignment of default parameters for parameters that come after a splat * Better check for whether a param is attached to `this` * More understandable variable names * For parameters after a splat or expansion, assign them similar to the 1.x destructuring method of using `arguments`, except only concern ourselves with the post-splat parameters instead of all parameters; and use the splat/expansion parameter name, since `arguments` in ES fat arrow functions refers to the parent function’s `arguments` rather than the fat arrow function’s arguments/parameters * Don’t add unnamed parameters (like `[]` as a parameter) to the function scope * Disallow multiple splat/expansion parameters in function definitions; disallow lone expansion parameters * Fix `this` params not getting assigned if the parameter is after a splat parameter * Allow names of function parameters attached to `this` to be reserved words * Always add a statement to the function body defining a variable with its default value, if it has one, if the variable `== null`; this covers the case when ES doesn’t apply the default value when `null` is passed in as a value, but CoffeeScript expects `null` and `undefined` to act interchangeably * Aftermath of having both `undefined` and `null` trigger the use of default values for parameters with default values * More careful parsing of destructured parameters * Fall back to processing destructured parameters in the function body, to account for `this` or default values within destructured objects * Clean up comments * Restore new bare function test, minus the arrow function part of it * Test that bound/arrow functions aren’t overwriting the `arguments` object, which should refer to the parent scope’s `arguments` (like `this`) * Follow ES2015 spec for parameter default values: `null` gets assigned as as `null`, not the default value * Mimic ES default parameters behavior for parameters after a splat or expansion parameter * Bound functions cannot be generators: remove no-longer-relevant test, add check to throw error if `yield` appears inside a bound (arrow) function * Error for bound generator functions should underline the `yield`
2016-10-26 05:26:13 +00:00
Lexer.prototype.error = function(message, options = {}) {
var first_column, first_line, location, ref2, ref3, ref4;
location = 'first_line' in options ? options : ((ref3 = this.getLineAndColumnFromChunk((ref2 = options.offset) != null ? ref2 : 0), first_line = ref3[0], first_column = ref3[1], ref3), {
first_line: first_line,
first_column: first_column,
last_column: first_column + ((ref4 = options.length) != null ? ref4 : 1) - 1
});
return throwSyntaxError(message, location);
2011-09-16 23:26:04 +00:00
};
return Lexer;
2011-12-14 15:39:20 +00:00
2010-12-23 18:50:52 +00:00
})();
[CS2] Output ES2015 arrow functions, default parameters, rest parameters (#4311) * Eliminate wrapper around “bound” (arrow) functions; output `=>` for such functions * Remove irrelevant (and breaking) tests * Minor cleanup * When a function parameter is a splat (i.e., it uses the ES2015 rest parameter syntax) output that parameter as ES2015 * Rearrange function parameters when one of the parameters is a splat and isn’t the last parameter (very WIP) * Handle params like `@param`, adding assignment expressions for them when they appear; ensure splat parameter is last * Add parameter names (not a text like `'\nValue IdentifierLiteral: a'`) to the scope, so that parameters can’t be deleted; move body-related lines together; more explanation of what’s going on * For parameters with a default value, correctly add the parameter name to the function scope * Handle expansions in function parameters: when an expansion is found, set the parameters to only be the original parameters left of the expansion, then an `...args` parameter; and in the function body define variables for the parameters to the right of the expansion, including setting default values * Handle splat parameters the same way we handle expansions: if a splat parameter is found, it becomes the last parameter in the function definition, and all following parameters get declared in the function body. Fix the splat/rest parameter values after the post-splat parameters have been extracted from it. Clean up `Code.compileNode` so that we loop through the parameters only once, and we create all expressions using calls like `new IdentifierLiteral` rather than `@makeCode`. * Fix parameter name when a parameter is a splat attached to `this` (e.g. `@param...`) * Rather than assigning post-splat parameters based on index, use slice; passes test “Functions with splats being called with too few arguments” * Dial back our w00t indentation * Better parsing of parameter names (WIP) * Refactor processing of splat/expansion parameters * Fix assignment of default parameters for parameters that come after a splat * Better check for whether a param is attached to `this` * More understandable variable names * For parameters after a splat or expansion, assign them similar to the 1.x destructuring method of using `arguments`, except only concern ourselves with the post-splat parameters instead of all parameters; and use the splat/expansion parameter name, since `arguments` in ES fat arrow functions refers to the parent function’s `arguments` rather than the fat arrow function’s arguments/parameters * Don’t add unnamed parameters (like `[]` as a parameter) to the function scope * Disallow multiple splat/expansion parameters in function definitions; disallow lone expansion parameters * Fix `this` params not getting assigned if the parameter is after a splat parameter * Allow names of function parameters attached to `this` to be reserved words * Always add a statement to the function body defining a variable with its default value, if it has one, if the variable `== null`; this covers the case when ES doesn’t apply the default value when `null` is passed in as a value, but CoffeeScript expects `null` and `undefined` to act interchangeably * Aftermath of having both `undefined` and `null` trigger the use of default values for parameters with default values * More careful parsing of destructured parameters * Fall back to processing destructured parameters in the function body, to account for `this` or default values within destructured objects * Clean up comments * Restore new bare function test, minus the arrow function part of it * Test that bound/arrow functions aren’t overwriting the `arguments` object, which should refer to the parent scope’s `arguments` (like `this`) * Follow ES2015 spec for parameter default values: `null` gets assigned as as `null`, not the default value * Mimic ES default parameters behavior for parameters after a splat or expansion parameter * Bound functions cannot be generators: remove no-longer-relevant test, add check to throw error if `yield` appears inside a bound (arrow) function * Error for bound generator functions should underline the `yield`
2016-10-26 05:26:13 +00:00
isUnassignable = function(name, displayName = name) {
switch (false) {
case indexOf.call(slice.call(JS_KEYWORDS).concat(slice.call(COFFEE_KEYWORDS)), name) < 0:
return "keyword '" + displayName + "' can't be assigned";
case indexOf.call(STRICT_PROSCRIBED, name) < 0:
return "'" + displayName + "' can't be assigned";
case indexOf.call(RESERVED, name) < 0:
return "reserved word '" + displayName + "' can't be assigned";
default:
return false;
}
};
exports.isUnassignable = isUnassignable;
JS_KEYWORDS = ['true', 'false', 'null', 'this', 'new', 'delete', 'typeof', 'in', 'instanceof', 'return', 'throw', 'break', 'continue', 'debugger', 'yield', 'await', 'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally', 'class', 'extends', 'super', 'import', 'export', 'default'];
COFFEE_KEYWORDS = ['undefined', 'Infinity', 'NaN', 'then', 'unless', 'until', 'loop', 'of', 'by', 'when'];
2011-04-30 14:35:56 +00:00
COFFEE_ALIAS_MAP = {
and: '&&',
or: '||',
is: '==',
isnt: '!=',
not: '!',
2010-10-23 18:24:30 +00:00
yes: 'true',
no: 'false',
on: 'true',
off: 'false'
2011-04-30 14:35:56 +00:00
};
2011-04-30 14:35:56 +00:00
COFFEE_ALIASES = (function() {
var results;
results = [];
2011-04-30 14:35:56 +00:00
for (key in COFFEE_ALIAS_MAP) {
results.push(key);
2011-04-30 14:35:56 +00:00
}
return results;
2011-04-30 14:35:56 +00:00
})();
2011-04-30 14:35:56 +00:00
COFFEE_KEYWORDS = COFFEE_KEYWORDS.concat(COFFEE_ALIASES);
Support import and export of ES2015 modules (#4300) This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages: ```js import "module-name" import defaultMember from "module-name" import * as name from "module-name" import { } from "module-name" import { member } from "module-name" import { member as alias } from "module-name" import { member1, member2 as alias2, … } from "module-name" import defaultMember, * as name from "module-name" import defaultMember, { … } from "module-name" export default expression export class name export { } export { name } export { name as exportedName } export { name as default } export { name1, name2 as exportedName2, name3 as default, … } export * from "module-name" export { … } from "module-name" ``` As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports: ```js export name = … ``` CoffeeScript also supports optional commas within `{ … }`. This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this. This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used. There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests: * I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu) * I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos) The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
RESERVED = ['case', 'function', 'var', 'void', 'with', 'const', 'let', 'enum', 'native', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'static'];
STRICT_PROSCRIBED = ['arguments', 'eval'];
exports.JS_FORBIDDEN = JS_KEYWORDS.concat(RESERVED).concat(STRICT_PROSCRIBED);
Refactor `Literal` into several subtypes Previously, the parser created `Literal` nodes for many things. This resulted in information loss. Instead of being able to check the node type, we had to use regexes to tell the different types of `Literal`s apart. That was a bit like parsing literals twice: Once in the lexer, and once (or more) in the compiler. It also caused problems, such as `` `this` `` and `this` being indistinguishable (fixes #2009). Instead returning `new Literal` in the grammar, subtypes of it are now returned instead, such as `NumberLiteral`, `StringLiteral` and `IdentifierLiteral`. `new Literal` by itself is only used to represent code chunks that fit no category. (While mentioning `NumberLiteral`, there's also `InfinityLiteral` now, which is a subtype of `NumberLiteral`.) `StringWithInterpolations` has been added as a subtype of `Parens`, and `RegexWithInterpolations` as a subtype of `Call`. This makes it easier for other programs to make use of CoffeeScript's "AST" (nodes). For example, it is now possible to distinguish between `"a #{b} c"` and `"a " + b + " c"`. Fixes #4192. `SuperCall` has been added as a subtype of `Call`. Note, though, that some information is still lost, especially in the lexer. For example, there is no way to distinguish a heredoc from a regular string, or a heregex without interpolations from a regular regex. Binary and octal number literals are indistinguishable from hexadecimal literals. After the new subtypes were added, they were taken advantage of, removing most regexes in nodes.coffee. `SIMPLENUM` (which matches non-hex integers) had to be kept, though, because such numbers need special handling in JavaScript (for example in `1..toString()`). An especially nice hack to get rid of was using `new String()` for the token value for reserved identifiers (to be able to set a property on them which could survive through the parser). Now it's a good old regular string. In range literals, slices, splices and for loop steps when number literals are involved, CoffeeScript can do some optimizations, such as precomputing the value of, say, `5 - 3` (outputting `2` instead of `5 - 3` literally). As a side bonus, this now also works with hexadecimal number literals, such as `0x02`. Finally, this also improves the output of `coffee --nodes`: # Before: $ bin/coffee -ne 'while true "#{a}" break' Block While Value Bool Block Value Parens Block Op + Value """" Value Parens Block Value "a" "break" # After: $ bin/coffee -ne 'while true "#{a}" break' Block While Value BooleanLiteral: true Block Value StringWithInterpolations Block Op + Value StringLiteral: "" Value Parens Block Value IdentifierLiteral: a StatementLiteral: break
2016-01-31 19:24:31 +00:00
BOM = 65279;
IDENTIFIER = /^(?!\d)((?:(?!\s)[$\w\x7f-\uffff])+)([^\n\S]*:(?!:))?/;
NUMBER = /^0b[01]+|^0o[0-7]+|^0x[\da-f]+|^\d*\.?\d+(?:e[+-]?\d+)?/i;
2014-09-06 10:55:27 +00:00
OPERATOR = /^(?:[-=]>|[-+*\/%<>&|^!?=]=|>>>=?|([-+:])\1|([&|<>*\/%])\2=?|\?(\.|::)|\.{2,3})/;
WHITESPACE = /^[^\n\S]+/;
COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/;
2014-09-06 10:55:27 +00:00
CODE = /^[-=]>/;
MULTI_DENT = /^(?:\n[^\n\S]*)+/;
2010-09-24 13:38:28 +00:00
JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
STRING_START = /^(?:'''|"""|'|")/;
STRING_SINGLE = /^(?:[^\\']|\\[\s\S])*/;
STRING_DOUBLE = /^(?:[^\\"#]|\\[\s\S]|\#(?!\{))*/;
HEREDOC_SINGLE = /^(?:[^\\']|\\[\s\S]|'(?!''))*/;
HEREDOC_DOUBLE = /^(?:[^\\"#]|\\[\s\S]|"(?!"")|\#(?!\{))*/;
STRING_OMIT = /((?:\\\\)+)|\\[^\S\n]*\n\s*/g;
SIMPLE_STRING_OMIT = /\s*\n\s*/g;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g;
REGEX = /^\/(?!\/)((?:[^[\/\n\\]|\\[^\n]|\[(?:\\[^\n]|[^\]\n\\])*\])*)(\/)?/;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
REGEX_FLAGS = /^\w*/;
VALID_FLAGS = /^(?!.*(.).*\1)[imgy]*$/;
HEREGEX = /^(?:[^\\\/#]|\\[\s\S]|\/(?!\/\/)|\#(?!\{))*/;
HEREGEX_OMIT = /((?:\\\\)+)|\\(\s)|\s+(?:#.*)?/g;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
REGEX_ILLEGAL = /^(\/|\/{3}\s*)(\*)/;
Fix #3410, #3182: Allow regex to start with space or = A regex may not follow a specific set of tokens. These were already known before in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them to be more correct and to add a few missing tokens). In all other cases (except after a spaced callable) a slash is the start of a regex, and may now start with a space or an equals sign. It’s really that simple! A slash after a spaced callable is the only ambigous case. We cannot know if that's division or function application with a regex as the argument. The spacing determines which is which: Space on both sides: - `a / b/i` -> `a / b / i` - `a /= b/i` -> `a /= b / i` No spaces: - `a/b/i` -> `a / b / i` - `a/=b/i` -> `a /= b / i` Space on the right side: - `a/ b/i` -> `a / b / i` - `a/= b/i` -> `a /= b / i` Space on the left side: - `a /b/i` -> `a(/b/i)` - `a /=b/i` -> `a(/=b/i)` The last case used to compile to `a /= b / i`, but that has been changed to be consistent with the `/` operator. The last case really looks like a regex, so it should be parsed as one. Moreover, you may now also space the `/` and `/=` operators with other whitespace characters than a space (such as tabs and non-breaking spaces) for consistency. Lastly, unclosed regexes are now reported as such, instead of generating some other confusing error message. It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.
2015-01-10 00:48:00 +00:00
POSSIBLY_DIVISION = /^\/=?\s/;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
HERECOMMENT_ILLEGAL = /\*\//;
LINE_CONTINUER = /^\s*(?:,|\??\.(?![.\d])|::)/;
INVALID_ESCAPE = /((?:^|[^\\])(?:\\\\)*)\\(?:(0[0-7]|[1-7])|(x(?![\da-fA-F]{2}).{0,2})|(u(?![\da-fA-F]{4}).{0,4}))/;
Refactor interpolation (and string and regex) handling in lexer - Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs) used to pass through the lexer, causing a parsing error later, while double-quoted strings caused an error already in the lexing phase. Now both single and double-quoted unclosed strings error out in the lexer (which is the more logical option) with consistent error messages. This also fixes the last comment by @satyr in #3301. - Similar to the above, unclosed heregexes also used to pass through the lexer and not error until in the parsing phase, which resulted in confusing error messages. This has been fixed, too. - Fix #3348, by adding passing tests. - Fix #3529: If a string starts with an interpolation, an empty string is no longer emitted before the interpolation (unless it is needed to coerce the interpolation into a string). - Block comments cannot contain `*/`. Now the error message also shows exactly where the offending `*/`. This improvement might seem unrelated, but I had to touch that code anyway to refactor string and regex related code, and the change was very trivial. Moreover, it's consistent with the next two points. - Regexes cannot start with `*`. Now the error message also shows exactly where the offending `*` is. (It might actually not be exatly at the start in heregexes.) It is a very minor improvement, but it was trivial to add. - Octal escapes in strings are forbidden in CoffeeScript (just like in JavaScript strict mode). However, this used to be the case only for regular strings. Now they are also forbidden in heredocs. Moreover, the errors now point at the offending octal escape. - Invalid regex flags are no longer allowed. This includes repeated modifiers and unknown ones. Moreover, invalid modifiers do not stop a heregex from being matched, which results in better error messages. - Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does `RegExp("a#{1}")`. Still, those two code snippets used to generate different tokens, which is a bit weird, but more importantly causes problems for coffeelint (see clutchski/coffeelint#340). This required lots of tests in test/location.coffee to be updated. Note that some updates to those tests are unrelated to this point; some have been updated to be more consistent (I discovered this because the refactored code happened to be seemingly more correct). - Regular regex literals used to erraneously allow newlines to be escaped, causing invalid JavaScript output. This has been fixed. - Heregexes may now be completely empty (`//////`), instead of erroring out with a confusing message. - Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a heregex inside a heregex. - Fix #2321: If you used division inside interpolation and then a slash later in the string containing that interpolation, the division slash and the latter slash was erraneously matched as a regex. This has been fixed. - Indentation inside interpolations in heredocs no longer affect how much indentation is removed from each line of the heredoc (which is more intuitive). - Whitespace is now correctly trimmed from the start and end of strings in a few edge cases. - Last but not least, the lexing of interpolated strings now seems to be more efficient. For a regular double-quoted string, we used to use a custom function to find the end of it (taking interpolations and interpolations within interpolations etc. into account). Then we used to re-find the interpolations and recursively lex their contents. In effect, the same string was processed twice, or even more in the case of deeper nesting of interpolations. Now the same string is processed just once. - Code duplication between regular strings, heredocs, regular regexes and heregexes has been reduced. - The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
LEADING_BLANK_LINE = /^[^\n\S]*\n/;
TRAILING_BLANK_LINE = /\n[^\n\S]*$/;
2010-10-03 23:22:42 +00:00
TRAILING_SPACES = /\s+$/;
COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|=', '**=', '//=', '%%='];
2014-09-06 11:53:21 +00:00
UNARY = ['NEW', 'TYPEOF', 'DELETE', 'DO'];
UNARY_MATH = ['!', '~'];
SHIFT = ['<<', '>>', '>>>'];
2010-10-23 18:24:30 +00:00
COMPARE = ['==', '!=', '<', '>', '<=', '>='];
MATH = ['*', '/', '%', '//', '%%'];
RELATION = ['IN', 'OF', 'INSTANCEOF'];
BOOL = ['TRUE', 'FALSE'];
CALLABLE = ['IDENTIFIER', 'PROPERTY', ')', ']', '?', '@', 'THIS', 'SUPER'];
INDEXABLE = CALLABLE.concat(['NUMBER', 'INFINITY', 'NAN', 'STRING', 'STRING_END', 'REGEX', 'REGEX_END', 'BOOL', 'NULL', 'UNDEFINED', '}', '::']);
Fix #3410, #3182: Allow regex to start with space or = A regex may not follow a specific set of tokens. These were already known before in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them to be more correct and to add a few missing tokens). In all other cases (except after a spaced callable) a slash is the start of a regex, and may now start with a space or an equals sign. It’s really that simple! A slash after a spaced callable is the only ambigous case. We cannot know if that's division or function application with a regex as the argument. The spacing determines which is which: Space on both sides: - `a / b/i` -> `a / b / i` - `a /= b/i` -> `a /= b / i` No spaces: - `a/b/i` -> `a / b / i` - `a/=b/i` -> `a /= b / i` Space on the right side: - `a/ b/i` -> `a / b / i` - `a/= b/i` -> `a /= b / i` Space on the left side: - `a /b/i` -> `a(/b/i)` - `a /=b/i` -> `a(/=b/i)` The last case used to compile to `a /= b / i`, but that has been changed to be consistent with the `/` operator. The last case really looks like a regex, so it should be parsed as one. Moreover, you may now also space the `/` and `/=` operators with other whitespace characters than a space (such as tabs and non-breaking spaces) for consistency. Lastly, unclosed regexes are now reported as such, instead of generating some other confusing error message. It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.
2015-01-10 00:48:00 +00:00
NOT_REGEX = INDEXABLE.concat(['++', '--']);
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
2011-12-14 15:39:20 +00:00
2014-01-22 18:30:13 +00:00
INDENTABLE_CLOSERS = [')', '}', ']'];
}).call(this);