2016-10-01 18:58:53 +00:00
|
|
|
// Generated by CoffeeScript 1.11.1
|
2010-07-25 07:15:12 +00:00
|
|
|
(function() {
|
Define proper operator precedence for bitwise/logical operators
This is an upstream port for the patch https://github.com/decaffeinate/coffeescript/pull/8
See https://github.com/decaffeinate/decaffeinate/issues/291 for the bug that this fixed.
For the most part, CoffeeScript and JavaScript have the same precedence rules,
but in some cases, the intermediate AST format didn't represent the actual
evaluation order. For example, in the expression `a or b and c`, the `and` is
evaluated first, but the parser treated the two operators with equal precedence.
This was still correct end-to-end because CoffeeScript simply emitted the result
without parens, but any intermediate tools using the CoffeeScript parser could
become confused.
Here are the JS operator precedence rules:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
For the most part, CoffeeScript already follows these. `COMPARE` operators
already behave differently due to chained comparisons, so I think we don't need
to worry about following JS precedence for those. So I think the only case where
it was behaving differently in an important way was for the binary/bitwise
operators that are being changed here.
As part of this change, I also introduced a new token tag, `BIN?`, for the
binary form of the `?` operator.
2016-10-09 19:00:38 +00:00
|
|
|
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVALID_ESCAPE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isUnassignable, key, locationDataToString, ref, ref1, repeat, starts, throwSyntaxError,
|
2016-03-05 19:59:39 +00:00
|
|
|
indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; },
|
|
|
|
slice = [].slice;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-01-30 19:33:03 +00:00
|
|
|
ref = require('./rewriter'), Rewriter = ref.Rewriter, INVERSES = ref.INVERSES;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-02-07 20:50:41 +00:00
|
|
|
ref1 = require('./helpers'), count = ref1.count, starts = ref1.starts, compact = ref1.compact, repeat = ref1.repeat, invertLiterate = ref1.invertLiterate, locationDataToString = ref1.locationDataToString, throwSyntaxError = ref1.throwSyntaxError;
|
2013-02-25 17:41:34 +00:00
|
|
|
|
2010-12-23 18:50:52 +00:00
|
|
|
exports.Lexer = Lexer = (function() {
|
2010-11-12 02:48:08 +00:00
|
|
|
function Lexer() {}
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-11-05 04:04:52 +00:00
|
|
|
Lexer.prototype.tokenize = function(code, opts) {
|
2015-01-30 19:33:03 +00:00
|
|
|
var consumed, end, i, ref2;
|
2012-04-10 18:57:45 +00:00
|
|
|
if (opts == null) {
|
|
|
|
opts = {};
|
|
|
|
}
|
2012-09-26 00:15:40 +00:00
|
|
|
this.literate = opts.literate;
|
2010-02-28 00:40:53 +00:00
|
|
|
this.indent = 0;
|
2013-06-13 22:21:47 +00:00
|
|
|
this.baseIndent = 0;
|
2010-09-09 02:46:13 +00:00
|
|
|
this.indebt = 0;
|
2010-06-02 03:32:46 +00:00
|
|
|
this.outdebt = 0;
|
2010-02-28 00:40:53 +00:00
|
|
|
this.indents = [];
|
2011-09-16 23:26:04 +00:00
|
|
|
this.ends = [];
|
2010-02-28 00:40:53 +00:00
|
|
|
this.tokens = [];
|
2015-07-08 01:55:43 +00:00
|
|
|
this.seenFor = false;
|
Support import and export of ES2015 modules (#4300)
This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages:
```js
import "module-name"
import defaultMember from "module-name"
import * as name from "module-name"
import { } from "module-name"
import { member } from "module-name"
import { member as alias } from "module-name"
import { member1, member2 as alias2, … } from "module-name"
import defaultMember, * as name from "module-name"
import defaultMember, { … } from "module-name"
export default expression
export class name
export { }
export { name }
export { name as exportedName }
export { name as default }
export { name1, name2 as exportedName2, name3 as default, … }
export * from "module-name"
export { … } from "module-name"
```
As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports:
```js
export name = …
```
CoffeeScript also supports optional commas within `{ … }`.
This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this.
This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used.
There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests:
* I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu)
* I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos)
The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
|
|
|
this.seenImport = false;
|
|
|
|
this.seenExport = false;
|
2013-01-14 19:26:06 +00:00
|
|
|
this.chunkLine = opts.line || 0;
|
|
|
|
this.chunkColumn = opts.column || 0;
|
2013-03-01 18:38:55 +00:00
|
|
|
code = this.clean(code);
|
2010-10-22 14:47:40 +00:00
|
|
|
i = 0;
|
|
|
|
while (this.chunk = code.slice(i)) {
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
consumed = this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
|
2015-01-30 19:33:03 +00:00
|
|
|
ref2 = this.getLineAndColumnFromChunk(consumed), this.chunkLine = ref2[0], this.chunkColumn = ref2[1];
|
2012-11-17 00:09:56 +00:00
|
|
|
i += consumed;
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
if (opts.untilBalanced && this.ends.length === 0) {
|
|
|
|
return {
|
|
|
|
tokens: this.tokens,
|
|
|
|
index: i
|
|
|
|
};
|
|
|
|
}
|
2010-02-28 00:40:53 +00:00
|
|
|
}
|
2010-06-12 23:05:13 +00:00
|
|
|
this.closeIndentation();
|
2015-01-03 23:28:23 +00:00
|
|
|
if (end = this.ends.pop()) {
|
2015-02-06 09:52:02 +00:00
|
|
|
this.error("missing " + end.tag, end.origin[2]);
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
|
|
|
if (opts.rewrite === false) {
|
|
|
|
return this.tokens;
|
|
|
|
}
|
2013-04-29 03:09:46 +00:00
|
|
|
return (new Rewriter).rewrite(this.tokens);
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2012-09-26 00:15:40 +00:00
|
|
|
Lexer.prototype.clean = function(code) {
|
2013-01-07 04:56:58 +00:00
|
|
|
if (code.charCodeAt(0) === BOM) {
|
|
|
|
code = code.slice(1);
|
|
|
|
}
|
2013-03-01 18:38:55 +00:00
|
|
|
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
|
2012-09-26 00:15:40 +00:00
|
|
|
if (WHITESPACE.test(code)) {
|
|
|
|
code = "\n" + code;
|
2013-03-01 18:38:55 +00:00
|
|
|
this.chunkLine--;
|
2012-09-26 00:15:40 +00:00
|
|
|
}
|
|
|
|
if (this.literate) {
|
2013-03-05 02:45:57 +00:00
|
|
|
code = invertLiterate(code);
|
2012-09-26 00:15:40 +00:00
|
|
|
}
|
|
|
|
return code;
|
|
|
|
};
|
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.identifierToken = function() {
|
2016-09-15 06:30:58 +00:00
|
|
|
var alias, colon, colonOffset, id, idLength, input, match, poppedToken, prev, ref2, ref3, ref4, ref5, tag, tagToken;
|
2014-09-06 10:55:27 +00:00
|
|
|
if (!(match = IDENTIFIER.exec(this.chunk))) {
|
2012-04-10 18:57:45 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2010-10-11 00:17:31 +00:00
|
|
|
input = match[0], id = match[1], colon = match[2];
|
2012-11-17 00:09:56 +00:00
|
|
|
idLength = id.length;
|
|
|
|
poppedToken = void 0;
|
2010-11-28 23:33:43 +00:00
|
|
|
if (id === 'own' && this.tag() === 'FOR') {
|
|
|
|
this.token('OWN', id);
|
2010-10-22 14:47:40 +00:00
|
|
|
return id.length;
|
2010-07-16 01:18:35 +00:00
|
|
|
}
|
2014-09-06 11:53:21 +00:00
|
|
|
if (id === 'from' && this.tag() === 'YIELD') {
|
|
|
|
this.token('FROM', id);
|
|
|
|
return id.length;
|
|
|
|
}
|
2016-09-15 06:30:58 +00:00
|
|
|
if (id === 'as' && this.seenImport && (this.tag() === 'IDENTIFIER' || this.value() === '*')) {
|
|
|
|
if (this.value() === '*') {
|
|
|
|
this.tokens[this.tokens.length - 1][0] = 'IMPORT_ALL';
|
|
|
|
}
|
|
|
|
this.token('AS', id);
|
|
|
|
return id.length;
|
|
|
|
}
|
|
|
|
if (id === 'as' && this.seenExport && this.tag() === 'IDENTIFIER') {
|
Support import and export of ES2015 modules (#4300)
This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages:
```js
import "module-name"
import defaultMember from "module-name"
import * as name from "module-name"
import { } from "module-name"
import { member } from "module-name"
import { member as alias } from "module-name"
import { member1, member2 as alias2, … } from "module-name"
import defaultMember, * as name from "module-name"
import defaultMember, { … } from "module-name"
export default expression
export class name
export { }
export { name }
export { name as exportedName }
export { name as default }
export { name1, name2 as exportedName2, name3 as default, … }
export * from "module-name"
export { … } from "module-name"
```
As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports:
```js
export name = …
```
CoffeeScript also supports optional commas within `{ … }`.
This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this.
This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used.
There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests:
* I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu)
* I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos)
The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
|
|
|
this.token('AS', id);
|
|
|
|
return id.length;
|
|
|
|
}
|
|
|
|
if (id === 'default' && this.seenExport) {
|
|
|
|
this.token('DEFAULT', id);
|
|
|
|
return id.length;
|
|
|
|
}
|
2016-09-15 06:30:58 +00:00
|
|
|
ref2 = this.tokens, prev = ref2[ref2.length - 1];
|
|
|
|
tag = colon || (prev != null) && (((ref3 = prev[0]) === '.' || ref3 === '?.' || ref3 === '::' || ref3 === '?::') || !prev.spaced && prev[0] === '@') ? 'PROPERTY' : 'IDENTIFIER';
|
2016-03-05 16:41:15 +00:00
|
|
|
if (tag === 'IDENTIFIER' && (indexOf.call(JS_KEYWORDS, id) >= 0 || indexOf.call(COFFEE_KEYWORDS, id) >= 0)) {
|
2010-02-28 00:40:53 +00:00
|
|
|
tag = id.toUpperCase();
|
2016-09-15 06:30:58 +00:00
|
|
|
if (tag === 'WHEN' && (ref4 = this.tag(), indexOf.call(LINE_BREAK, ref4) >= 0)) {
|
2010-09-23 05:14:18 +00:00
|
|
|
tag = 'LEADING_WHEN';
|
2010-10-05 19:46:17 +00:00
|
|
|
} else if (tag === 'FOR') {
|
|
|
|
this.seenFor = true;
|
2010-12-21 03:50:49 +00:00
|
|
|
} else if (tag === 'UNLESS') {
|
|
|
|
tag = 'IF';
|
Support import and export of ES2015 modules (#4300)
This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages:
```js
import "module-name"
import defaultMember from "module-name"
import * as name from "module-name"
import { } from "module-name"
import { member } from "module-name"
import { member as alias } from "module-name"
import { member1, member2 as alias2, … } from "module-name"
import defaultMember, * as name from "module-name"
import defaultMember, { … } from "module-name"
export default expression
export class name
export { }
export { name }
export { name as exportedName }
export { name as default }
export { name1, name2 as exportedName2, name3 as default, … }
export * from "module-name"
export { … } from "module-name"
```
As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports:
```js
export name = …
```
CoffeeScript also supports optional commas within `{ … }`.
This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this.
This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used.
There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests:
* I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu)
* I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos)
The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
|
|
|
} else if (tag === 'IMPORT') {
|
|
|
|
this.seenImport = true;
|
|
|
|
} else if (tag === 'EXPORT') {
|
|
|
|
this.seenExport = true;
|
2015-01-30 19:33:03 +00:00
|
|
|
} else if (indexOf.call(UNARY, tag) >= 0) {
|
2010-09-25 07:00:07 +00:00
|
|
|
tag = 'UNARY';
|
2015-01-30 19:33:03 +00:00
|
|
|
} else if (indexOf.call(RELATION, tag) >= 0) {
|
2010-10-05 19:46:17 +00:00
|
|
|
if (tag !== 'INSTANCEOF' && this.seenFor) {
|
|
|
|
tag = 'FOR' + tag;
|
2010-12-24 16:59:30 +00:00
|
|
|
this.seenFor = false;
|
2010-10-05 19:46:17 +00:00
|
|
|
} else {
|
|
|
|
tag = 'RELATION';
|
|
|
|
if (this.value() === '!') {
|
2012-11-17 00:09:56 +00:00
|
|
|
poppedToken = this.tokens.pop();
|
2010-10-05 19:46:17 +00:00
|
|
|
id = '!' + id;
|
|
|
|
}
|
|
|
|
}
|
2010-09-23 05:14:18 +00:00
|
|
|
}
|
2010-08-12 02:24:43 +00:00
|
|
|
}
|
2016-03-05 16:41:15 +00:00
|
|
|
if (tag === 'IDENTIFIER' && indexOf.call(RESERVED, id) >= 0) {
|
|
|
|
this.error("reserved word '" + id + "'", {
|
|
|
|
length: id.length
|
|
|
|
});
|
2010-06-15 04:54:02 +00:00
|
|
|
}
|
2016-03-05 16:41:15 +00:00
|
|
|
if (tag !== 'PROPERTY') {
|
2015-01-30 19:33:03 +00:00
|
|
|
if (indexOf.call(COFFEE_ALIASES, id) >= 0) {
|
2015-05-01 12:33:11 +00:00
|
|
|
alias = id;
|
2012-04-10 18:57:45 +00:00
|
|
|
id = COFFEE_ALIAS_MAP[id];
|
|
|
|
}
|
2010-12-23 18:50:52 +00:00
|
|
|
tag = (function() {
|
2010-11-05 04:04:52 +00:00
|
|
|
switch (id) {
|
|
|
|
case '!':
|
|
|
|
return 'UNARY';
|
|
|
|
case '==':
|
|
|
|
case '!=':
|
|
|
|
return 'COMPARE';
|
|
|
|
case 'true':
|
|
|
|
case 'false':
|
|
|
|
return 'BOOL';
|
2010-11-14 20:15:13 +00:00
|
|
|
case 'break':
|
|
|
|
case 'continue':
|
Refactor `Literal` into several subtypes
Previously, the parser created `Literal` nodes for many things. This resulted in
information loss. Instead of being able to check the node type, we had to use
regexes to tell the different types of `Literal`s apart. That was a bit like
parsing literals twice: Once in the lexer, and once (or more) in the compiler.
It also caused problems, such as `` `this` `` and `this` being indistinguishable
(fixes #2009).
Instead returning `new Literal` in the grammar, subtypes of it are now returned
instead, such as `NumberLiteral`, `StringLiteral` and `IdentifierLiteral`. `new
Literal` by itself is only used to represent code chunks that fit no category.
(While mentioning `NumberLiteral`, there's also `InfinityLiteral` now, which is
a subtype of `NumberLiteral`.)
`StringWithInterpolations` has been added as a subtype of `Parens`, and
`RegexWithInterpolations` as a subtype of `Call`. This makes it easier for other
programs to make use of CoffeeScript's "AST" (nodes). For example, it is now
possible to distinguish between `"a #{b} c"` and `"a " + b + " c"`. Fixes #4192.
`SuperCall` has been added as a subtype of `Call`.
Note, though, that some information is still lost, especially in the lexer. For
example, there is no way to distinguish a heredoc from a regular string, or a
heregex without interpolations from a regular regex. Binary and octal number
literals are indistinguishable from hexadecimal literals.
After the new subtypes were added, they were taken advantage of, removing most
regexes in nodes.coffee. `SIMPLENUM` (which matches non-hex integers) had to be
kept, though, because such numbers need special handling in JavaScript (for
example in `1..toString()`).
An especially nice hack to get rid of was using `new String()` for the token
value for reserved identifiers (to be able to set a property on them which could
survive through the parser). Now it's a good old regular string.
In range literals, slices, splices and for loop steps when number literals
are involved, CoffeeScript can do some optimizations, such as precomputing the
value of, say, `5 - 3` (outputting `2` instead of `5 - 3` literally). As a side
bonus, this now also works with hexadecimal number literals, such as `0x02`.
Finally, this also improves the output of `coffee --nodes`:
# Before:
$ bin/coffee -ne 'while true
"#{a}"
break'
Block
While
Value
Bool
Block
Value
Parens
Block
Op +
Value """"
Value
Parens
Block
Value "a" "break"
# After:
$ bin/coffee -ne 'while true
"#{a}"
break'
Block
While
Value BooleanLiteral: true
Block
Value
StringWithInterpolations
Block
Op +
Value StringLiteral: ""
Value
Parens
Block
Value IdentifierLiteral: a
StatementLiteral: break
2016-01-31 19:24:31 +00:00
|
|
|
case 'debugger':
|
2010-11-14 20:15:13 +00:00
|
|
|
return 'STATEMENT';
|
Define proper operator precedence for bitwise/logical operators
This is an upstream port for the patch https://github.com/decaffeinate/coffeescript/pull/8
See https://github.com/decaffeinate/decaffeinate/issues/291 for the bug that this fixed.
For the most part, CoffeeScript and JavaScript have the same precedence rules,
but in some cases, the intermediate AST format didn't represent the actual
evaluation order. For example, in the expression `a or b and c`, the `and` is
evaluated first, but the parser treated the two operators with equal precedence.
This was still correct end-to-end because CoffeeScript simply emitted the result
without parens, but any intermediate tools using the CoffeeScript parser could
become confused.
Here are the JS operator precedence rules:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
For the most part, CoffeeScript already follows these. `COMPARE` operators
already behave differently due to chained comparisons, so I think we don't need
to worry about following JS precedence for those. So I think the only case where
it was behaving differently in an important way was for the binary/bitwise
operators that are being changed here.
As part of this change, I also introduced a new token tag, `BIN?`, for the
binary form of the `?` operator.
2016-10-09 19:00:38 +00:00
|
|
|
case '&&':
|
|
|
|
case '||':
|
|
|
|
return id;
|
2010-11-05 04:04:52 +00:00
|
|
|
default:
|
|
|
|
return tag;
|
|
|
|
}
|
2010-12-23 18:50:52 +00:00
|
|
|
})();
|
2010-03-22 01:46:06 +00:00
|
|
|
}
|
2012-11-17 00:09:56 +00:00
|
|
|
tagToken = this.token(tag, id, 0, idLength);
|
2015-05-01 12:33:11 +00:00
|
|
|
if (alias) {
|
|
|
|
tagToken.origin = [tag, alias, tagToken[2]];
|
|
|
|
}
|
2012-11-17 00:09:56 +00:00
|
|
|
if (poppedToken) {
|
2016-09-15 06:30:58 +00:00
|
|
|
ref5 = [poppedToken[2].first_line, poppedToken[2].first_column], tagToken[2].first_line = ref5[0], tagToken[2].first_column = ref5[1];
|
2012-11-17 00:09:56 +00:00
|
|
|
}
|
2012-04-10 18:57:45 +00:00
|
|
|
if (colon) {
|
2012-11-17 00:09:56 +00:00
|
|
|
colonOffset = input.lastIndexOf(':');
|
|
|
|
this.token(':', ':', colonOffset, colon.length);
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
2010-10-22 14:47:40 +00:00
|
|
|
return input.length;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.numberToken = function() {
|
2016-09-26 14:33:57 +00:00
|
|
|
var base, lexedLength, match, number, numberValue, ref2, tag;
|
2012-04-10 18:57:45 +00:00
|
|
|
if (!(match = NUMBER.exec(this.chunk))) {
|
|
|
|
return 0;
|
|
|
|
}
|
2010-09-25 22:06:14 +00:00
|
|
|
number = match[0];
|
2015-02-06 09:52:02 +00:00
|
|
|
lexedLength = number.length;
|
2016-09-26 14:33:57 +00:00
|
|
|
switch (false) {
|
|
|
|
case !/^0[BOX]/.test(number):
|
|
|
|
this.error("radix prefix in '" + number + "' must be lowercase", {
|
|
|
|
offset: 1
|
|
|
|
});
|
|
|
|
break;
|
|
|
|
case !/^(?!0x).*E/.test(number):
|
|
|
|
this.error("exponential notation in '" + number + "' must be indicated with a lowercase 'e'", {
|
|
|
|
offset: number.indexOf('E')
|
|
|
|
});
|
|
|
|
break;
|
|
|
|
case !/^0\d*[89]/.test(number):
|
|
|
|
this.error("decimal literal '" + number + "' must not be prefixed with '0'", {
|
|
|
|
length: lexedLength
|
|
|
|
});
|
|
|
|
break;
|
|
|
|
case !/^0\d+/.test(number):
|
|
|
|
this.error("octal literal '" + number + "' must be prefixed with '0o'", {
|
|
|
|
length: lexedLength
|
|
|
|
});
|
|
|
|
}
|
|
|
|
base = (function() {
|
|
|
|
switch (number.charAt(1)) {
|
|
|
|
case 'b':
|
|
|
|
return 2;
|
|
|
|
case 'o':
|
|
|
|
return 8;
|
|
|
|
case 'x':
|
|
|
|
return 16;
|
|
|
|
default:
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
})();
|
|
|
|
numberValue = base != null ? parseInt(number.slice(2), base) : parseFloat(number);
|
|
|
|
if ((ref2 = number.charAt(1)) === 'b' || ref2 === 'o') {
|
Refactor `Literal` into several subtypes
Previously, the parser created `Literal` nodes for many things. This resulted in
information loss. Instead of being able to check the node type, we had to use
regexes to tell the different types of `Literal`s apart. That was a bit like
parsing literals twice: Once in the lexer, and once (or more) in the compiler.
It also caused problems, such as `` `this` `` and `this` being indistinguishable
(fixes #2009).
Instead returning `new Literal` in the grammar, subtypes of it are now returned
instead, such as `NumberLiteral`, `StringLiteral` and `IdentifierLiteral`. `new
Literal` by itself is only used to represent code chunks that fit no category.
(While mentioning `NumberLiteral`, there's also `InfinityLiteral` now, which is
a subtype of `NumberLiteral`.)
`StringWithInterpolations` has been added as a subtype of `Parens`, and
`RegexWithInterpolations` as a subtype of `Call`. This makes it easier for other
programs to make use of CoffeeScript's "AST" (nodes). For example, it is now
possible to distinguish between `"a #{b} c"` and `"a " + b + " c"`. Fixes #4192.
`SuperCall` has been added as a subtype of `Call`.
Note, though, that some information is still lost, especially in the lexer. For
example, there is no way to distinguish a heredoc from a regular string, or a
heregex without interpolations from a regular regex. Binary and octal number
literals are indistinguishable from hexadecimal literals.
After the new subtypes were added, they were taken advantage of, removing most
regexes in nodes.coffee. `SIMPLENUM` (which matches non-hex integers) had to be
kept, though, because such numbers need special handling in JavaScript (for
example in `1..toString()`).
An especially nice hack to get rid of was using `new String()` for the token
value for reserved identifiers (to be able to set a property on them which could
survive through the parser). Now it's a good old regular string.
In range literals, slices, splices and for loop steps when number literals
are involved, CoffeeScript can do some optimizations, such as precomputing the
value of, say, `5 - 3` (outputting `2` instead of `5 - 3` literally). As a side
bonus, this now also works with hexadecimal number literals, such as `0x02`.
Finally, this also improves the output of `coffee --nodes`:
# Before:
$ bin/coffee -ne 'while true
"#{a}"
break'
Block
While
Value
Bool
Block
Value
Parens
Block
Op +
Value """"
Value
Parens
Block
Value "a" "break"
# After:
$ bin/coffee -ne 'while true
"#{a}"
break'
Block
While
Value BooleanLiteral: true
Block
Value
StringWithInterpolations
Block
Op +
Value StringLiteral: ""
Value
Parens
Block
Value IdentifierLiteral: a
StatementLiteral: break
2016-01-31 19:24:31 +00:00
|
|
|
number = "0x" + (numberValue.toString(16));
|
2011-10-24 18:51:41 +00:00
|
|
|
}
|
2016-03-05 20:32:20 +00:00
|
|
|
tag = numberValue === 2e308 ? 'INFINITY' : 'NUMBER';
|
Refactor `Literal` into several subtypes
Previously, the parser created `Literal` nodes for many things. This resulted in
information loss. Instead of being able to check the node type, we had to use
regexes to tell the different types of `Literal`s apart. That was a bit like
parsing literals twice: Once in the lexer, and once (or more) in the compiler.
It also caused problems, such as `` `this` `` and `this` being indistinguishable
(fixes #2009).
Instead returning `new Literal` in the grammar, subtypes of it are now returned
instead, such as `NumberLiteral`, `StringLiteral` and `IdentifierLiteral`. `new
Literal` by itself is only used to represent code chunks that fit no category.
(While mentioning `NumberLiteral`, there's also `InfinityLiteral` now, which is
a subtype of `NumberLiteral`.)
`StringWithInterpolations` has been added as a subtype of `Parens`, and
`RegexWithInterpolations` as a subtype of `Call`. This makes it easier for other
programs to make use of CoffeeScript's "AST" (nodes). For example, it is now
possible to distinguish between `"a #{b} c"` and `"a " + b + " c"`. Fixes #4192.
`SuperCall` has been added as a subtype of `Call`.
Note, though, that some information is still lost, especially in the lexer. For
example, there is no way to distinguish a heredoc from a regular string, or a
heregex without interpolations from a regular regex. Binary and octal number
literals are indistinguishable from hexadecimal literals.
After the new subtypes were added, they were taken advantage of, removing most
regexes in nodes.coffee. `SIMPLENUM` (which matches non-hex integers) had to be
kept, though, because such numbers need special handling in JavaScript (for
example in `1..toString()`).
An especially nice hack to get rid of was using `new String()` for the token
value for reserved identifiers (to be able to set a property on them which could
survive through the parser). Now it's a good old regular string.
In range literals, slices, splices and for loop steps when number literals
are involved, CoffeeScript can do some optimizations, such as precomputing the
value of, say, `5 - 3` (outputting `2` instead of `5 - 3` literally). As a side
bonus, this now also works with hexadecimal number literals, such as `0x02`.
Finally, this also improves the output of `coffee --nodes`:
# Before:
$ bin/coffee -ne 'while true
"#{a}"
break'
Block
While
Value
Bool
Block
Value
Parens
Block
Op +
Value """"
Value
Parens
Block
Value "a" "break"
# After:
$ bin/coffee -ne 'while true
"#{a}"
break'
Block
While
Value BooleanLiteral: true
Block
Value
StringWithInterpolations
Block
Op +
Value StringLiteral: ""
Value
Parens
Block
Value IdentifierLiteral: a
StatementLiteral: break
2016-01-31 19:24:31 +00:00
|
|
|
this.token(tag, number, 0, lexedLength);
|
2011-10-24 18:51:41 +00:00
|
|
|
return lexedLength;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.stringToken = function() {
|
2015-02-05 16:23:03 +00:00
|
|
|
var $, attempt, delimiter, doc, end, heredoc, i, indent, indentRegex, match, quote, ref2, ref3, regex, token, tokens;
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
quote = (STRING_START.exec(this.chunk) || [])[0];
|
|
|
|
if (!quote) {
|
2013-11-27 20:29:45 +00:00
|
|
|
return 0;
|
|
|
|
}
|
Support import and export of ES2015 modules (#4300)
This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages:
```js
import "module-name"
import defaultMember from "module-name"
import * as name from "module-name"
import { } from "module-name"
import { member } from "module-name"
import { member as alias } from "module-name"
import { member1, member2 as alias2, … } from "module-name"
import defaultMember, * as name from "module-name"
import defaultMember, { … } from "module-name"
export default expression
export class name
export { }
export { name }
export { name as exportedName }
export { name as default }
export { name1, name2 as exportedName2, name3 as default, … }
export * from "module-name"
export { … } from "module-name"
```
As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports:
```js
export name = …
```
CoffeeScript also supports optional commas within `{ … }`.
This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this.
This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used.
There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests:
* I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu)
* I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos)
The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
|
|
|
if (this.tokens.length && this.value() === 'from' && (this.seenImport || this.seenExport)) {
|
|
|
|
this.tokens[this.tokens.length - 1][0] = 'FROM';
|
|
|
|
}
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
regex = (function() {
|
|
|
|
switch (quote) {
|
|
|
|
case "'":
|
|
|
|
return STRING_SINGLE;
|
|
|
|
case '"':
|
|
|
|
return STRING_DOUBLE;
|
|
|
|
case "'''":
|
|
|
|
return HEREDOC_SINGLE;
|
|
|
|
case '"""':
|
|
|
|
return HEREDOC_DOUBLE;
|
2014-07-02 15:33:57 +00:00
|
|
|
}
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
})();
|
|
|
|
heredoc = quote.length === 3;
|
2015-02-03 17:55:38 +00:00
|
|
|
ref2 = this.matchWithInterpolations(regex, quote), tokens = ref2.tokens, end = ref2.index;
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
$ = tokens.length - 1;
|
2015-02-22 16:08:15 +00:00
|
|
|
delimiter = quote.charAt(0);
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
if (heredoc) {
|
|
|
|
indent = null;
|
|
|
|
doc = ((function() {
|
2015-01-30 19:33:03 +00:00
|
|
|
var j, len, results;
|
|
|
|
results = [];
|
|
|
|
for (i = j = 0, len = tokens.length; j < len; i = ++j) {
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
token = tokens[i];
|
|
|
|
if (token[0] === 'NEOSTRING') {
|
2015-01-30 19:33:03 +00:00
|
|
|
results.push(token[1]);
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
}
|
|
|
|
}
|
2015-01-30 19:33:03 +00:00
|
|
|
return results;
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
})()).join('#{}');
|
|
|
|
while (match = HEREDOC_INDENT.exec(doc)) {
|
|
|
|
attempt = match[1];
|
2015-01-30 19:33:03 +00:00
|
|
|
if (indent === null || (0 < (ref3 = attempt.length) && ref3 < indent.length)) {
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
indent = attempt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (indent) {
|
2016-09-26 15:10:38 +00:00
|
|
|
indentRegex = RegExp("\\n" + indent, "g");
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
}
|
2015-02-05 16:23:03 +00:00
|
|
|
this.mergeInterpolationTokens(tokens, {
|
|
|
|
delimiter: delimiter
|
|
|
|
}, (function(_this) {
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
return function(value, i) {
|
|
|
|
value = _this.formatString(value);
|
2016-09-26 15:10:38 +00:00
|
|
|
if (indentRegex) {
|
|
|
|
value = value.replace(indentRegex, '\n');
|
|
|
|
}
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
if (i === 0) {
|
|
|
|
value = value.replace(LEADING_BLANK_LINE, '');
|
|
|
|
}
|
|
|
|
if (i === $) {
|
|
|
|
value = value.replace(TRAILING_BLANK_LINE, '');
|
|
|
|
}
|
|
|
|
return value;
|
|
|
|
};
|
|
|
|
})(this));
|
2010-10-02 22:45:23 +00:00
|
|
|
} else {
|
2015-02-05 16:23:03 +00:00
|
|
|
this.mergeInterpolationTokens(tokens, {
|
|
|
|
delimiter: delimiter
|
|
|
|
}, (function(_this) {
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
return function(value, i) {
|
|
|
|
value = _this.formatString(value);
|
2015-02-05 16:23:03 +00:00
|
|
|
value = value.replace(SIMPLE_STRING_OMIT, function(match, offset) {
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
if ((i === 0 && offset === 0) || (i === $ && offset + match.length === value.length)) {
|
|
|
|
return '';
|
|
|
|
} else {
|
|
|
|
return ' ';
|
|
|
|
}
|
|
|
|
});
|
|
|
|
return value;
|
|
|
|
};
|
|
|
|
})(this));
|
2010-10-02 22:45:23 +00:00
|
|
|
}
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
return end;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.commentToken = function() {
|
2010-10-11 00:17:31 +00:00
|
|
|
var comment, here, match;
|
2012-04-10 18:57:45 +00:00
|
|
|
if (!(match = this.chunk.match(COMMENT))) {
|
|
|
|
return 0;
|
|
|
|
}
|
2010-10-11 00:17:31 +00:00
|
|
|
comment = match[0], here = match[1];
|
2010-09-23 05:14:18 +00:00
|
|
|
if (here) {
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
if (match = HERECOMMENT_ILLEGAL.exec(comment)) {
|
2015-02-06 09:52:02 +00:00
|
|
|
this.error("block comments cannot contain " + match[0], {
|
|
|
|
offset: match.index,
|
|
|
|
length: match[0].length
|
|
|
|
});
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
}
|
|
|
|
if (here.indexOf('\n') >= 0) {
|
|
|
|
here = here.replace(RegExp("\\n" + (repeat(' ', this.indent)), "g"), '\n');
|
|
|
|
}
|
|
|
|
this.token('HERECOMMENT', here, 0, comment.length);
|
2010-07-02 01:26:33 +00:00
|
|
|
}
|
2010-10-22 14:47:40 +00:00
|
|
|
return comment.length;
|
2010-05-13 00:56:44 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.jsToken = function() {
|
2010-09-25 22:06:14 +00:00
|
|
|
var match, script;
|
2011-08-08 16:55:22 +00:00
|
|
|
if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) {
|
|
|
|
return 0;
|
|
|
|
}
|
2012-11-17 00:09:56 +00:00
|
|
|
this.token('JS', (script = match[0]).slice(1, -1), 0, script.length);
|
2010-10-22 14:47:40 +00:00
|
|
|
return script.length;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.regexToken = function() {
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
var body, closed, end, flags, index, match, origin, prev, ref2, ref3, ref4, regex, tokens;
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
switch (false) {
|
|
|
|
case !(match = REGEX_ILLEGAL.exec(this.chunk)):
|
2015-02-06 09:52:02 +00:00
|
|
|
this.error("regular expressions cannot begin with " + match[2], {
|
|
|
|
offset: match.index + match[1].length
|
|
|
|
});
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
break;
|
2015-02-03 17:55:38 +00:00
|
|
|
case !(match = this.matchWithInterpolations(HEREGEX, '///')):
|
|
|
|
tokens = match.tokens, index = match.index;
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
break;
|
|
|
|
case !(match = REGEX.exec(this.chunk)):
|
2015-02-05 16:23:03 +00:00
|
|
|
regex = match[0], body = match[1], closed = match[2];
|
2015-02-12 18:26:41 +00:00
|
|
|
this.validateEscapes(body, {
|
|
|
|
isRegex: true,
|
|
|
|
offsetInChunk: 1
|
2015-02-05 16:23:03 +00:00
|
|
|
});
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
index = regex.length;
|
2015-02-07 20:50:41 +00:00
|
|
|
ref2 = this.tokens, prev = ref2[ref2.length - 1];
|
2015-01-10 00:48:00 +00:00
|
|
|
if (prev) {
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
if (prev.spaced && (ref3 = prev[0], indexOf.call(CALLABLE, ref3) >= 0)) {
|
2015-01-10 00:48:00 +00:00
|
|
|
if (!closed || POSSIBLY_DIVISION.test(regex)) {
|
|
|
|
return 0;
|
|
|
|
}
|
2015-02-07 20:50:41 +00:00
|
|
|
} else if (ref4 = prev[0], indexOf.call(NOT_REGEX, ref4) >= 0) {
|
2015-01-10 00:48:00 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!closed) {
|
|
|
|
this.error('missing / (unclosed regex)');
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
flags = REGEX_FLAGS.exec(this.chunk.slice(index))[0];
|
|
|
|
end = index + flags.length;
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
origin = this.makeToken('REGEX', null, 0, end);
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
switch (false) {
|
|
|
|
case !!VALID_FLAGS.test(flags):
|
2015-02-06 09:52:02 +00:00
|
|
|
this.error("invalid regular expression flags " + flags, {
|
|
|
|
offset: index,
|
|
|
|
length: flags.length
|
|
|
|
});
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
break;
|
2015-02-05 16:23:03 +00:00
|
|
|
case !(regex || tokens.length === 1):
|
|
|
|
if (body == null) {
|
|
|
|
body = this.formatHeregex(tokens[0][1]);
|
|
|
|
}
|
|
|
|
this.token('REGEX', "" + (this.makeDelimitedLiteral(body, {
|
|
|
|
delimiter: '/'
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
})) + flags, 0, end, origin);
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
break;
|
|
|
|
default:
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
this.token('REGEX_START', '(', 0, 0, origin);
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
this.token('IDENTIFIER', 'RegExp', 0, 0);
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
this.token('CALL_START', '(', 0, 0);
|
2015-02-05 16:23:03 +00:00
|
|
|
this.mergeInterpolationTokens(tokens, {
|
|
|
|
delimiter: '"',
|
|
|
|
double: true
|
|
|
|
}, this.formatHeregex);
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
if (flags) {
|
|
|
|
this.token(',', ',', index, 0);
|
|
|
|
this.token('STRING', '"' + flags + '"', index, flags.length);
|
|
|
|
}
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
this.token(')', ')', end, 0);
|
|
|
|
this.token('REGEX_END', ')', end, 0);
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
}
|
|
|
|
return end;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.lineToken = function() {
|
2012-06-07 00:39:17 +00:00
|
|
|
var diff, indent, match, noNewlines, size;
|
2012-04-10 18:57:45 +00:00
|
|
|
if (!(match = MULTI_DENT.exec(this.chunk))) {
|
|
|
|
return 0;
|
|
|
|
}
|
2010-09-25 22:06:14 +00:00
|
|
|
indent = match[0];
|
2011-09-21 03:28:07 +00:00
|
|
|
this.seenFor = false;
|
2010-09-23 05:14:18 +00:00
|
|
|
size = indent.length - 1 - indent.lastIndexOf('\n');
|
2010-10-25 18:56:02 +00:00
|
|
|
noNewlines = this.unfinished();
|
2010-09-09 02:46:13 +00:00
|
|
|
if (size - this.indebt === this.indent) {
|
2010-06-12 23:05:13 +00:00
|
|
|
if (noNewlines) {
|
2010-10-22 14:47:40 +00:00
|
|
|
this.suppressNewlines();
|
|
|
|
} else {
|
2012-11-17 00:09:56 +00:00
|
|
|
this.newlineToken(0);
|
2010-02-28 17:49:37 +00:00
|
|
|
}
|
2010-10-22 14:47:40 +00:00
|
|
|
return indent.length;
|
|
|
|
}
|
|
|
|
if (size > this.indent) {
|
2010-06-12 23:05:13 +00:00
|
|
|
if (noNewlines) {
|
2010-09-09 02:46:13 +00:00
|
|
|
this.indebt = size - this.indent;
|
2010-10-22 14:47:40 +00:00
|
|
|
this.suppressNewlines();
|
|
|
|
return indent.length;
|
2010-02-28 17:49:37 +00:00
|
|
|
}
|
2013-06-13 22:21:47 +00:00
|
|
|
if (!this.tokens.length) {
|
|
|
|
this.baseIndent = this.indent = size;
|
|
|
|
return indent.length;
|
|
|
|
}
|
2010-08-21 16:13:43 +00:00
|
|
|
diff = size - this.indent + this.outdebt;
|
2013-03-04 23:03:08 +00:00
|
|
|
this.token('INDENT', diff, indent.length - size, size);
|
2010-02-28 00:40:53 +00:00
|
|
|
this.indents.push(diff);
|
2015-01-03 23:28:23 +00:00
|
|
|
this.ends.push({
|
|
|
|
tag: 'OUTDENT'
|
|
|
|
});
|
2010-10-20 17:29:06 +00:00
|
|
|
this.outdebt = this.indebt = 0;
|
2014-01-22 18:30:13 +00:00
|
|
|
this.indent = size;
|
2013-06-13 22:21:47 +00:00
|
|
|
} else if (size < this.baseIndent) {
|
2015-02-06 09:52:02 +00:00
|
|
|
this.error('missing indentation', {
|
|
|
|
offset: indent.length
|
|
|
|
});
|
2010-02-28 00:40:53 +00:00
|
|
|
} else {
|
2010-09-09 02:46:13 +00:00
|
|
|
this.indebt = 0;
|
2012-11-17 00:09:56 +00:00
|
|
|
this.outdentToken(this.indent - size, noNewlines, indent.length);
|
2010-02-28 00:40:53 +00:00
|
|
|
}
|
2010-10-22 14:47:40 +00:00
|
|
|
return indent.length;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2012-11-17 00:09:56 +00:00
|
|
|
Lexer.prototype.outdentToken = function(moveOut, noNewlines, outdentLength) {
|
2015-01-30 19:33:03 +00:00
|
|
|
var decreasedIndent, dent, lastIndent, ref2;
|
2014-01-22 18:30:13 +00:00
|
|
|
decreasedIndent = this.indent - moveOut;
|
2010-07-31 02:06:22 +00:00
|
|
|
while (moveOut > 0) {
|
2014-01-22 18:30:13 +00:00
|
|
|
lastIndent = this.indents[this.indents.length - 1];
|
|
|
|
if (!lastIndent) {
|
2010-07-31 02:06:22 +00:00
|
|
|
moveOut = 0;
|
2014-01-22 18:30:13 +00:00
|
|
|
} else if (lastIndent === this.outdebt) {
|
2010-07-31 02:06:22 +00:00
|
|
|
moveOut -= this.outdebt;
|
|
|
|
this.outdebt = 0;
|
2014-01-22 18:30:13 +00:00
|
|
|
} else if (lastIndent < this.outdebt) {
|
|
|
|
this.outdebt -= lastIndent;
|
|
|
|
moveOut -= lastIndent;
|
2010-07-31 02:06:22 +00:00
|
|
|
} else {
|
2013-02-25 03:12:57 +00:00
|
|
|
dent = this.indents.pop() + this.outdebt;
|
2015-01-30 19:33:03 +00:00
|
|
|
if (outdentLength && (ref2 = this.chunk[outdentLength], indexOf.call(INDENTABLE_CLOSERS, ref2) >= 0)) {
|
2014-01-22 18:30:13 +00:00
|
|
|
decreasedIndent -= dent - moveOut;
|
|
|
|
moveOut = dent;
|
|
|
|
}
|
2010-07-31 02:06:22 +00:00
|
|
|
this.outdebt = 0;
|
2011-09-16 23:26:04 +00:00
|
|
|
this.pair('OUTDENT');
|
2014-01-22 18:30:13 +00:00
|
|
|
this.token('OUTDENT', moveOut, 0, outdentLength);
|
|
|
|
moveOut -= dent;
|
2010-06-02 03:32:46 +00:00
|
|
|
}
|
|
|
|
}
|
2012-04-10 18:57:45 +00:00
|
|
|
if (dent) {
|
|
|
|
this.outdebt -= moveOut;
|
|
|
|
}
|
2011-09-04 16:18:22 +00:00
|
|
|
while (this.value() === ';') {
|
|
|
|
this.tokens.pop();
|
|
|
|
}
|
2011-08-08 16:55:22 +00:00
|
|
|
if (!(this.tag() === 'TERMINATOR' || noNewlines)) {
|
2012-11-17 00:09:56 +00:00
|
|
|
this.token('TERMINATOR', '\n', outdentLength, 0);
|
2011-08-08 16:55:22 +00:00
|
|
|
}
|
2014-01-23 20:52:26 +00:00
|
|
|
this.indent = decreasedIndent;
|
2010-10-22 14:47:40 +00:00
|
|
|
return this;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.whitespaceToken = function() {
|
2015-02-07 20:50:41 +00:00
|
|
|
var match, nline, prev, ref2;
|
2011-08-08 16:55:22 +00:00
|
|
|
if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.charAt(0) === '\n'))) {
|
|
|
|
return 0;
|
|
|
|
}
|
2015-02-07 20:50:41 +00:00
|
|
|
ref2 = this.tokens, prev = ref2[ref2.length - 1];
|
2012-04-10 18:57:45 +00:00
|
|
|
if (prev) {
|
|
|
|
prev[match ? 'spaced' : 'newLine'] = true;
|
|
|
|
}
|
2010-11-09 04:07:51 +00:00
|
|
|
if (match) {
|
|
|
|
return match[0].length;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2012-11-17 00:09:56 +00:00
|
|
|
Lexer.prototype.newlineToken = function(offset) {
|
2011-09-04 16:18:22 +00:00
|
|
|
while (this.value() === ';') {
|
|
|
|
this.tokens.pop();
|
|
|
|
}
|
2012-04-10 18:57:45 +00:00
|
|
|
if (this.tag() !== 'TERMINATOR') {
|
2012-11-17 00:09:56 +00:00
|
|
|
this.token('TERMINATOR', '\n', offset, 0);
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
2010-10-22 14:47:40 +00:00
|
|
|
return this;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.suppressNewlines = function() {
|
2012-04-10 18:57:45 +00:00
|
|
|
if (this.value() === '\\') {
|
|
|
|
this.tokens.pop();
|
|
|
|
}
|
2010-10-22 14:47:40 +00:00
|
|
|
return this;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.literalToken = function() {
|
2016-03-05 19:59:39 +00:00
|
|
|
var match, message, origin, prev, ref2, ref3, ref4, ref5, ref6, skipToken, tag, token, value;
|
2010-10-05 14:31:48 +00:00
|
|
|
if (match = OPERATOR.exec(this.chunk)) {
|
|
|
|
value = match[0];
|
2012-04-10 18:57:45 +00:00
|
|
|
if (CODE.test(value)) {
|
|
|
|
this.tagParameters();
|
|
|
|
}
|
2010-09-23 05:14:18 +00:00
|
|
|
} else {
|
|
|
|
value = this.chunk.charAt(0);
|
2010-02-28 00:40:53 +00:00
|
|
|
}
|
|
|
|
tag = value;
|
2015-02-07 20:50:41 +00:00
|
|
|
ref2 = this.tokens, prev = ref2[ref2.length - 1];
|
2016-03-05 19:59:39 +00:00
|
|
|
if (prev && indexOf.call(['='].concat(slice.call(COMPOUND_ASSIGN)), value) >= 0) {
|
|
|
|
skipToken = false;
|
|
|
|
if (value === '=' && ((ref3 = prev[1]) === '||' || ref3 === '&&') && !prev.spaced) {
|
2010-10-05 14:31:48 +00:00
|
|
|
prev[0] = 'COMPOUND_ASSIGN';
|
2010-10-11 10:10:30 +00:00
|
|
|
prev[1] += '=';
|
2016-03-05 19:59:39 +00:00
|
|
|
prev = this.tokens[this.tokens.length - 2];
|
|
|
|
skipToken = true;
|
|
|
|
}
|
2016-03-05 16:41:15 +00:00
|
|
|
if (prev && prev[0] !== 'PROPERTY') {
|
2016-03-05 19:59:39 +00:00
|
|
|
origin = (ref4 = prev.origin) != null ? ref4 : prev;
|
|
|
|
message = isUnassignable(prev[1], origin[1]);
|
|
|
|
if (message) {
|
|
|
|
this.error(message, origin[2]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (skipToken) {
|
2010-10-22 14:47:40 +00:00
|
|
|
return value.length;
|
2010-07-25 05:36:50 +00:00
|
|
|
}
|
2010-07-25 05:23:37 +00:00
|
|
|
}
|
2010-10-20 02:04:38 +00:00
|
|
|
if (value === ';') {
|
Support import and export of ES2015 modules (#4300)
This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages:
```js
import "module-name"
import defaultMember from "module-name"
import * as name from "module-name"
import { } from "module-name"
import { member } from "module-name"
import { member as alias } from "module-name"
import { member1, member2 as alias2, … } from "module-name"
import defaultMember, * as name from "module-name"
import defaultMember, { … } from "module-name"
export default expression
export class name
export { }
export { name }
export { name as exportedName }
export { name as default }
export { name1, name2 as exportedName2, name3 as default, … }
export * from "module-name"
export { … } from "module-name"
```
As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports:
```js
export name = …
```
CoffeeScript also supports optional commas within `{ … }`.
This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this.
This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used.
There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests:
* I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu)
* I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos)
The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
|
|
|
this.seenFor = this.seenImport = this.seenExport = false;
|
2010-02-28 00:40:53 +00:00
|
|
|
tag = 'TERMINATOR';
|
2016-09-15 06:30:58 +00:00
|
|
|
} else if (value === '*' && prev[0] === 'EXPORT') {
|
|
|
|
tag = 'EXPORT_ALL';
|
2015-01-30 19:33:03 +00:00
|
|
|
} else if (indexOf.call(MATH, value) >= 0) {
|
2010-08-12 02:24:43 +00:00
|
|
|
tag = 'MATH';
|
2015-01-30 19:33:03 +00:00
|
|
|
} else if (indexOf.call(COMPARE, value) >= 0) {
|
2010-08-12 02:24:43 +00:00
|
|
|
tag = 'COMPARE';
|
2015-01-30 19:33:03 +00:00
|
|
|
} else if (indexOf.call(COMPOUND_ASSIGN, value) >= 0) {
|
2010-08-12 02:24:43 +00:00
|
|
|
tag = 'COMPOUND_ASSIGN';
|
2015-01-30 19:33:03 +00:00
|
|
|
} else if (indexOf.call(UNARY, value) >= 0) {
|
2010-08-12 02:24:43 +00:00
|
|
|
tag = 'UNARY';
|
2015-01-30 19:33:03 +00:00
|
|
|
} else if (indexOf.call(UNARY_MATH, value) >= 0) {
|
2013-03-25 03:05:04 +00:00
|
|
|
tag = 'UNARY_MATH';
|
2015-01-30 19:33:03 +00:00
|
|
|
} else if (indexOf.call(SHIFT, value) >= 0) {
|
2010-08-12 02:24:43 +00:00
|
|
|
tag = 'SHIFT';
|
Define proper operator precedence for bitwise/logical operators
This is an upstream port for the patch https://github.com/decaffeinate/coffeescript/pull/8
See https://github.com/decaffeinate/decaffeinate/issues/291 for the bug that this fixed.
For the most part, CoffeeScript and JavaScript have the same precedence rules,
but in some cases, the intermediate AST format didn't represent the actual
evaluation order. For example, in the expression `a or b and c`, the `and` is
evaluated first, but the parser treated the two operators with equal precedence.
This was still correct end-to-end because CoffeeScript simply emitted the result
without parens, but any intermediate tools using the CoffeeScript parser could
become confused.
Here are the JS operator precedence rules:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
For the most part, CoffeeScript already follows these. `COMPARE` operators
already behave differently due to chained comparisons, so I think we don't need
to worry about following JS precedence for those. So I think the only case where
it was behaving differently in an important way was for the binary/bitwise
operators that are being changed here.
As part of this change, I also introduced a new token tag, `BIN?`, for the
binary form of the `?` operator.
2016-10-09 19:00:38 +00:00
|
|
|
} else if (value === '?' && (prev != null ? prev.spaced : void 0)) {
|
|
|
|
tag = 'BIN?';
|
2010-10-12 10:30:10 +00:00
|
|
|
} else if (prev && !prev.spaced) {
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
if (value === '(' && (ref5 = prev[0], indexOf.call(CALLABLE, ref5) >= 0)) {
|
2012-04-10 18:57:45 +00:00
|
|
|
if (prev[0] === '?') {
|
|
|
|
prev[0] = 'FUNC_EXIST';
|
|
|
|
}
|
2010-02-28 00:40:53 +00:00
|
|
|
tag = 'CALL_START';
|
2015-02-07 20:50:41 +00:00
|
|
|
} else if (value === '[' && (ref6 = prev[0], indexOf.call(INDEXABLE, ref6) >= 0)) {
|
2010-02-28 00:40:53 +00:00
|
|
|
tag = 'INDEX_START';
|
2010-10-08 02:22:33 +00:00
|
|
|
switch (prev[0]) {
|
2010-09-23 05:14:18 +00:00
|
|
|
case '?':
|
2010-10-05 14:31:48 +00:00
|
|
|
prev[0] = 'INDEX_SOAK';
|
2010-06-01 02:32:43 +00:00
|
|
|
}
|
2010-02-28 00:40:53 +00:00
|
|
|
}
|
|
|
|
}
|
2015-01-03 23:28:23 +00:00
|
|
|
token = this.makeToken(tag, value);
|
2011-09-16 23:26:04 +00:00
|
|
|
switch (value) {
|
|
|
|
case '(':
|
|
|
|
case '{':
|
|
|
|
case '[':
|
2015-01-03 23:28:23 +00:00
|
|
|
this.ends.push({
|
|
|
|
tag: INVERSES[value],
|
|
|
|
origin: token
|
|
|
|
});
|
2011-09-16 23:26:04 +00:00
|
|
|
break;
|
|
|
|
case ')':
|
|
|
|
case '}':
|
|
|
|
case ']':
|
|
|
|
this.pair(value);
|
|
|
|
}
|
2015-01-03 23:28:23 +00:00
|
|
|
this.tokens.push(token);
|
2010-10-22 14:47:40 +00:00
|
|
|
return value.length;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.tagParameters = function() {
|
2010-10-26 04:09:46 +00:00
|
|
|
var i, stack, tok, tokens;
|
2012-04-10 18:57:45 +00:00
|
|
|
if (this.tag() !== ')') {
|
|
|
|
return this;
|
|
|
|
}
|
2010-10-26 04:09:46 +00:00
|
|
|
stack = [];
|
|
|
|
tokens = this.tokens;
|
|
|
|
i = tokens.length;
|
|
|
|
tokens[--i][0] = 'PARAM_END';
|
|
|
|
while (tok = tokens[--i]) {
|
2010-09-16 03:46:01 +00:00
|
|
|
switch (tok[0]) {
|
2010-09-22 03:58:05 +00:00
|
|
|
case ')':
|
2010-10-26 04:09:46 +00:00
|
|
|
stack.push(tok);
|
2010-09-22 03:58:05 +00:00
|
|
|
break;
|
|
|
|
case '(':
|
2011-02-23 00:20:01 +00:00
|
|
|
case 'CALL_START':
|
2010-10-26 04:09:46 +00:00
|
|
|
if (stack.length) {
|
|
|
|
stack.pop();
|
2011-02-23 00:20:01 +00:00
|
|
|
} else if (tok[0] === '(') {
|
2010-10-26 04:09:46 +00:00
|
|
|
tok[0] = 'PARAM_START';
|
|
|
|
return this;
|
2011-06-07 07:58:36 +00:00
|
|
|
} else {
|
|
|
|
return this;
|
2010-10-26 04:09:46 +00:00
|
|
|
}
|
2010-03-01 01:44:33 +00:00
|
|
|
}
|
|
|
|
}
|
2010-10-22 14:47:40 +00:00
|
|
|
return this;
|
2010-03-01 01:44:33 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-06-12 23:05:13 +00:00
|
|
|
Lexer.prototype.closeIndentation = function() {
|
|
|
|
return this.outdentToken(this.indent);
|
2010-03-01 01:44:33 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-02-03 17:55:38 +00:00
|
|
|
Lexer.prototype.matchWithInterpolations = function(regex, delimiter) {
|
|
|
|
var close, column, firstToken, index, lastToken, line, nested, offsetInChunk, open, ref2, ref3, ref4, str, strPart, tokens;
|
2010-09-25 22:06:14 +00:00
|
|
|
tokens = [];
|
2015-02-03 17:55:38 +00:00
|
|
|
offsetInChunk = delimiter.length;
|
|
|
|
if (this.chunk.slice(0, offsetInChunk) !== delimiter) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
str = this.chunk.slice(offsetInChunk);
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
while (true) {
|
|
|
|
strPart = regex.exec(str)[0];
|
2015-02-05 16:23:03 +00:00
|
|
|
this.validateEscapes(strPart, {
|
|
|
|
isRegex: delimiter.charAt(0) === '/',
|
|
|
|
offsetInChunk: offsetInChunk
|
|
|
|
});
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
tokens.push(this.makeToken('NEOSTRING', strPart, offsetInChunk));
|
|
|
|
str = str.slice(strPart.length);
|
|
|
|
offsetInChunk += strPart.length;
|
|
|
|
if (str.slice(0, 2) !== '#{') {
|
|
|
|
break;
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
2015-01-30 19:33:03 +00:00
|
|
|
ref2 = this.getLineAndColumnFromChunk(offsetInChunk + 1), line = ref2[0], column = ref2[1];
|
|
|
|
ref3 = new Lexer().tokenize(str.slice(1), {
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
line: line,
|
|
|
|
column: column,
|
|
|
|
untilBalanced: true
|
2015-01-30 19:33:03 +00:00
|
|
|
}), nested = ref3.tokens, index = ref3.index;
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
index += 1;
|
2015-01-12 19:10:54 +00:00
|
|
|
open = nested[0], close = nested[nested.length - 1];
|
|
|
|
open[0] = open[1] = '(';
|
|
|
|
close[0] = close[1] = ')';
|
|
|
|
close.origin = ['', 'end of interpolation', close[2]];
|
2015-01-30 19:33:03 +00:00
|
|
|
if (((ref4 = nested[1]) != null ? ref4[0] : void 0) === 'TERMINATOR') {
|
2015-01-12 19:10:54 +00:00
|
|
|
nested.splice(1, 1);
|
2010-03-05 22:30:49 +00:00
|
|
|
}
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
tokens.push(['TOKENS', nested]);
|
|
|
|
str = str.slice(index);
|
|
|
|
offsetInChunk += index;
|
2010-09-25 22:06:14 +00:00
|
|
|
}
|
2015-02-03 17:55:38 +00:00
|
|
|
if (str.slice(0, delimiter.length) !== delimiter) {
|
2015-02-06 09:52:02 +00:00
|
|
|
this.error("missing " + delimiter, {
|
|
|
|
length: delimiter.length
|
|
|
|
});
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
2015-02-03 17:55:38 +00:00
|
|
|
firstToken = tokens[0], lastToken = tokens[tokens.length - 1];
|
|
|
|
firstToken[2].first_column -= delimiter.length;
|
2016-10-23 07:41:46 +00:00
|
|
|
if (lastToken[1].substr(-1) === '\n') {
|
|
|
|
lastToken[2].last_line += 1;
|
|
|
|
lastToken[2].last_column = delimiter.length - 1;
|
|
|
|
} else {
|
|
|
|
lastToken[2].last_column += delimiter.length;
|
|
|
|
}
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
if (lastToken[1].length === 0) {
|
|
|
|
lastToken[2].last_column -= 1;
|
|
|
|
}
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
return {
|
|
|
|
tokens: tokens,
|
2015-02-03 17:55:38 +00:00
|
|
|
index: offsetInChunk + delimiter.length
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2015-02-05 16:23:03 +00:00
|
|
|
Lexer.prototype.mergeInterpolationTokens = function(tokens, options, fn) {
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
var converted, firstEmptyStringIndex, firstIndex, i, j, lastToken, len, locationToken, lparen, plusToken, ref2, rparen, tag, token, tokensToPush, value;
|
|
|
|
if (tokens.length > 1) {
|
|
|
|
lparen = this.token('STRING_START', '(', 0, 0);
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
firstIndex = this.tokens.length;
|
2015-01-30 19:33:03 +00:00
|
|
|
for (i = j = 0, len = tokens.length; j < len; i = ++j) {
|
2012-11-17 00:09:56 +00:00
|
|
|
token = tokens[i];
|
|
|
|
tag = token[0], value = token[1];
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
switch (tag) {
|
|
|
|
case 'TOKENS':
|
2015-01-12 19:10:54 +00:00
|
|
|
if (value.length === 2) {
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
locationToken = value[0];
|
|
|
|
tokensToPush = value;
|
|
|
|
break;
|
|
|
|
case 'NEOSTRING':
|
|
|
|
converted = fn(token[1], i);
|
|
|
|
if (converted.length === 0) {
|
|
|
|
if (i === 0) {
|
|
|
|
firstEmptyStringIndex = this.tokens.length;
|
|
|
|
} else {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i === 2 && (firstEmptyStringIndex != null)) {
|
|
|
|
this.tokens.splice(firstEmptyStringIndex, 2);
|
|
|
|
}
|
|
|
|
token[0] = 'STRING';
|
2015-02-05 16:23:03 +00:00
|
|
|
token[1] = this.makeDelimitedLiteral(converted, options);
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
locationToken = token;
|
|
|
|
tokensToPush = [token];
|
|
|
|
}
|
|
|
|
if (this.tokens.length > firstIndex) {
|
|
|
|
plusToken = this.token('+', '+');
|
2013-01-14 20:20:35 +00:00
|
|
|
plusToken[2] = {
|
|
|
|
first_line: locationToken[2].first_line,
|
|
|
|
first_column: locationToken[2].first_column,
|
|
|
|
last_line: locationToken[2].first_line,
|
|
|
|
last_column: locationToken[2].first_column
|
2012-11-19 22:37:46 +00:00
|
|
|
};
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
2015-01-30 19:33:03 +00:00
|
|
|
(ref2 = this.tokens).push.apply(ref2, tokensToPush);
|
2010-03-05 22:30:49 +00:00
|
|
|
}
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
if (lparen) {
|
|
|
|
lastToken = tokens[tokens.length - 1];
|
|
|
|
lparen.origin = [
|
|
|
|
'STRING', null, {
|
|
|
|
first_line: lparen[2].first_line,
|
|
|
|
first_column: lparen[2].first_column,
|
|
|
|
last_line: lastToken[2].last_line,
|
|
|
|
last_column: lastToken[2].last_column
|
|
|
|
}
|
|
|
|
];
|
|
|
|
rparen = this.token('STRING_END', ')');
|
|
|
|
return rparen[2] = {
|
2015-02-04 10:06:35 +00:00
|
|
|
first_line: lastToken[2].last_line,
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
first_column: lastToken[2].last_column,
|
2015-02-04 10:06:35 +00:00
|
|
|
last_line: lastToken[2].last_line,
|
Fix #3597: Allow interpolations in object keys
The following is now allowed:
o =
a: 1
b: 2
"#{'c'}": 3
"#{'d'}": 4
e: 5
"#{'f'}": 6
g: 7
It compiles to:
o = (
obj = {
a: 1,
b: 2
},
obj["" + 'c'] = 3,
obj["" + 'd'] = 4,
obj.e = 5,
obj["" + 'f'] = 6,
obj.g = 7,
obj
);
- Closes #3039. Empty interpolations in object keys are now _supposed_ to be
allowed.
- Closes #1131. No need to improve error messages for attempted key
interpolation anymore.
- Implementing this required fixing the following bug: `("" + a): 1` used to
error out on the colon, saying "unexpected colon". But really, it is the
attempted object key that is unexpected. Now the error is on the opening
parenthesis instead.
- However, the above fix broke some error message tests for regexes. The easiest
way to fix this was to make a seemingly unrelated change: The error messages
for unexpected identifiers, numbers, strings and regexes now say for example
'unexpected string' instead of 'unexpected """some #{really long} string"""'.
In other words, the tag _name_ is used instead of the tag _value_.
This was way easier to implement, and is more helpful to the user. Using the
tag value is good for operators, reserved words and the like, but not for
tokens which can contain any text. For example, 'unexpected identifier' is
better than 'unexpected expected' (if a variable called 'expected' was used
erraneously).
- While writing tests for the above point I found a few minor bugs with string
locations which have been fixed.
2015-02-07 19:16:59 +00:00
|
|
|
last_column: lastToken[2].last_column
|
2015-02-04 10:06:35 +00:00
|
|
|
};
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
2010-03-05 22:30:49 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2011-09-16 23:26:04 +00:00
|
|
|
Lexer.prototype.pair = function(tag) {
|
2015-02-07 20:50:41 +00:00
|
|
|
var lastIndent, prev, ref2, ref3, wanted;
|
|
|
|
ref2 = this.ends, prev = ref2[ref2.length - 1];
|
|
|
|
if (tag !== (wanted = prev != null ? prev.tag : void 0)) {
|
2012-04-10 18:57:45 +00:00
|
|
|
if ('OUTDENT' !== wanted) {
|
|
|
|
this.error("unmatched " + tag);
|
|
|
|
}
|
2015-02-07 20:50:41 +00:00
|
|
|
ref3 = this.indents, lastIndent = ref3[ref3.length - 1];
|
|
|
|
this.outdentToken(lastIndent, true);
|
2011-09-16 23:26:04 +00:00
|
|
|
return this.pair(tag);
|
|
|
|
}
|
|
|
|
return this.ends.pop();
|
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2012-11-17 00:09:56 +00:00
|
|
|
Lexer.prototype.getLineAndColumnFromChunk = function(offset) {
|
2015-02-07 20:50:41 +00:00
|
|
|
var column, lastLine, lineCount, ref2, string;
|
2012-11-17 00:09:56 +00:00
|
|
|
if (offset === 0) {
|
|
|
|
return [this.chunkLine, this.chunkColumn];
|
|
|
|
}
|
|
|
|
if (offset >= this.chunk.length) {
|
|
|
|
string = this.chunk;
|
|
|
|
} else {
|
|
|
|
string = this.chunk.slice(0, +(offset - 1) + 1 || 9e9);
|
|
|
|
}
|
|
|
|
lineCount = count(string, '\n');
|
|
|
|
column = this.chunkColumn;
|
|
|
|
if (lineCount > 0) {
|
2015-02-07 20:50:41 +00:00
|
|
|
ref2 = string.split('\n'), lastLine = ref2[ref2.length - 1];
|
|
|
|
column = lastLine.length;
|
2012-11-17 00:09:56 +00:00
|
|
|
} else {
|
|
|
|
column += string.length;
|
|
|
|
}
|
|
|
|
return [this.chunkLine + lineCount, column];
|
|
|
|
};
|
|
|
|
|
|
|
|
Lexer.prototype.makeToken = function(tag, value, offsetInChunk, length) {
|
2015-01-30 19:33:03 +00:00
|
|
|
var lastCharacter, locationData, ref2, ref3, token;
|
2013-02-25 13:44:56 +00:00
|
|
|
if (offsetInChunk == null) {
|
|
|
|
offsetInChunk = 0;
|
|
|
|
}
|
|
|
|
if (length == null) {
|
2012-11-17 00:09:56 +00:00
|
|
|
length = value.length;
|
|
|
|
}
|
|
|
|
locationData = {};
|
2015-01-30 19:33:03 +00:00
|
|
|
ref2 = this.getLineAndColumnFromChunk(offsetInChunk), locationData.first_line = ref2[0], locationData.first_column = ref2[1];
|
Fix incorrect location data in OUTDENT nodes
In f609036beef3aa1529c210ffad3ced818ee94cce, a line was changed from
`if length > 0 then (length - 1) else 0` to `Math.max 0, length - 1`. However,
in some cases, the `length` variable can be `undefined`. The previous code would
correctly compute `lastCharacter` as 0, but the new code would compute it as
`NaN`. This would cause trouble later on: the end location would just be the end
of the current chunk, which would be incorrect.
Here's a specific case where the parser was behaving incorrectly:
```
a {
b: ->
return c d,
if e
f
}
g
```
The OUTDENT tokens after the `f` had an undefined length, so the `NaN` made it
so the end location was at the end of the file. That meant that various nodes in
the AST, like the `return` node, would incorrectly have an end location at the
end of the file.
To fix, I just reverted the change to that particular line.
2016-07-28 06:16:30 +00:00
|
|
|
lastCharacter = length > 0 ? length - 1 : 0;
|
2015-01-30 19:33:03 +00:00
|
|
|
ref3 = this.getLineAndColumnFromChunk(offsetInChunk + lastCharacter), locationData.last_line = ref3[0], locationData.last_column = ref3[1];
|
2013-01-14 20:20:35 +00:00
|
|
|
token = [tag, value, locationData];
|
2012-11-17 00:09:56 +00:00
|
|
|
return token;
|
|
|
|
};
|
|
|
|
|
2014-01-22 02:44:50 +00:00
|
|
|
Lexer.prototype.token = function(tag, value, offsetInChunk, length, origin) {
|
2012-11-17 00:09:56 +00:00
|
|
|
var token;
|
|
|
|
token = this.makeToken(tag, value, offsetInChunk, length);
|
2014-01-22 02:44:50 +00:00
|
|
|
if (origin) {
|
|
|
|
token.origin = origin;
|
|
|
|
}
|
2012-11-17 00:09:56 +00:00
|
|
|
this.tokens.push(token);
|
|
|
|
return token;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-02-07 20:50:41 +00:00
|
|
|
Lexer.prototype.tag = function() {
|
|
|
|
var ref2, token;
|
|
|
|
ref2 = this.tokens, token = ref2[ref2.length - 1];
|
|
|
|
return token != null ? token[0] : void 0;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-02-07 20:50:41 +00:00
|
|
|
Lexer.prototype.value = function() {
|
|
|
|
var ref2, token;
|
|
|
|
ref2 = this.tokens, token = ref2[ref2.length - 1];
|
|
|
|
return token != null ? token[1] : void 0;
|
2010-02-28 00:40:53 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-05-15 03:40:04 +00:00
|
|
|
Lexer.prototype.unfinished = function() {
|
2015-01-30 19:33:03 +00:00
|
|
|
var ref2;
|
Define proper operator precedence for bitwise/logical operators
This is an upstream port for the patch https://github.com/decaffeinate/coffeescript/pull/8
See https://github.com/decaffeinate/decaffeinate/issues/291 for the bug that this fixed.
For the most part, CoffeeScript and JavaScript have the same precedence rules,
but in some cases, the intermediate AST format didn't represent the actual
evaluation order. For example, in the expression `a or b and c`, the `and` is
evaluated first, but the parser treated the two operators with equal precedence.
This was still correct end-to-end because CoffeeScript simply emitted the result
without parens, but any intermediate tools using the CoffeeScript parser could
become confused.
Here are the JS operator precedence rules:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
For the most part, CoffeeScript already follows these. `COMPARE` operators
already behave differently due to chained comparisons, so I think we don't need
to worry about following JS precedence for those. So I think the only case where
it was behaving differently in an important way was for the binary/bitwise
operators that are being changed here.
As part of this change, I also introduced a new token tag, `BIN?`, for the
binary form of the `?` operator.
2016-10-09 19:00:38 +00:00
|
|
|
return LINE_CONTINUER.test(this.chunk) || ((ref2 = this.tag()) === '\\' || ref2 === '.' || ref2 === '?.' || ref2 === '?::' || ref2 === 'UNARY' || ref2 === 'MATH' || ref2 === 'UNARY_MATH' || ref2 === '+' || ref2 === '-' || ref2 === '**' || ref2 === 'SHIFT' || ref2 === 'RELATION' || ref2 === 'COMPARE' || ref2 === '&' || ref2 === '^' || ref2 === '|' || ref2 === '&&' || ref2 === '||' || ref2 === 'BIN?' || ref2 === 'THROW' || ref2 === 'EXTENDS');
|
2010-09-25 22:06:14 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
Lexer.prototype.formatString = function(str) {
|
2015-02-05 16:23:03 +00:00
|
|
|
return str.replace(STRING_OMIT, '$1');
|
2010-03-20 04:58:25 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
Lexer.prototype.formatHeregex = function(str) {
|
2015-02-05 16:23:03 +00:00
|
|
|
return str.replace(HEREGEX_OMIT, '$1$2');
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
};
|
|
|
|
|
2015-02-05 16:23:03 +00:00
|
|
|
Lexer.prototype.validateEscapes = function(str, options) {
|
2015-02-06 09:52:02 +00:00
|
|
|
var before, hex, invalidEscape, match, message, octal, ref2, unicode;
|
2015-02-05 16:23:03 +00:00
|
|
|
if (options == null) {
|
|
|
|
options = {};
|
2012-04-10 18:57:45 +00:00
|
|
|
}
|
2015-02-05 16:23:03 +00:00
|
|
|
match = INVALID_ESCAPE.exec(str);
|
|
|
|
if (!match) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
match[0], before = match[1], octal = match[2], hex = match[3], unicode = match[4];
|
|
|
|
if (options.isRegex && octal && octal.charAt(0) !== '0') {
|
|
|
|
return;
|
|
|
|
}
|
2015-02-06 09:52:02 +00:00
|
|
|
message = octal ? "octal escape sequences are not allowed" : "invalid escape sequence";
|
|
|
|
invalidEscape = "\\" + (octal || hex || unicode);
|
|
|
|
return this.error(message + " " + invalidEscape, {
|
|
|
|
offset: ((ref2 = options.offsetInChunk) != null ? ref2 : 0) + match.index + before.length,
|
|
|
|
length: invalidEscape.length
|
|
|
|
});
|
2015-02-05 16:23:03 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
Lexer.prototype.makeDelimitedLiteral = function(body, options) {
|
|
|
|
var regex;
|
|
|
|
if (options == null) {
|
|
|
|
options = {};
|
|
|
|
}
|
|
|
|
if (body === '' && options.delimiter === '/') {
|
|
|
|
body = '(?:)';
|
|
|
|
}
|
|
|
|
regex = RegExp("(\\\\\\\\)|(\\\\0(?=[1-7]))|\\\\?(" + options.delimiter + ")|\\\\?(?:(\\n)|(\\r)|(\\u2028)|(\\u2029))|(\\\\.)", "g");
|
|
|
|
body = body.replace(regex, function(match, backslash, nul, delimiter, lf, cr, ls, ps, other) {
|
|
|
|
switch (false) {
|
|
|
|
case !backslash:
|
|
|
|
if (options.double) {
|
|
|
|
return backslash + backslash;
|
|
|
|
} else {
|
|
|
|
return backslash;
|
|
|
|
}
|
|
|
|
case !nul:
|
|
|
|
return '\\x00';
|
|
|
|
case !delimiter:
|
|
|
|
return "\\" + delimiter;
|
|
|
|
case !lf:
|
|
|
|
return '\\n';
|
|
|
|
case !cr:
|
|
|
|
return '\\r';
|
|
|
|
case !ls:
|
|
|
|
return '\\u2028';
|
|
|
|
case !ps:
|
|
|
|
return '\\u2029';
|
|
|
|
case !other:
|
|
|
|
if (options.double) {
|
|
|
|
return "\\" + other;
|
|
|
|
} else {
|
|
|
|
return other;
|
|
|
|
}
|
2010-11-09 04:07:51 +00:00
|
|
|
}
|
2010-10-04 18:30:48 +00:00
|
|
|
});
|
2015-02-05 16:23:03 +00:00
|
|
|
return "" + options.delimiter + body + options.delimiter;
|
2010-10-04 18:30:48 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-02-06 09:52:02 +00:00
|
|
|
Lexer.prototype.error = function(message, options) {
|
|
|
|
var first_column, first_line, location, ref2, ref3, ref4;
|
|
|
|
if (options == null) {
|
|
|
|
options = {};
|
2013-06-13 22:21:47 +00:00
|
|
|
}
|
2015-02-06 09:52:02 +00:00
|
|
|
location = 'first_line' in options ? options : ((ref3 = this.getLineAndColumnFromChunk((ref2 = options.offset) != null ? ref2 : 0), first_line = ref3[0], first_column = ref3[1], ref3), {
|
2013-06-13 22:21:47 +00:00
|
|
|
first_line: first_line,
|
2015-02-06 09:52:02 +00:00
|
|
|
first_column: first_column,
|
|
|
|
last_column: first_column + ((ref4 = options.length) != null ? ref4 : 1) - 1
|
2013-03-05 04:13:46 +00:00
|
|
|
});
|
2015-02-06 09:52:02 +00:00
|
|
|
return throwSyntaxError(message, location);
|
2011-09-16 23:26:04 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-02-28 00:40:53 +00:00
|
|
|
return Lexer;
|
2011-12-14 15:39:20 +00:00
|
|
|
|
2010-12-23 18:50:52 +00:00
|
|
|
})();
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2016-03-05 19:59:39 +00:00
|
|
|
isUnassignable = function(name, displayName) {
|
|
|
|
if (displayName == null) {
|
|
|
|
displayName = name;
|
|
|
|
}
|
|
|
|
switch (false) {
|
|
|
|
case indexOf.call(slice.call(JS_KEYWORDS).concat(slice.call(COFFEE_KEYWORDS)), name) < 0:
|
|
|
|
return "keyword '" + displayName + "' can't be assigned";
|
|
|
|
case indexOf.call(STRICT_PROSCRIBED, name) < 0:
|
|
|
|
return "'" + displayName + "' can't be assigned";
|
|
|
|
case indexOf.call(RESERVED, name) < 0:
|
|
|
|
return "reserved word '" + displayName + "' can't be assigned";
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
exports.isUnassignable = isUnassignable;
|
|
|
|
|
Support import and export of ES2015 modules (#4300)
This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages:
```js
import "module-name"
import defaultMember from "module-name"
import * as name from "module-name"
import { } from "module-name"
import { member } from "module-name"
import { member as alias } from "module-name"
import { member1, member2 as alias2, … } from "module-name"
import defaultMember, * as name from "module-name"
import defaultMember, { … } from "module-name"
export default expression
export class name
export { }
export { name }
export { name as exportedName }
export { name as default }
export { name1, name2 as exportedName2, name3 as default, … }
export * from "module-name"
export { … } from "module-name"
```
As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports:
```js
export name = …
```
CoffeeScript also supports optional commas within `{ … }`.
This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this.
This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used.
There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests:
* I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu)
* I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos)
The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
|
|
|
JS_KEYWORDS = ['true', 'false', 'null', 'this', 'new', 'delete', 'typeof', 'in', 'instanceof', 'return', 'throw', 'break', 'continue', 'debugger', 'yield', 'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally', 'class', 'extends', 'super', 'import', 'export', 'default'];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2016-03-05 20:32:20 +00:00
|
|
|
COFFEE_KEYWORDS = ['undefined', 'Infinity', 'NaN', 'then', 'unless', 'until', 'loop', 'of', 'by', 'when'];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2011-04-30 14:35:56 +00:00
|
|
|
COFFEE_ALIAS_MAP = {
|
2010-10-05 19:46:17 +00:00
|
|
|
and: '&&',
|
|
|
|
or: '||',
|
|
|
|
is: '==',
|
|
|
|
isnt: '!=',
|
2010-10-07 15:56:01 +00:00
|
|
|
not: '!',
|
2010-10-23 18:24:30 +00:00
|
|
|
yes: 'true',
|
|
|
|
no: 'false',
|
|
|
|
on: 'true',
|
|
|
|
off: 'false'
|
2011-04-30 14:35:56 +00:00
|
|
|
};
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2011-04-30 14:35:56 +00:00
|
|
|
COFFEE_ALIASES = (function() {
|
2015-01-30 19:33:03 +00:00
|
|
|
var results;
|
|
|
|
results = [];
|
2011-04-30 14:35:56 +00:00
|
|
|
for (key in COFFEE_ALIAS_MAP) {
|
2015-01-30 19:33:03 +00:00
|
|
|
results.push(key);
|
2011-04-30 14:35:56 +00:00
|
|
|
}
|
2015-01-30 19:33:03 +00:00
|
|
|
return results;
|
2011-04-30 14:35:56 +00:00
|
|
|
})();
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2011-04-30 14:35:56 +00:00
|
|
|
COFFEE_KEYWORDS = COFFEE_KEYWORDS.concat(COFFEE_ALIASES);
|
2011-09-18 22:16:39 +00:00
|
|
|
|
Support import and export of ES2015 modules (#4300)
This pull request adds support for ES2015 modules, by recognizing `import` and `export` statements. The following syntaxes are supported, based on the MDN [import](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import) and [export](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export) pages:
```js
import "module-name"
import defaultMember from "module-name"
import * as name from "module-name"
import { } from "module-name"
import { member } from "module-name"
import { member as alias } from "module-name"
import { member1, member2 as alias2, … } from "module-name"
import defaultMember, * as name from "module-name"
import defaultMember, { … } from "module-name"
export default expression
export class name
export { }
export { name }
export { name as exportedName }
export { name as default }
export { name1, name2 as exportedName2, name3 as default, … }
export * from "module-name"
export { … } from "module-name"
```
As a subsitute for ECMAScript’s `export var name = …` and `export function name {}`, CoffeeScript also supports:
```js
export name = …
```
CoffeeScript also supports optional commas within `{ … }`.
This PR converts the supported `import` and `export` statements into ES2015 `import` and `export` statements; it **does not resolve the modules**. So any CoffeeScript with `import` or `export` statements will be output as ES2015, and will need to be transpiled by another tool such as Babel before it can be used in a browser. We will need to add a warning to the documentation explaining this.
This should be fully backwards-compatible, as `import` and `export` were previously reserved keywords. No flags are used.
There are extensive tests included, though because no current JavaScript runtime supports `import` or `export`, the tests compare strings of what the compiled CoffeeScript output is against what the expected ES2015 should be. I also conducted two more elaborate tests:
* I forked the [ember-piqu](https://github.com/pauc/piqu-ember) project, which was an Ember CLI app that used ember-cli-coffeescript and [ember-cli-coffees6](https://github.com/alexspeller/ember-cli-coffees6) (which adds “support” for `import`/`export` by wrapping such statements in backticks before passing the result to the CoffeeScript compiler). I removed `ember-cli-coffees6` and replaced the CoffeeScript compiler used in the build chain with this code, and the app built without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-piqu)
* I also forked the [CoffeeScript version of Meteor’s Todos example app](https://github.com/meteor/todos/tree/coffeescript), and replaced all of its `require` statements with the `import` and `export` statements from the original ES2015 version of the app on its `master` branch. I then updated the `coffeescript` Meteor package in the app to use this new code, and again the app builds without errors. [Demo here.](https://github.com/GeoffreyBooth/coffeescript-modules-test-meteor-todos)
The discussion history for this work started [here](https://github.com/jashkenas/coffeescript/pull/4160) and continued [here](https://github.com/GeoffreyBooth/coffeescript/pull/2). @lydell provided guidance, and @JimPanic and @rattrayalex contributed essential code.
2016-09-14 18:46:05 +00:00
|
|
|
RESERVED = ['case', 'function', 'var', 'void', 'with', 'const', 'let', 'enum', 'native', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'static'];
|
2012-01-11 23:04:14 +00:00
|
|
|
|
2016-03-05 19:59:39 +00:00
|
|
|
STRICT_PROSCRIBED = ['arguments', 'eval'];
|
2012-01-11 23:04:14 +00:00
|
|
|
|
2016-03-05 19:59:39 +00:00
|
|
|
exports.JS_FORBIDDEN = JS_KEYWORDS.concat(RESERVED).concat(STRICT_PROSCRIBED);
|
Refactor `Literal` into several subtypes
Previously, the parser created `Literal` nodes for many things. This resulted in
information loss. Instead of being able to check the node type, we had to use
regexes to tell the different types of `Literal`s apart. That was a bit like
parsing literals twice: Once in the lexer, and once (or more) in the compiler.
It also caused problems, such as `` `this` `` and `this` being indistinguishable
(fixes #2009).
Instead returning `new Literal` in the grammar, subtypes of it are now returned
instead, such as `NumberLiteral`, `StringLiteral` and `IdentifierLiteral`. `new
Literal` by itself is only used to represent code chunks that fit no category.
(While mentioning `NumberLiteral`, there's also `InfinityLiteral` now, which is
a subtype of `NumberLiteral`.)
`StringWithInterpolations` has been added as a subtype of `Parens`, and
`RegexWithInterpolations` as a subtype of `Call`. This makes it easier for other
programs to make use of CoffeeScript's "AST" (nodes). For example, it is now
possible to distinguish between `"a #{b} c"` and `"a " + b + " c"`. Fixes #4192.
`SuperCall` has been added as a subtype of `Call`.
Note, though, that some information is still lost, especially in the lexer. For
example, there is no way to distinguish a heredoc from a regular string, or a
heregex without interpolations from a regular regex. Binary and octal number
literals are indistinguishable from hexadecimal literals.
After the new subtypes were added, they were taken advantage of, removing most
regexes in nodes.coffee. `SIMPLENUM` (which matches non-hex integers) had to be
kept, though, because such numbers need special handling in JavaScript (for
example in `1..toString()`).
An especially nice hack to get rid of was using `new String()` for the token
value for reserved identifiers (to be able to set a property on them which could
survive through the parser). Now it's a good old regular string.
In range literals, slices, splices and for loop steps when number literals
are involved, CoffeeScript can do some optimizations, such as precomputing the
value of, say, `5 - 3` (outputting `2` instead of `5 - 3` literally). As a side
bonus, this now also works with hexadecimal number literals, such as `0x02`.
Finally, this also improves the output of `coffee --nodes`:
# Before:
$ bin/coffee -ne 'while true
"#{a}"
break'
Block
While
Value
Bool
Block
Value
Parens
Block
Op +
Value """"
Value
Parens
Block
Value "a" "break"
# After:
$ bin/coffee -ne 'while true
"#{a}"
break'
Block
While
Value BooleanLiteral: true
Block
Value
StringWithInterpolations
Block
Op +
Value StringLiteral: ""
Value
Parens
Block
Value IdentifierLiteral: a
StatementLiteral: break
2016-01-31 19:24:31 +00:00
|
|
|
|
2013-01-07 04:56:58 +00:00
|
|
|
BOM = 65279;
|
|
|
|
|
2015-01-06 20:32:14 +00:00
|
|
|
IDENTIFIER = /^(?!\d)((?:(?!\s)[$\w\x7f-\uffff])+)([^\n\S]*:(?!:))?/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2012-01-20 22:23:50 +00:00
|
|
|
NUMBER = /^0b[01]+|^0o[0-7]+|^0x[\da-f]+|^\d*\.?\d+(?:e[+-]?\d+)?/i;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2014-09-06 10:55:27 +00:00
|
|
|
OPERATOR = /^(?:[-=]>|[-+*\/%<>&|^!?=]=|>>>=?|([-+:])\1|([&|<>*\/%])\2=?|\?(\.|::)|\.{2,3})/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-11-09 13:25:48 +00:00
|
|
|
WHITESPACE = /^[^\n\S]+/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2013-11-26 19:29:13 +00:00
|
|
|
COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2014-09-06 10:55:27 +00:00
|
|
|
CODE = /^[-=]>/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-11-09 13:25:48 +00:00
|
|
|
MULTI_DENT = /^(?:\n[^\n\S]*)+/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-09-24 13:38:28 +00:00
|
|
|
JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
STRING_START = /^(?:'''|"""|'|")/;
|
|
|
|
|
|
|
|
STRING_SINGLE = /^(?:[^\\']|\\[\s\S])*/;
|
|
|
|
|
|
|
|
STRING_DOUBLE = /^(?:[^\\"#]|\\[\s\S]|\#(?!\{))*/;
|
|
|
|
|
|
|
|
HEREDOC_SINGLE = /^(?:[^\\']|\\[\s\S]|'(?!''))*/;
|
|
|
|
|
|
|
|
HEREDOC_DOUBLE = /^(?:[^\\"#]|\\[\s\S]|"(?!"")|\#(?!\{))*/;
|
|
|
|
|
2015-02-05 16:23:03 +00:00
|
|
|
STRING_OMIT = /((?:\\\\)+)|\\[^\S\n]*\n\s*/g;
|
|
|
|
|
|
|
|
SIMPLE_STRING_OMIT = /\s*\n\s*/g;
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
|
|
|
|
HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g;
|
|
|
|
|
2015-03-09 04:59:09 +00:00
|
|
|
REGEX = /^\/(?!\/)((?:[^[\/\n\\]|\\[^\n]|\[(?:\\[^\n]|[^\]\n\\])*\])*)(\/)?/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
REGEX_FLAGS = /^\w*/;
|
|
|
|
|
|
|
|
VALID_FLAGS = /^(?!.*(.).*\1)[imgy]*$/;
|
|
|
|
|
|
|
|
HEREGEX = /^(?:[^\\\/#]|\\[\s\S]|\/(?!\/\/)|\#(?!\{))*/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-02-05 16:23:03 +00:00
|
|
|
HEREGEX_OMIT = /((?:\\\\)+)|\\(\s)|\s+(?:#.*)?/g;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
REGEX_ILLEGAL = /^(\/|\/{3}\s*)(\*)/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-01-10 00:48:00 +00:00
|
|
|
POSSIBLY_DIVISION = /^\/=?\s/;
|
|
|
|
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
HERECOMMENT_ILLEGAL = /\*\//;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2011-02-27 07:11:35 +00:00
|
|
|
LINE_CONTINUER = /^\s*(?:,|\??\.(?![.\d])|::)/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-02-05 16:23:03 +00:00
|
|
|
INVALID_ESCAPE = /((?:^|[^\\])(?:\\\\)*)\\(?:(0[0-7]|[1-7])|(x(?![\da-fA-F]{2}).{0,2})|(u(?![\da-fA-F]{4}).{0,4}))/;
|
Refactor interpolation (and string and regex) handling in lexer
- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
used to pass through the lexer, causing a parsing error later, while
double-quoted strings caused an error already in the lexing phase. Now both
single and double-quoted unclosed strings error out in the lexer (which is the
more logical option) with consistent error messages. This also fixes the last
comment by @satyr in #3301.
- Similar to the above, unclosed heregexes also used to pass through the lexer
and not error until in the parsing phase, which resulted in confusing error
messages. This has been fixed, too.
- Fix #3348, by adding passing tests.
- Fix #3529: If a string starts with an interpolation, an empty string is no
longer emitted before the interpolation (unless it is needed to coerce the
interpolation into a string).
- Block comments cannot contain `*/`. Now the error message also shows exactly
where the offending `*/`. This improvement might seem unrelated, but I had to
touch that code anyway to refactor string and regex related code, and the
change was very trivial. Moreover, it's consistent with the next two points.
- Regexes cannot start with `*`. Now the error message also shows exactly where
the offending `*` is. (It might actually not be exatly at the start in
heregexes.) It is a very minor improvement, but it was trivial to add.
- Octal escapes in strings are forbidden in CoffeeScript (just like in
JavaScript strict mode). However, this used to be the case only for regular
strings. Now they are also forbidden in heredocs. Moreover, the errors now
point at the offending octal escape.
- Invalid regex flags are no longer allowed. This includes repeated modifiers
and unknown ones. Moreover, invalid modifiers do not stop a heregex from
being matched, which results in better error messages.
- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
`RegExp("a#{1}")`. Still, those two code snippets used to generate different
tokens, which is a bit weird, but more importantly causes problems for
coffeelint (see clutchski/coffeelint#340). This required lots of tests in
test/location.coffee to be updated. Note that some updates to those tests are
unrelated to this point; some have been updated to be more consistent (I
discovered this because the refactored code happened to be seemingly more
correct).
- Regular regex literals used to erraneously allow newlines to be escaped,
causing invalid JavaScript output. This has been fixed.
- Heregexes may now be completely empty (`//////`), instead of erroring out with
a confusing message.
- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
heregex inside a heregex.
- Fix #2321: If you used division inside interpolation and then a slash later in
the string containing that interpolation, the division slash and the latter
slash was erraneously matched as a regex. This has been fixed.
- Indentation inside interpolations in heredocs no longer affect how much
indentation is removed from each line of the heredoc (which is more
intuitive).
- Whitespace is now correctly trimmed from the start and end of strings in a few
edge cases.
- Last but not least, the lexing of interpolated strings now seems to be more
efficient. For a regular double-quoted string, we used to use a custom
function to find the end of it (taking interpolations and interpolations
within interpolations etc. into account). Then we used to re-find the
interpolations and recursively lex their contents. In effect, the same string
was processed twice, or even more in the case of deeper nesting of
interpolations. Now the same string is processed just once.
- Code duplication between regular strings, heredocs, regular regexes and
heregexes has been reduced.
- The above two points should result in more easily read code, too.
2015-01-03 22:40:43 +00:00
|
|
|
|
|
|
|
LEADING_BLANK_LINE = /^[^\n\S]*\n/;
|
|
|
|
|
|
|
|
TRAILING_BLANK_LINE = /\n[^\n\S]*$/;
|
|
|
|
|
2010-10-03 23:22:42 +00:00
|
|
|
TRAILING_SPACES = /\s+$/;
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2013-03-25 06:19:05 +00:00
|
|
|
COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|=', '**=', '//=', '%%='];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2014-09-06 11:53:21 +00:00
|
|
|
UNARY = ['NEW', 'TYPEOF', 'DELETE', 'DO'];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2013-03-25 03:05:04 +00:00
|
|
|
UNARY_MATH = ['!', '~'];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-08-12 02:24:43 +00:00
|
|
|
SHIFT = ['<<', '>>', '>>>'];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-10-23 18:24:30 +00:00
|
|
|
COMPARE = ['==', '!=', '<', '>', '<=', '>='];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2013-03-25 06:19:05 +00:00
|
|
|
MATH = ['*', '/', '%', '//', '%%'];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-10-05 15:43:44 +00:00
|
|
|
RELATION = ['IN', 'OF', 'INSTANCEOF'];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2012-05-08 20:11:23 +00:00
|
|
|
BOOL = ['TRUE', 'FALSE'];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2016-03-05 16:41:15 +00:00
|
|
|
CALLABLE = ['IDENTIFIER', 'PROPERTY', ')', ']', '?', '@', 'THIS', 'SUPER'];
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2016-03-05 20:32:20 +00:00
|
|
|
INDEXABLE = CALLABLE.concat(['NUMBER', 'INFINITY', 'NAN', 'STRING', 'STRING_END', 'REGEX', 'REGEX_END', 'BOOL', 'NULL', 'UNDEFINED', '}', '::']);
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2015-01-10 00:48:00 +00:00
|
|
|
NOT_REGEX = INDEXABLE.concat(['++', '--']);
|
2011-09-18 22:16:39 +00:00
|
|
|
|
2010-03-20 04:58:25 +00:00
|
|
|
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
|
2011-12-14 15:39:20 +00:00
|
|
|
|
2014-01-22 18:30:13 +00:00
|
|
|
INDENTABLE_CLOSERS = [')', '}', ']'];
|
|
|
|
|
2010-09-21 07:53:58 +00:00
|
|
|
}).call(this);
|