mirror of
https://github.com/jashkenas/coffeescript.git
synced 2022-11-09 12:23:24 -05:00
Rewriting string tokenizer; allowing nested double-quoted strings inside expression interpolations.
This commit is contained in:
parent
e977967eb5
commit
83fd84745d
3 changed files with 95 additions and 5 deletions
63
lib/lexer.js
63
lib/lexer.js
|
@ -1,5 +1,5 @@
|
|||
(function(){
|
||||
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, compact, count, include;
|
||||
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include;
|
||||
// The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
|
||||
// matches against the beginning of the source code. When a match is found,
|
||||
// a token is produced, we consume the match, and start again. Tokens are in the
|
||||
|
@ -33,7 +33,6 @@
|
|||
// Token matching regexes.
|
||||
IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
|
||||
NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
|
||||
STRING = /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/;
|
||||
HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
|
||||
INTERPOLATION = /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/;
|
||||
JS = /^(``|`([\s\S]*?)([^\\]|\\\\)`)/;
|
||||
|
@ -169,7 +168,11 @@
|
|||
// Matches strings, including multi-line strings.
|
||||
Lexer.prototype.string_token = function string_token() {
|
||||
var string;
|
||||
if (!((string = this.match(STRING, 1)))) {
|
||||
string = this.balanced_group(['"'], ['${', '}']);
|
||||
if (string === false) {
|
||||
string = this.balanced_group(["'"]);
|
||||
}
|
||||
if (!(string)) {
|
||||
return false;
|
||||
}
|
||||
this.interpolate_string(string.replace(STRING_NEWLINES, " \\\n"));
|
||||
|
@ -212,6 +215,60 @@
|
|||
this.i += regex.length;
|
||||
return true;
|
||||
};
|
||||
// Matches a balanced group such as a single or double-quoted string.
|
||||
Lexer.prototype.balanced_group = function balanced_group() {
|
||||
var _a, _b, _c, _d, _e, _f, delimited, each, escaped, i, levels, next, type;
|
||||
delimited = Array.prototype.slice.call(arguments, 0);
|
||||
_a = delimited;
|
||||
for (_b = 0, _c = _a.length; _b < _c; _b++) {
|
||||
each = _a[_b];
|
||||
!(typeof (_d = each[1]) !== "undefined" && _d !== null) ? ((each[1] = each[0])) : null;
|
||||
}
|
||||
escaped = '\\';
|
||||
next = (function(__this) {
|
||||
var __func = function(length) {
|
||||
return this.chunk.substring(i, i + length);
|
||||
};
|
||||
return (function next() {
|
||||
return __func.apply(__this, arguments);
|
||||
});
|
||||
})(this);
|
||||
levels = [];
|
||||
i = 0;
|
||||
while (i < this.chunk.length) {
|
||||
if (next(1) === escaped) {
|
||||
i += 1;
|
||||
} else {
|
||||
_e = delimited;
|
||||
for (type = 0, _f = _e.length; type < _f; type++) {
|
||||
each = _e[type];
|
||||
if (levels.length && next(each[1].length) === each[1] && levels[levels.length - 1] === type) {
|
||||
levels.pop();
|
||||
i += each[1].length - 1;
|
||||
if (!(levels.length)) {
|
||||
i += 1;
|
||||
}
|
||||
break;
|
||||
} else if (next(each[0].length) === each[0]) {
|
||||
levels.push(type);
|
||||
i += each[0].length - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!(levels.length)) {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
if (levels.length) {
|
||||
throw new Error("SyntaxError: Unterminated " + (delimited[levels.pop()][0]) + " starting on line " + this.line);
|
||||
}
|
||||
if (i === 0) {
|
||||
return false;
|
||||
}
|
||||
return this.chunk.substring(0, i);
|
||||
};
|
||||
// Matches and conumes comments.
|
||||
Lexer.prototype.comment_token = function comment_token() {
|
||||
var comment, lines;
|
||||
|
|
|
@ -58,7 +58,6 @@ JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED
|
|||
# Token matching regexes.
|
||||
IDENTIFIER : /^([a-zA-Z$_](\w|\$)*)/
|
||||
NUMBER : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
|
||||
STRING : /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/
|
||||
HEREDOC : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
|
||||
INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
|
||||
JS : /^(``|`([\s\S]*?)([^\\]|\\\\)`)/
|
||||
|
@ -167,7 +166,9 @@ exports.Lexer: class Lexer
|
|||
|
||||
# Matches strings, including multi-line strings.
|
||||
string_token: ->
|
||||
return false unless string: @match STRING, 1
|
||||
string: @balanced_group ['"'], ['${', '}']
|
||||
string: @balanced_group ["'"] if string is false
|
||||
return false unless string
|
||||
@interpolate_string string.replace STRING_NEWLINES, " \\\n"
|
||||
@line += count string, "\n"
|
||||
@i += string.length
|
||||
|
@ -197,6 +198,33 @@ exports.Lexer: class Lexer
|
|||
@i += regex.length
|
||||
true
|
||||
|
||||
# Matches a balanced group such as a single or double-quoted string.
|
||||
balanced_group: (delimited...) ->
|
||||
(each[1]: each[0]) for each in delimited when not each[1]?
|
||||
escaped: '\\'
|
||||
next: (length) => @chunk.substring i, i + length
|
||||
levels: []
|
||||
i: 0
|
||||
while i < @chunk.length
|
||||
if next(1) is escaped
|
||||
i += 1
|
||||
else
|
||||
for each, type in delimited
|
||||
if levels.length and next(each[1].length) is each[1] and levels[levels.length - 1] is type
|
||||
levels.pop()
|
||||
i += each[1].length - 1
|
||||
i += 1 unless levels.length
|
||||
break
|
||||
else if next(each[0].length) is each[0]
|
||||
levels.push(type)
|
||||
i += each[0].length - 1
|
||||
break
|
||||
break unless levels.length
|
||||
i += 1
|
||||
throw new Error "SyntaxError: Unterminated ${delimited[levels.pop()][0]} starting on line $@line" if levels.length
|
||||
return false if i is 0
|
||||
return @chunk.substring(0, i)
|
||||
|
||||
# Matches and conumes comments.
|
||||
comment_token: ->
|
||||
return false unless comment: @match COMMENT, 1
|
||||
|
|
|
@ -48,3 +48,8 @@ obj: {
|
|||
hi: -> "Hello $@name."
|
||||
}
|
||||
ok obj.hi() is "Hello Joe."
|
||||
|
||||
ok "I can has ${"cheeze"}" is 'I can has cheeze'
|
||||
ok 'I can has ${"cheeze"}' is 'I can has ${"cheeze"}'
|
||||
|
||||
ok "Where is ${obj["name"] + '?'}" is 'Where is Joe?'
|
||||
|
|
Loading…
Reference in a new issue