some edits to the interpolation path

This commit is contained in:
Jeremy Ashkenas 2010-03-05 20:42:36 -05:00
parent 75be5eed62
commit d250e9e9cc
2 changed files with 80 additions and 60 deletions

View File

@ -1,5 +1,5 @@
(function(){
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, compact, count, include;
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATED_EXPRESSION, INTERPOLATED_IDENTIFIER, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, compact, count, include;
// The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
// matches against the beginning of the source code. When a match is found,
// a token is produced, we consume the match, and start again. Tokens are in the
@ -45,6 +45,9 @@
LAST_DENTS = /\n([ \t]*)/g;
LAST_DENT = /\n([ \t]*)/;
ASSIGNMENT = /^(:|=)$/;
// Interpolation matching regexes.
INTERPOLATED_EXPRESSION = /(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/;
INTERPOLATED_IDENTIFIER = /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/;
// Token cleaning regexes.
JS_CLEANER = /(^`|`$)/g;
MULTILINER = /\n/g;
@ -76,7 +79,8 @@
exports.Lexer = (function() {
Lexer = function Lexer() { };
// Scan by attempting to match tokens one at a time. Slow and steady.
Lexer.prototype.tokenize = function tokenize(code, rewrite) {
Lexer.prototype.tokenize = function tokenize(code, options) {
options = options || {};
this.code = code;
// The remainder of the source code.
this.i = 0;
@ -94,10 +98,10 @@
this.extract_next_token();
}
this.close_indentation();
if (((typeof rewrite !== "undefined" && rewrite !== null) ? rewrite : true)) {
return (new Rewriter()).rewrite(this.tokens);
if (options.rewrite === false) {
return this.tokens;
}
return this.tokens;
return (new Rewriter()).rewrite(this.tokens);
};
// At every position, run through this list of attempted matches,
// short-circuiting if any of them succeed.
@ -166,12 +170,11 @@
};
// Matches strings, including multi-line strings.
Lexer.prototype.string_token = function string_token() {
var escaped, string;
var string;
if (!((string = this.match(STRING, 1)))) {
return false;
}
escaped = string.replace(STRING_NEWLINES, " \\\n");
this.interpolate_string(escaped);
this.interpolate_string(string.replace(STRING_NEWLINES, " \\\n"));
this.line += count(string, "\n");
this.i += string.length;
return true;
@ -400,18 +403,21 @@
Lexer.prototype.assignment_error = function assignment_error() {
throw new Error('SyntaxError: Reserved word "' + this.value() + '" on line ' + this.line + ' can\'t be assigned');
};
// Replace variables and expressions inside double-quoted strings.
Lexer.prototype.interpolate_string = function interpolate_string(escaped) {
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, before, each, expression, expression_match, group, i, identifier, identifier_match, lexer, nested, quote, rewrite, tokens;
if (escaped.length < 3 || escaped.indexOf('"') !== 0) {
return this.token('STRING', escaped);
// Expand variables and expressions inside double-quoted strings using
// [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation).
// "Hello $name."
// "Hello ${name.capitalize()}."
Lexer.prototype.interpolate_string = function interpolate_string(str) {
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, before, contents, each, expression, expression_match, group, i, identifier, identifier_match, lexer, nested, prev, quote, tok, tokens;
if (str.length < 3 || str.substring(0, 1) !== '"') {
return this.token('STRING', str);
} else {
lexer = null;
lexer = new Lexer();
tokens = [];
quote = escaped.substring(0, 1);
escaped = escaped.substring(1, escaped.length - 1);
while (escaped.length) {
expression_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/);
quote = str.substring(0, 1);
str = str.substring(1, str.length - 1);
while (str.length) {
expression_match = str.match(INTERPOLATED_EXPRESSION);
if (expression_match) {
_a = expression_match;
group = _a[0];
@ -425,16 +431,15 @@
if (before.length) {
tokens.push(['STRING', quote + before + quote]);
}
if (!(typeof lexer !== "undefined" && lexer !== null)) {
lexer = new Lexer();
}
nested = lexer.tokenize('(' + expression.substring(2, expression.length - 1) + ')', (rewrite = false));
nested = lexer.tokenize('(' + expression.substring(2, expression.length - 1) + ')', {
rewrite: false
});
nested.pop();
tokens.push(['TOKENS', nested]);
}
escaped = escaped.substring(group.length);
str = str.substring(group.length);
} else {
identifier_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/);
identifier_match = str.match(INTERPOLATED_IDENTIFIER);
if (identifier_match) {
_b = identifier_match;
group = _b[0];
@ -450,27 +455,33 @@
}
tokens.push(['IDENTIFIER', identifier.substring(1)]);
}
escaped = escaped.substring(group.length);
str = str.substring(group.length);
} else {
tokens.push(['STRING', quote + escaped + quote]);
escaped = '';
tokens.push(['STRING', quote + str + quote]);
str = '';
}
}
}
if (tokens.length > 1) {
_e = tokens.length - 1; _f = 1;
for (_d = 0, i = _e; (_e <= _f ? i <= _f : i >= _f); (_e <= _f ? i += 1 : i -= 1), _d++) {
tokens[i][0] === 'STRING' && tokens[i - 1][0] === 'STRING' ? tokens.splice(i - 1, 2, ['STRING', quote + tokens[i - 1][1].substring(1, tokens[i - 1][1].length - 1) + tokens[i][1].substring(1, tokens[i][1].length - 1) + quote]) : null;
_g = [tokens[i - 1], tokens[i]];
prev = _g[0];
tok = _g[1];
if (tok[0] === 'STRING' && prev[0] === 'STRING') {
contents = quote + prev[1].substring(1, prev[1].length - 1) + tok[1].substring(1, tok[1].length - 1) + quote;
tokens.splice(i - 1, 2, ['STRING', contents]);
}
}
}
_g = []; _h = tokens;
for (i = 0, _i = _h.length; i < _i; i++) {
each = _h[i];
_g.push((function() {
_h = []; _i = tokens;
for (i = 0, _j = _i.length; i < _j; i++) {
each = _i[i];
_h.push((function() {
if (each[0] === 'TOKENS') {
_j = each[1];
for (_k = 0, _l = _j.length; _k < _l; _k++) {
nested = _j[_k];
_k = each[1];
for (_l = 0, _m = _k.length; _l < _m; _l++) {
nested = _k[_l];
this.token(nested[0], nested[1]);
}
} else {
@ -481,7 +492,7 @@
}
}).call(this));
}
return _g;
return _h;
}
};
// Helpers

View File

@ -71,6 +71,10 @@ LAST_DENTS : /\n([ \t]*)/g
LAST_DENT : /\n([ \t]*)/
ASSIGNMENT : /^(:|=)$/
# Interpolation matching regexes.
INTERPOLATED_EXPRESSION: /(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/
INTERPOLATED_IDENTIFIER: /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/
# Token cleaning regexes.
JS_CLEANER : /(^`|`$)/g
MULTILINER : /\n/g
@ -112,7 +116,8 @@ BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
exports.Lexer: class Lexer
# Scan by attempting to match tokens one at a time. Slow and steady.
tokenize: (code, rewrite) ->
tokenize: (code, options) ->
options ||= {}
@code : code # The remainder of the source code.
@i : 0 # Current character position we're parsing.
@line : 0 # The current line.
@ -123,8 +128,8 @@ exports.Lexer: class Lexer
@chunk: @code.slice(@i)
@extract_next_token()
@close_indentation()
return (new Rewriter()).rewrite @tokens if (rewrite ? true)
return @tokens
return @tokens if options.rewrite is no
(new Rewriter()).rewrite @tokens
# At every position, run through this list of attempted matches,
# short-circuiting if any of them succeed.
@ -166,8 +171,7 @@ exports.Lexer: class Lexer
# Matches strings, including multi-line strings.
string_token: ->
return false unless string: @match STRING, 1
escaped: string.replace STRING_NEWLINES, " \\\n"
@interpolate_string escaped
@interpolate_string string.replace STRING_NEWLINES, " \\\n"
@line += count string, "\n"
@i += string.length
true
@ -341,30 +345,34 @@ exports.Lexer: class Lexer
assignment_error: ->
throw new Error 'SyntaxError: Reserved word "' + @value() + '" on line ' + @line + ' can\'t be assigned'
# Replace variables and expressions inside double-quoted strings.
interpolate_string: (escaped) ->
if escaped.length < 3 or escaped.indexOf('"') isnt 0
@token 'STRING', escaped
# Expand variables and expressions inside double-quoted strings using
# [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation).
#
# "Hello $name."
# "Hello ${name.capitalize()}."
#
interpolate_string: (str) ->
if str.length < 3 or str.substring(0, 1) isnt '"'
@token 'STRING', str
else
lexer: null
lexer: new Lexer()
tokens: []
quote: escaped.substring(0, 1)
escaped: escaped.substring(1, escaped.length - 1)
while escaped.length
expression_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/
quote: str.substring(0, 1)
str: str.substring(1, str.length - 1)
while str.length
expression_match: str.match INTERPOLATED_EXPRESSION
if expression_match
[group, before, expression]: expression_match
if before.substring(before.length - 1) is '\\'
tokens.push ['STRING', quote + before.substring(0, before.length - 1) + expression + quote] if before.length
else
tokens.push ['STRING', quote + before + quote] if before.length
lexer: new Lexer() if not lexer?
nested: lexer.tokenize '(' + expression.substring(2, expression.length - 1) + ')', rewrite: no
nested: lexer.tokenize '(' + expression.substring(2, expression.length - 1) + ')', {rewrite: no}
nested.pop()
tokens.push ['TOKENS', nested]
escaped: escaped.substring(group.length)
str: str.substring(group.length)
else
identifier_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/
identifier_match: str.match INTERPOLATED_IDENTIFIER
if identifier_match
[group, before, identifier]: identifier_match
if before.substring(before.length - 1) is '\\'
@ -372,15 +380,16 @@ exports.Lexer: class Lexer
else
tokens.push ['STRING', quote + before + quote] if before.length
tokens.push ['IDENTIFIER', identifier.substring(1)]
escaped: escaped.substring(group.length)
str: str.substring(group.length)
else
tokens.push ['STRING', quote + escaped + quote]
escaped: ''
tokens.push ['STRING', quote + str + quote]
str: ''
if tokens.length > 1
for i in [tokens.length - 1..1]
if tokens[i][0] is 'STRING' and tokens[i - 1][0] is 'STRING'
tokens.splice i - 1, 2, ['STRING', quote + tokens[i - 1][1].substring(1, tokens[i - 1][1].length - 1) +
tokens[i][1].substring(1, tokens[i][1].length - 1) + quote]
[prev, tok]: [tokens[i - 1], tokens[i]]
if tok[0] is 'STRING' and prev[0] is 'STRING'
contents: quote + prev[1].substring(1, prev[1].length - 1) + tok[1].substring(1, tok[1].length - 1) + quote
tokens.splice i - 1, 2, ['STRING', contents]
for each, i in tokens
if each[0] is 'TOKENS'
@token nested[0], nested[1] for nested in each[1]