Rewritting lexer.coffee to accept nested string interpolations.

This commit is contained in:
Stan Angeloff 2010-03-07 14:56:27 +02:00 committed by Jeremy Ashkenas
parent 1602e0e823
commit f74fae58e3
3 changed files with 99 additions and 74 deletions

View File

@ -34,7 +34,7 @@
IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
INTERPOLATION = /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/;
INTERPOLATION = /^\$([a-zA-Z_@]\w*)/;
OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
WHITESPACE = /^([ \t]+)/;
COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/;
@ -217,30 +217,30 @@
};
// Matches a balanced group such as a single or double-quoted string. Pass in
// a series of delimiters, all of which must be balanced correctly within the
// token's contents.
Lexer.prototype.balanced_token = function balanced_token() {
// string.
Lexer.prototype.balanced_string = function balanced_string(str) {
var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
delimited = Array.prototype.slice.call(arguments, 0);
delimited = Array.prototype.slice.call(arguments, 1);
levels = [];
i = 0;
while (i < this.chunk.length) {
while (i < str.length) {
_a = delimited;
for (_b = 0, _c = _a.length; _b < _c; _b++) {
pair = _a[_b];
_d = pair;
open = _d[0];
close = _d[1];
if (levels.length && starts(this.chunk, '\\', i)) {
if (levels.length && starts(str, '\\', i)) {
i += 1;
break;
} else if (levels.length && starts(this.chunk, close, i) && levels[levels.length - 1] === pair) {
} else if (levels.length && starts(str, close, i) && levels[levels.length - 1] === pair) {
levels.pop();
i += close.length - 1;
if (!(levels.length)) {
i += 1;
}
break;
} else if (starts(this.chunk, open, i)) {
} else if (starts(str, open, i)) {
levels.push(pair);
i += open.length - 1;
break;
@ -257,7 +257,13 @@
if (i === 0) {
return false;
}
return this.chunk.substring(0, i);
return str.substring(0, i);
};
// Matches a balanced string within the token's contents.
Lexer.prototype.balanced_token = function balanced_token() {
var delimited;
delimited = Array.prototype.slice.call(arguments, 0);
return this.balanced_string.apply(this, [this.chunk].concat(delimited));
};
// Matches and conumes comments.
Lexer.prototype.comment_token = function comment_token() {
@ -453,50 +459,55 @@
// "Hello $name."
// "Hello ${name.capitalize()}."
Lexer.prototype.interpolate_string = function interpolate_string(str) {
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, before, each, group, i, inner, interp, lexer, match, nested, prev, quote, tok, tokens;
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, each, expression, group, i, inner, interp, last_i, lexer, match, nested, prev, quote, tok, tokens;
if (str.length < 3 || !starts(str, '"')) {
return this.token('STRING', str);
} else {
lexer = new Lexer();
tokens = [];
quote = str.substring(0, 1);
str = str.substring(1, str.length - 1);
while (str.length) {
match = str.match(INTERPOLATION);
if (match) {
_a = match;
group = _a[0];
before = _a[1];
interp = _a[2];
if (starts(before, '\\', before.length - 1)) {
prev = before.substring(0, before.length - 1);
if (before.length) {
tokens.push(['STRING', quote + prev + "$" + interp + quote]);
i = 1;
last_i = i;
while (i < str.length - 1) {
if (starts(str, '\\', i)) {
i += 1;
} else {
match = str.substring(i).match(INTERPOLATION);
if (match) {
_a = match;
group = _a[0];
interp = _a[1];
if (starts(interp, '@')) {
interp = "this." + (interp.substring(1));
}
if (last_i < i) {
tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
}
tokens.push(['IDENTIFIER', interp]);
i += group.length - 1;
last_i = i + 1;
} else {
if (before.length) {
tokens.push(['STRING', quote + before + quote]);
}
if (starts(interp, '{')) {
inner = interp.substring(1, interp.length - 1);
expression = this.balanced_string(str.substring(i), ['${', '}']);
if (expression && expression.length > 3) {
inner = expression.substring(2, expression.length - 1);
nested = lexer.tokenize("(" + inner + ")", {
rewrite: false,
line: this.line
});
nested.pop();
tokens.push(['TOKENS', nested]);
} else {
if (starts(interp, '@')) {
interp = "this." + (interp.substring(1));
if (last_i < i) {
tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
}
tokens.push(['IDENTIFIER', interp]);
tokens.push(['TOKENS', nested]);
i += expression.length - 1;
last_i = i + 1;
}
}
str = str.substring(group.length);
} else {
tokens.push(['STRING', quote + str + quote]);
str = '';
}
i += 1;
}
if (last_i < i && last_i < str.length - 1) {
tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
}
if (tokens.length > 1) {
_d = tokens.length - 1; _e = 1;

View File

@ -59,7 +59,7 @@ JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED
IDENTIFIER : /^([a-zA-Z$_](\w|\$)*)/
NUMBER : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
HEREDOC : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
INTERPOLATION : /^\$([a-zA-Z_@]\w*)/
OPERATOR : /^([+\*&|\/\-%=<>:!?]+)/
WHITESPACE : /^([ \t]+)/
COMMENT : /^(((\n?[ \t]*)?#[^\n]*)+)/
@ -199,22 +199,22 @@ exports.Lexer: class Lexer
# Matches a balanced group such as a single or double-quoted string. Pass in
# a series of delimiters, all of which must be balanced correctly within the
# token's contents.
balanced_token: (delimited...) ->
# string.
balanced_string: (str, delimited...) ->
levels: []
i: 0
while i < @chunk.length
while i < str.length
for pair in delimited
[open, close]: pair
if levels.length and starts @chunk, '\\', i
if levels.length and starts str, '\\', i
i += 1
break
else if levels.length and starts(@chunk, close, i) and levels[levels.length - 1] is pair
else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
levels.pop()
i += close.length - 1
i += 1 unless levels.length
break
else if starts @chunk, open, i
else if starts str, open, i
levels.push(pair)
i += open.length - 1
break
@ -222,7 +222,11 @@ exports.Lexer: class Lexer
i += 1
throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
return false if i is 0
return @chunk.substring(0, i)
return str.substring(0, i)
# Matches a balanced string within the token's contents.
balanced_token: (delimited...) ->
@balanced_string @chunk, delimited...
# Matches and conumes comments.
comment_token: ->
@ -382,28 +386,32 @@ exports.Lexer: class Lexer
lexer: new Lexer()
tokens: []
quote: str.substring(0, 1)
str: str.substring(1, str.length - 1)
while str.length
match: str.match INTERPOLATION
if match
[group, before, interp]: match
if starts before, '\\', before.length - 1
prev: before.substring(0, before.length - 1)
tokens.push ['STRING', "$quote$prev$$interp$quote"] if before.length
i: 1
last_i: i
while i < str.length - 1
if starts str, '\\', i
i += 1
else
match: str.substring(i).match INTERPOLATION
if match
[group, interp]: match
interp: "this.${ interp.substring(1) }" if starts interp, '@'
tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
tokens.push ['IDENTIFIER', interp]
i += group.length - 1
last_i: i + 1
else
tokens.push ['STRING', "$quote$before$quote"] if before.length
if starts interp, '{'
inner: interp.substring(1, interp.length - 1)
expression: @balanced_string str.substring(i), ['${', '}']
if expression and expression.length > 3
inner: expression.substring(2, expression.length - 1)
nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
nested.pop()
tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
tokens.push ['TOKENS', nested]
else
interp: "this.${ interp.substring(1) }" if starts interp, '@'
tokens.push ['IDENTIFIER', interp]
str: str.substring(group.length)
else
tokens.push ['STRING', "$quote$str$quote"]
str: ''
i += expression.length - 1
last_i: i + 1
i += 1
tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i and last_i < str.length - 1
if tokens.length > 1
for i in [tokens.length - 1..1]
[prev, tok]: [tokens[i - 1], tokens[i]]

View File

@ -14,22 +14,23 @@ ok "$hello ${ 1 + 2 } $world" is "Hello 3 World"
[s, t, r, i, n, g]: ['s', 't', 'r', 'i', 'n', 'g']
ok "$s$t$r$i$n$g" is 'string'
ok "${s}${t}${r}${i}${n}${g}" is 'string'
ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '$s$t$r$i$n$g'
ok "\\${s}\\${t}\\${r}\\${i}\\${n}\\${g}" is '${s}${t}${r}${i}${n}${g}'
ok "\\$string" is '$string'
ok "\\${string}" is '${string}'
ok "\$s\$t\$r\$i\$n\$g" is '$s$t$r$i$n$g'
ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '\\s\\t\\r\\i\\n\\g'
ok "\${s}\${t}\${r}\${i}\${n}\${g}" is '${s}${t}${r}${i}${n}${g}'
ok "\$string" is '$string'
ok "\${string}" is '${string}'
ok "\\$Escaping first" is '$Escaping first'
ok "\\${Escaping} first" is '${Escaping} first'
ok "Escaping \\$in middle" is 'Escaping $in middle'
ok "Escaping \\${in} middle" is 'Escaping ${in} middle'
ok "Escaping \\$last" is 'Escaping $last'
ok "Escaping \\${last}" is 'Escaping ${last}'
ok "\$Escaping first" is '$Escaping first'
ok "\${Escaping} first" is '${Escaping} first'
ok "Escaping \$in middle" is 'Escaping $in middle'
ok "Escaping \${in} middle" is 'Escaping ${in} middle'
ok "Escaping \$last" is 'Escaping $last'
ok "Escaping \${last}" is 'Escaping ${last}'
ok "$$" is '$$'
ok "${}" is '${}'
ok "\\\\$$" is '\\\\$$'
ok "\\\\${}" is '\\\\${}'
ok "\\\\\$$" is '\\\\\$$'
ok "\\\${}" is '\\${}'
ok "I won $20 last night." is 'I won $20 last night.'
ok "I won $${20} last night." is 'I won $20 last night.'
@ -53,3 +54,8 @@ ok "I can has ${"cheeze"}" is 'I can has cheeze'
ok 'I can has ${"cheeze"}' is 'I can has ${"cheeze"}'
ok "Where is ${obj["name"] + '?'}" is 'Where is Joe?'
ok "Where is ${"the new ${obj["name"]}"}?" is 'Where is the new Joe?'
ok "Hello ${world ? "$hello"}" is 'Hello World'
ok "Hello ${"${"${obj["name"]}" + '!'}"}" is 'Hello Joe!'