Rewritting lexer.coffee to accept nested string interpolations.
This commit is contained in:
parent
1602e0e823
commit
f74fae58e3
83
lib/lexer.js
83
lib/lexer.js
|
@ -34,7 +34,7 @@
|
|||
IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
|
||||
NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
|
||||
HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
|
||||
INTERPOLATION = /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/;
|
||||
INTERPOLATION = /^\$([a-zA-Z_@]\w*)/;
|
||||
OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
|
||||
WHITESPACE = /^([ \t]+)/;
|
||||
COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/;
|
||||
|
@ -217,30 +217,30 @@
|
|||
};
|
||||
// Matches a balanced group such as a single or double-quoted string. Pass in
|
||||
// a series of delimiters, all of which must be balanced correctly within the
|
||||
// token's contents.
|
||||
Lexer.prototype.balanced_token = function balanced_token() {
|
||||
// string.
|
||||
Lexer.prototype.balanced_string = function balanced_string(str) {
|
||||
var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
|
||||
delimited = Array.prototype.slice.call(arguments, 0);
|
||||
delimited = Array.prototype.slice.call(arguments, 1);
|
||||
levels = [];
|
||||
i = 0;
|
||||
while (i < this.chunk.length) {
|
||||
while (i < str.length) {
|
||||
_a = delimited;
|
||||
for (_b = 0, _c = _a.length; _b < _c; _b++) {
|
||||
pair = _a[_b];
|
||||
_d = pair;
|
||||
open = _d[0];
|
||||
close = _d[1];
|
||||
if (levels.length && starts(this.chunk, '\\', i)) {
|
||||
if (levels.length && starts(str, '\\', i)) {
|
||||
i += 1;
|
||||
break;
|
||||
} else if (levels.length && starts(this.chunk, close, i) && levels[levels.length - 1] === pair) {
|
||||
} else if (levels.length && starts(str, close, i) && levels[levels.length - 1] === pair) {
|
||||
levels.pop();
|
||||
i += close.length - 1;
|
||||
if (!(levels.length)) {
|
||||
i += 1;
|
||||
}
|
||||
break;
|
||||
} else if (starts(this.chunk, open, i)) {
|
||||
} else if (starts(str, open, i)) {
|
||||
levels.push(pair);
|
||||
i += open.length - 1;
|
||||
break;
|
||||
|
@ -257,7 +257,13 @@
|
|||
if (i === 0) {
|
||||
return false;
|
||||
}
|
||||
return this.chunk.substring(0, i);
|
||||
return str.substring(0, i);
|
||||
};
|
||||
// Matches a balanced string within the token's contents.
|
||||
Lexer.prototype.balanced_token = function balanced_token() {
|
||||
var delimited;
|
||||
delimited = Array.prototype.slice.call(arguments, 0);
|
||||
return this.balanced_string.apply(this, [this.chunk].concat(delimited));
|
||||
};
|
||||
// Matches and conumes comments.
|
||||
Lexer.prototype.comment_token = function comment_token() {
|
||||
|
@ -453,50 +459,55 @@
|
|||
// "Hello $name."
|
||||
// "Hello ${name.capitalize()}."
|
||||
Lexer.prototype.interpolate_string = function interpolate_string(str) {
|
||||
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, before, each, group, i, inner, interp, lexer, match, nested, prev, quote, tok, tokens;
|
||||
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, each, expression, group, i, inner, interp, last_i, lexer, match, nested, prev, quote, tok, tokens;
|
||||
if (str.length < 3 || !starts(str, '"')) {
|
||||
return this.token('STRING', str);
|
||||
} else {
|
||||
lexer = new Lexer();
|
||||
tokens = [];
|
||||
quote = str.substring(0, 1);
|
||||
str = str.substring(1, str.length - 1);
|
||||
while (str.length) {
|
||||
match = str.match(INTERPOLATION);
|
||||
if (match) {
|
||||
_a = match;
|
||||
group = _a[0];
|
||||
before = _a[1];
|
||||
interp = _a[2];
|
||||
if (starts(before, '\\', before.length - 1)) {
|
||||
prev = before.substring(0, before.length - 1);
|
||||
if (before.length) {
|
||||
tokens.push(['STRING', quote + prev + "$" + interp + quote]);
|
||||
i = 1;
|
||||
last_i = i;
|
||||
while (i < str.length - 1) {
|
||||
if (starts(str, '\\', i)) {
|
||||
i += 1;
|
||||
} else {
|
||||
match = str.substring(i).match(INTERPOLATION);
|
||||
if (match) {
|
||||
_a = match;
|
||||
group = _a[0];
|
||||
interp = _a[1];
|
||||
if (starts(interp, '@')) {
|
||||
interp = "this." + (interp.substring(1));
|
||||
}
|
||||
if (last_i < i) {
|
||||
tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
|
||||
}
|
||||
tokens.push(['IDENTIFIER', interp]);
|
||||
i += group.length - 1;
|
||||
last_i = i + 1;
|
||||
} else {
|
||||
if (before.length) {
|
||||
tokens.push(['STRING', quote + before + quote]);
|
||||
}
|
||||
if (starts(interp, '{')) {
|
||||
inner = interp.substring(1, interp.length - 1);
|
||||
expression = this.balanced_string(str.substring(i), ['${', '}']);
|
||||
if (expression && expression.length > 3) {
|
||||
inner = expression.substring(2, expression.length - 1);
|
||||
nested = lexer.tokenize("(" + inner + ")", {
|
||||
rewrite: false,
|
||||
line: this.line
|
||||
});
|
||||
nested.pop();
|
||||
tokens.push(['TOKENS', nested]);
|
||||
} else {
|
||||
if (starts(interp, '@')) {
|
||||
interp = "this." + (interp.substring(1));
|
||||
if (last_i < i) {
|
||||
tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
|
||||
}
|
||||
tokens.push(['IDENTIFIER', interp]);
|
||||
tokens.push(['TOKENS', nested]);
|
||||
i += expression.length - 1;
|
||||
last_i = i + 1;
|
||||
}
|
||||
}
|
||||
str = str.substring(group.length);
|
||||
} else {
|
||||
tokens.push(['STRING', quote + str + quote]);
|
||||
str = '';
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
if (last_i < i && last_i < str.length - 1) {
|
||||
tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
|
||||
}
|
||||
if (tokens.length > 1) {
|
||||
_d = tokens.length - 1; _e = 1;
|
||||
|
|
|
@ -59,7 +59,7 @@ JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED
|
|||
IDENTIFIER : /^([a-zA-Z$_](\w|\$)*)/
|
||||
NUMBER : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
|
||||
HEREDOC : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
|
||||
INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
|
||||
INTERPOLATION : /^\$([a-zA-Z_@]\w*)/
|
||||
OPERATOR : /^([+\*&|\/\-%=<>:!?]+)/
|
||||
WHITESPACE : /^([ \t]+)/
|
||||
COMMENT : /^(((\n?[ \t]*)?#[^\n]*)+)/
|
||||
|
@ -199,22 +199,22 @@ exports.Lexer: class Lexer
|
|||
|
||||
# Matches a balanced group such as a single or double-quoted string. Pass in
|
||||
# a series of delimiters, all of which must be balanced correctly within the
|
||||
# token's contents.
|
||||
balanced_token: (delimited...) ->
|
||||
# string.
|
||||
balanced_string: (str, delimited...) ->
|
||||
levels: []
|
||||
i: 0
|
||||
while i < @chunk.length
|
||||
while i < str.length
|
||||
for pair in delimited
|
||||
[open, close]: pair
|
||||
if levels.length and starts @chunk, '\\', i
|
||||
if levels.length and starts str, '\\', i
|
||||
i += 1
|
||||
break
|
||||
else if levels.length and starts(@chunk, close, i) and levels[levels.length - 1] is pair
|
||||
else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
|
||||
levels.pop()
|
||||
i += close.length - 1
|
||||
i += 1 unless levels.length
|
||||
break
|
||||
else if starts @chunk, open, i
|
||||
else if starts str, open, i
|
||||
levels.push(pair)
|
||||
i += open.length - 1
|
||||
break
|
||||
|
@ -222,7 +222,11 @@ exports.Lexer: class Lexer
|
|||
i += 1
|
||||
throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
|
||||
return false if i is 0
|
||||
return @chunk.substring(0, i)
|
||||
return str.substring(0, i)
|
||||
|
||||
# Matches a balanced string within the token's contents.
|
||||
balanced_token: (delimited...) ->
|
||||
@balanced_string @chunk, delimited...
|
||||
|
||||
# Matches and conumes comments.
|
||||
comment_token: ->
|
||||
|
@ -382,28 +386,32 @@ exports.Lexer: class Lexer
|
|||
lexer: new Lexer()
|
||||
tokens: []
|
||||
quote: str.substring(0, 1)
|
||||
str: str.substring(1, str.length - 1)
|
||||
while str.length
|
||||
match: str.match INTERPOLATION
|
||||
if match
|
||||
[group, before, interp]: match
|
||||
if starts before, '\\', before.length - 1
|
||||
prev: before.substring(0, before.length - 1)
|
||||
tokens.push ['STRING', "$quote$prev$$interp$quote"] if before.length
|
||||
i: 1
|
||||
last_i: i
|
||||
while i < str.length - 1
|
||||
if starts str, '\\', i
|
||||
i += 1
|
||||
else
|
||||
match: str.substring(i).match INTERPOLATION
|
||||
if match
|
||||
[group, interp]: match
|
||||
interp: "this.${ interp.substring(1) }" if starts interp, '@'
|
||||
tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
|
||||
tokens.push ['IDENTIFIER', interp]
|
||||
i += group.length - 1
|
||||
last_i: i + 1
|
||||
else
|
||||
tokens.push ['STRING', "$quote$before$quote"] if before.length
|
||||
if starts interp, '{'
|
||||
inner: interp.substring(1, interp.length - 1)
|
||||
expression: @balanced_string str.substring(i), ['${', '}']
|
||||
if expression and expression.length > 3
|
||||
inner: expression.substring(2, expression.length - 1)
|
||||
nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
|
||||
nested.pop()
|
||||
tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
|
||||
tokens.push ['TOKENS', nested]
|
||||
else
|
||||
interp: "this.${ interp.substring(1) }" if starts interp, '@'
|
||||
tokens.push ['IDENTIFIER', interp]
|
||||
str: str.substring(group.length)
|
||||
else
|
||||
tokens.push ['STRING', "$quote$str$quote"]
|
||||
str: ''
|
||||
i += expression.length - 1
|
||||
last_i: i + 1
|
||||
i += 1
|
||||
tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i and last_i < str.length - 1
|
||||
if tokens.length > 1
|
||||
for i in [tokens.length - 1..1]
|
||||
[prev, tok]: [tokens[i - 1], tokens[i]]
|
||||
|
|
|
@ -14,22 +14,23 @@ ok "$hello ${ 1 + 2 } $world" is "Hello 3 World"
|
|||
[s, t, r, i, n, g]: ['s', 't', 'r', 'i', 'n', 'g']
|
||||
ok "$s$t$r$i$n$g" is 'string'
|
||||
ok "${s}${t}${r}${i}${n}${g}" is 'string'
|
||||
ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '$s$t$r$i$n$g'
|
||||
ok "\\${s}\\${t}\\${r}\\${i}\\${n}\\${g}" is '${s}${t}${r}${i}${n}${g}'
|
||||
ok "\\$string" is '$string'
|
||||
ok "\\${string}" is '${string}'
|
||||
ok "\$s\$t\$r\$i\$n\$g" is '$s$t$r$i$n$g'
|
||||
ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '\\s\\t\\r\\i\\n\\g'
|
||||
ok "\${s}\${t}\${r}\${i}\${n}\${g}" is '${s}${t}${r}${i}${n}${g}'
|
||||
ok "\$string" is '$string'
|
||||
ok "\${string}" is '${string}'
|
||||
|
||||
ok "\\$Escaping first" is '$Escaping first'
|
||||
ok "\\${Escaping} first" is '${Escaping} first'
|
||||
ok "Escaping \\$in middle" is 'Escaping $in middle'
|
||||
ok "Escaping \\${in} middle" is 'Escaping ${in} middle'
|
||||
ok "Escaping \\$last" is 'Escaping $last'
|
||||
ok "Escaping \\${last}" is 'Escaping ${last}'
|
||||
ok "\$Escaping first" is '$Escaping first'
|
||||
ok "\${Escaping} first" is '${Escaping} first'
|
||||
ok "Escaping \$in middle" is 'Escaping $in middle'
|
||||
ok "Escaping \${in} middle" is 'Escaping ${in} middle'
|
||||
ok "Escaping \$last" is 'Escaping $last'
|
||||
ok "Escaping \${last}" is 'Escaping ${last}'
|
||||
|
||||
ok "$$" is '$$'
|
||||
ok "${}" is '${}'
|
||||
ok "\\\\$$" is '\\\\$$'
|
||||
ok "\\\\${}" is '\\\\${}'
|
||||
ok "\\\\\$$" is '\\\\\$$'
|
||||
ok "\\\${}" is '\\${}'
|
||||
|
||||
ok "I won $20 last night." is 'I won $20 last night.'
|
||||
ok "I won $${20} last night." is 'I won $20 last night.'
|
||||
|
@ -53,3 +54,8 @@ ok "I can has ${"cheeze"}" is 'I can has cheeze'
|
|||
ok 'I can has ${"cheeze"}' is 'I can has ${"cheeze"}'
|
||||
|
||||
ok "Where is ${obj["name"] + '?'}" is 'Where is Joe?'
|
||||
|
||||
ok "Where is ${"the new ${obj["name"]}"}?" is 'Where is the new Joe?'
|
||||
ok "Hello ${world ? "$hello"}" is 'Hello World'
|
||||
|
||||
ok "Hello ${"${"${obj["name"]}" + '!'}"}" is 'Hello Joe!'
|
||||
|
|
Loading…
Reference in New Issue