1
0
Fork 0
mirror of https://github.com/jashkenas/coffeescript.git synced 2022-11-09 12:23:24 -05:00

fixed #1299: overhauled token pairings

This commit is contained in:
satyr 2011-09-17 08:26:04 +09:00
parent c5dbb1c933
commit d03d288a98
5 changed files with 105 additions and 166 deletions

View file

@ -1,17 +1,17 @@
(function() { (function() {
var BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_ILLEGAL, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, key, last, starts, _ref; var BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_ILLEGAL, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, key, last, starts, _ref, _ref2;
var __hasProp = Object.prototype.hasOwnProperty, __indexOf = Array.prototype.indexOf || function(item) { var __hasProp = Object.prototype.hasOwnProperty, __indexOf = Array.prototype.indexOf || function(item) {
for (var i = 0, l = this.length; i < l; i++) { for (var i = 0, l = this.length; i < l; i++) {
if (__hasProp.call(this, i) && this[i] === item) return i; if (__hasProp.call(this, i) && this[i] === item) return i;
} }
return -1; return -1;
}; };
Rewriter = require('./rewriter').Rewriter; _ref = require('./rewriter'), Rewriter = _ref.Rewriter, INVERSES = _ref.INVERSES;
_ref = require('./helpers'), count = _ref.count, starts = _ref.starts, compact = _ref.compact, last = _ref.last; _ref2 = require('./helpers'), count = _ref2.count, starts = _ref2.starts, compact = _ref2.compact, last = _ref2.last;
exports.Lexer = Lexer = (function() { exports.Lexer = Lexer = (function() {
function Lexer() {} function Lexer() {}
Lexer.prototype.tokenize = function(code, opts) { Lexer.prototype.tokenize = function(code, opts) {
var i; var i, tag;
if (opts == null) opts = {}; if (opts == null) opts = {};
if (WHITESPACE.test(code)) code = "\n" + code; if (WHITESPACE.test(code)) code = "\n" + code;
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, ''); code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
@ -21,28 +21,30 @@
this.indebt = 0; this.indebt = 0;
this.outdebt = 0; this.outdebt = 0;
this.indents = []; this.indents = [];
this.ends = [];
this.tokens = []; this.tokens = [];
i = 0; i = 0;
while (this.chunk = code.slice(i)) { while (this.chunk = code.slice(i)) {
i += this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken(); i += this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
} }
this.closeIndentation(); this.closeIndentation();
if (tag = this.ends.pop()) this.carp("missing " + tag);
if (opts.rewrite === false) return this.tokens; if (opts.rewrite === false) return this.tokens;
return (new Rewriter).rewrite(this.tokens); return (new Rewriter).rewrite(this.tokens);
}; };
Lexer.prototype.identifierToken = function() { Lexer.prototype.identifierToken = function() {
var colon, forcedIdentifier, id, input, match, prev, tag, _ref2, _ref3; var colon, forcedIdentifier, id, input, match, prev, tag, _ref3, _ref4;
if (!(match = IDENTIFIER.exec(this.chunk))) return 0; if (!(match = IDENTIFIER.exec(this.chunk))) return 0;
input = match[0], id = match[1], colon = match[2]; input = match[0], id = match[1], colon = match[2];
if (id === 'own' && this.tag() === 'FOR') { if (id === 'own' && this.tag() === 'FOR') {
this.token('OWN', id); this.token('OWN', id);
return id.length; return id.length;
} }
forcedIdentifier = colon || (prev = last(this.tokens)) && (((_ref2 = prev[0]) === '.' || _ref2 === '?.' || _ref2 === '::') || !prev.spaced && prev[0] === '@'); forcedIdentifier = colon || (prev = last(this.tokens)) && (((_ref3 = prev[0]) === '.' || _ref3 === '?.' || _ref3 === '::') || !prev.spaced && prev[0] === '@');
tag = 'IDENTIFIER'; tag = 'IDENTIFIER';
if (!forcedIdentifier && (__indexOf.call(JS_KEYWORDS, id) >= 0 || __indexOf.call(COFFEE_KEYWORDS, id) >= 0)) { if (!forcedIdentifier && (__indexOf.call(JS_KEYWORDS, id) >= 0 || __indexOf.call(COFFEE_KEYWORDS, id) >= 0)) {
tag = id.toUpperCase(); tag = id.toUpperCase();
if (tag === 'WHEN' && (_ref3 = this.tag(), __indexOf.call(LINE_BREAK, _ref3) >= 0)) { if (tag === 'WHEN' && (_ref4 = this.tag(), __indexOf.call(LINE_BREAK, _ref4) >= 0)) {
tag = 'LEADING_WHEN'; tag = 'LEADING_WHEN';
} else if (tag === 'FOR') { } else if (tag === 'FOR') {
this.seenFor = true; this.seenFor = true;
@ -172,7 +174,7 @@
return script.length; return script.length;
}; };
Lexer.prototype.regexToken = function() { Lexer.prototype.regexToken = function() {
var length, match, prev, regex, _ref2; var length, match, prev, regex, _ref3;
if (this.chunk.charAt(0) !== '/') return 0; if (this.chunk.charAt(0) !== '/') return 0;
if (match = HEREGEX.exec(this.chunk)) { if (match = HEREGEX.exec(this.chunk)) {
length = this.heregexToken(match); length = this.heregexToken(match);
@ -180,7 +182,7 @@
return length; return length;
} }
prev = last(this.tokens); prev = last(this.tokens);
if (prev && (_ref2 = prev[0], __indexOf.call((prev.spaced ? NOT_REGEX : NOT_SPACED_REGEX), _ref2) >= 0)) { if (prev && (_ref3 = prev[0], __indexOf.call((prev.spaced ? NOT_REGEX : NOT_SPACED_REGEX), _ref3) >= 0)) {
return 0; return 0;
} }
if (!(match = REGEX.exec(this.chunk))) return 0; if (!(match = REGEX.exec(this.chunk))) return 0;
@ -189,7 +191,7 @@
return regex.length; return regex.length;
}; };
Lexer.prototype.heregexToken = function(match) { Lexer.prototype.heregexToken = function(match) {
var body, flags, heregex, re, tag, tokens, value, _i, _len, _ref2, _ref3, _ref4, _ref5; var body, flags, heregex, re, tag, tokens, value, _i, _len, _ref3, _ref4, _ref5, _ref6;
heregex = match[0], body = match[1], flags = match[2]; heregex = match[0], body = match[1], flags = match[2];
if (0 > body.indexOf('#{')) { if (0 > body.indexOf('#{')) {
re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/'); re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/');
@ -199,11 +201,11 @@
this.token('IDENTIFIER', 'RegExp'); this.token('IDENTIFIER', 'RegExp');
this.tokens.push(['CALL_START', '(']); this.tokens.push(['CALL_START', '(']);
tokens = []; tokens = [];
_ref2 = this.interpolateString(body, { _ref3 = this.interpolateString(body, {
regex: true regex: true
}); });
for (_i = 0, _len = _ref2.length; _i < _len; _i++) { for (_i = 0, _len = _ref3.length; _i < _len; _i++) {
_ref3 = _ref2[_i], tag = _ref3[0], value = _ref3[1]; _ref4 = _ref3[_i], tag = _ref4[0], value = _ref4[1];
if (tag === 'TOKENS') { if (tag === 'TOKENS') {
tokens.push.apply(tokens, value); tokens.push.apply(tokens, value);
} else { } else {
@ -214,10 +216,10 @@
tokens.push(['+', '+']); tokens.push(['+', '+']);
} }
tokens.pop(); tokens.pop();
if (((_ref4 = tokens[0]) != null ? _ref4[0] : void 0) !== 'STRING') { if (((_ref5 = tokens[0]) != null ? _ref5[0] : void 0) !== 'STRING') {
this.tokens.push(['STRING', '""'], ['+', '+']); this.tokens.push(['STRING', '""'], ['+', '+']);
} }
(_ref5 = this.tokens).push.apply(_ref5, tokens); (_ref6 = this.tokens).push.apply(_ref6, tokens);
if (flags) this.tokens.push([',', ','], ['STRING', '"' + flags + '"']); if (flags) this.tokens.push([',', ','], ['STRING', '"' + flags + '"']);
this.token(')', ')'); this.token(')', ')');
return heregex.length; return heregex.length;
@ -247,6 +249,7 @@
diff = size - this.indent + this.outdebt; diff = size - this.indent + this.outdebt;
this.token('INDENT', diff); this.token('INDENT', diff);
this.indents.push(diff); this.indents.push(diff);
this.ends.push('OUTDENT');
this.outdebt = this.indebt = 0; this.outdebt = this.indebt = 0;
} else { } else {
this.indebt = 0; this.indebt = 0;
@ -255,7 +258,7 @@
this.indent = size; this.indent = size;
return indent.length; return indent.length;
}; };
Lexer.prototype.outdentToken = function(moveOut, noNewlines, close) { Lexer.prototype.outdentToken = function(moveOut, noNewlines) {
var dent, len; var dent, len;
while (moveOut > 0) { while (moveOut > 0) {
len = this.indents.length - 1; len = this.indents.length - 1;
@ -271,6 +274,7 @@
dent = this.indents.pop() - this.outdebt; dent = this.indents.pop() - this.outdebt;
moveOut -= dent; moveOut -= dent;
this.outdebt = 0; this.outdebt = 0;
this.pair('OUTDENT');
this.token('OUTDENT', dent); this.token('OUTDENT', dent);
} }
} }
@ -308,7 +312,7 @@
return this; return this;
}; };
Lexer.prototype.literalToken = function() { Lexer.prototype.literalToken = function() {
var match, prev, tag, value, _ref2, _ref3, _ref4, _ref5; var match, prev, tag, value, _ref3, _ref4, _ref5, _ref6;
if (match = OPERATOR.exec(this.chunk)) { if (match = OPERATOR.exec(this.chunk)) {
value = match[0]; value = match[0];
if (CODE.test(value)) this.tagParameters(); if (CODE.test(value)) this.tagParameters();
@ -318,10 +322,10 @@
tag = value; tag = value;
prev = last(this.tokens); prev = last(this.tokens);
if (value === '=' && prev) { if (value === '=' && prev) {
if (!prev[1].reserved && (_ref2 = prev[1], __indexOf.call(JS_FORBIDDEN, _ref2) >= 0)) { if (!prev[1].reserved && (_ref3 = prev[1], __indexOf.call(JS_FORBIDDEN, _ref3) >= 0)) {
this.assignmentError(); this.assignmentError();
} }
if ((_ref3 = prev[1]) === '||' || _ref3 === '&&') { if ((_ref4 = prev[1]) === '||' || _ref4 === '&&') {
prev[0] = 'COMPOUND_ASSIGN'; prev[0] = 'COMPOUND_ASSIGN';
prev[1] += '='; prev[1] += '=';
return value.length; return value.length;
@ -342,10 +346,10 @@
} else if (__indexOf.call(LOGIC, value) >= 0 || value === '?' && (prev != null ? prev.spaced : void 0)) { } else if (__indexOf.call(LOGIC, value) >= 0 || value === '?' && (prev != null ? prev.spaced : void 0)) {
tag = 'LOGIC'; tag = 'LOGIC';
} else if (prev && !prev.spaced) { } else if (prev && !prev.spaced) {
if (value === '(' && (_ref4 = prev[0], __indexOf.call(CALLABLE, _ref4) >= 0)) { if (value === '(' && (_ref5 = prev[0], __indexOf.call(CALLABLE, _ref5) >= 0)) {
if (prev[0] === '?') prev[0] = 'FUNC_EXIST'; if (prev[0] === '?') prev[0] = 'FUNC_EXIST';
tag = 'CALL_START'; tag = 'CALL_START';
} else if (value === '[' && (_ref5 = prev[0], __indexOf.call(INDEXABLE, _ref5) >= 0)) { } else if (value === '[' && (_ref6 = prev[0], __indexOf.call(INDEXABLE, _ref6) >= 0)) {
tag = 'INDEX_START'; tag = 'INDEX_START';
switch (prev[0]) { switch (prev[0]) {
case '?': case '?':
@ -353,21 +357,32 @@
} }
} }
} }
switch (value) {
case '(':
case '{':
case '[':
this.ends.push(INVERSES[value]);
break;
case ')':
case '}':
case ']':
this.pair(value);
}
this.token(tag, value); this.token(tag, value);
return value.length; return value.length;
}; };
Lexer.prototype.sanitizeHeredoc = function(doc, options) { Lexer.prototype.sanitizeHeredoc = function(doc, options) {
var attempt, herecomment, indent, match, _ref2; var attempt, herecomment, indent, match, _ref3;
indent = options.indent, herecomment = options.herecomment; indent = options.indent, herecomment = options.herecomment;
if (herecomment) { if (herecomment) {
if (HEREDOC_ILLEGAL.test(doc)) { if (HEREDOC_ILLEGAL.test(doc)) {
throw new Error("block comment cannot contain \"*/\", starting on line " + (this.line + 1)); this.carp("block comment cannot contain \"*/\", starting");
} }
if (doc.indexOf('\n') <= 0) return doc; if (doc.indexOf('\n') <= 0) return doc;
} else { } else {
while (match = HEREDOC_INDENT.exec(doc)) { while (match = HEREDOC_INDENT.exec(doc)) {
attempt = match[1]; attempt = match[1];
if (indent === null || (0 < (_ref2 = attempt.length) && _ref2 < indent.length)) { if (indent === null || (0 < (_ref3 = attempt.length) && _ref3 < indent.length)) {
indent = attempt; indent = attempt;
} }
} }
@ -412,9 +427,9 @@
throw SyntaxError("Reserved word \"" + (this.value()) + "\" on line " + (this.line + 1) + " can't be assigned"); throw SyntaxError("Reserved word \"" + (this.value()) + "\" on line " + (this.line + 1) + " can't be assigned");
}; };
Lexer.prototype.balancedString = function(str, end) { Lexer.prototype.balancedString = function(str, end) {
var i, letter, match, prev, stack, _ref2; var i, letter, match, prev, stack, _ref3;
stack = [end]; stack = [end];
for (i = 1, _ref2 = str.length; 1 <= _ref2 ? i < _ref2 : i > _ref2; 1 <= _ref2 ? i++ : i--) { for (i = 1, _ref3 = str.length; 1 <= _ref3 ? i < _ref3 : i > _ref3; 1 <= _ref3 ? i++ : i--) {
switch (letter = str.charAt(i)) { switch (letter = str.charAt(i)) {
case '\\': case '\\':
i++; i++;
@ -436,10 +451,10 @@
} }
prev = letter; prev = letter;
} }
throw new Error("missing " + (stack.pop()) + ", starting on line " + (this.line + 1)); return this.carp("missing " + (stack.pop()) + ", starting");
}; };
Lexer.prototype.interpolateString = function(str, options) { Lexer.prototype.interpolateString = function(str, options) {
var expr, heredoc, i, inner, interpolated, len, letter, nested, pi, regex, tag, tokens, value, _len, _ref2, _ref3, _ref4; var expr, heredoc, i, inner, interpolated, len, letter, nested, pi, regex, tag, tokens, value, _len, _ref3, _ref4, _ref5;
if (options == null) options = {}; if (options == null) options = {};
heredoc = options.heredoc, regex = options.regex; heredoc = options.heredoc, regex = options.regex;
tokens = []; tokens = [];
@ -461,7 +476,7 @@
rewrite: false rewrite: false
}); });
nested.pop(); nested.pop();
if (((_ref2 = nested[0]) != null ? _ref2[0] : void 0) === 'TERMINATOR') { if (((_ref3 = nested[0]) != null ? _ref3[0] : void 0) === 'TERMINATOR') {
nested.shift(); nested.shift();
} }
if (len = nested.length) { if (len = nested.length) {
@ -481,10 +496,10 @@
if (tokens[0][0] !== 'NEOSTRING') tokens.unshift(['', '']); if (tokens[0][0] !== 'NEOSTRING') tokens.unshift(['', '']);
if (interpolated = tokens.length > 1) this.token('(', '('); if (interpolated = tokens.length > 1) this.token('(', '(');
for (i = 0, _len = tokens.length; i < _len; i++) { for (i = 0, _len = tokens.length; i < _len; i++) {
_ref3 = tokens[i], tag = _ref3[0], value = _ref3[1]; _ref4 = tokens[i], tag = _ref4[0], value = _ref4[1];
if (i) this.token('+', '+'); if (i) this.token('+', '+');
if (tag === 'TOKENS') { if (tag === 'TOKENS') {
(_ref4 = this.tokens).push.apply(_ref4, value); (_ref5 = this.tokens).push.apply(_ref5, value);
} else { } else {
this.token('STRING', this.makeString(value, '"', heredoc)); this.token('STRING', this.makeString(value, '"', heredoc));
} }
@ -492,6 +507,16 @@
if (interpolated) this.token(')', ')'); if (interpolated) this.token(')', ')');
return tokens; return tokens;
}; };
Lexer.prototype.pair = function(tag) {
var size, wanted;
if (tag !== (wanted = last(this.ends))) {
if ('OUTDENT' !== wanted) this.carp("unmatched " + tag);
this.indent -= size = last(this.indents);
this.outdentToken(size, true);
return this.pair(tag);
}
return this.ends.pop();
};
Lexer.prototype.token = function(tag, value) { Lexer.prototype.token = function(tag, value) {
return this.tokens.push([tag, value, this.line]); return this.tokens.push([tag, value, this.line]);
}; };
@ -504,8 +529,8 @@
return (tok = last(this.tokens, index)) && (val ? tok[1] = val : tok[1]); return (tok = last(this.tokens, index)) && (val ? tok[1] = val : tok[1]);
}; };
Lexer.prototype.unfinished = function() { Lexer.prototype.unfinished = function() {
var _ref2; var _ref3;
return LINE_CONTINUER.test(this.chunk) || ((_ref2 = this.tag()) === '\\' || _ref2 === '.' || _ref2 === '?.' || _ref2 === 'UNARY' || _ref2 === 'MATH' || _ref2 === '+' || _ref2 === '-' || _ref2 === 'SHIFT' || _ref2 === 'RELATION' || _ref2 === 'COMPARE' || _ref2 === 'LOGIC' || _ref2 === 'COMPOUND_ASSIGN' || _ref2 === 'THROW' || _ref2 === 'EXTENDS'); return LINE_CONTINUER.test(this.chunk) || ((_ref3 = this.tag()) === '\\' || _ref3 === '.' || _ref3 === '?.' || _ref3 === 'UNARY' || _ref3 === 'MATH' || _ref3 === '+' || _ref3 === '-' || _ref3 === 'SHIFT' || _ref3 === 'RELATION' || _ref3 === 'COMPARE' || _ref3 === 'LOGIC' || _ref3 === 'COMPOUND_ASSIGN' || _ref3 === 'THROW' || _ref3 === 'EXTENDS');
}; };
Lexer.prototype.escapeLines = function(str, heredoc) { Lexer.prototype.escapeLines = function(str, heredoc) {
return str.replace(MULTILINER, heredoc ? '\\n' : ''); return str.replace(MULTILINER, heredoc ? '\\n' : '');
@ -522,6 +547,9 @@
body = body.replace(RegExp("" + quote, "g"), '\\$&'); body = body.replace(RegExp("" + quote, "g"), '\\$&');
return quote + this.escapeLines(body, heredoc) + quote; return quote + this.escapeLines(body, heredoc) + quote;
}; };
Lexer.prototype.carp = function(message) {
throw SyntaxError("" + message + " on line " + (this.line + 1));
};
return Lexer; return Lexer;
})(); })();
JS_KEYWORDS = ['true', 'false', 'null', 'this', 'new', 'delete', 'typeof', 'in', 'instanceof', 'return', 'throw', 'break', 'continue', 'debugger', 'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally', 'class', 'extends', 'super']; JS_KEYWORDS = ['true', 'false', 'null', 'this', 'new', 'delete', 'typeof', 'in', 'instanceof', 'return', 'throw', 'break', 'continue', 'debugger', 'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally', 'class', 'extends', 'super'];

View file

@ -18,8 +18,6 @@
this.tagPostfixConditionals(); this.tagPostfixConditionals();
this.addImplicitBraces(); this.addImplicitBraces();
this.addImplicitParentheses(); this.addImplicitParentheses();
this.ensureBalance(BALANCED_PAIRS);
this.rewriteClosingParens();
return this.tokens; return this.tokens;
}; };
Rewriter.prototype.scanTokens = function(block) { Rewriter.prototype.scanTokens = function(block) {
@ -146,7 +144,7 @@
noCall = false; noCall = false;
action = function(token, i) { action = function(token, i) {
var idx; var idx;
idx = token[0] === 'OUTDENT' ? i + 1 : i; idx = token[0] === 'OUTDENT' ? i : i;
return this.tokens.splice(idx, 0, ['CALL_END', ')', token[2]]); return this.tokens.splice(idx, 0, ['CALL_END', ')', token[2]]);
}; };
return this.scanTokens(function(token, i, tokens) { return this.scanTokens(function(token, i, tokens) {
@ -235,66 +233,6 @@
return 1; return 1;
}); });
}; };
Rewriter.prototype.ensureBalance = function(pairs) {
var close, level, levels, open, openLine, tag, token, _i, _j, _len, _len2, _ref, _ref2;
levels = {};
openLine = {};
_ref = this.tokens;
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
token = _ref[_i];
tag = token[0];
for (_j = 0, _len2 = pairs.length; _j < _len2; _j++) {
_ref2 = pairs[_j], open = _ref2[0], close = _ref2[1];
levels[open] |= 0;
if (tag === open) {
if (levels[open]++ === 0) openLine[open] = token[2];
} else if (tag === close && --levels[open] < 0) {
throw Error("too many " + token[1] + " on line " + (token[2] + 1));
}
}
}
for (open in levels) {
level = levels[open];
if (level > 0) {
throw Error("unclosed " + open + " on line " + (openLine[open] + 1));
}
}
return this;
};
Rewriter.prototype.rewriteClosingParens = function() {
var debt, key, stack;
stack = [];
debt = {};
for (key in INVERSES) {
debt[key] = 0;
}
return this.scanTokens(function(token, i, tokens) {
var inv, match, mtag, oppos, tag, val, _ref;
if (_ref = (tag = token[0]), __indexOf.call(EXPRESSION_START, _ref) >= 0) {
stack.push(token);
return 1;
}
if (__indexOf.call(EXPRESSION_END, tag) < 0) return 1;
if (debt[inv = INVERSES[tag]] > 0) {
debt[inv] -= 1;
tokens.splice(i, 1);
return 0;
}
match = stack.pop();
mtag = match[0];
oppos = INVERSES[mtag];
if (tag === oppos) return 1;
debt[mtag] += 1;
val = [oppos, mtag === 'INDENT' ? match[1] : oppos];
if (this.tag(i + 2) === mtag) {
tokens.splice(i + 3, 0, val);
stack.push(match);
} else {
tokens.splice(i, 0, val);
}
return 1;
});
};
Rewriter.prototype.indentation = function(token) { Rewriter.prototype.indentation = function(token) {
return [['INDENT', 2, token[2]], ['OUTDENT', 2, token[2]]]; return [['INDENT', 2, token[2]], ['OUTDENT', 2, token[2]]];
}; };
@ -305,7 +243,7 @@
return Rewriter; return Rewriter;
})(); })();
BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], ['CALL_START', 'CALL_END'], ['PARAM_START', 'PARAM_END'], ['INDEX_START', 'INDEX_END']]; BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], ['CALL_START', 'CALL_END'], ['PARAM_START', 'PARAM_END'], ['INDEX_START', 'INDEX_END']];
INVERSES = {}; exports.INVERSES = INVERSES = {};
EXPRESSION_START = []; EXPRESSION_START = [];
EXPRESSION_END = []; EXPRESSION_END = [];
for (_i = 0, _len = BALANCED_PAIRS.length; _i < _len; _i++) { for (_i = 0, _len = BALANCED_PAIRS.length; _i < _len; _i++) {

View file

@ -7,7 +7,7 @@
# #
# Which is a format that can be fed directly into [Jison](http://github.com/zaach/jison). # Which is a format that can be fed directly into [Jison](http://github.com/zaach/jison).
{Rewriter} = require './rewriter' {Rewriter, INVERSES} = require './rewriter'
# Import the helpers we need. # Import the helpers we need.
{count, starts, compact, last} = require './helpers' {count, starts, compact, last} = require './helpers'
@ -41,6 +41,7 @@ exports.Lexer = class Lexer
@indebt = 0 # The over-indentation at the current level. @indebt = 0 # The over-indentation at the current level.
@outdebt = 0 # The under-outdentation at the current level. @outdebt = 0 # The under-outdentation at the current level.
@indents = [] # The stack of all current indentation levels. @indents = [] # The stack of all current indentation levels.
@ends = [] # The stack for pairing up tokens.
@tokens = [] # Stream of parsed tokens in the form `['TYPE', value, line]`. @tokens = [] # Stream of parsed tokens in the form `['TYPE', value, line]`.
# At every position, run through this list of attempted matches, # At every position, run through this list of attempted matches,
@ -60,6 +61,7 @@ exports.Lexer = class Lexer
@literalToken() @literalToken()
@closeIndentation() @closeIndentation()
@carp "missing #{tag}" if tag = @ends.pop()
return @tokens if opts.rewrite is off return @tokens if opts.rewrite is off
(new Rewriter).rewrite @tokens (new Rewriter).rewrite @tokens
@ -253,6 +255,7 @@ exports.Lexer = class Lexer
diff = size - @indent + @outdebt diff = size - @indent + @outdebt
@token 'INDENT', diff @token 'INDENT', diff
@indents.push diff @indents.push diff
@ends .push 'OUTDENT'
@outdebt = @indebt = 0 @outdebt = @indebt = 0
else else
@indebt = 0 @indebt = 0
@ -262,7 +265,7 @@ exports.Lexer = class Lexer
# Record an outdent token or multiple tokens, if we happen to be moving back # Record an outdent token or multiple tokens, if we happen to be moving back
# inwards past several recorded indents. # inwards past several recorded indents.
outdentToken: (moveOut, noNewlines, close) -> outdentToken: (moveOut, noNewlines) ->
while moveOut > 0 while moveOut > 0
len = @indents.length - 1 len = @indents.length - 1
if @indents[len] is undefined if @indents[len] is undefined
@ -277,6 +280,7 @@ exports.Lexer = class Lexer
dent = @indents.pop() - @outdebt dent = @indents.pop() - @outdebt
moveOut -= dent moveOut -= dent
@outdebt = 0 @outdebt = 0
@pair 'OUTDENT'
@token 'OUTDENT', dent @token 'OUTDENT', dent
@outdebt -= moveOut if dent @outdebt -= moveOut if dent
@tokens.pop() while @value() is ';' @tokens.pop() while @value() is ';'
@ -338,6 +342,9 @@ exports.Lexer = class Lexer
tag = 'INDEX_START' tag = 'INDEX_START'
switch prev[0] switch prev[0]
when '?' then prev[0] = 'INDEX_SOAK' when '?' then prev[0] = 'INDEX_SOAK'
switch value
when '(', '{', '[' then @ends.push INVERSES[value]
when ')', '}', ']' then @pair value
@token tag, value @token tag, value
value.length value.length
@ -350,7 +357,7 @@ exports.Lexer = class Lexer
{indent, herecomment} = options {indent, herecomment} = options
if herecomment if herecomment
if HEREDOC_ILLEGAL.test doc if HEREDOC_ILLEGAL.test doc
throw new Error "block comment cannot contain \"*/\", starting on line #{@line + 1}" @carp "block comment cannot contain \"*/\", starting"
return doc if doc.indexOf('\n') <= 0 return doc if doc.indexOf('\n') <= 0
else else
while match = HEREDOC_INDENT.exec doc while match = HEREDOC_INDENT.exec doc
@ -421,8 +428,7 @@ exports.Lexer = class Lexer
else if end is '"' and prev is '#' and letter is '{' else if end is '"' and prev is '#' and letter is '{'
stack.push end = '}' stack.push end = '}'
prev = letter prev = letter
throw new Error "missing #{ stack.pop() }, starting on line #{ @line + 1 }" @carp "missing #{ stack.pop() }, starting"
# Expand variables and expressions inside double-quoted strings using # Expand variables and expressions inside double-quoted strings using
# Ruby-like notation for substitution of arbitrary expressions. # Ruby-like notation for substitution of arbitrary expressions.
@ -471,6 +477,21 @@ exports.Lexer = class Lexer
@token ')', ')' if interpolated @token ')', ')' if interpolated
tokens tokens
# Pairs up a closing token, ensuring that all listed pairs of tokens are
# correctly balanced throughout the course of the token stream.
pair: (tag) ->
unless tag is wanted = last @ends
@carp "unmatched #{tag}" unless 'OUTDENT' is wanted
# Auto-close INDENT to support syntax like this:
#
# el.click((event) ->
# el.hide())
#
@indent -= size = last @indents
@outdentToken size, true
return @pair tag
@ends.pop()
# Helpers # Helpers
# ------- # -------
@ -504,6 +525,9 @@ exports.Lexer = class Lexer
body = body.replace /// #{quote} ///g, '\\$&' body = body.replace /// #{quote} ///g, '\\$&'
quote + @escapeLines(body, heredoc) + quote quote + @escapeLines(body, heredoc) + quote
# Throws a syntax error from current `@line`.
carp: (message) -> throw SyntaxError "#{message} on line #{ @line + 1}"
# Constants # Constants
# --------- # ---------

View file

@ -3,7 +3,7 @@
# the resulting parse table. Instead of making the parser handle it all, we take # the resulting parse table. Instead of making the parser handle it all, we take
# a series of passes over the token stream, using this **Rewriter** to convert # a series of passes over the token stream, using this **Rewriter** to convert
# shorthand into the unambiguous long form, add implicit indentation and # shorthand into the unambiguous long form, add implicit indentation and
# parentheses, balance incorrect nestings, and generally clean things up. # parentheses, and generally clean things up.
# The **Rewriter** class is used by the [Lexer](lexer.html), directly against # The **Rewriter** class is used by the [Lexer](lexer.html), directly against
# its internal array of tokens. # its internal array of tokens.
@ -26,8 +26,6 @@ class exports.Rewriter
@tagPostfixConditionals() @tagPostfixConditionals()
@addImplicitBraces() @addImplicitBraces()
@addImplicitParentheses() @addImplicitParentheses()
@ensureBalance BALANCED_PAIRS
@rewriteClosingParens()
@tokens @tokens
# Rewrite the token stream, looking one token ahead and behind. # Rewrite the token stream, looking one token ahead and behind.
@ -134,7 +132,7 @@ class exports.Rewriter
addImplicitParentheses: -> addImplicitParentheses: ->
noCall = no noCall = no
action = (token, i) -> action = (token, i) ->
idx = if token[0] is 'OUTDENT' then i + 1 else i idx = if token[0] is 'OUTDENT' then i else i
@tokens.splice idx, 0, ['CALL_END', ')', token[2]] @tokens.splice idx, 0, ['CALL_END', ')', token[2]]
@scanTokens (token, i, tokens) -> @scanTokens (token, i, tokens) ->
tag = token[0] tag = token[0]
@ -211,65 +209,6 @@ class exports.Rewriter
original[0] = 'POST_' + original[0] if token[0] isnt 'INDENT' original[0] = 'POST_' + original[0] if token[0] isnt 'INDENT'
1 1
# Ensure that all listed pairs of tokens are correctly balanced throughout
# the course of the token stream.
ensureBalance: (pairs) ->
levels = {}
openLine = {}
for token in @tokens
[tag] = token
for [open, close] in pairs
levels[open] |= 0
if tag is open
openLine[open] = token[2] if levels[open]++ is 0
else if tag is close and --levels[open] < 0
throw Error "too many #{token[1]} on line #{token[2] + 1}"
for open, level of levels when level > 0
throw Error "unclosed #{ open } on line #{openLine[open] + 1}"
this
# We'd like to support syntax like this:
#
# el.click((event) ->
# el.hide())
#
# In order to accomplish this, move outdents that follow closing parens
# inwards, safely. The steps to accomplish this are:
#
# 1. Check that all paired tokens are balanced and in order.
# 2. Rewrite the stream with a stack: if you see an `EXPRESSION_START`, add it
# to the stack. If you see an `EXPRESSION_END`, pop the stack and replace
# it with the inverse of what we've just popped.
# 3. Keep track of "debt" for tokens that we manufacture, to make sure we end
# up balanced in the end.
# 4. Be careful not to alter array or parentheses delimiters with overzealous
# rewriting.
rewriteClosingParens: ->
stack = []
debt = {}
debt[key] = 0 for key of INVERSES
@scanTokens (token, i, tokens) ->
if (tag = token[0]) in EXPRESSION_START
stack.push token
return 1
return 1 unless tag in EXPRESSION_END
if debt[inv = INVERSES[tag]] > 0
debt[inv] -= 1
tokens.splice i, 1
return 0
match = stack.pop()
mtag = match[0]
oppos = INVERSES[mtag]
return 1 if tag is oppos
debt[mtag] += 1
val = [oppos, if mtag is 'INDENT' then match[1] else oppos]
if @tag(i + 2) is mtag
tokens.splice i + 3, 0, val
stack.push match
else
tokens.splice i, 0, val
1
# Generate the indentation tokens, based on another token on the same line. # Generate the indentation tokens, based on another token on the same line.
indentation: (token) -> indentation: (token) ->
[['INDENT', 2, token[2]], ['OUTDENT', 2, token[2]]] [['INDENT', 2, token[2]], ['OUTDENT', 2, token[2]]]
@ -293,7 +232,7 @@ BALANCED_PAIRS = [
# The inverse mappings of `BALANCED_PAIRS` we're trying to fix up, so we can # The inverse mappings of `BALANCED_PAIRS` we're trying to fix up, so we can
# look things up from either end. # look things up from either end.
INVERSES = {} exports.INVERSES = INVERSES = {}
# The tokens that signal the start/end of a balanced pair. # The tokens that signal the start/end of a balanced pair.
EXPRESSION_START = [] EXPRESSION_START = []

View file

@ -134,3 +134,13 @@ test "#1195 Ignore trailing semicolons (before newlines or as the last char in a
lastChar = '-> lastChar;' lastChar = '-> lastChar;'
doesNotThrow -> CoffeeScript.compile lastChar, bare: true doesNotThrow -> CoffeeScript.compile lastChar, bare: true
test "#1299: Disallow token misnesting", ->
try
CoffeeScript.compile '''
[{
]}
'''
ok no
catch e
eq 'unmatched ] on line 2', e.message