From e02ab76edf481938d314759a86cd2d732f58b856 Mon Sep 17 00:00:00 2001 From: Jeremy Ashkenas Date: Sat, 27 Feb 2010 19:46:45 -0500 Subject: [PATCH] converting the remainder of the CoffeeScript compiler (Rewriter, Scope, Optparse) to use classes --- lib/optparse.js | 113 ++++---- lib/rewriter.js | 663 ++++++++++++++++++++++---------------------- lib/scope.js | 189 ++++++------- src/optparse.coffee | 63 ++--- src/rewriter.coffee | 424 ++++++++++++++-------------- src/scope.coffee | 109 ++++---- 6 files changed, 786 insertions(+), 775 deletions(-) diff --git a/lib/optparse.js b/lib/optparse.js index 7656dd02..effedb36 100755 --- a/lib/optparse.js +++ b/lib/optparse.js @@ -1,67 +1,70 @@ (function(){ - var LONG_FLAG, MULTI_FLAG, OPTIONAL, SHORT_FLAG, build_rule, build_rules, normalize_arguments, op; + var LONG_FLAG, MULTI_FLAG, OPTIONAL, OptionParser, SHORT_FLAG, build_rule, build_rules, normalize_arguments; // Create an OptionParser with a list of valid options, in the form: // [short-flag (optional), long-flag, description] // And an optional banner for the usage help. - op = (exports.OptionParser = function OptionParser(rules, banner) { - this.banner = banner; - this.rules = build_rules(rules); - return this; - }); - // Parse the argument array, populating an options object with all of the - // specified options, and returning it. options.arguments will be an array - // containing the remaning non-option arguments. - op.prototype.parse = function parse(args) { - var _a, _b, _c, arg, is_option, matched_rule, options, rule; - arguments = Array.prototype.slice.call(arguments, 0); - options = { - arguments: [] + exports.OptionParser = (function() { + OptionParser = function OptionParser(rules, banner) { + this.banner = banner; + this.rules = build_rules(rules); + return this; }; - args = normalize_arguments(args); - while (arg = args.shift()) { - is_option = !!(arg.match(LONG_FLAG) || arg.match(SHORT_FLAG)); - matched_rule = false; + // Parse the argument array, populating an options object with all of the + // specified options, and returning it. options.arguments will be an array + // containing the remaning non-option arguments. + OptionParser.prototype.parse = function parse(args) { + var _a, _b, _c, arg, is_option, matched_rule, options, rule; + arguments = Array.prototype.slice.call(arguments, 0); + options = { + arguments: [] + }; + args = normalize_arguments(args); + while (arg = args.shift()) { + is_option = !!(arg.match(LONG_FLAG) || arg.match(SHORT_FLAG)); + matched_rule = false; + _a = this.rules; + for (_b = 0, _c = _a.length; _b < _c; _b++) { + rule = _a[_b]; + if (rule.letter === arg || rule.flag === arg) { + options[rule.name] = rule.has_argument ? args.shift() : true; + matched_rule = true; + break; + } + } + if (is_option && !matched_rule) { + throw new Error("unrecognized option: " + arg); + } + if (!(is_option)) { + options.arguments.push(arg); + } + } + return options; + }; + // Return the help text for this OptionParser, for --help and such. + OptionParser.prototype.help = function help() { + var _a, _b, _c, _d, _e, _f, _g, _h, i, let_part, lines, rule, spaces; + lines = ['Available options:']; + if (this.banner) { + lines.unshift(this.banner + '\n'); + } _a = this.rules; for (_b = 0, _c = _a.length; _b < _c; _b++) { rule = _a[_b]; - if (rule.letter === arg || rule.flag === arg) { - options[rule.name] = rule.has_argument ? args.shift() : true; - matched_rule = true; - break; - } + spaces = 15 - rule.flag.length; + spaces = spaces > 0 ? (function() { + _d = []; _g = 0; _h = spaces; + for (_f = 0, i = _g; (_g <= _h ? i <= _h : i >= _h); (_g <= _h ? i += 1 : i -= 1), _f++) { + _d.push(' '); + } + return _d; + }).call(this).join('') : ''; + let_part = rule.letter ? rule.letter + ', ' : ' '; + lines.push(' ' + let_part + rule.flag + spaces + rule.description); } - if (is_option && !matched_rule) { - throw new Error("unrecognized option: " + arg); - } - if (!(is_option)) { - options.arguments.push(arg); - } - } - return options; - }; - // Return the help text for this OptionParser, for --help and such. - op.prototype.help = function help() { - var _a, _b, _c, _d, _e, _f, _g, _h, i, let_part, lines, rule, spaces; - lines = ['Available options:']; - if (this.banner) { - lines.unshift(this.banner + '\n'); - } - _a = this.rules; - for (_b = 0, _c = _a.length; _b < _c; _b++) { - rule = _a[_b]; - spaces = 15 - rule.flag.length; - spaces = spaces > 0 ? (function() { - _d = []; _g = 0; _h = spaces; - for (_f = 0, i = _g; (_g <= _h ? i <= _h : i >= _h); (_g <= _h ? i += 1 : i -= 1), _f++) { - _d.push(' '); - } - return _d; - }).call(this).join('') : ''; - let_part = rule.letter ? rule.letter + ', ' : ' '; - lines.push(' ' + let_part + rule.flag + spaces + rule.description); - } - return lines.join('\n'); - }; + return lines.join('\n'); + }; + return OptionParser; + }).call(this); // Regex matchers for option flags. LONG_FLAG = /^(--\w[\w\-]+)/; SHORT_FLAG = /^(-\w)/; diff --git a/lib/rewriter.js b/lib/rewriter.js index 7fceea3f..a0e7e3bc 100644 --- a/lib/rewriter.js +++ b/lib/rewriter.js @@ -1,13 +1,9 @@ (function(){ - var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_START, EXPRESSION_TAIL, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, pair, re; + var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_START, EXPRESSION_TAIL, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, pair; var __hasProp = Object.prototype.hasOwnProperty; if (!((typeof process !== "undefined" && process !== null))) { this.exports = this; } - // In order to keep the grammar simple, the stream of tokens that the Lexer - // emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested - // indentation, and single-line flavors of expressions. - exports.Rewriter = (re = function re() { }); // Tokens that must be balanced. BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], ['PARAM_START', 'PARAM_END'], ['CALL_START', 'CALL_END'], ['INDEX_START', 'INDEX_END'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END']]; // Tokens that signal the start of a balanced pair. @@ -47,338 +43,345 @@ // The grammar can't disambiguate them, so we insert the implicit indentation. SINGLE_LINERS = ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN']; SINGLE_CLOSERS = ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN']; - // Rewrite the token stream in multiple passes, one logical filter at - // a time. This could certainly be changed into a single pass through the - // stream, with a big ol' efficient switch, but it's much nicer like this. - re.prototype.rewrite = function rewrite(tokens) { - this.tokens = tokens; - this.adjust_comments(); - this.remove_leading_newlines(); - this.remove_mid_expression_newlines(); - this.move_commas_outside_outdents(); - this.close_open_calls_and_indexes(); - this.add_implicit_indentation(); - this.add_implicit_parentheses(); - this.ensure_balance(BALANCED_PAIRS); - this.rewrite_closing_parens(); - return this.tokens; - }; - // Rewrite the token stream, looking one token ahead and behind. - // Allow the return value of the block to tell us how many tokens to move - // forwards (or backwards) in the stream, to make sure we don't miss anything - // as the stream changes length under our feet. - re.prototype.scan_tokens = function scan_tokens(block) { - var i, move; - i = 0; - while (true) { - if (!(this.tokens[i])) { - break; + // In order to keep the grammar simple, the stream of tokens that the Lexer + // emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested + // indentation, and single-line flavors of expressions. + exports.Rewriter = (function() { + Rewriter = function Rewriter() { }; + // Rewrite the token stream in multiple passes, one logical filter at + // a time. This could certainly be changed into a single pass through the + // stream, with a big ol' efficient switch, but it's much nicer like this. + Rewriter.prototype.rewrite = function rewrite(tokens) { + this.tokens = tokens; + this.adjust_comments(); + this.remove_leading_newlines(); + this.remove_mid_expression_newlines(); + this.move_commas_outside_outdents(); + this.close_open_calls_and_indexes(); + this.add_implicit_indentation(); + this.add_implicit_parentheses(); + this.ensure_balance(BALANCED_PAIRS); + this.rewrite_closing_parens(); + return this.tokens; + }; + // Rewrite the token stream, looking one token ahead and behind. + // Allow the return value of the block to tell us how many tokens to move + // forwards (or backwards) in the stream, to make sure we don't miss anything + // as the stream changes length under our feet. + Rewriter.prototype.scan_tokens = function scan_tokens(block) { + var i, move; + i = 0; + while (true) { + if (!(this.tokens[i])) { + break; + } + move = block(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i); + i += move; } - move = block(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i); - i += move; - } - return true; - }; - // Massage newlines and indentations so that comments don't have to be - // correctly indented, or appear on their own line. - re.prototype.adjust_comments = function adjust_comments() { - return this.scan_tokens((function(__this) { - var __func = function(prev, token, post, i) { - var after, before; - if (!(token[0] === 'COMMENT')) { - return 1; - } - before = this.tokens[i - 2]; - after = this.tokens[i + 2]; - if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) { - this.tokens.splice(i + 2, 1); - this.tokens.splice(i - 2, 1); - return 0; - } else if (prev && prev[0] === 'TERMINATOR' && after && after[0] === 'INDENT') { - this.tokens.splice(i + 2, 1); - this.tokens[i - 1] = after; - return 1; - } else if (prev && prev[0] !== 'TERMINATOR' && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') { - this.tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]]); - return 2; - } else { - return 1; - } - }; - return (function() { - return __func.apply(__this, arguments); - }); - })(this)); - }; - // Leading newlines would introduce an ambiguity in the grammar, so we - // dispatch them here. - re.prototype.remove_leading_newlines = function remove_leading_newlines() { - if (this.tokens[0][0] === 'TERMINATOR') { - return this.tokens.shift(); - } - }; - // Some blocks occur in the middle of expressions -- when we're expecting - // this, remove their trailing newlines. - re.prototype.remove_mid_expression_newlines = function remove_mid_expression_newlines() { - return this.scan_tokens((function(__this) { - var __func = function(prev, token, post, i) { - if (!(post && EXPRESSION_CLOSE.indexOf(post[0]) >= 0 && token[0] === 'TERMINATOR')) { - return 1; - } - this.tokens.splice(i, 1); - return 0; - }; - return (function() { - return __func.apply(__this, arguments); - }); - })(this)); - }; - // Make sure that we don't accidentally break trailing commas, which need - // to go on the outside of expression closers. - re.prototype.move_commas_outside_outdents = function move_commas_outside_outdents() { - return this.scan_tokens((function(__this) { - var __func = function(prev, token, post, i) { - if (token[0] === 'OUTDENT' && prev[0] === ',') { - this.tokens.splice(i, 1, token); - } - return 1; - }; - return (function() { - return __func.apply(__this, arguments); - }); - })(this)); - }; - // We've tagged the opening parenthesis of a method call, and the opening - // bracket of an indexing operation. Match them with their close. - re.prototype.close_open_calls_and_indexes = function close_open_calls_and_indexes() { - var brackets, parens; - parens = [0]; - brackets = [0]; - return this.scan_tokens((function(__this) { - var __func = function(prev, token, post, i) { - var _l; - if ((_l = token[0]) === 'CALL_START') { - parens.push(0); - } else if (_l === 'INDEX_START') { - brackets.push(0); - } else if (_l === '(') { - parens[parens.length - 1] += 1; - } else if (_l === '[') { - brackets[brackets.length - 1] += 1; - } else if (_l === ')') { - if (parens[parens.length - 1] === 0) { - parens.pop(); - token[0] = 'CALL_END'; - } else { - parens[parens.length - 1] -= 1; - } - } else if (_l === ']') { - if (brackets[brackets.length - 1] === 0) { - brackets.pop(); - token[0] = 'INDEX_END'; - } else { - brackets[brackets.length - 1] -= 1; - } - } - return 1; - }; - return (function() { - return __func.apply(__this, arguments); - }); - })(this)); - }; - // Methods may be optionally called without parentheses, for simple cases. - // Insert the implicit parentheses here, so that the parser doesn't have to - // deal with them. - re.prototype.add_implicit_parentheses = function add_implicit_parentheses() { - var stack; - stack = [0]; - return this.scan_tokens((function(__this) { - var __func = function(prev, token, post, i) { - var _l, _m, _n, _o, idx, last, size, stack_pointer, tag, tmp; - tag = token[0]; - if (tag === 'INDENT') { - stack.push(0); - } - if (tag === 'OUTDENT') { - last = stack.pop(); - stack[stack.length - 1] += last; - } - if (IMPLICIT_END.indexOf(tag) >= 0 || !(typeof post !== "undefined" && post !== null)) { - if (tag === 'INDENT' && prev && IMPLICIT_BLOCK.indexOf(prev[0]) >= 0) { + return true; + }; + // Massage newlines and indentations so that comments don't have to be + // correctly indented, or appear on their own line. + Rewriter.prototype.adjust_comments = function adjust_comments() { + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var after, before; + if (!(token[0] === 'COMMENT')) { return 1; } - if (stack[stack.length - 1] > 0 || tag === 'INDENT') { - idx = tag === 'OUTDENT' ? i + 1 : i; - stack_pointer = tag === 'INDENT' ? 2 : 1; - _n = 0; _o = stack[stack.length - stack_pointer]; - for (_m = 0, tmp = _n; (_n <= _o ? tmp < _o : tmp > _o); (_n <= _o ? tmp += 1 : tmp -= 1), _m++) { - this.tokens.splice(idx, 0, ['CALL_END', ')', token[2]]); - } - size = stack[stack.length - stack_pointer] + 1; - stack[stack.length - stack_pointer] = 0; - return size; - } - } - if (!(prev && IMPLICIT_FUNC.indexOf(prev[0]) >= 0 && IMPLICIT_CALL.indexOf(tag) >= 0)) { - return 1; - } - this.tokens.splice(i, 0, ['CALL_START', '(', token[2]]); - stack[stack.length - 1] += 1; - return 2; - }; - return (function() { - return __func.apply(__this, arguments); - }); - })(this)); - }; - // Because our grammar is LALR(1), it can't handle some single-line - // expressions that lack ending delimiters. Use the lexer to add the implicit - // blocks, so it doesn't need to. - // ')' can close a single-line block, but we need to make sure it's balanced. - re.prototype.add_implicit_indentation = function add_implicit_indentation() { - return this.scan_tokens((function(__this) { - var __func = function(prev, token, post, i) { - var idx, insertion, parens, pre, starter, tok; - if (!(SINGLE_LINERS.indexOf(token[0]) >= 0 && post[0] !== 'INDENT' && !(token[0] === 'ELSE' && post[0] === 'IF'))) { - return 1; - } - starter = token[0]; - this.tokens.splice(i + 1, 0, ['INDENT', 2, token[2]]); - idx = i + 1; - parens = 0; - while (true) { - idx += 1; - tok = this.tokens[idx]; - pre = this.tokens[idx - 1]; - if ((!tok || (SINGLE_CLOSERS.indexOf(tok[0]) >= 0 && tok[1] !== ';') || (pre[0] === ',' && tok[0] === 'PARAM_START') || (tok[0] === ')' && parens === 0)) && !(starter === 'ELSE' && tok[0] === 'ELSE')) { - insertion = pre[0] === "," ? idx - 1 : idx; - this.tokens.splice(insertion, 0, ['OUTDENT', 2, token[2]]); - break; - } - if (tok[0] === '(') { - parens += 1; - } - if (tok[0] === ')') { - parens -= 1; - } - } - if (!(token[0] === 'THEN')) { - return 1; - } - this.tokens.splice(i, 1); - return 0; - }; - return (function() { - return __func.apply(__this, arguments); - }); - })(this)); - }; - // Ensure that all listed pairs of tokens are correctly balanced throughout - // the course of the token stream. - re.prototype.ensure_balance = function ensure_balance(pairs) { - var _l, _m, key, levels, unclosed, value; - levels = {}; - this.scan_tokens((function(__this) { - var __func = function(prev, token, post, i) { - var _l, _m, _n, _o, close, open; - _l = pairs; - for (_m = 0, _n = _l.length; _m < _n; _m++) { - pair = _l[_m]; - _o = pair; - open = _o[0]; - close = _o[1]; - levels[open] = levels[open] || 0; - if (token[0] === open) { - levels[open] += 1; - } - if (token[0] === close) { - levels[open] -= 1; - } - if (levels[open] < 0) { - throw new Error("too many " + token[1]); - } - } - return 1; - }; - return (function() { - return __func.apply(__this, arguments); - }); - })(this)); - unclosed = (function() { - _l = []; _m = levels; - for (key in _m) { if (__hasProp.call(_m, key)) { - value = _m[key]; - if (value > 0) { - _l.push(key); - } - }} - return _l; - }).call(this); - if (unclosed.length) { - throw new Error("unclosed " + unclosed[0]); - } - }; - // We'd like to support syntax like this: - // el.click((event) -> - // el.hide()) - // In order to accomplish this, move outdents that follow closing parens - // inwards, safely. The steps to accomplish this are: - // - // 1. Check that all paired tokens are balanced and in order. - // 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it - // to the stack. If you see an ')' or OUTDENT, pop the stack and replace - // it with the inverse of what we've just popped. - // 3. Keep track of "debt" for tokens that we fake, to make sure we end - // up balanced in the end. - // - re.prototype.rewrite_closing_parens = function rewrite_closing_parens() { - var _l, debt, key, stack, val; - stack = []; - debt = {}; - _l = INVERSES; - for (key in _l) { if (__hasProp.call(_l, key)) { - val = _l[key]; - ((debt[key] = 0)); - }} - return this.scan_tokens((function(__this) { - var __func = function(prev, token, post, i) { - var inv, match, mtag, tag; - tag = token[0]; - inv = INVERSES[token[0]]; - // Push openers onto the stack. - if (EXPRESSION_START.indexOf(tag) >= 0) { - stack.push(token); - return 1; - // The end of an expression, check stack and debt for a pair. - } else if (EXPRESSION_TAIL.indexOf(tag) >= 0) { - // If the tag is already in our debt, swallow it. - if (debt[inv] > 0) { - debt[inv] -= 1; - this.tokens.splice(i, 1); + before = this.tokens[i - 2]; + after = this.tokens[i + 2]; + if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) { + this.tokens.splice(i + 2, 1); + this.tokens.splice(i - 2, 1); return 0; + } else if (prev && prev[0] === 'TERMINATOR' && after && after[0] === 'INDENT') { + this.tokens.splice(i + 2, 1); + this.tokens[i - 1] = after; + return 1; + } else if (prev && prev[0] !== 'TERMINATOR' && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') { + this.tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]]); + return 2; } else { - // Pop the stack of open delimiters. - match = stack.pop(); - mtag = match[0]; - // Continue onwards if it's the expected tag. - if (tag === INVERSES[mtag]) { - return 1; + return 1; + } + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // Leading newlines would introduce an ambiguity in the grammar, so we + // dispatch them here. + Rewriter.prototype.remove_leading_newlines = function remove_leading_newlines() { + if (this.tokens[0][0] === 'TERMINATOR') { + return this.tokens.shift(); + } + }; + // Some blocks occur in the middle of expressions -- when we're expecting + // this, remove their trailing newlines. + Rewriter.prototype.remove_mid_expression_newlines = function remove_mid_expression_newlines() { + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + if (!(post && EXPRESSION_CLOSE.indexOf(post[0]) >= 0 && token[0] === 'TERMINATOR')) { + return 1; + } + this.tokens.splice(i, 1); + return 0; + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // Make sure that we don't accidentally break trailing commas, which need + // to go on the outside of expression closers. + Rewriter.prototype.move_commas_outside_outdents = function move_commas_outside_outdents() { + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + if (token[0] === 'OUTDENT' && prev[0] === ',') { + this.tokens.splice(i, 1, token); + } + return 1; + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // We've tagged the opening parenthesis of a method call, and the opening + // bracket of an indexing operation. Match them with their close. + Rewriter.prototype.close_open_calls_and_indexes = function close_open_calls_and_indexes() { + var brackets, parens; + parens = [0]; + brackets = [0]; + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var _l; + if ((_l = token[0]) === 'CALL_START') { + parens.push(0); + } else if (_l === 'INDEX_START') { + brackets.push(0); + } else if (_l === '(') { + parens[parens.length - 1] += 1; + } else if (_l === '[') { + brackets[brackets.length - 1] += 1; + } else if (_l === ')') { + if (parens[parens.length - 1] === 0) { + parens.pop(); + token[0] = 'CALL_END'; } else { - // Unexpected close, insert correct close, adding to the debt. - debt[mtag] += 1; - val = mtag === 'INDENT' ? match[1] : INVERSES[mtag]; - this.tokens.splice(i, 0, [INVERSES[mtag], val]); - return 1; + parens[parens.length - 1] -= 1; + } + } else if (_l === ']') { + if (brackets[brackets.length - 1] === 0) { + brackets.pop(); + token[0] = 'INDEX_END'; + } else { + brackets[brackets.length - 1] -= 1; } } - } else { return 1; - } - }; - return (function() { - return __func.apply(__this, arguments); - }); - })(this)); - }; + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // Methods may be optionally called without parentheses, for simple cases. + // Insert the implicit parentheses here, so that the parser doesn't have to + // deal with them. + Rewriter.prototype.add_implicit_parentheses = function add_implicit_parentheses() { + var stack; + stack = [0]; + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var _l, _m, _n, _o, idx, last, size, stack_pointer, tag, tmp; + tag = token[0]; + if (tag === 'INDENT') { + stack.push(0); + } + if (tag === 'OUTDENT') { + last = stack.pop(); + stack[stack.length - 1] += last; + } + if (IMPLICIT_END.indexOf(tag) >= 0 || !(typeof post !== "undefined" && post !== null)) { + if (tag === 'INDENT' && prev && IMPLICIT_BLOCK.indexOf(prev[0]) >= 0) { + return 1; + } + if (stack[stack.length - 1] > 0 || tag === 'INDENT') { + idx = tag === 'OUTDENT' ? i + 1 : i; + stack_pointer = tag === 'INDENT' ? 2 : 1; + _n = 0; _o = stack[stack.length - stack_pointer]; + for (_m = 0, tmp = _n; (_n <= _o ? tmp < _o : tmp > _o); (_n <= _o ? tmp += 1 : tmp -= 1), _m++) { + this.tokens.splice(idx, 0, ['CALL_END', ')', token[2]]); + } + size = stack[stack.length - stack_pointer] + 1; + stack[stack.length - stack_pointer] = 0; + return size; + } + } + if (!(prev && IMPLICIT_FUNC.indexOf(prev[0]) >= 0 && IMPLICIT_CALL.indexOf(tag) >= 0)) { + return 1; + } + this.tokens.splice(i, 0, ['CALL_START', '(', token[2]]); + stack[stack.length - 1] += 1; + return 2; + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // Because our grammar is LALR(1), it can't handle some single-line + // expressions that lack ending delimiters. Use the lexer to add the implicit + // blocks, so it doesn't need to. + // ')' can close a single-line block, but we need to make sure it's balanced. + Rewriter.prototype.add_implicit_indentation = function add_implicit_indentation() { + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var idx, insertion, parens, pre, starter, tok; + if (!(SINGLE_LINERS.indexOf(token[0]) >= 0 && post[0] !== 'INDENT' && !(token[0] === 'ELSE' && post[0] === 'IF'))) { + return 1; + } + starter = token[0]; + this.tokens.splice(i + 1, 0, ['INDENT', 2, token[2]]); + idx = i + 1; + parens = 0; + while (true) { + idx += 1; + tok = this.tokens[idx]; + pre = this.tokens[idx - 1]; + if ((!tok || (SINGLE_CLOSERS.indexOf(tok[0]) >= 0 && tok[1] !== ';') || (pre[0] === ',' && tok[0] === 'PARAM_START') || (tok[0] === ')' && parens === 0)) && !(starter === 'ELSE' && tok[0] === 'ELSE')) { + insertion = pre[0] === "," ? idx - 1 : idx; + this.tokens.splice(insertion, 0, ['OUTDENT', 2, token[2]]); + break; + } + if (tok[0] === '(') { + parens += 1; + } + if (tok[0] === ')') { + parens -= 1; + } + } + if (!(token[0] === 'THEN')) { + return 1; + } + this.tokens.splice(i, 1); + return 0; + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // Ensure that all listed pairs of tokens are correctly balanced throughout + // the course of the token stream. + Rewriter.prototype.ensure_balance = function ensure_balance(pairs) { + var _l, _m, key, levels, unclosed, value; + levels = {}; + this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var _l, _m, _n, _o, close, open; + _l = pairs; + for (_m = 0, _n = _l.length; _m < _n; _m++) { + pair = _l[_m]; + _o = pair; + open = _o[0]; + close = _o[1]; + levels[open] = levels[open] || 0; + if (token[0] === open) { + levels[open] += 1; + } + if (token[0] === close) { + levels[open] -= 1; + } + if (levels[open] < 0) { + throw new Error("too many " + token[1]); + } + } + return 1; + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + unclosed = (function() { + _l = []; _m = levels; + for (key in _m) { if (__hasProp.call(_m, key)) { + value = _m[key]; + if (value > 0) { + _l.push(key); + } + }} + return _l; + }).call(this); + if (unclosed.length) { + throw new Error("unclosed " + unclosed[0]); + } + }; + // We'd like to support syntax like this: + // el.click((event) -> + // el.hide()) + // In order to accomplish this, move outdents that follow closing parens + // inwards, safely. The steps to accomplish this are: + // + // 1. Check that all paired tokens are balanced and in order. + // 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it + // to the stack. If you see an ')' or OUTDENT, pop the stack and replace + // it with the inverse of what we've just popped. + // 3. Keep track of "debt" for tokens that we fake, to make sure we end + // up balanced in the end. + // + Rewriter.prototype.rewrite_closing_parens = function rewrite_closing_parens() { + var _l, debt, key, stack, val; + stack = []; + debt = {}; + _l = INVERSES; + for (key in _l) { if (__hasProp.call(_l, key)) { + val = _l[key]; + ((debt[key] = 0)); + }} + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var inv, match, mtag, tag; + tag = token[0]; + inv = INVERSES[token[0]]; + // Push openers onto the stack. + if (EXPRESSION_START.indexOf(tag) >= 0) { + stack.push(token); + return 1; + // The end of an expression, check stack and debt for a pair. + } else if (EXPRESSION_TAIL.indexOf(tag) >= 0) { + // If the tag is already in our debt, swallow it. + if (debt[inv] > 0) { + debt[inv] -= 1; + this.tokens.splice(i, 1); + return 0; + } else { + // Pop the stack of open delimiters. + match = stack.pop(); + mtag = match[0]; + // Continue onwards if it's the expected tag. + if (tag === INVERSES[mtag]) { + return 1; + } else { + // Unexpected close, insert correct close, adding to the debt. + debt[mtag] += 1; + val = mtag === 'INDENT' ? match[1] : INVERSES[mtag]; + this.tokens.splice(i, 0, [INVERSES[mtag], val]); + return 1; + } + } + } else { + return 1; + } + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + return Rewriter; + }).call(this); })(); diff --git a/lib/scope.js b/lib/scope.js index 86591e51..f9914a70 100644 --- a/lib/scope.js +++ b/lib/scope.js @@ -11,104 +11,107 @@ // Initialize a scope with its parent, for lookups up the chain, // as well as the Expressions body where it should declare its variables, // and the function that it wraps. - Scope = (exports.Scope = function Scope(parent, expressions, method) { - var _a; - _a = [parent, expressions, method]; - this.parent = _a[0]; - this.expressions = _a[1]; - this.method = _a[2]; - this.variables = {}; - this.temp_var = this.parent ? this.parent.temp_var : '_a'; - return this; - }); - // Look up a variable in lexical scope, or declare it if not found. - Scope.prototype.find = function find(name) { - if (this.check(name)) { - return true; - } - this.variables[name] = 'var'; - return false; - }; - // Define a local variable as originating from a parameter in current scope - // -- no var required. - Scope.prototype.parameter = function parameter(name) { - return this.variables[name] = 'param'; - }; - // Just check to see if a variable has already been declared. - Scope.prototype.check = function check(name) { - if (this.variables[name]) { - return true; - } - return !!(this.parent && this.parent.check(name)); - }; - // You can reset a found variable on the immediate scope. - Scope.prototype.reset = function reset(name) { - return delete this.variables[name]; - }; - // Find an available, short, name for a compiler-generated variable. - Scope.prototype.free_variable = function free_variable() { - var ordinal; - while (this.check(this.temp_var)) { - ordinal = 1 + parseInt(this.temp_var.substr(1), 36); - this.temp_var = '_' + ordinal.toString(36).replace(/\d/g, 'a'); - } - this.variables[this.temp_var] = 'var'; - return this.temp_var; - }; - // Ensure that an assignment is made at the top of scope (or top-level - // scope, if requested). - Scope.prototype.assign = function assign(name, value, top_level) { - if (top_level && this.parent) { - return this.parent.assign(name, value, top_level); - } - return this.variables[name] = { - value: value, - assigned: true + exports.Scope = (function() { + Scope = function Scope(parent, expressions, method) { + var _a; + _a = [parent, expressions, method]; + this.parent = _a[0]; + this.expressions = _a[1]; + this.method = _a[2]; + this.variables = {}; + this.temp_var = this.parent ? this.parent.temp_var : '_a'; + return this; }; - }; - // Does this scope reference any variables that need to be declared in the - // given function body? - Scope.prototype.has_declarations = function has_declarations(body) { - return body === this.expressions && this.declared_variables().length; - }; - // Does this scope reference any assignments that need to be declared at the - // top of the given function body? - Scope.prototype.has_assignments = function has_assignments(body) { - return body === this.expressions && this.assigned_variables().length; - }; - // Return the list of variables first declared in current scope. - Scope.prototype.declared_variables = function declared_variables() { - var _a, _b, key, val; - return (function() { + // Look up a variable in lexical scope, or declare it if not found. + Scope.prototype.find = function find(name) { + if (this.check(name)) { + return true; + } + this.variables[name] = 'var'; + return false; + }; + // Define a local variable as originating from a parameter in current scope + // -- no var required. + Scope.prototype.parameter = function parameter(name) { + return this.variables[name] = 'param'; + }; + // Just check to see if a variable has already been declared. + Scope.prototype.check = function check(name) { + if (this.variables[name]) { + return true; + } + return !!(this.parent && this.parent.check(name)); + }; + // You can reset a found variable on the immediate scope. + Scope.prototype.reset = function reset(name) { + return delete this.variables[name]; + }; + // Find an available, short, name for a compiler-generated variable. + Scope.prototype.free_variable = function free_variable() { + var ordinal; + while (this.check(this.temp_var)) { + ordinal = 1 + parseInt(this.temp_var.substr(1), 36); + this.temp_var = '_' + ordinal.toString(36).replace(/\d/g, 'a'); + } + this.variables[this.temp_var] = 'var'; + return this.temp_var; + }; + // Ensure that an assignment is made at the top of scope (or top-level + // scope, if requested). + Scope.prototype.assign = function assign(name, value, top_level) { + if (top_level && this.parent) { + return this.parent.assign(name, value, top_level); + } + return this.variables[name] = { + value: value, + assigned: true + }; + }; + // Does this scope reference any variables that need to be declared in the + // given function body? + Scope.prototype.has_declarations = function has_declarations(body) { + return body === this.expressions && this.declared_variables().length; + }; + // Does this scope reference any assignments that need to be declared at the + // top of the given function body? + Scope.prototype.has_assignments = function has_assignments(body) { + return body === this.expressions && this.assigned_variables().length; + }; + // Return the list of variables first declared in current scope. + Scope.prototype.declared_variables = function declared_variables() { + var _a, _b, key, val; + return (function() { + _a = []; _b = this.variables; + for (key in _b) { if (__hasProp.call(_b, key)) { + val = _b[key]; + if (val === 'var') { + _a.push(key); + } + }} + return _a; + }).call(this).sort(); + }; + // Return the list of variables that are supposed to be assigned at the top + // of scope. + Scope.prototype.assigned_variables = function assigned_variables() { + var _a, _b, key, val; _a = []; _b = this.variables; for (key in _b) { if (__hasProp.call(_b, key)) { val = _b[key]; - if (val === 'var') { - _a.push(key); + if (val.assigned) { + _a.push(key + ' = ' + val.value); } }} return _a; - }).call(this).sort(); - }; - // Return the list of variables that are supposed to be assigned at the top - // of scope. - Scope.prototype.assigned_variables = function assigned_variables() { - var _a, _b, key, val; - _a = []; _b = this.variables; - for (key in _b) { if (__hasProp.call(_b, key)) { - val = _b[key]; - if (val.assigned) { - _a.push(key + ' = ' + val.value); - } - }} - return _a; - }; - // Compile the string representing all of the declared variables for this scope. - Scope.prototype.compiled_declarations = function compiled_declarations() { - return this.declared_variables().join(', '); - }; - // Compile the string performing all of the variable assignments for this scope. - Scope.prototype.compiled_assignments = function compiled_assignments() { - return this.assigned_variables().join(', '); - }; + }; + // Compile the string representing all of the declared variables for this scope. + Scope.prototype.compiled_declarations = function compiled_declarations() { + return this.declared_variables().join(', '); + }; + // Compile the string performing all of the variable assignments for this scope. + Scope.prototype.compiled_assignments = function compiled_assignments() { + return this.assigned_variables().join(', '); + }; + return Scope; + }).call(this); })(); diff --git a/src/optparse.coffee b/src/optparse.coffee index b38a902e..b1efe9e4 100644 --- a/src/optparse.coffee +++ b/src/optparse.coffee @@ -1,39 +1,40 @@ # Create an OptionParser with a list of valid options, in the form: # [short-flag (optional), long-flag, description] # And an optional banner for the usage help. -op: exports.OptionParser: (rules, banner) -> - @banner: banner - @rules: build_rules(rules) - this +exports.OptionParser: class OptionParser -# Parse the argument array, populating an options object with all of the -# specified options, and returning it. options.arguments will be an array -# containing the remaning non-option arguments. -op::parse: (args) -> - options: {arguments: []} - args: normalize_arguments args - while arg: args.shift() - is_option: !!(arg.match(LONG_FLAG) or arg.match(SHORT_FLAG)) - matched_rule: no + constructor: (rules, banner) -> + @banner: banner + @rules: build_rules(rules) + + # Parse the argument array, populating an options object with all of the + # specified options, and returning it. options.arguments will be an array + # containing the remaning non-option arguments. + parse: (args) -> + options: {arguments: []} + args: normalize_arguments args + while arg: args.shift() + is_option: !!(arg.match(LONG_FLAG) or arg.match(SHORT_FLAG)) + matched_rule: no + for rule in @rules + if rule.letter is arg or rule.flag is arg + options[rule.name]: if rule.has_argument then args.shift() else true + matched_rule: yes + break + throw new Error "unrecognized option: " + arg if is_option and not matched_rule + options.arguments.push arg unless is_option + options + + # Return the help text for this OptionParser, for --help and such. + help: -> + lines: ['Available options:'] + lines.unshift @banner + '\n' if @banner for rule in @rules - if rule.letter is arg or rule.flag is arg - options[rule.name]: if rule.has_argument then args.shift() else true - matched_rule: yes - break - throw new Error "unrecognized option: " + arg if is_option and not matched_rule - options.arguments.push arg unless is_option - options - -# Return the help text for this OptionParser, for --help and such. -op::help: -> - lines: ['Available options:'] - lines.unshift @banner + '\n' if @banner - for rule in @rules - spaces: 15 - rule.flag.length - spaces: if spaces > 0 then (' ' for i in [0..spaces]).join('') else '' - let_part: if rule.letter then rule.letter + ', ' else ' ' - lines.push ' ' + let_part + rule.flag + spaces + rule.description - lines.join('\n') + spaces: 15 - rule.flag.length + spaces: if spaces > 0 then (' ' for i in [0..spaces]).join('') else '' + let_part: if rule.letter then rule.letter + ', ' else ' ' + lines.push ' ' + let_part + rule.flag + spaces + rule.description + lines.join('\n') # Regex matchers for option flags. LONG_FLAG: /^(--\w[\w\-]+)/ diff --git a/src/rewriter.coffee b/src/rewriter.coffee index 2c4a7e01..ae5689c1 100644 --- a/src/rewriter.coffee +++ b/src/rewriter.coffee @@ -1,10 +1,5 @@ this.exports: this unless process? -# In order to keep the grammar simple, the stream of tokens that the Lexer -# emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested -# indentation, and single-line flavors of expressions. -exports.Rewriter: re: -> - # Tokens that must be balanced. BALANCED_PAIRS: [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], ['PARAM_START', 'PARAM_END'], ['CALL_START', 'CALL_END'], @@ -39,217 +34,222 @@ for pair in BALANCED_PAIRS SINGLE_LINERS: ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN'] SINGLE_CLOSERS: ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN'] -# Rewrite the token stream in multiple passes, one logical filter at -# a time. This could certainly be changed into a single pass through the -# stream, with a big ol' efficient switch, but it's much nicer like this. -re::rewrite: (tokens) -> - @tokens: tokens - @adjust_comments() - @remove_leading_newlines() - @remove_mid_expression_newlines() - @move_commas_outside_outdents() - @close_open_calls_and_indexes() - @add_implicit_indentation() - @add_implicit_parentheses() - @ensure_balance(BALANCED_PAIRS) - @rewrite_closing_parens() - @tokens +# In order to keep the grammar simple, the stream of tokens that the Lexer +# emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested +# indentation, and single-line flavors of expressions. +exports.Rewriter: class Rewriter -# Rewrite the token stream, looking one token ahead and behind. -# Allow the return value of the block to tell us how many tokens to move -# forwards (or backwards) in the stream, to make sure we don't miss anything -# as the stream changes length under our feet. -re::scan_tokens: (block) -> - i: 0 - while true - break unless @tokens[i] - move: block(@tokens[i - 1], @tokens[i], @tokens[i + 1], i) - i += move - true + # Rewrite the token stream in multiple passes, one logical filter at + # a time. This could certainly be changed into a single pass through the + # stream, with a big ol' efficient switch, but it's much nicer like this. + rewrite: (tokens) -> + @tokens: tokens + @adjust_comments() + @remove_leading_newlines() + @remove_mid_expression_newlines() + @move_commas_outside_outdents() + @close_open_calls_and_indexes() + @add_implicit_indentation() + @add_implicit_parentheses() + @ensure_balance(BALANCED_PAIRS) + @rewrite_closing_parens() + @tokens -# Massage newlines and indentations so that comments don't have to be -# correctly indented, or appear on their own line. -re::adjust_comments: -> - @scan_tokens (prev, token, post, i) => - return 1 unless token[0] is 'COMMENT' - before: @tokens[i - 2] - after: @tokens[i + 2] - if before and after and - ((before[0] is 'INDENT' and after[0] is 'OUTDENT') or - (before[0] is 'OUTDENT' and after[0] is 'INDENT')) and - before[1] is after[1] - @tokens.splice(i + 2, 1) - @tokens.splice(i - 2, 1) - return 0 - else if prev and prev[0] is 'TERMINATOR' and after and after[0] is 'INDENT' - @tokens.splice(i + 2, 1) - @tokens[i - 1]: after - return 1 - else if prev and prev[0] isnt 'TERMINATOR' and prev[0] isnt 'INDENT' and prev[0] isnt 'OUTDENT' - @tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]]) - return 2 - else - return 1 - -# Leading newlines would introduce an ambiguity in the grammar, so we -# dispatch them here. -re::remove_leading_newlines: -> - @tokens.shift() if @tokens[0][0] is 'TERMINATOR' - -# Some blocks occur in the middle of expressions -- when we're expecting -# this, remove their trailing newlines. -re::remove_mid_expression_newlines: -> - @scan_tokens (prev, token, post, i) => - return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is 'TERMINATOR' - @tokens.splice(i, 1) - return 0 - -# Make sure that we don't accidentally break trailing commas, which need -# to go on the outside of expression closers. -re::move_commas_outside_outdents: -> - @scan_tokens (prev, token, post, i) => - @tokens.splice(i, 1, token) if token[0] is 'OUTDENT' and prev[0] is ',' - return 1 - -# We've tagged the opening parenthesis of a method call, and the opening -# bracket of an indexing operation. Match them with their close. -re::close_open_calls_and_indexes: -> - parens: [0] - brackets: [0] - @scan_tokens (prev, token, post, i) => - switch token[0] - when 'CALL_START' then parens.push(0) - when 'INDEX_START' then brackets.push(0) - when '(' then parens[parens.length - 1] += 1 - when '[' then brackets[brackets.length - 1] += 1 - when ')' - if parens[parens.length - 1] is 0 - parens.pop() - token[0]: 'CALL_END' - else - parens[parens.length - 1] -= 1 - when ']' - if brackets[brackets.length - 1] == 0 - brackets.pop() - token[0]: 'INDEX_END' - else - brackets[brackets.length - 1] -= 1 - return 1 - -# Methods may be optionally called without parentheses, for simple cases. -# Insert the implicit parentheses here, so that the parser doesn't have to -# deal with them. -re::add_implicit_parentheses: -> - stack: [0] - @scan_tokens (prev, token, post, i) => - tag: token[0] - stack.push(0) if tag is 'INDENT' - if tag is 'OUTDENT' - last: stack.pop() - stack[stack.length - 1] += last - if IMPLICIT_END.indexOf(tag) >= 0 or !post? - return 1 if tag is 'INDENT' and prev and IMPLICIT_BLOCK.indexOf(prev[0]) >= 0 - if stack[stack.length - 1] > 0 or tag is 'INDENT' - idx: if tag is 'OUTDENT' then i + 1 else i - stack_pointer: if tag is 'INDENT' then 2 else 1 - for tmp in [0...stack[stack.length - stack_pointer]] - @tokens.splice(idx, 0, ['CALL_END', ')', token[2]]) - size: stack[stack.length - stack_pointer] + 1 - stack[stack.length - stack_pointer]: 0 - return size - return 1 unless prev and IMPLICIT_FUNC.indexOf(prev[0]) >= 0 and IMPLICIT_CALL.indexOf(tag) >= 0 - @tokens.splice(i, 0, ['CALL_START', '(', token[2]]) - stack[stack.length - 1] += 1 - return 2 - -# Because our grammar is LALR(1), it can't handle some single-line -# expressions that lack ending delimiters. Use the lexer to add the implicit -# blocks, so it doesn't need to. -# ')' can close a single-line block, but we need to make sure it's balanced. -re::add_implicit_indentation: -> - @scan_tokens (prev, token, post, i) => - return 1 unless SINGLE_LINERS.indexOf(token[0]) >= 0 and post[0] isnt 'INDENT' and - not (token[0] is 'ELSE' and post[0] is 'IF') - starter: token[0] - @tokens.splice(i + 1, 0, ['INDENT', 2, token[2]]) - idx: i + 1 - parens: 0 + # Rewrite the token stream, looking one token ahead and behind. + # Allow the return value of the block to tell us how many tokens to move + # forwards (or backwards) in the stream, to make sure we don't miss anything + # as the stream changes length under our feet. + scan_tokens: (block) -> + i: 0 while true - idx += 1 - tok: @tokens[idx] - pre: @tokens[idx - 1] - if (not tok or - (SINGLE_CLOSERS.indexOf(tok[0]) >= 0 and tok[1] isnt ';') or - (pre[0] is ',' and tok[0] is 'PARAM_START') or - (tok[0] is ')' && parens is 0)) and - not (starter is 'ELSE' and tok[0] is 'ELSE') - insertion: if pre[0] is "," then idx - 1 else idx - @tokens.splice(insertion, 0, ['OUTDENT', 2, token[2]]) - break - parens += 1 if tok[0] is '(' - parens -= 1 if tok[0] is ')' - return 1 unless token[0] is 'THEN' - @tokens.splice(i, 1) - return 0 + break unless @tokens[i] + move: block(@tokens[i - 1], @tokens[i], @tokens[i + 1], i) + i += move + true -# Ensure that all listed pairs of tokens are correctly balanced throughout -# the course of the token stream. -re::ensure_balance: (pairs) -> - levels: {} - @scan_tokens (prev, token, post, i) => - for pair in pairs - [open, close]: pair - levels[open] ||= 0 - levels[open] += 1 if token[0] is open - levels[open] -= 1 if token[0] is close - throw new Error("too many " + token[1]) if levels[open] < 0 - return 1 - unclosed: key for key, value of levels when value > 0 - throw new Error("unclosed " + unclosed[0]) if unclosed.length - -# We'd like to support syntax like this: -# el.click((event) -> -# el.hide()) -# In order to accomplish this, move outdents that follow closing parens -# inwards, safely. The steps to accomplish this are: -# -# 1. Check that all paired tokens are balanced and in order. -# 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it -# to the stack. If you see an ')' or OUTDENT, pop the stack and replace -# it with the inverse of what we've just popped. -# 3. Keep track of "debt" for tokens that we fake, to make sure we end -# up balanced in the end. -# -re::rewrite_closing_parens: -> - stack: [] - debt: {} - (debt[key]: 0) for key, val of INVERSES - @scan_tokens (prev, token, post, i) => - tag: token[0] - inv: INVERSES[token[0]] - # Push openers onto the stack. - if EXPRESSION_START.indexOf(tag) >= 0 - stack.push(token) - return 1 - # The end of an expression, check stack and debt for a pair. - else if EXPRESSION_TAIL.indexOf(tag) >= 0 - # If the tag is already in our debt, swallow it. - if debt[inv] > 0 - debt[inv] -= 1 - @tokens.splice(i, 1) + # Massage newlines and indentations so that comments don't have to be + # correctly indented, or appear on their own line. + adjust_comments: -> + @scan_tokens (prev, token, post, i) => + return 1 unless token[0] is 'COMMENT' + before: @tokens[i - 2] + after: @tokens[i + 2] + if before and after and + ((before[0] is 'INDENT' and after[0] is 'OUTDENT') or + (before[0] is 'OUTDENT' and after[0] is 'INDENT')) and + before[1] is after[1] + @tokens.splice(i + 2, 1) + @tokens.splice(i - 2, 1) return 0 + else if prev and prev[0] is 'TERMINATOR' and after and after[0] is 'INDENT' + @tokens.splice(i + 2, 1) + @tokens[i - 1]: after + return 1 + else if prev and prev[0] isnt 'TERMINATOR' and prev[0] isnt 'INDENT' and prev[0] isnt 'OUTDENT' + @tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]]) + return 2 else - # Pop the stack of open delimiters. - match: stack.pop() - mtag: match[0] - # Continue onwards if it's the expected tag. - if tag is INVERSES[mtag] - return 1 - else - # Unexpected close, insert correct close, adding to the debt. - debt[mtag] += 1 - val: if mtag is 'INDENT' then match[1] else INVERSES[mtag] - @tokens.splice(i, 0, [INVERSES[mtag], val]) - return 1 - else + return 1 + + # Leading newlines would introduce an ambiguity in the grammar, so we + # dispatch them here. + remove_leading_newlines: -> + @tokens.shift() if @tokens[0][0] is 'TERMINATOR' + + # Some blocks occur in the middle of expressions -- when we're expecting + # this, remove their trailing newlines. + remove_mid_expression_newlines: -> + @scan_tokens (prev, token, post, i) => + return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is 'TERMINATOR' + @tokens.splice(i, 1) + return 0 + + # Make sure that we don't accidentally break trailing commas, which need + # to go on the outside of expression closers. + move_commas_outside_outdents: -> + @scan_tokens (prev, token, post, i) => + @tokens.splice(i, 1, token) if token[0] is 'OUTDENT' and prev[0] is ',' return 1 + + # We've tagged the opening parenthesis of a method call, and the opening + # bracket of an indexing operation. Match them with their close. + close_open_calls_and_indexes: -> + parens: [0] + brackets: [0] + @scan_tokens (prev, token, post, i) => + switch token[0] + when 'CALL_START' then parens.push(0) + when 'INDEX_START' then brackets.push(0) + when '(' then parens[parens.length - 1] += 1 + when '[' then brackets[brackets.length - 1] += 1 + when ')' + if parens[parens.length - 1] is 0 + parens.pop() + token[0]: 'CALL_END' + else + parens[parens.length - 1] -= 1 + when ']' + if brackets[brackets.length - 1] == 0 + brackets.pop() + token[0]: 'INDEX_END' + else + brackets[brackets.length - 1] -= 1 + return 1 + + # Methods may be optionally called without parentheses, for simple cases. + # Insert the implicit parentheses here, so that the parser doesn't have to + # deal with them. + add_implicit_parentheses: -> + stack: [0] + @scan_tokens (prev, token, post, i) => + tag: token[0] + stack.push(0) if tag is 'INDENT' + if tag is 'OUTDENT' + last: stack.pop() + stack[stack.length - 1] += last + if IMPLICIT_END.indexOf(tag) >= 0 or !post? + return 1 if tag is 'INDENT' and prev and IMPLICIT_BLOCK.indexOf(prev[0]) >= 0 + if stack[stack.length - 1] > 0 or tag is 'INDENT' + idx: if tag is 'OUTDENT' then i + 1 else i + stack_pointer: if tag is 'INDENT' then 2 else 1 + for tmp in [0...stack[stack.length - stack_pointer]] + @tokens.splice(idx, 0, ['CALL_END', ')', token[2]]) + size: stack[stack.length - stack_pointer] + 1 + stack[stack.length - stack_pointer]: 0 + return size + return 1 unless prev and IMPLICIT_FUNC.indexOf(prev[0]) >= 0 and IMPLICIT_CALL.indexOf(tag) >= 0 + @tokens.splice(i, 0, ['CALL_START', '(', token[2]]) + stack[stack.length - 1] += 1 + return 2 + + # Because our grammar is LALR(1), it can't handle some single-line + # expressions that lack ending delimiters. Use the lexer to add the implicit + # blocks, so it doesn't need to. + # ')' can close a single-line block, but we need to make sure it's balanced. + add_implicit_indentation: -> + @scan_tokens (prev, token, post, i) => + return 1 unless SINGLE_LINERS.indexOf(token[0]) >= 0 and post[0] isnt 'INDENT' and + not (token[0] is 'ELSE' and post[0] is 'IF') + starter: token[0] + @tokens.splice(i + 1, 0, ['INDENT', 2, token[2]]) + idx: i + 1 + parens: 0 + while true + idx += 1 + tok: @tokens[idx] + pre: @tokens[idx - 1] + if (not tok or + (SINGLE_CLOSERS.indexOf(tok[0]) >= 0 and tok[1] isnt ';') or + (pre[0] is ',' and tok[0] is 'PARAM_START') or + (tok[0] is ')' && parens is 0)) and + not (starter is 'ELSE' and tok[0] is 'ELSE') + insertion: if pre[0] is "," then idx - 1 else idx + @tokens.splice(insertion, 0, ['OUTDENT', 2, token[2]]) + break + parens += 1 if tok[0] is '(' + parens -= 1 if tok[0] is ')' + return 1 unless token[0] is 'THEN' + @tokens.splice(i, 1) + return 0 + + # Ensure that all listed pairs of tokens are correctly balanced throughout + # the course of the token stream. + ensure_balance: (pairs) -> + levels: {} + @scan_tokens (prev, token, post, i) => + for pair in pairs + [open, close]: pair + levels[open] ||= 0 + levels[open] += 1 if token[0] is open + levels[open] -= 1 if token[0] is close + throw new Error("too many " + token[1]) if levels[open] < 0 + return 1 + unclosed: key for key, value of levels when value > 0 + throw new Error("unclosed " + unclosed[0]) if unclosed.length + + # We'd like to support syntax like this: + # el.click((event) -> + # el.hide()) + # In order to accomplish this, move outdents that follow closing parens + # inwards, safely. The steps to accomplish this are: + # + # 1. Check that all paired tokens are balanced and in order. + # 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it + # to the stack. If you see an ')' or OUTDENT, pop the stack and replace + # it with the inverse of what we've just popped. + # 3. Keep track of "debt" for tokens that we fake, to make sure we end + # up balanced in the end. + # + rewrite_closing_parens: -> + stack: [] + debt: {} + (debt[key]: 0) for key, val of INVERSES + @scan_tokens (prev, token, post, i) => + tag: token[0] + inv: INVERSES[token[0]] + # Push openers onto the stack. + if EXPRESSION_START.indexOf(tag) >= 0 + stack.push(token) + return 1 + # The end of an expression, check stack and debt for a pair. + else if EXPRESSION_TAIL.indexOf(tag) >= 0 + # If the tag is already in our debt, swallow it. + if debt[inv] > 0 + debt[inv] -= 1 + @tokens.splice(i, 1) + return 0 + else + # Pop the stack of open delimiters. + match: stack.pop() + mtag: match[0] + # Continue onwards if it's the expected tag. + if tag is INVERSES[mtag] + return 1 + else + # Unexpected close, insert correct close, adding to the debt. + debt[mtag] += 1 + val: if mtag is 'INDENT' then match[1] else INVERSES[mtag] + @tokens.splice(i, 0, [INVERSES[mtag], val]) + return 1 + else + return 1 diff --git a/src/scope.coffee b/src/scope.coffee index 48b35248..c15528d6 100644 --- a/src/scope.coffee +++ b/src/scope.coffee @@ -7,69 +7,70 @@ this.exports: this unless process? # Initialize a scope with its parent, for lookups up the chain, # as well as the Expressions body where it should declare its variables, # and the function that it wraps. -Scope: exports.Scope: (parent, expressions, method) -> - [@parent, @expressions, @method]: [parent, expressions, method] - @variables: {} - @temp_var: if @parent then @parent.temp_var else '_a' - this +exports.Scope: class Scope -# Look up a variable in lexical scope, or declare it if not found. -Scope::find: (name) -> - return true if @check name - @variables[name]: 'var' - false + constructor: (parent, expressions, method) -> + [@parent, @expressions, @method]: [parent, expressions, method] + @variables: {} + @temp_var: if @parent then @parent.temp_var else '_a' -# Define a local variable as originating from a parameter in current scope -# -- no var required. -Scope::parameter: (name) -> - @variables[name]: 'param' + # Look up a variable in lexical scope, or declare it if not found. + find: (name) -> + return true if @check name + @variables[name]: 'var' + false -# Just check to see if a variable has already been declared. -Scope::check: (name) -> - return true if @variables[name] - !!(@parent and @parent.check(name)) + # Define a local variable as originating from a parameter in current scope + # -- no var required. + parameter: (name) -> + @variables[name]: 'param' -# You can reset a found variable on the immediate scope. -Scope::reset: (name) -> - delete @variables[name] + # Just check to see if a variable has already been declared. + check: (name) -> + return true if @variables[name] + !!(@parent and @parent.check(name)) -# Find an available, short, name for a compiler-generated variable. -Scope::free_variable: -> - while @check @temp_var - ordinal: 1 + parseInt @temp_var.substr(1), 36 - @temp_var: '_' + ordinal.toString(36).replace(/\d/g, 'a') - @variables[@temp_var]: 'var' - @temp_var + # You can reset a found variable on the immediate scope. + reset: (name) -> + delete @variables[name] -# Ensure that an assignment is made at the top of scope (or top-level -# scope, if requested). -Scope::assign: (name, value, top_level) -> - return @parent.assign(name, value, top_level) if top_level and @parent - @variables[name]: {value: value, assigned: true} + # Find an available, short, name for a compiler-generated variable. + free_variable: -> + while @check @temp_var + ordinal: 1 + parseInt @temp_var.substr(1), 36 + @temp_var: '_' + ordinal.toString(36).replace(/\d/g, 'a') + @variables[@temp_var]: 'var' + @temp_var -# Does this scope reference any variables that need to be declared in the -# given function body? -Scope::has_declarations: (body) -> - body is @expressions and @declared_variables().length + # Ensure that an assignment is made at the top of scope (or top-level + # scope, if requested). + assign: (name, value, top_level) -> + return @parent.assign(name, value, top_level) if top_level and @parent + @variables[name]: {value: value, assigned: true} -# Does this scope reference any assignments that need to be declared at the -# top of the given function body? -Scope::has_assignments: (body) -> - body is @expressions and @assigned_variables().length + # Does this scope reference any variables that need to be declared in the + # given function body? + has_declarations: (body) -> + body is @expressions and @declared_variables().length -# Return the list of variables first declared in current scope. -Scope::declared_variables: -> - (key for key, val of @variables when val is 'var').sort() + # Does this scope reference any assignments that need to be declared at the + # top of the given function body? + has_assignments: (body) -> + body is @expressions and @assigned_variables().length -# Return the list of variables that are supposed to be assigned at the top -# of scope. -Scope::assigned_variables: -> - key + ' = ' + val.value for key, val of @variables when val.assigned + # Return the list of variables first declared in current scope. + declared_variables: -> + (key for key, val of @variables when val is 'var').sort() -# Compile the string representing all of the declared variables for this scope. -Scope::compiled_declarations: -> - @declared_variables().join ', ' + # Return the list of variables that are supposed to be assigned at the top + # of scope. + assigned_variables: -> + key + ' = ' + val.value for key, val of @variables when val.assigned -# Compile the string performing all of the variable assignments for this scope. -Scope::compiled_assignments: -> - @assigned_variables().join ', ' + # Compile the string representing all of the declared variables for this scope. + compiled_declarations: -> + @declared_variables().join ', ' + + # Compile the string performing all of the variable assignments for this scope. + compiled_assignments: -> + @assigned_variables().join ', '