From 90f96af720a142af7a353e78ed2616c74e6af896 Mon Sep 17 00:00:00 2001 From: Jeremy Ashkenas Date: Mon, 31 May 2010 14:42:30 -0400 Subject: [PATCH] Simpler stab at a fix for issue #397. Allow CoffeeScript-only keywords to be used as accessors, or in assignment. --- lib/lexer.js | 29 ++++++---- lib/rewriter.js | 137 ++------------------------------------------ src/lexer.coffee | 28 +++++---- src/rewriter.coffee | 65 +-------------------- 4 files changed, 43 insertions(+), 216 deletions(-) diff --git a/lib/lexer.js b/lib/lexer.js index e8d49c64..dedb1c24 100644 --- a/lib/lexer.js +++ b/lib/lexer.js @@ -1,5 +1,5 @@ (function(){ - var ASSIGNMENT, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HALF_ASSIGNMENTS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, _a, _b, _c, balanced_string, compact, count, helpers, include, starts; + var ASSIGNED, ASSIGNMENT, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, CONVERSIONS, HALF_ASSIGNMENTS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, _a, _b, _c, balanced_string, compact, count, helpers, include, starts; var __slice = Array.prototype.slice; // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt // matches against the beginning of the source code. When a match is found, @@ -126,13 +126,13 @@ // referenced as property names here, so you can still do `jQuery.is()` even // though `is` means `===` otherwise. Lexer.prototype.identifier_token = function() { - var accessed, id, operator, tag; + var forced_identifier, id, tag; if (!(id = this.match(IDENTIFIER, 1))) { return false; } - accessed = this.tag_accessor(); + forced_identifier = this.tag_accessor() || this.match(ASSIGNED, 1); tag = 'IDENTIFIER'; - if (!accessed && include(KEYWORDS, id)) { + if (include(JS_KEYWORDS, id) || (!forced_identifier && include(COFFEE_KEYWORDS, id))) { tag = id.toUpperCase(); } if (include(RESERVED, id)) { @@ -142,9 +142,9 @@ tag = 'LEADING_WHEN'; } this.i += id.length; - if (!(accessed)) { - if ((operator = Rewriter.alias_operator(id))) { - tag = (id = operator); + if (!(forced_identifier)) { + if (include(COFFEE_ALIASES, id)) { + tag = (id = CONVERSIONS[id]); } if (this.prev() && this.prev()[0] === 'ASSIGN' && include(HALF_ASSIGNMENTS, tag)) { return this.tag_half_assignment(tag); @@ -154,6 +154,7 @@ return true; }; // Matches numbers, including decimals, hex, and exponential notation. + // Be careful not to interfere with ranges-in-progress. Lexer.prototype.number_token = function() { var number; if (!(number = this.match(NUMBER, 1))) { @@ -650,9 +651,6 @@ // be used standalone, but you can reference them as an attached property. COFFEE_ALIASES = ["and", "or", "is", "isnt", "not"]; COFFEE_KEYWORDS = COFFEE_ALIASES.concat(["then", "unless", "until", "yes", "no", "on", "off", "of", "by", "where", "when"]); - // The combined list of keywords is the superset that gets passed verbatim to - // the parser. - KEYWORDS = JS_KEYWORDS.concat(COFFEE_KEYWORDS); // The list of keywords that are reserved by JavaScript, but not used, or are // used by CoffeeScript internally. We throw an error when these are encountered, // to avoid having a JavaScript error at runtime. @@ -672,7 +670,7 @@ MULTI_DENT = /^((\n([ \t]*))+)(\.)?/; LAST_DENTS = /\n([ \t]*)/g; LAST_DENT = /\n([ \t]*)/; - ASSIGNMENT = /^(:|=)$/; + ASSIGNMENT = /^[:=]$/; // Regex-matching-regexes. REGEX_START = /^\/[^\/ ]/; REGEX_INTERPOLATION = /([^\\]\$[a-zA-Z_@]|[^\\]\$\{.*[^\\]\})/; @@ -685,6 +683,7 @@ COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg; NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/; HEREDOC_INDENT = /(\n+([ \t]*)|^([ \t]+))/g; + ASSIGNED = /^([a-zA-Z\$_]\w*[ \t]*?[:=])/; // Tokens which a regular expression will never immediately follow, but which // a division operator might. // See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions @@ -700,4 +699,12 @@ LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR']; // Half-assignments... HALF_ASSIGNMENTS = ['-', '+', '/', '*', '%', '||', '&&', '?']; + // Conversions from CoffeeScript operators into JavaScript ones. + CONVERSIONS = { + 'and': '&&', + 'or': '||', + 'is': '==', + 'isnt': '!=', + 'not': '!' + }; })(); diff --git a/lib/rewriter.js b/lib/rewriter.js index b824e454..d86d0cd0 100644 --- a/lib/rewriter.js +++ b/lib/rewriter.js @@ -1,10 +1,10 @@ (function(){ - var BALANCED_PAIRS, CONVERSIONS, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, exits, helpers, include, pair; - var __hasProp = Object.prototype.hasOwnProperty, __slice = Array.prototype.slice, __bind = function(func, obj, args) { + var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, helpers, include, pair; + var __slice = Array.prototype.slice, __bind = function(func, obj, args) { return function() { return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments); }; - }; + }, __hasProp = Object.prototype.hasOwnProperty; // The CoffeeScript language has a good deal of optional syntax, implicit syntax, // and shorthand syntax. This can greatly complicate a grammar and bloat // the resulting parse table. Instead of making the parser handle it all, we take @@ -22,32 +22,6 @@ // Import the helpers we need. _b = helpers; include = _b.include; - // Helper method to check if the given stream of tokens matches the exit conditions - exits = function(prev, token, post, cond) { - var _c, _d, _e, _f, args, k, length, match, pair, v; - length = 0; - match = 0; - args = { - prev: prev, - token: token, - post: post - }; - _c = args; - for (k in _c) { if (__hasProp.call(_c, k)) { - v = _c[k]; - if (k in cond) { - length += 1; - _e = cond[k]; - for (_d = 0, _f = _e.length; _d < _f; _d++) { - pair = _e[_d]; - if (v[0] === pair[0] && v[1] === pair[1]) { - match += 1; - } - } - } - }} - return match === length; - }; // The **Rewriter** class is used by the [Lexer](lexer.html), directly against // its internal array of tokens. exports.Rewriter = (function() { @@ -65,7 +39,6 @@ this.close_open_calls_and_indexes(); this.add_implicit_indentation(); this.add_implicit_parentheses(); - this.rewrite_object_keys(); this.ensure_balance(BALANCED_PAIRS); this.rewrite_closing_parens(); return this.tokens; @@ -271,74 +244,6 @@ return 0; }, this)); }; - // Allow reserved words to be used as object keys. We scan the token stream - // until we enter an object. Any token before an assignment is considered the - // key which we rewrite back to an `IDENTIFIER`. - Rewriter.prototype.rewrite_object_keys = function() { - var levels; - levels = []; - return this.scan_tokens(__bind(function(prev, token, post, i) { - var _c, _d, _e, after, alias, balanced, last, pair, popped; - if (token[0] === '{') { - levels.push({ - rewrite: true, - cond: { - token: [['}', '}']] - } - }); - } else if (levels.length) { - popped = false; - while ((last = levels[levels.length - 1]) && exits(prev, token, post, last.cond)) { - levels.pop(); - popped = true; - } - if (!popped) { - balanced = false; - _d = BALANCED_PAIRS; - for (_c = 0, _e = _d.length; _c < _e; _c++) { - pair = _d[_c]; - if (post && post[0] === pair[0]) { - levels.push({ - rewrite: false, - cond: { - token: [[pair[1], pair[2] || post[1]]] - } - }); - balanced = true; - } - } - if (token[0] === 'ASSIGN') { - if (last.rewrite) { - prev[0] = 'IDENTIFIER'; - } - if (last.rewrite && (alias = Rewriter.alias_operator(prev[1], true))) { - prev[1] = alias; - } - if (!balanced) { - after = this.tokens[i + 2]; - if (post && post[0] === '->' && after && after[0] === 'INDENT') { - levels.push({ - rewrite: false, - cond: { - prev: [['OUTDENT', after[1]]], - token: [['TERMINATOR', '\n']] - } - }); - } else if (last.rewrite) { - levels.push({ - rewrite: false, - cond: { - token: [[',', ','], ['TERMINATOR', '\n'], ['}', '}']] - } - }); - } - } - } - } - } - return 1; - }, this)); - }; // Ensure that all listed pairs of tokens are correctly balanced throughout // the course of the token stream. Rewriter.prototype.ensure_balance = function(pairs) { @@ -439,36 +344,12 @@ } }, this)); }; - // Rewriter Properties - // ---------------- - // Alias an identifier to a Coffee operator or vice versa. - Rewriter.alias_operator = function(id, reverse) { - var _c, _d, k, v; - if (!reverse) { - _c = CONVERSIONS; - for (k in _c) { if (__hasProp.call(_c, k)) { - if (id === k) { - return CONVERSIONS[k]; - } - }} - } - if (reverse) { - _d = CONVERSIONS; - for (k in _d) { if (__hasProp.call(_d, k)) { - v = _d[k]; - if (id === v) { - return k; - } - }} - } - return false; - }; return Rewriter; - }).call(this); + })(); // Constants // --------- // List of the token pairs that must be balanced. - BALANCED_PAIRS = [['(', ')', ')'], ['[', ']', ']'], ['{', '}', '}'], ['INDENT', 'OUTDENT'], ['PARAM_START', 'PARAM_END', ')'], ['CALL_START', 'CALL_END', ')'], ['INDEX_START', 'INDEX_END', ']'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END', ']']]; + BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], ['PARAM_START', 'PARAM_END'], ['CALL_START', 'CALL_END'], ['INDEX_START', 'INDEX_END'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END']]; // The inverse mappings of `BALANCED_PAIRS` we're trying to fix up, so we can // look things up from either end. INVERSES = {}; @@ -510,12 +391,4 @@ // The grammar can't disambiguate them, so we insert the implicit indentation. SINGLE_LINERS = ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN']; SINGLE_CLOSERS = ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN']; - // Conversions from CoffeeScript operators into JavaScript ones. - CONVERSIONS = { - 'and': '&&', - 'or': '||', - 'is': '==', - 'isnt': '!=', - 'not': '!' - }; })(); diff --git a/src/lexer.coffee b/src/lexer.coffee index af59d60b..f7210be4 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -89,14 +89,14 @@ exports.Lexer: class Lexer # though `is` means `===` otherwise. identifier_token: -> return false unless id: @match IDENTIFIER, 1 - accessed: @tag_accessor() + forced_identifier: @tag_accessor() or @match ASSIGNED, 1 tag: 'IDENTIFIER' - tag: id.toUpperCase() if not accessed and include(KEYWORDS, id) - @identifier_error id if include RESERVED, id - tag: 'LEADING_WHEN' if tag is 'WHEN' and include LINE_BREAK, @tag() + tag: id.toUpperCase() if include(JS_KEYWORDS, id) or (not forced_identifier and include(COFFEE_KEYWORDS, id)) + @identifier_error id if include RESERVED, id + tag: 'LEADING_WHEN' if tag is 'WHEN' and include LINE_BREAK, @tag() @i: + id.length - unless accessed - tag: id: operator if (operator: Rewriter.alias_operator id) + unless forced_identifier + tag: id: CONVERSIONS[id] if include COFFEE_ALIASES, id return @tag_half_assignment tag if @prev() and @prev()[0] is 'ASSIGN' and include HALF_ASSIGNMENTS, tag @token tag, id true @@ -468,10 +468,6 @@ COFFEE_KEYWORDS: COFFEE_ALIASES.concat [ "of", "by", "where", "when" ] -# The combined list of keywords is the superset that gets passed verbatim to -# the parser. -KEYWORDS: JS_KEYWORDS.concat COFFEE_KEYWORDS - # The list of keywords that are reserved by JavaScript, but not used, or are # used by CoffeeScript internally. We throw an error when these are encountered, # to avoid having a JavaScript error at runtime. @@ -496,7 +492,7 @@ CODE : /^((-|=)>)/ MULTI_DENT : /^((\n([ \t]*))+)(\.)?/ LAST_DENTS : /\n([ \t]*)/g LAST_DENT : /\n([ \t]*)/ -ASSIGNMENT : /^(:|=)$/ +ASSIGNMENT : /^[:=]$/ # Regex-matching-regexes. REGEX_START : /^\/[^\/ ]/ @@ -511,6 +507,7 @@ STRING_NEWLINES : /\n[ \t]*/g COMMENT_CLEANER : /(^[ \t]*#|\n[ \t]*$)/mg NO_NEWLINE : /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/ HEREDOC_INDENT : /(\n+([ \t]*)|^([ \t]+))/g +ASSIGNED : /^([a-zA-Z\$_]\w*[ \t]*?[:=])/ # Tokens which a regular expression will never immediately follow, but which # a division operator might. @@ -534,3 +531,12 @@ LINE_BREAK: ['INDENT', 'OUTDENT', 'TERMINATOR'] # Half-assignments... HALF_ASSIGNMENTS: ['-', '+', '/', '*', '%', '||', '&&', '?'] + +# Conversions from CoffeeScript operators into JavaScript ones. +CONVERSIONS: { + 'and': '&&' + 'or': '||' + 'is': '==' + 'isnt': '!=' + 'not': '!' +} diff --git a/src/rewriter.coffee b/src/rewriter.coffee index 76d0d1fc..e9d90f91 100644 --- a/src/rewriter.coffee +++ b/src/rewriter.coffee @@ -15,17 +15,6 @@ else # Import the helpers we need. {include}: helpers -# Helper method to check if the given stream of tokens matches the exit conditions -exits: (prev, token, post, cond) -> - length: 0 - match: 0 - args: {prev, token, post} - for k, v of args when k in cond - length: + 1 - for pair in cond[k] - match: + 1 if v[0] is pair[0] and v[1] is pair[1] - return match is length - # The **Rewriter** class is used by the [Lexer](lexer.html), directly against # its internal array of tokens. exports.Rewriter: class Rewriter @@ -43,7 +32,6 @@ exports.Rewriter: class Rewriter @close_open_calls_and_indexes() @add_implicit_indentation() @add_implicit_parentheses() - @rewrite_object_keys() @ensure_balance BALANCED_PAIRS @rewrite_closing_parens() @tokens @@ -193,35 +181,6 @@ exports.Rewriter: class Rewriter @tokens.splice i, 1 return 0 - # Allow reserved words to be used as object keys. We scan the token stream - # until we enter an object. Any token before an assignment is considered the - # key which we rewrite back to an `IDENTIFIER`. - rewrite_object_keys: -> - levels: [] - @scan_tokens (prev, token, post, i) => - if token[0] is '{' - levels.push {rewrite: yes, cond: {token: [['}','}']]}} - else if levels.length - popped: no - while (last: levels[levels.length - 1]) and exits prev, token, post, last.cond - levels.pop() - popped: yes - if not popped - balanced: no - for pair in BALANCED_PAIRS when post and post[0] is pair[0] - levels.push {rewrite: no, cond: {token: [[pair[1], pair[2] or post[1]]]}} - balanced: yes - if token[0] is 'ASSIGN' - prev[0]: 'IDENTIFIER' if last.rewrite - prev[1]: alias if last.rewrite and (alias: Rewriter.alias_operator prev[1], yes) - if not balanced - after: @tokens[i + 2] - if post and post[0] is '->' and after and after[0] is 'INDENT' - levels.push {rewrite: no, cond: {prev: [['OUTDENT', after[1]]], token: [['TERMINATOR', '\n']]}} - else if last.rewrite - levels.push {rewrite: no, cond: {token: [[',', ','], ['TERMINATOR', '\n'], ['}', '}']]}} - return 1 - # Ensure that all listed pairs of tokens are correctly balanced throughout # the course of the token stream. ensure_balance: (pairs) -> @@ -290,22 +249,13 @@ exports.Rewriter: class Rewriter else return 1 - # Rewriter Properties - # ---------------- - - # Alias an identifier to a Coffee operator or vice versa. - @alias_operator: (id, reverse) -> - (return CONVERSIONS[k]) for k of CONVERSIONS when id is k if not reverse - (return k) for k, v of CONVERSIONS when id is v if reverse - false - # Constants # --------- # List of the token pairs that must be balanced. -BALANCED_PAIRS: [['(', ')', ')'], ['[', ']', ']'], ['{', '}', '}'], ['INDENT', 'OUTDENT'], - ['PARAM_START', 'PARAM_END', ')'], ['CALL_START', 'CALL_END', ')'], - ['INDEX_START', 'INDEX_END', ']'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END', ']']] +BALANCED_PAIRS: [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], + ['PARAM_START', 'PARAM_END'], ['CALL_START', 'CALL_END'], + ['INDEX_START', 'INDEX_END'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END']] # The inverse mappings of `BALANCED_PAIRS` we're trying to fix up, so we can # look things up from either end. @@ -343,12 +293,3 @@ IMPLICIT_END: ['IF', 'UNLESS', 'FOR', 'WHILE', 'UNTIL', 'TERMINATOR', 'INDENT' # The grammar can't disambiguate them, so we insert the implicit indentation. SINGLE_LINERS: ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN'] SINGLE_CLOSERS: ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN'] - -# Conversions from CoffeeScript operators into JavaScript ones. -CONVERSIONS: { - 'and': '&&' - 'or': '||' - 'is': '==' - 'isnt': '!=' - 'not': '!' -}