Simpler stab at a fix for issue #397. Allow CoffeeScript-only keywords to be used as accessors, or in assignment.

2010-05-31 14:42:30 -04:00 · 2010-05-31 14:42:30 -04:00 · 90f96af720
parent 710b2b5fdc
commit 90f96af720
4 changed files with 43 additions and 216 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -1,5 +1,5 @@
 (function(){
-  var ASSIGNMENT, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HALF_ASSIGNMENTS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, _a, _b, _c, balanced_string, compact, count, helpers, include, starts;
+  var ASSIGNED, ASSIGNMENT, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, CONVERSIONS, HALF_ASSIGNMENTS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, _a, _b, _c, balanced_string, compact, count, helpers, include, starts;
  var __slice = Array.prototype.slice;
  // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
  // matches against the beginning of the source code. When a match is found,
@ -126,13 +126,13 @@
    // referenced as property names here, so you can still do `jQuery.is()` even
    // though `is` means `===` otherwise.
    Lexer.prototype.identifier_token = function() {
-      var accessed, id, operator, tag;
+      var forced_identifier, id, tag;
      if (!(id = this.match(IDENTIFIER, 1))) {
        return false;
      }
-      accessed = this.tag_accessor();
+      forced_identifier = this.tag_accessor() || this.match(ASSIGNED, 1);
      tag = 'IDENTIFIER';
-      if (!accessed && include(KEYWORDS, id)) {
+      if (include(JS_KEYWORDS, id) || (!forced_identifier && include(COFFEE_KEYWORDS, id))) {
        tag = id.toUpperCase();
      }
      if (include(RESERVED, id)) {
@ -142,9 +142,9 @@
        tag = 'LEADING_WHEN';
      }
      this.i += id.length;
-      if (!(accessed)) {
-        if ((operator = Rewriter.alias_operator(id))) {
-          tag = (id = operator);
+      if (!(forced_identifier)) {
+        if (include(COFFEE_ALIASES, id)) {
+          tag = (id = CONVERSIONS[id]);
        }
        if (this.prev() && this.prev()[0] === 'ASSIGN' && include(HALF_ASSIGNMENTS, tag)) {
          return this.tag_half_assignment(tag);
@ -154,6 +154,7 @@
      return true;
    };
    // Matches numbers, including decimals, hex, and exponential notation.
+    // Be careful not to interfere with ranges-in-progress.
    Lexer.prototype.number_token = function() {
      var number;
      if (!(number = this.match(NUMBER, 1))) {
@ -650,9 +651,6 @@
  // be used standalone, but you can reference them as an attached property.
  COFFEE_ALIASES = ["and", "or", "is", "isnt", "not"];
  COFFEE_KEYWORDS = COFFEE_ALIASES.concat(["then", "unless", "until", "yes", "no", "on", "off", "of", "by", "where", "when"]);
-  // The combined list of keywords is the superset that gets passed verbatim to
-  // the parser.
-  KEYWORDS = JS_KEYWORDS.concat(COFFEE_KEYWORDS);
  // The list of keywords that are reserved by JavaScript, but not used, or are
  // used by CoffeeScript internally. We throw an error when these are encountered,
  // to avoid having a JavaScript error at runtime.
@ -672,7 +670,7 @@
  MULTI_DENT = /^((\n([ \t]*))+)(\.)?/;
  LAST_DENTS = /\n([ \t]*)/g;
  LAST_DENT = /\n([ \t]*)/;
-  ASSIGNMENT = /^(:|=)$/;
+  ASSIGNMENT = /^[:=]$/;
  // Regex-matching-regexes.
  REGEX_START = /^\/[^\/ ]/;
  REGEX_INTERPOLATION = /([^\\]\$[a-zA-Z_@]|[^\\]\$\{.*[^\\]\})/;
@ -685,6 +683,7 @@
  COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg;
  NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/;
  HEREDOC_INDENT = /(\n+([ \t]*)|^([ \t]+))/g;
+  ASSIGNED = /^([a-zA-Z\$_]\w*[ \t]*?[:=])/;
  // Tokens which a regular expression will never immediately follow, but which
  // a division operator might.
  // See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
@ -700,4 +699,12 @@
  LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
  // Half-assignments...
  HALF_ASSIGNMENTS = ['-', '+', '/', '*', '%', '||', '&&', '?'];
+  // Conversions from CoffeeScript operators into JavaScript ones.
+  CONVERSIONS = {
+    'and': '&&',
+    'or': '||',
+    'is': '==',
+    'isnt': '!=',
+    'not': '!'
+  };
 })();
--- a/lib/rewriter.js
+++ b/lib/rewriter.js
@ -1,10 +1,10 @@
 (function(){
-  var BALANCED_PAIRS, CONVERSIONS, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, exits, helpers, include, pair;
-  var __hasProp = Object.prototype.hasOwnProperty, __slice = Array.prototype.slice, __bind = function(func, obj, args) {
+  var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, helpers, include, pair;
+  var __slice = Array.prototype.slice, __bind = function(func, obj, args) {
    return function() {
      return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);
    };
-  };
+  }, __hasProp = Object.prototype.hasOwnProperty;
  // The CoffeeScript language has a good deal of optional syntax, implicit syntax,
  // and shorthand syntax. This can greatly complicate a grammar and bloat
  // the resulting parse table. Instead of making the parser handle it all, we take
@ -22,32 +22,6 @@
  // Import the helpers we need.
  _b = helpers;
  include = _b.include;
-  // Helper method to check if the given stream of tokens matches the exit conditions
-  exits = function(prev, token, post, cond) {
-    var _c, _d, _e, _f, args, k, length, match, pair, v;
-    length = 0;
-    match = 0;
-    args = {
-      prev: prev,
-      token: token,
-      post: post
-    };
-    _c = args;
-    for (k in _c) { if (__hasProp.call(_c, k)) {
-      v = _c[k];
-      if (k in cond) {
-        length += 1;
-        _e = cond[k];
-        for (_d = 0, _f = _e.length; _d < _f; _d++) {
-          pair = _e[_d];
-          if (v[0] === pair[0] && v[1] === pair[1]) {
-            match += 1;
-          }
-        }
-      }
-    }}
-    return match === length;
-  };
  // The **Rewriter** class is used by the [Lexer](lexer.html), directly against
  // its internal array of tokens.
  exports.Rewriter = (function() {
@ -65,7 +39,6 @@
      this.close_open_calls_and_indexes();
      this.add_implicit_indentation();
      this.add_implicit_parentheses();
-      this.rewrite_object_keys();
      this.ensure_balance(BALANCED_PAIRS);
      this.rewrite_closing_parens();
      return this.tokens;
@ -271,74 +244,6 @@
          return 0;
        }, this));
    };
-    // Allow reserved words to be used as object keys. We scan the token stream
-    // until we enter an object. Any token before an assignment is considered the
-    // key which we rewrite back to an `IDENTIFIER`.
-    Rewriter.prototype.rewrite_object_keys = function() {
-      var levels;
-      levels = [];
-      return this.scan_tokens(__bind(function(prev, token, post, i) {
-          var _c, _d, _e, after, alias, balanced, last, pair, popped;
-          if (token[0] === '{') {
-            levels.push({
-              rewrite: true,
-              cond: {
-                token: [['}', '}']]
-              }
-            });
-          } else if (levels.length) {
-            popped = false;
-            while ((last = levels[levels.length - 1]) && exits(prev, token, post, last.cond)) {
-              levels.pop();
-              popped = true;
-            }
-            if (!popped) {
-              balanced = false;
-              _d = BALANCED_PAIRS;
-              for (_c = 0, _e = _d.length; _c < _e; _c++) {
-                pair = _d[_c];
-                if (post && post[0] === pair[0]) {
-                  levels.push({
-                    rewrite: false,
-                    cond: {
-                      token: [[pair[1], pair[2] || post[1]]]
-                    }
-                  });
-                  balanced = true;
-                }
-              }
-              if (token[0] === 'ASSIGN') {
-                if (last.rewrite) {
-                  prev[0] = 'IDENTIFIER';
-                }
-                if (last.rewrite && (alias = Rewriter.alias_operator(prev[1], true))) {
-                  prev[1] = alias;
-                }
-                if (!balanced) {
-                  after = this.tokens[i + 2];
-                  if (post && post[0] === '->' && after && after[0] === 'INDENT') {
-                    levels.push({
-                      rewrite: false,
-                      cond: {
-                        prev: [['OUTDENT', after[1]]],
-                        token: [['TERMINATOR', '\n']]
-                      }
-                    });
-                  } else if (last.rewrite) {
-                    levels.push({
-                      rewrite: false,
-                      cond: {
-                        token: [[',', ','], ['TERMINATOR', '\n'], ['}', '}']]
-                      }
-                    });
-                  }
-                }
-              }
-            }
-          }
-          return 1;
-        }, this));
-    };
    // Ensure that all listed pairs of tokens are correctly balanced throughout
    // the course of the token stream.
    Rewriter.prototype.ensure_balance = function(pairs) {
@ -439,36 +344,12 @@
          }
        }, this));
    };
-    // Rewriter Properties
-    // ----------------
-    // Alias an identifier to a Coffee operator or vice versa.
-    Rewriter.alias_operator = function(id, reverse) {
-      var _c, _d, k, v;
-      if (!reverse) {
-        _c = CONVERSIONS;
-        for (k in _c) { if (__hasProp.call(_c, k)) {
-          if (id === k) {
-            return CONVERSIONS[k];
-          }
-        }}
-      }
-      if (reverse) {
-        _d = CONVERSIONS;
-        for (k in _d) { if (__hasProp.call(_d, k)) {
-          v = _d[k];
-          if (id === v) {
-            return k;
-          }
-        }}
-      }
-      return false;
-    };
    return Rewriter;
-  }).call(this);
+  })();
  // Constants
  // ---------
  // List of the token pairs that must be balanced.
-  BALANCED_PAIRS = [['(', ')', ')'], ['[', ']', ']'], ['{', '}', '}'], ['INDENT', 'OUTDENT'], ['PARAM_START', 'PARAM_END', ')'], ['CALL_START', 'CALL_END', ')'], ['INDEX_START', 'INDEX_END', ']'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END', ']']];
+  BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], ['PARAM_START', 'PARAM_END'], ['CALL_START', 'CALL_END'], ['INDEX_START', 'INDEX_END'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END']];
  // The inverse mappings of `BALANCED_PAIRS` we're trying to fix up, so we can
  // look things up from either end.
  INVERSES = {};
@ -510,12 +391,4 @@
  // The grammar can't disambiguate them, so we insert the implicit indentation.
  SINGLE_LINERS = ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN'];
  SINGLE_CLOSERS = ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN'];
-  // Conversions from CoffeeScript operators into JavaScript ones.
-  CONVERSIONS = {
-    'and': '&&',
-    'or': '||',
-    'is': '==',
-    'isnt': '!=',
-    'not': '!'
-  };
 })();
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -89,14 +89,14 @@ exports.Lexer: class Lexer
  # though `is` means `===` otherwise.
  identifier_token: ->
    return false unless id: @match IDENTIFIER, 1
-    accessed: @tag_accessor()
+    forced_identifier: @tag_accessor() or @match ASSIGNED, 1
    tag: 'IDENTIFIER'
-    tag: id.toUpperCase()     if not accessed and include(KEYWORDS, id)
-    @identifier_error id      if include RESERVED, id
-    tag: 'LEADING_WHEN'       if tag is 'WHEN' and include LINE_BREAK, @tag()
+    tag: id.toUpperCase() if include(JS_KEYWORDS, id) or (not forced_identifier and include(COFFEE_KEYWORDS, id))
+    @identifier_error id  if include RESERVED, id
+    tag: 'LEADING_WHEN'   if tag is 'WHEN' and include LINE_BREAK, @tag()
    @i: + id.length
-    unless accessed
-      tag: id: operator                if (operator: Rewriter.alias_operator id)
+    unless forced_identifier
+      tag: id: CONVERSIONS[id]         if include COFFEE_ALIASES, id
      return @tag_half_assignment tag  if @prev() and @prev()[0] is 'ASSIGN' and include HALF_ASSIGNMENTS, tag
    @token tag, id
    true
@ -468,10 +468,6 @@ COFFEE_KEYWORDS: COFFEE_ALIASES.concat [
  "of", "by", "where", "when"
 ]

-# The combined list of keywords is the superset that gets passed verbatim to
-# the parser.
-KEYWORDS: JS_KEYWORDS.concat COFFEE_KEYWORDS
-
 # The list of keywords that are reserved by JavaScript, but not used, or are
 # used by CoffeeScript internally. We throw an error when these are encountered,
 # to avoid having a JavaScript error at runtime.
@ -496,7 +492,7 @@ CODE          : /^((-|=)>)/
 MULTI_DENT    : /^((\n([ \t]*))+)(\.)?/
 LAST_DENTS    : /\n([ \t]*)/g
 LAST_DENT     : /\n([ \t]*)/
-ASSIGNMENT    : /^(:|=)$/
+ASSIGNMENT    : /^[:=]$/

 # Regex-matching-regexes.
 REGEX_START        : /^\/[^\/ ]/
@ -511,6 +507,7 @@ STRING_NEWLINES : /\n[ \t]*/g
 COMMENT_CLEANER : /(^[ \t]*#|\n[ \t]*$)/mg
 NO_NEWLINE      : /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/
 HEREDOC_INDENT  : /(\n+([ \t]*)|^([ \t]+))/g
+ASSIGNED        : /^([a-zA-Z\$_]\w*[ \t]*?[:=])/

 # Tokens which a regular expression will never immediately follow, but which
 # a division operator might.
@ -534,3 +531,12 @@ LINE_BREAK: ['INDENT', 'OUTDENT', 'TERMINATOR']

 # Half-assignments...
 HALF_ASSIGNMENTS: ['-', '+', '/', '*', '%', '||', '&&', '?']
+
+# Conversions from CoffeeScript operators into JavaScript ones.
+CONVERSIONS: {
+  'and':  '&&'
+  'or':   '||'
+  'is':   '=='
+  'isnt': '!='
+  'not':  '!'
+}
--- a/src/rewriter.coffee
+++ b/src/rewriter.coffee
@ -15,17 +15,6 @@ else
 # Import the helpers we need.
 {include}: helpers

-# Helper method to check if the given stream of tokens matches the exit conditions
-exits: (prev, token, post, cond) ->
-  length: 0
-  match:  0
-  args:   {prev, token, post}
-  for k, v of args when k in cond
-    length: + 1
-    for pair in cond[k]
-      match: + 1 if v[0] is pair[0] and v[1] is pair[1]
-  return match is length
-
 # The **Rewriter** class is used by the [Lexer](lexer.html), directly against
 # its internal array of tokens.
 exports.Rewriter: class Rewriter
@ -43,7 +32,6 @@ exports.Rewriter: class Rewriter
    @close_open_calls_and_indexes()
    @add_implicit_indentation()
    @add_implicit_parentheses()
-    @rewrite_object_keys()
    @ensure_balance BALANCED_PAIRS
    @rewrite_closing_parens()
    @tokens
@ -193,35 +181,6 @@ exports.Rewriter: class Rewriter
      @tokens.splice i, 1
      return 0

-  # Allow reserved words to be used as object keys. We scan the token stream
-  # until we enter an object. Any token before an assignment is considered the
-  # key which we rewrite back to an `IDENTIFIER`.
-  rewrite_object_keys: ->
-    levels: []
-    @scan_tokens (prev, token, post, i) =>
-      if token[0] is '{'
-        levels.push {rewrite: yes, cond: {token: [['}','}']]}}
-      else if levels.length
-        popped: no
-        while (last: levels[levels.length - 1]) and exits prev, token, post, last.cond
-          levels.pop()
-          popped: yes
-        if not popped
-          balanced: no
-          for pair in BALANCED_PAIRS when post and post[0] is pair[0]
-            levels.push {rewrite: no, cond: {token: [[pair[1], pair[2] or post[1]]]}}
-            balanced: yes
-          if token[0] is 'ASSIGN'
-            prev[0]: 'IDENTIFIER' if last.rewrite
-            prev[1]: alias        if last.rewrite and (alias: Rewriter.alias_operator prev[1], yes)
-            if not balanced
-              after: @tokens[i + 2]
-              if post and post[0] is '->' and after and after[0] is 'INDENT'
-                levels.push {rewrite: no, cond: {prev: [['OUTDENT', after[1]]], token: [['TERMINATOR', '\n']]}}
-              else if last.rewrite
-                levels.push {rewrite: no, cond: {token: [[',', ','], ['TERMINATOR', '\n'], ['}', '}']]}}
-      return 1
-
  # Ensure that all listed pairs of tokens are correctly balanced throughout
  # the course of the token stream.
  ensure_balance: (pairs) ->
@ -290,22 +249,13 @@ exports.Rewriter: class Rewriter
      else
        return 1

-  # Rewriter Properties
-  # ----------------
-
-  # Alias an identifier to a Coffee operator or vice versa.
-  @alias_operator: (id, reverse) ->
-    (return CONVERSIONS[k]) for k    of CONVERSIONS when id is k if not reverse
-    (return k)              for k, v of CONVERSIONS when id is v if reverse
-    false
-
 # Constants
 # ---------

 # List of the token pairs that must be balanced.
-BALANCED_PAIRS: [['(', ')', ')'], ['[', ']', ']'], ['{', '}', '}'], ['INDENT', 'OUTDENT'],
-  ['PARAM_START', 'PARAM_END', ')'], ['CALL_START', 'CALL_END', ')'],
-  ['INDEX_START', 'INDEX_END', ']'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END', ']']]
+BALANCED_PAIRS: [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'],
+  ['PARAM_START', 'PARAM_END'], ['CALL_START', 'CALL_END'],
+  ['INDEX_START', 'INDEX_END'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END']]

 # The inverse mappings of `BALANCED_PAIRS` we're trying to fix up, so we can
 # look things up from either end.
@ -343,12 +293,3 @@ IMPLICIT_END:   ['IF', 'UNLESS', 'FOR', 'WHILE', 'UNTIL', 'TERMINATOR', 'INDENT'
 # The grammar can't disambiguate them, so we insert the implicit indentation.
 SINGLE_LINERS: ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN']
 SINGLE_CLOSERS: ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN']
-
-# Conversions from CoffeeScript operators into JavaScript ones.
-CONVERSIONS: {
-  'and':  '&&'
-  'or':   '||'
-  'is':   '=='
-  'isnt': '!='
-  'not':  '!'
-}