Simpler stab at a fix for issue #397. Allow CoffeeScript-only keywords to be used as accessors, or in assignment.

This commit is contained in:
Jeremy Ashkenas 2010-05-31 14:42:30 -04:00
parent 710b2b5fdc
commit 90f96af720
4 changed files with 43 additions and 216 deletions

View File

@ -1,5 +1,5 @@
(function(){
var ASSIGNMENT, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HALF_ASSIGNMENTS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, _a, _b, _c, balanced_string, compact, count, helpers, include, starts;
var ASSIGNED, ASSIGNMENT, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, CONVERSIONS, HALF_ASSIGNMENTS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, _a, _b, _c, balanced_string, compact, count, helpers, include, starts;
var __slice = Array.prototype.slice;
// The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
// matches against the beginning of the source code. When a match is found,
@ -126,13 +126,13 @@
// referenced as property names here, so you can still do `jQuery.is()` even
// though `is` means `===` otherwise.
Lexer.prototype.identifier_token = function() {
var accessed, id, operator, tag;
var forced_identifier, id, tag;
if (!(id = this.match(IDENTIFIER, 1))) {
return false;
}
accessed = this.tag_accessor();
forced_identifier = this.tag_accessor() || this.match(ASSIGNED, 1);
tag = 'IDENTIFIER';
if (!accessed && include(KEYWORDS, id)) {
if (include(JS_KEYWORDS, id) || (!forced_identifier && include(COFFEE_KEYWORDS, id))) {
tag = id.toUpperCase();
}
if (include(RESERVED, id)) {
@ -142,9 +142,9 @@
tag = 'LEADING_WHEN';
}
this.i += id.length;
if (!(accessed)) {
if ((operator = Rewriter.alias_operator(id))) {
tag = (id = operator);
if (!(forced_identifier)) {
if (include(COFFEE_ALIASES, id)) {
tag = (id = CONVERSIONS[id]);
}
if (this.prev() && this.prev()[0] === 'ASSIGN' && include(HALF_ASSIGNMENTS, tag)) {
return this.tag_half_assignment(tag);
@ -154,6 +154,7 @@
return true;
};
// Matches numbers, including decimals, hex, and exponential notation.
// Be careful not to interfere with ranges-in-progress.
Lexer.prototype.number_token = function() {
var number;
if (!(number = this.match(NUMBER, 1))) {
@ -650,9 +651,6 @@
// be used standalone, but you can reference them as an attached property.
COFFEE_ALIASES = ["and", "or", "is", "isnt", "not"];
COFFEE_KEYWORDS = COFFEE_ALIASES.concat(["then", "unless", "until", "yes", "no", "on", "off", "of", "by", "where", "when"]);
// The combined list of keywords is the superset that gets passed verbatim to
// the parser.
KEYWORDS = JS_KEYWORDS.concat(COFFEE_KEYWORDS);
// The list of keywords that are reserved by JavaScript, but not used, or are
// used by CoffeeScript internally. We throw an error when these are encountered,
// to avoid having a JavaScript error at runtime.
@ -672,7 +670,7 @@
MULTI_DENT = /^((\n([ \t]*))+)(\.)?/;
LAST_DENTS = /\n([ \t]*)/g;
LAST_DENT = /\n([ \t]*)/;
ASSIGNMENT = /^(:|=)$/;
ASSIGNMENT = /^[:=]$/;
// Regex-matching-regexes.
REGEX_START = /^\/[^\/ ]/;
REGEX_INTERPOLATION = /([^\\]\$[a-zA-Z_@]|[^\\]\$\{.*[^\\]\})/;
@ -685,6 +683,7 @@
COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg;
NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/;
HEREDOC_INDENT = /(\n+([ \t]*)|^([ \t]+))/g;
ASSIGNED = /^([a-zA-Z\$_]\w*[ \t]*?[:=])/;
// Tokens which a regular expression will never immediately follow, but which
// a division operator might.
// See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
@ -700,4 +699,12 @@
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
// Half-assignments...
HALF_ASSIGNMENTS = ['-', '+', '/', '*', '%', '||', '&&', '?'];
// Conversions from CoffeeScript operators into JavaScript ones.
CONVERSIONS = {
'and': '&&',
'or': '||',
'is': '==',
'isnt': '!=',
'not': '!'
};
})();

View File

@ -1,10 +1,10 @@
(function(){
var BALANCED_PAIRS, CONVERSIONS, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, exits, helpers, include, pair;
var __hasProp = Object.prototype.hasOwnProperty, __slice = Array.prototype.slice, __bind = function(func, obj, args) {
var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_BLOCK, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, helpers, include, pair;
var __slice = Array.prototype.slice, __bind = function(func, obj, args) {
return function() {
return func.apply(obj || {}, args ? args.concat(__slice.call(arguments, 0)) : arguments);
};
};
}, __hasProp = Object.prototype.hasOwnProperty;
// The CoffeeScript language has a good deal of optional syntax, implicit syntax,
// and shorthand syntax. This can greatly complicate a grammar and bloat
// the resulting parse table. Instead of making the parser handle it all, we take
@ -22,32 +22,6 @@
// Import the helpers we need.
_b = helpers;
include = _b.include;
// Helper method to check if the given stream of tokens matches the exit conditions
exits = function(prev, token, post, cond) {
var _c, _d, _e, _f, args, k, length, match, pair, v;
length = 0;
match = 0;
args = {
prev: prev,
token: token,
post: post
};
_c = args;
for (k in _c) { if (__hasProp.call(_c, k)) {
v = _c[k];
if (k in cond) {
length += 1;
_e = cond[k];
for (_d = 0, _f = _e.length; _d < _f; _d++) {
pair = _e[_d];
if (v[0] === pair[0] && v[1] === pair[1]) {
match += 1;
}
}
}
}}
return match === length;
};
// The **Rewriter** class is used by the [Lexer](lexer.html), directly against
// its internal array of tokens.
exports.Rewriter = (function() {
@ -65,7 +39,6 @@
this.close_open_calls_and_indexes();
this.add_implicit_indentation();
this.add_implicit_parentheses();
this.rewrite_object_keys();
this.ensure_balance(BALANCED_PAIRS);
this.rewrite_closing_parens();
return this.tokens;
@ -271,74 +244,6 @@
return 0;
}, this));
};
// Allow reserved words to be used as object keys. We scan the token stream
// until we enter an object. Any token before an assignment is considered the
// key which we rewrite back to an `IDENTIFIER`.
Rewriter.prototype.rewrite_object_keys = function() {
var levels;
levels = [];
return this.scan_tokens(__bind(function(prev, token, post, i) {
var _c, _d, _e, after, alias, balanced, last, pair, popped;
if (token[0] === '{') {
levels.push({
rewrite: true,
cond: {
token: [['}', '}']]
}
});
} else if (levels.length) {
popped = false;
while ((last = levels[levels.length - 1]) && exits(prev, token, post, last.cond)) {
levels.pop();
popped = true;
}
if (!popped) {
balanced = false;
_d = BALANCED_PAIRS;
for (_c = 0, _e = _d.length; _c < _e; _c++) {
pair = _d[_c];
if (post && post[0] === pair[0]) {
levels.push({
rewrite: false,
cond: {
token: [[pair[1], pair[2] || post[1]]]
}
});
balanced = true;
}
}
if (token[0] === 'ASSIGN') {
if (last.rewrite) {
prev[0] = 'IDENTIFIER';
}
if (last.rewrite && (alias = Rewriter.alias_operator(prev[1], true))) {
prev[1] = alias;
}
if (!balanced) {
after = this.tokens[i + 2];
if (post && post[0] === '->' && after && after[0] === 'INDENT') {
levels.push({
rewrite: false,
cond: {
prev: [['OUTDENT', after[1]]],
token: [['TERMINATOR', '\n']]
}
});
} else if (last.rewrite) {
levels.push({
rewrite: false,
cond: {
token: [[',', ','], ['TERMINATOR', '\n'], ['}', '}']]
}
});
}
}
}
}
}
return 1;
}, this));
};
// Ensure that all listed pairs of tokens are correctly balanced throughout
// the course of the token stream.
Rewriter.prototype.ensure_balance = function(pairs) {
@ -439,36 +344,12 @@
}
}, this));
};
// Rewriter Properties
// ----------------
// Alias an identifier to a Coffee operator or vice versa.
Rewriter.alias_operator = function(id, reverse) {
var _c, _d, k, v;
if (!reverse) {
_c = CONVERSIONS;
for (k in _c) { if (__hasProp.call(_c, k)) {
if (id === k) {
return CONVERSIONS[k];
}
}}
}
if (reverse) {
_d = CONVERSIONS;
for (k in _d) { if (__hasProp.call(_d, k)) {
v = _d[k];
if (id === v) {
return k;
}
}}
}
return false;
};
return Rewriter;
}).call(this);
})();
// Constants
// ---------
// List of the token pairs that must be balanced.
BALANCED_PAIRS = [['(', ')', ')'], ['[', ']', ']'], ['{', '}', '}'], ['INDENT', 'OUTDENT'], ['PARAM_START', 'PARAM_END', ')'], ['CALL_START', 'CALL_END', ')'], ['INDEX_START', 'INDEX_END', ']'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END', ']']];
BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], ['PARAM_START', 'PARAM_END'], ['CALL_START', 'CALL_END'], ['INDEX_START', 'INDEX_END'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END']];
// The inverse mappings of `BALANCED_PAIRS` we're trying to fix up, so we can
// look things up from either end.
INVERSES = {};
@ -510,12 +391,4 @@
// The grammar can't disambiguate them, so we insert the implicit indentation.
SINGLE_LINERS = ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN'];
SINGLE_CLOSERS = ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN'];
// Conversions from CoffeeScript operators into JavaScript ones.
CONVERSIONS = {
'and': '&&',
'or': '||',
'is': '==',
'isnt': '!=',
'not': '!'
};
})();

View File

@ -89,14 +89,14 @@ exports.Lexer: class Lexer
# though `is` means `===` otherwise.
identifier_token: ->
return false unless id: @match IDENTIFIER, 1
accessed: @tag_accessor()
forced_identifier: @tag_accessor() or @match ASSIGNED, 1
tag: 'IDENTIFIER'
tag: id.toUpperCase() if not accessed and include(KEYWORDS, id)
@identifier_error id if include RESERVED, id
tag: 'LEADING_WHEN' if tag is 'WHEN' and include LINE_BREAK, @tag()
tag: id.toUpperCase() if include(JS_KEYWORDS, id) or (not forced_identifier and include(COFFEE_KEYWORDS, id))
@identifier_error id if include RESERVED, id
tag: 'LEADING_WHEN' if tag is 'WHEN' and include LINE_BREAK, @tag()
@i: + id.length
unless accessed
tag: id: operator if (operator: Rewriter.alias_operator id)
unless forced_identifier
tag: id: CONVERSIONS[id] if include COFFEE_ALIASES, id
return @tag_half_assignment tag if @prev() and @prev()[0] is 'ASSIGN' and include HALF_ASSIGNMENTS, tag
@token tag, id
true
@ -468,10 +468,6 @@ COFFEE_KEYWORDS: COFFEE_ALIASES.concat [
"of", "by", "where", "when"
]
# The combined list of keywords is the superset that gets passed verbatim to
# the parser.
KEYWORDS: JS_KEYWORDS.concat COFFEE_KEYWORDS
# The list of keywords that are reserved by JavaScript, but not used, or are
# used by CoffeeScript internally. We throw an error when these are encountered,
# to avoid having a JavaScript error at runtime.
@ -496,7 +492,7 @@ CODE : /^((-|=)>)/
MULTI_DENT : /^((\n([ \t]*))+)(\.)?/
LAST_DENTS : /\n([ \t]*)/g
LAST_DENT : /\n([ \t]*)/
ASSIGNMENT : /^(:|=)$/
ASSIGNMENT : /^[:=]$/
# Regex-matching-regexes.
REGEX_START : /^\/[^\/ ]/
@ -511,6 +507,7 @@ STRING_NEWLINES : /\n[ \t]*/g
COMMENT_CLEANER : /(^[ \t]*#|\n[ \t]*$)/mg
NO_NEWLINE : /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/
HEREDOC_INDENT : /(\n+([ \t]*)|^([ \t]+))/g
ASSIGNED : /^([a-zA-Z\$_]\w*[ \t]*?[:=])/
# Tokens which a regular expression will never immediately follow, but which
# a division operator might.
@ -534,3 +531,12 @@ LINE_BREAK: ['INDENT', 'OUTDENT', 'TERMINATOR']
# Half-assignments...
HALF_ASSIGNMENTS: ['-', '+', '/', '*', '%', '||', '&&', '?']
# Conversions from CoffeeScript operators into JavaScript ones.
CONVERSIONS: {
'and': '&&'
'or': '||'
'is': '=='
'isnt': '!='
'not': '!'
}

View File

@ -15,17 +15,6 @@ else
# Import the helpers we need.
{include}: helpers
# Helper method to check if the given stream of tokens matches the exit conditions
exits: (prev, token, post, cond) ->
length: 0
match: 0
args: {prev, token, post}
for k, v of args when k in cond
length: + 1
for pair in cond[k]
match: + 1 if v[0] is pair[0] and v[1] is pair[1]
return match is length
# The **Rewriter** class is used by the [Lexer](lexer.html), directly against
# its internal array of tokens.
exports.Rewriter: class Rewriter
@ -43,7 +32,6 @@ exports.Rewriter: class Rewriter
@close_open_calls_and_indexes()
@add_implicit_indentation()
@add_implicit_parentheses()
@rewrite_object_keys()
@ensure_balance BALANCED_PAIRS
@rewrite_closing_parens()
@tokens
@ -193,35 +181,6 @@ exports.Rewriter: class Rewriter
@tokens.splice i, 1
return 0
# Allow reserved words to be used as object keys. We scan the token stream
# until we enter an object. Any token before an assignment is considered the
# key which we rewrite back to an `IDENTIFIER`.
rewrite_object_keys: ->
levels: []
@scan_tokens (prev, token, post, i) =>
if token[0] is '{'
levels.push {rewrite: yes, cond: {token: [['}','}']]}}
else if levels.length
popped: no
while (last: levels[levels.length - 1]) and exits prev, token, post, last.cond
levels.pop()
popped: yes
if not popped
balanced: no
for pair in BALANCED_PAIRS when post and post[0] is pair[0]
levels.push {rewrite: no, cond: {token: [[pair[1], pair[2] or post[1]]]}}
balanced: yes
if token[0] is 'ASSIGN'
prev[0]: 'IDENTIFIER' if last.rewrite
prev[1]: alias if last.rewrite and (alias: Rewriter.alias_operator prev[1], yes)
if not balanced
after: @tokens[i + 2]
if post and post[0] is '->' and after and after[0] is 'INDENT'
levels.push {rewrite: no, cond: {prev: [['OUTDENT', after[1]]], token: [['TERMINATOR', '\n']]}}
else if last.rewrite
levels.push {rewrite: no, cond: {token: [[',', ','], ['TERMINATOR', '\n'], ['}', '}']]}}
return 1
# Ensure that all listed pairs of tokens are correctly balanced throughout
# the course of the token stream.
ensure_balance: (pairs) ->
@ -290,22 +249,13 @@ exports.Rewriter: class Rewriter
else
return 1
# Rewriter Properties
# ----------------
# Alias an identifier to a Coffee operator or vice versa.
@alias_operator: (id, reverse) ->
(return CONVERSIONS[k]) for k of CONVERSIONS when id is k if not reverse
(return k) for k, v of CONVERSIONS when id is v if reverse
false
# Constants
# ---------
# List of the token pairs that must be balanced.
BALANCED_PAIRS: [['(', ')', ')'], ['[', ']', ']'], ['{', '}', '}'], ['INDENT', 'OUTDENT'],
['PARAM_START', 'PARAM_END', ')'], ['CALL_START', 'CALL_END', ')'],
['INDEX_START', 'INDEX_END', ']'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END', ']']]
BALANCED_PAIRS: [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'],
['PARAM_START', 'PARAM_END'], ['CALL_START', 'CALL_END'],
['INDEX_START', 'INDEX_END'], ['SOAKED_INDEX_START', 'SOAKED_INDEX_END']]
# The inverse mappings of `BALANCED_PAIRS` we're trying to fix up, so we can
# look things up from either end.
@ -343,12 +293,3 @@ IMPLICIT_END: ['IF', 'UNLESS', 'FOR', 'WHILE', 'UNTIL', 'TERMINATOR', 'INDENT'
# The grammar can't disambiguate them, so we insert the implicit indentation.
SINGLE_LINERS: ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN']
SINGLE_CLOSERS: ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN']
# Conversions from CoffeeScript operators into JavaScript ones.
CONVERSIONS: {
'and': '&&'
'or': '||'
'is': '=='
'isnt': '!='
'not': '!'
}