allowing keywords as identifiers, when used as identifiers, because we've got more keywords than JavaScript does.

This commit is contained in:
Jeremy Ashkenas 2010-02-16 08:43:58 -05:00
parent 2f389f1d51
commit a3c8c0b492
3 changed files with 36 additions and 22 deletions

View File

@ -1,5 +1,5 @@
(function(){
var ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex;
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex;
if ((typeof process !== "undefined" && process !== null)) {
Rewriter = require('./rewriter').Rewriter;
} else {
@ -41,6 +41,9 @@
NOT_REGEX = ['IDENTIFIER', 'NUMBER', 'REGEX', 'STRING', ')', '++', '--', ']', '}', 'FALSE', 'NULL', 'TRUE'];
// Tokens which could legitimately be invoked or indexed.
CALLABLE = ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING'];
// Tokens that indicate an access -- keywords immediately following will be
// treated as identifiers.
ACCESSORS = ['PROPERTY_ACCESS', 'PROTOTYPE_ACCESS', 'SOAK_ACCESS', '@'];
// Tokens that, when immediately preceding a 'WHEN', indicate that its leading.
BEFORE_WHEN = ['INDENT', 'OUTDENT', 'TERMINATOR'];
// Scan by attempting to match tokens one character at a time. Slow and steady.
@ -57,8 +60,6 @@
// The stack of all indent levels we are currently within.
this.tokens = [];
// Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
this.spaced = null;
// The last token that has a space following it.
while (this.i < this.code.length) {
this.chunk = this.code.slice(this.i);
this.extract_next_token();
@ -105,16 +106,10 @@
if (!((id = this.match(IDENTIFIER, 1)))) {
return false;
}
// Keywords are special identifiers tagged with their own name,
// 'if' will result in an ['IF', "if"] token.
tag = KEYWORDS.indexOf(id) >= 0 ? id.toUpperCase() : 'IDENTIFIER';
if (tag === 'WHEN' && BEFORE_WHEN.indexOf(this.tag()) >= 0) {
tag = 'LEADING_WHEN';
}
if (tag === 'IDENTIFIER' && this.value() === '::') {
if (this.value() === '::') {
this.tag(1, 'PROTOTYPE_ACCESS');
}
if (tag === 'IDENTIFIER' && this.value() === '.' && !(this.value(2) === '.')) {
if (this.value() === '.' && !(this.value(2) === '.')) {
if (this.tag(2) === '?') {
this.tag(1, 'SOAK_ACCESS');
this.tokens.splice(-2, 1);
@ -122,6 +117,13 @@
this.tag(1, 'PROPERTY_ACCESS');
}
}
tag = 'IDENTIFIER';
if (KEYWORDS.indexOf(id) >= 0 && !(ACCESSORS.indexOf(this.tag()) >= 0)) {
tag = id.toUpperCase();
}
if (tag === 'WHEN' && BEFORE_WHEN.indexOf(this.tag()) >= 0) {
tag = 'LEADING_WHEN';
}
this.token(tag, id);
this.i += id.length;
return true;
@ -245,7 +247,7 @@
if (!((space = this.match(WHITESPACE, 1)))) {
return false;
}
this.spaced = this.value();
this.tokens[this.tokens.length - 1].spaced = true;
this.i += space.length;
return true;
};
@ -279,7 +281,7 @@
if (value === ';') {
tag = 'TERMINATOR';
}
if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) {
if (!this.tokens[this.tokens.length - 1].spaced && CALLABLE.indexOf(this.tag()) >= 0) {
if (value === '(') {
tag = 'CALL_START';
}

View File

@ -61,6 +61,10 @@ NOT_REGEX: [
# Tokens which could legitimately be invoked or indexed.
CALLABLE: ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING']
# Tokens that indicate an access -- keywords immediately following will be
# treated as identifiers.
ACCESSORS: ['PROPERTY_ACCESS', 'PROTOTYPE_ACCESS', 'SOAK_ACCESS', '@']
# Tokens that, when immediately preceding a 'WHEN', indicate that its leading.
BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
@ -72,7 +76,6 @@ lex::tokenize: (code) ->
@indent : 0 # The current indent level.
@indents : [] # The stack of all indent levels we are currently within.
@tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
@spaced : null # The last token that has a space following it.
while @i < @code.length
@chunk: @code.slice(@i)
@extract_next_token()
@ -98,17 +101,16 @@ lex::extract_next_token: ->
# Matches identifying literals: variables, keywords, method names, etc.
lex::identifier_token: ->
return false unless id: @match IDENTIFIER, 1
# Keywords are special identifiers tagged with their own name,
# 'if' will result in an ['IF', "if"] token.
tag: if KEYWORDS.indexOf(id) >= 0 then id.toUpperCase() else 'IDENTIFIER'
tag: 'LEADING_WHEN' if tag is 'WHEN' and BEFORE_WHEN.indexOf(@tag()) >= 0
@tag(1, 'PROTOTYPE_ACCESS') if tag is 'IDENTIFIER' and @value() is '::'
if tag is 'IDENTIFIER' and @value() is '.' and !(@value(2) is '.')
@tag(1, 'PROTOTYPE_ACCESS') if @value() is '::'
if @value() is '.' and not (@value(2) is '.')
if @tag(2) is '?'
@tag(1, 'SOAK_ACCESS')
@tokens.splice(-2, 1)
else
@tag(1, 'PROPERTY_ACCESS')
tag: 'IDENTIFIER'
tag: id.toUpperCase() if KEYWORDS.indexOf(id) >= 0 and not (ACCESSORS.indexOf(@tag()) >= 0)
tag: 'LEADING_WHEN' if tag is 'WHEN' and BEFORE_WHEN.indexOf(@tag()) >= 0
@token(tag, id)
@i += id.length
true
@ -199,7 +201,7 @@ lex::outdent_token: (move_out) ->
# Matches and consumes non-meaningful whitespace.
lex::whitespace_token: ->
return false unless space: @match WHITESPACE, 1
@spaced: @value()
@tokens[@tokens.length - 1].spaced: true
@i += space.length
true
@ -224,7 +226,8 @@ lex::literal_token: ->
value ||= @chunk.substr(0, 1)
tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
tag: 'TERMINATOR' if value == ';'
if @value() isnt @spaced and CALLABLE.indexOf(@tag()) >= 0
if not @tokens[@tokens.length - 1].spaced and CALLABLE.indexOf(@tag()) >= 0
tag: 'CALL_START' if value is '('
tag: 'INDEX_START' if value is '['
@token tag, value

View File

@ -51,3 +51,12 @@ bob: {
puts bob.hello() is "Hello Bob"
puts bob[10] is 'number'
obj: {
'is': -> yes
'not': -> no
}
puts obj.is()
puts not obj.not()