allowing keywords as identifiers, when used as identifiers, because we've got more keywords than JavaScript does.

This commit is contained in:
Jeremy Ashkenas 2010-02-16 08:43:58 -05:00
parent 2f389f1d51
commit a3c8c0b492
3 changed files with 36 additions and 22 deletions

View File

@ -1,5 +1,5 @@
(function(){ (function(){
var ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex; var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex;
if ((typeof process !== "undefined" && process !== null)) { if ((typeof process !== "undefined" && process !== null)) {
Rewriter = require('./rewriter').Rewriter; Rewriter = require('./rewriter').Rewriter;
} else { } else {
@ -41,6 +41,9 @@
NOT_REGEX = ['IDENTIFIER', 'NUMBER', 'REGEX', 'STRING', ')', '++', '--', ']', '}', 'FALSE', 'NULL', 'TRUE']; NOT_REGEX = ['IDENTIFIER', 'NUMBER', 'REGEX', 'STRING', ')', '++', '--', ']', '}', 'FALSE', 'NULL', 'TRUE'];
// Tokens which could legitimately be invoked or indexed. // Tokens which could legitimately be invoked or indexed.
CALLABLE = ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING']; CALLABLE = ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING'];
// Tokens that indicate an access -- keywords immediately following will be
// treated as identifiers.
ACCESSORS = ['PROPERTY_ACCESS', 'PROTOTYPE_ACCESS', 'SOAK_ACCESS', '@'];
// Tokens that, when immediately preceding a 'WHEN', indicate that its leading. // Tokens that, when immediately preceding a 'WHEN', indicate that its leading.
BEFORE_WHEN = ['INDENT', 'OUTDENT', 'TERMINATOR']; BEFORE_WHEN = ['INDENT', 'OUTDENT', 'TERMINATOR'];
// Scan by attempting to match tokens one character at a time. Slow and steady. // Scan by attempting to match tokens one character at a time. Slow and steady.
@ -57,8 +60,6 @@
// The stack of all indent levels we are currently within. // The stack of all indent levels we are currently within.
this.tokens = []; this.tokens = [];
// Collection of all parsed tokens in the form [:TOKEN_TYPE, value] // Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
this.spaced = null;
// The last token that has a space following it.
while (this.i < this.code.length) { while (this.i < this.code.length) {
this.chunk = this.code.slice(this.i); this.chunk = this.code.slice(this.i);
this.extract_next_token(); this.extract_next_token();
@ -105,16 +106,10 @@
if (!((id = this.match(IDENTIFIER, 1)))) { if (!((id = this.match(IDENTIFIER, 1)))) {
return false; return false;
} }
// Keywords are special identifiers tagged with their own name, if (this.value() === '::') {
// 'if' will result in an ['IF', "if"] token.
tag = KEYWORDS.indexOf(id) >= 0 ? id.toUpperCase() : 'IDENTIFIER';
if (tag === 'WHEN' && BEFORE_WHEN.indexOf(this.tag()) >= 0) {
tag = 'LEADING_WHEN';
}
if (tag === 'IDENTIFIER' && this.value() === '::') {
this.tag(1, 'PROTOTYPE_ACCESS'); this.tag(1, 'PROTOTYPE_ACCESS');
} }
if (tag === 'IDENTIFIER' && this.value() === '.' && !(this.value(2) === '.')) { if (this.value() === '.' && !(this.value(2) === '.')) {
if (this.tag(2) === '?') { if (this.tag(2) === '?') {
this.tag(1, 'SOAK_ACCESS'); this.tag(1, 'SOAK_ACCESS');
this.tokens.splice(-2, 1); this.tokens.splice(-2, 1);
@ -122,6 +117,13 @@
this.tag(1, 'PROPERTY_ACCESS'); this.tag(1, 'PROPERTY_ACCESS');
} }
} }
tag = 'IDENTIFIER';
if (KEYWORDS.indexOf(id) >= 0 && !(ACCESSORS.indexOf(this.tag()) >= 0)) {
tag = id.toUpperCase();
}
if (tag === 'WHEN' && BEFORE_WHEN.indexOf(this.tag()) >= 0) {
tag = 'LEADING_WHEN';
}
this.token(tag, id); this.token(tag, id);
this.i += id.length; this.i += id.length;
return true; return true;
@ -245,7 +247,7 @@
if (!((space = this.match(WHITESPACE, 1)))) { if (!((space = this.match(WHITESPACE, 1)))) {
return false; return false;
} }
this.spaced = this.value(); this.tokens[this.tokens.length - 1].spaced = true;
this.i += space.length; this.i += space.length;
return true; return true;
}; };
@ -279,7 +281,7 @@
if (value === ';') { if (value === ';') {
tag = 'TERMINATOR'; tag = 'TERMINATOR';
} }
if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) { if (!this.tokens[this.tokens.length - 1].spaced && CALLABLE.indexOf(this.tag()) >= 0) {
if (value === '(') { if (value === '(') {
tag = 'CALL_START'; tag = 'CALL_START';
} }

View File

@ -61,6 +61,10 @@ NOT_REGEX: [
# Tokens which could legitimately be invoked or indexed. # Tokens which could legitimately be invoked or indexed.
CALLABLE: ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING'] CALLABLE: ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING']
# Tokens that indicate an access -- keywords immediately following will be
# treated as identifiers.
ACCESSORS: ['PROPERTY_ACCESS', 'PROTOTYPE_ACCESS', 'SOAK_ACCESS', '@']
# Tokens that, when immediately preceding a 'WHEN', indicate that its leading. # Tokens that, when immediately preceding a 'WHEN', indicate that its leading.
BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR'] BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR']
@ -72,7 +76,6 @@ lex::tokenize: (code) ->
@indent : 0 # The current indent level. @indent : 0 # The current indent level.
@indents : [] # The stack of all indent levels we are currently within. @indents : [] # The stack of all indent levels we are currently within.
@tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value] @tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
@spaced : null # The last token that has a space following it.
while @i < @code.length while @i < @code.length
@chunk: @code.slice(@i) @chunk: @code.slice(@i)
@extract_next_token() @extract_next_token()
@ -98,17 +101,16 @@ lex::extract_next_token: ->
# Matches identifying literals: variables, keywords, method names, etc. # Matches identifying literals: variables, keywords, method names, etc.
lex::identifier_token: -> lex::identifier_token: ->
return false unless id: @match IDENTIFIER, 1 return false unless id: @match IDENTIFIER, 1
# Keywords are special identifiers tagged with their own name, @tag(1, 'PROTOTYPE_ACCESS') if @value() is '::'
# 'if' will result in an ['IF', "if"] token. if @value() is '.' and not (@value(2) is '.')
tag: if KEYWORDS.indexOf(id) >= 0 then id.toUpperCase() else 'IDENTIFIER'
tag: 'LEADING_WHEN' if tag is 'WHEN' and BEFORE_WHEN.indexOf(@tag()) >= 0
@tag(1, 'PROTOTYPE_ACCESS') if tag is 'IDENTIFIER' and @value() is '::'
if tag is 'IDENTIFIER' and @value() is '.' and !(@value(2) is '.')
if @tag(2) is '?' if @tag(2) is '?'
@tag(1, 'SOAK_ACCESS') @tag(1, 'SOAK_ACCESS')
@tokens.splice(-2, 1) @tokens.splice(-2, 1)
else else
@tag(1, 'PROPERTY_ACCESS') @tag(1, 'PROPERTY_ACCESS')
tag: 'IDENTIFIER'
tag: id.toUpperCase() if KEYWORDS.indexOf(id) >= 0 and not (ACCESSORS.indexOf(@tag()) >= 0)
tag: 'LEADING_WHEN' if tag is 'WHEN' and BEFORE_WHEN.indexOf(@tag()) >= 0
@token(tag, id) @token(tag, id)
@i += id.length @i += id.length
true true
@ -199,7 +201,7 @@ lex::outdent_token: (move_out) ->
# Matches and consumes non-meaningful whitespace. # Matches and consumes non-meaningful whitespace.
lex::whitespace_token: -> lex::whitespace_token: ->
return false unless space: @match WHITESPACE, 1 return false unless space: @match WHITESPACE, 1
@spaced: @value() @tokens[@tokens.length - 1].spaced: true
@i += space.length @i += space.length
true true
@ -224,7 +226,8 @@ lex::literal_token: ->
value ||= @chunk.substr(0, 1) value ||= @chunk.substr(0, 1)
tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
tag: 'TERMINATOR' if value == ';' tag: 'TERMINATOR' if value == ';'
if @value() isnt @spaced and CALLABLE.indexOf(@tag()) >= 0
if not @tokens[@tokens.length - 1].spaced and CALLABLE.indexOf(@tag()) >= 0
tag: 'CALL_START' if value is '(' tag: 'CALL_START' if value is '('
tag: 'INDEX_START' if value is '[' tag: 'INDEX_START' if value is '['
@token tag, value @token tag, value

View File

@ -51,3 +51,12 @@ bob: {
puts bob.hello() is "Hello Bob" puts bob.hello() is "Hello Bob"
puts bob[10] is 'number' puts bob[10] is 'number'
obj: {
'is': -> yes
'not': -> no
}
puts obj.is()
puts not obj.not()