mirror of
https://github.com/jashkenas/coffeescript.git
synced 2022-11-09 12:23:24 -05:00
a little further on with the lexer
This commit is contained in:
parent
854c796fd6
commit
eff2f4b520
2 changed files with 84 additions and 7 deletions
|
@ -10,9 +10,9 @@
|
|||
// Token matching regexes.
|
||||
lex.IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
|
||||
lex.NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
|
||||
lex.STRING = /^(""|''|"(.*?)([^\\]|\\\\)"|'(.*?)([^\\]|\\\\)')/m;
|
||||
lex.HEREDOC = /^("{6}|'{6}|"{3}\n?(.*?)\n?([ \t]*)"{3}|'{3}\n?(.*?)\n?([ \t]*)'{3})/m;
|
||||
lex.JS = /^(``|`(.*?)([^\\]|\\\\)`)/m;
|
||||
lex.STRING = /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/;
|
||||
lex.HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
|
||||
lex.JS = /^(``|`([\s\S]*?)([^\\]|\\\\)`)/;
|
||||
lex.OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
|
||||
lex.WHITESPACE = /^([ \t]+)/;
|
||||
lex.COMMENT = /^(((\n?[ \t]*)?#.*$)+)/;
|
||||
|
@ -92,5 +92,54 @@
|
|||
}
|
||||
return this.literal_token();
|
||||
};
|
||||
// Look at a tag in the current token stream.
|
||||
lex.prototype.tag = function tag(index, tag) {
|
||||
var tok;
|
||||
if (!((tok = this.tokens[index || -1]))) {
|
||||
return null;
|
||||
}
|
||||
if ((typeof tag !== "undefined" && tag !== null)) {
|
||||
return (tok[0] = tag);
|
||||
}
|
||||
return tok[0];
|
||||
};
|
||||
// Look at a value in the current token stream.
|
||||
lex.prototype.value = function value(index, val) {
|
||||
var tok;
|
||||
if (!((tok = this.tokens[index || -1]))) {
|
||||
return null;
|
||||
}
|
||||
if ((typeof val !== "undefined" && val !== null)) {
|
||||
return (tok[1] = val);
|
||||
}
|
||||
return tok[1];
|
||||
};
|
||||
// Tokenizers ==========================================================
|
||||
// Matches identifying literals: variables, keywords, method names, etc.
|
||||
lex.prototype.identifier_token = function identifier_token() {
|
||||
var id, match, tag;
|
||||
match = this.chunk.match(lex.IDENTIFIER);
|
||||
if (!(match && (id = match[1]))) {
|
||||
return false;
|
||||
}
|
||||
// Keywords are special identifiers tagged with their own name,
|
||||
// 'if' will result in an ['IF', "if"] token.
|
||||
tag = this.KEYWORDS.indexOf(id) >= 0 ? id.toUpperCase() : 'IDENTIFIER';
|
||||
if (tag === 'WHEN' && (this.tag() === 'OUTDENT' || this.tag() === 'INDENT')) {
|
||||
tag = 'LEADING_WHEN';
|
||||
}
|
||||
if (tag === 'IDENTIFIER' && this.value() === '::') {
|
||||
this.tag(-1, 'PROTOTYPE_ACCESS');
|
||||
}
|
||||
if (tag === 'IDENTIFIER' && this.value() === '.' && !(this.value(-2) === '.')) {
|
||||
if (this.tag(-2) === '?') {
|
||||
this.tag(-1, 'SOAK_ACCESS');
|
||||
this.tokens.splice(-2, 1);
|
||||
} else {
|
||||
this.tag(-1, 'PROPERTY_ACCESS');
|
||||
}
|
||||
}
|
||||
this.token(tag, id);
|
||||
return this.i += id.length;
|
||||
};
|
||||
})();
|
|
@ -22,9 +22,9 @@ lex.KEYWORDS: [
|
|||
# Token matching regexes.
|
||||
lex.IDENTIFIER : /^([a-zA-Z$_](\w|\$)*)/
|
||||
lex.NUMBER : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
|
||||
lex.STRING : /^(""|''|"(.*?)([^\\]|\\\\)"|'(.*?)([^\\]|\\\\)')/m
|
||||
lex.HEREDOC : /^("{6}|'{6}|"{3}\n?(.*?)\n?([ \t]*)"{3}|'{3}\n?(.*?)\n?([ \t]*)'{3})/m
|
||||
lex.JS : /^(``|`(.*?)([^\\]|\\\\)`)/m
|
||||
lex.STRING : /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/
|
||||
lex.HEREDOC : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
|
||||
lex.JS : /^(``|`([\s\S]*?)([^\\]|\\\\)`)/
|
||||
lex.OPERATOR : /^([+\*&|\/\-%=<>:!?]+)/
|
||||
lex.WHITESPACE : /^([ \t]+)/
|
||||
lex.COMMENT : /^(((\n?[ \t]*)?#.*$)+)/
|
||||
|
@ -84,9 +84,37 @@ lex::extract_next_token: ->
|
|||
return if this.whitespace_token()
|
||||
return this.literal_token()
|
||||
|
||||
# Look at a tag in the current token stream.
|
||||
lex::tag: (index, tag) ->
|
||||
return unless tok: this.tokens[index || -1]
|
||||
return tok[0]: tag if tag?
|
||||
tok[0]
|
||||
|
||||
# Look at a value in the current token stream.
|
||||
lex::value: (index, val) ->
|
||||
return unless tok: this.tokens[index || -1]
|
||||
return tok[1]: val if val?
|
||||
tok[1]
|
||||
|
||||
# Tokenizers ==========================================================
|
||||
|
||||
|
||||
# Matches identifying literals: variables, keywords, method names, etc.
|
||||
lex::identifier_token: ->
|
||||
match: this.chunk.match(lex.IDENTIFIER)
|
||||
return false unless match and id: match[1]
|
||||
# Keywords are special identifiers tagged with their own name,
|
||||
# 'if' will result in an ['IF', "if"] token.
|
||||
tag: if this.KEYWORDS.indexOf(id) >= 0 then id.toUpperCase() else 'IDENTIFIER'
|
||||
tag: 'LEADING_WHEN' if tag is 'WHEN' and (this.tag() is 'OUTDENT' or this.tag() is 'INDENT')
|
||||
this.tag(-1, 'PROTOTYPE_ACCESS') if tag is 'IDENTIFIER' and this.value() is '::'
|
||||
if tag is 'IDENTIFIER' and this.value() is '.' and !(this.value(-2) is '.')
|
||||
if this.tag(-2) is '?'
|
||||
this.tag(-1, 'SOAK_ACCESS')
|
||||
this.tokens.splice(-2, 1)
|
||||
else
|
||||
this.tag(-1, 'PROPERTY_ACCESS')
|
||||
this.token(tag, id)
|
||||
this.i += id.length
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue