Hewing closer to JS' syntactic resynchronization for regexp lexing.

This commit is contained in:
Jeremy Ashkenas 2010-11-09 22:39:15 -05:00
parent 71db1fc142
commit 841463da8e
3 changed files with 19 additions and 8 deletions

View File

@ -1,5 +1,5 @@
(function() {
var ASSIGNED, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_SPACES, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, last, op, starts, _ref;
var ASSIGNED, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_SPACES, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, last, op, starts, _ref;
var __indexOf = Array.prototype.indexOf || function(item) {
for (var i = 0, l = this.length; i < l; i++) {
if (this[i] === item) return i;
@ -200,14 +200,15 @@
return script.length;
};
Lexer.prototype.regexToken = function() {
var match, regex, _ref;
var match, prev, regex, _ref;
if (this.chunk.charAt(0) !== '/') {
return 0;
}
if (match = HEREGEX.exec(this.chunk)) {
return this.heregexToken(match);
}
if (_ref = this.tag(), __indexOf.call(NOT_REGEX, _ref) >= 0) {
prev = last(this.tokens);
if (prev && (_ref = prev[0], __indexOf.call((prev.spaced ? NOT_REGEX : NOT_SPACED_REGEX), _ref) >= 0)) {
return 0;
}
if (!(match = REGEX.exec(this.chunk))) {
@ -641,6 +642,7 @@
RELATION = ['IN', 'OF', 'INSTANCEOF'];
BOOL = ['TRUE', 'FALSE', 'NULL', 'UNDEFINED'];
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', '++', '--', ']'];
NOT_SPACED_REGEX = NOT_REGEX.concat(')', '}', 'THIS');
CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER'];
INDEXABLE = CALLABLE.concat('NUMBER', 'BOOL');
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];

View File

@ -195,7 +195,8 @@ exports.Lexer = class Lexer
regexToken: ->
return 0 if @chunk.charAt(0) isnt '/'
return @heregexToken match if match = HEREGEX.exec @chunk
return 0 if @tag() in NOT_REGEX
prev = last @tokens
return 0 if prev and (prev[0] in (if prev.spaced then NOT_REGEX else NOT_SPACED_REGEX))
return 0 unless match = REGEX.exec @chunk
[regex] = match
@token 'REGEX', if regex is '//' then '/(?:)/' else regex
@ -644,6 +645,10 @@ BOOL = ['TRUE', 'FALSE', 'NULL', 'UNDEFINED']
# Our list is shorter, due to sans-parentheses method calls.
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', '++', '--', ']']
# If the previous token is not spaced, there are more preceding tokens that
# force a division parse:
NOT_SPACED_REGEX = NOT_REGEX.concat ')', '}', 'THIS'
# Tokens which could legitimately be invoked or indexed. A opening
# parentheses or bracket following these tokens will be recorded as the start
# of a function invocation or indexing operation.

View File

@ -37,9 +37,13 @@ eq '\\\\#{}\\\\\\\"', ///
eq /// /// + '', '/(?:)/'
#584: Unescaped slashes in character classes.
ok /:\/[/]goog/.test 'http://google.com'
#764: Should be indexable.
eq /0/['source'], ///#{0}///['source']
# If not preceded by whitespace, should be stricter.
i = 5
eq (1000)/200/i, 1
#584: Unescaped slashes in character classes.
ok /:\/[/]goog/.test 'http://google.com'