Hewing closer to JS' syntactic resynchronization for regexp lexing.
This commit is contained in:
parent
71db1fc142
commit
841463da8e
|
@ -1,5 +1,5 @@
|
||||||
(function() {
|
(function() {
|
||||||
var ASSIGNED, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_SPACES, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, last, op, starts, _ref;
|
var ASSIGNED, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_SPACES, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, compact, count, last, op, starts, _ref;
|
||||||
var __indexOf = Array.prototype.indexOf || function(item) {
|
var __indexOf = Array.prototype.indexOf || function(item) {
|
||||||
for (var i = 0, l = this.length; i < l; i++) {
|
for (var i = 0, l = this.length; i < l; i++) {
|
||||||
if (this[i] === item) return i;
|
if (this[i] === item) return i;
|
||||||
|
@ -200,14 +200,15 @@
|
||||||
return script.length;
|
return script.length;
|
||||||
};
|
};
|
||||||
Lexer.prototype.regexToken = function() {
|
Lexer.prototype.regexToken = function() {
|
||||||
var match, regex, _ref;
|
var match, prev, regex, _ref;
|
||||||
if (this.chunk.charAt(0) !== '/') {
|
if (this.chunk.charAt(0) !== '/') {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (match = HEREGEX.exec(this.chunk)) {
|
if (match = HEREGEX.exec(this.chunk)) {
|
||||||
return this.heregexToken(match);
|
return this.heregexToken(match);
|
||||||
}
|
}
|
||||||
if (_ref = this.tag(), __indexOf.call(NOT_REGEX, _ref) >= 0) {
|
prev = last(this.tokens);
|
||||||
|
if (prev && (_ref = prev[0], __indexOf.call((prev.spaced ? NOT_REGEX : NOT_SPACED_REGEX), _ref) >= 0)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (!(match = REGEX.exec(this.chunk))) {
|
if (!(match = REGEX.exec(this.chunk))) {
|
||||||
|
@ -641,6 +642,7 @@
|
||||||
RELATION = ['IN', 'OF', 'INSTANCEOF'];
|
RELATION = ['IN', 'OF', 'INSTANCEOF'];
|
||||||
BOOL = ['TRUE', 'FALSE', 'NULL', 'UNDEFINED'];
|
BOOL = ['TRUE', 'FALSE', 'NULL', 'UNDEFINED'];
|
||||||
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', '++', '--', ']'];
|
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', '++', '--', ']'];
|
||||||
|
NOT_SPACED_REGEX = NOT_REGEX.concat(')', '}', 'THIS');
|
||||||
CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER'];
|
CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER'];
|
||||||
INDEXABLE = CALLABLE.concat('NUMBER', 'BOOL');
|
INDEXABLE = CALLABLE.concat('NUMBER', 'BOOL');
|
||||||
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
|
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
|
||||||
|
|
|
@ -195,7 +195,8 @@ exports.Lexer = class Lexer
|
||||||
regexToken: ->
|
regexToken: ->
|
||||||
return 0 if @chunk.charAt(0) isnt '/'
|
return 0 if @chunk.charAt(0) isnt '/'
|
||||||
return @heregexToken match if match = HEREGEX.exec @chunk
|
return @heregexToken match if match = HEREGEX.exec @chunk
|
||||||
return 0 if @tag() in NOT_REGEX
|
prev = last @tokens
|
||||||
|
return 0 if prev and (prev[0] in (if prev.spaced then NOT_REGEX else NOT_SPACED_REGEX))
|
||||||
return 0 unless match = REGEX.exec @chunk
|
return 0 unless match = REGEX.exec @chunk
|
||||||
[regex] = match
|
[regex] = match
|
||||||
@token 'REGEX', if regex is '//' then '/(?:)/' else regex
|
@token 'REGEX', if regex is '//' then '/(?:)/' else regex
|
||||||
|
@ -644,6 +645,10 @@ BOOL = ['TRUE', 'FALSE', 'NULL', 'UNDEFINED']
|
||||||
# Our list is shorter, due to sans-parentheses method calls.
|
# Our list is shorter, due to sans-parentheses method calls.
|
||||||
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', '++', '--', ']']
|
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', '++', '--', ']']
|
||||||
|
|
||||||
|
# If the previous token is not spaced, there are more preceding tokens that
|
||||||
|
# force a division parse:
|
||||||
|
NOT_SPACED_REGEX = NOT_REGEX.concat ')', '}', 'THIS'
|
||||||
|
|
||||||
# Tokens which could legitimately be invoked or indexed. A opening
|
# Tokens which could legitimately be invoked or indexed. A opening
|
||||||
# parentheses or bracket following these tokens will be recorded as the start
|
# parentheses or bracket following these tokens will be recorded as the start
|
||||||
# of a function invocation or indexing operation.
|
# of a function invocation or indexing operation.
|
||||||
|
|
|
@ -37,9 +37,13 @@ eq '\\\\#{}\\\\\\\"', ///
|
||||||
eq /// /// + '', '/(?:)/'
|
eq /// /// + '', '/(?:)/'
|
||||||
|
|
||||||
|
|
||||||
#584: Unescaped slashes in character classes.
|
|
||||||
ok /:\/[/]goog/.test 'http://google.com'
|
|
||||||
|
|
||||||
|
|
||||||
#764: Should be indexable.
|
#764: Should be indexable.
|
||||||
eq /0/['source'], ///#{0}///['source']
|
eq /0/['source'], ///#{0}///['source']
|
||||||
|
|
||||||
|
|
||||||
|
# If not preceded by whitespace, should be stricter.
|
||||||
|
i = 5
|
||||||
|
eq (1000)/200/i, 1
|
||||||
|
|
||||||
|
#584: Unescaped slashes in character classes.
|
||||||
|
ok /:\/[/]goog/.test 'http://google.com'
|
||||||
|
|
Loading…
Reference in New Issue