lexer: made REGEX more efficient

This commit is contained in:
satyr 2010-10-18 07:43:29 +09:00
parent 8d0a0e8ab1
commit 87560d943c
2 changed files with 18 additions and 11 deletions

View File

@ -179,7 +179,7 @@
return true; return true;
}; };
Lexer.prototype.regexToken = function() { Lexer.prototype.regexToken = function() {
var match; var match, regex;
if (this.chunk.charAt(0) !== '/') { if (this.chunk.charAt(0) !== '/') {
return false; return false;
} }
@ -192,8 +192,9 @@
if (!(match = REGEX.exec(this.chunk))) { if (!(match = REGEX.exec(this.chunk))) {
return false; return false;
} }
this.token('REGEX', match[0]); regex = match[0];
this.i += match[0].length; this.token('REGEX', regex === '//' ? '/(?:)/' : regex);
this.i += regex.length;
return true; return true;
}; };
Lexer.prototype.heregexToken = function(match) { Lexer.prototype.heregexToken = function(match) {
@ -610,7 +611,7 @@
MULTI_DENT = /^(?:\n[ \t]*)+/; MULTI_DENT = /^(?:\n[ \t]*)+/;
SIMPLESTR = /^'[^\\']*(?:\\.[^\\']*)*'/; SIMPLESTR = /^'[^\\']*(?:\\.[^\\']*)*'/;
JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/; JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/;
REGEX = /^\/(?!\s)(?:[^[\/\n\\]+|\\[\s\S]|\[([^\]\n\\]+|\\[\s\S])*])+\/[imgy]{0,4}(?![A-Za-z])/; REGEX = /^\/(?!\s)[^[\/\n\\]*(?:(?:\\[\s\S]|\[[^\]\n\\]*(?:\\[\s\S][^\]\n\\]*)*])[^[\/\n\\]*)*\/[imgy]{0,4}(?![A-Za-z])/;
HEREGEX = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/; HEREGEX = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/;
HEREGEX_OMIT = /\s+(?:#.*)?/g; HEREGEX_OMIT = /\s+(?:#.*)?/g;
MULTILINER = /\n/g; MULTILINER = /\n/g;

View File

@ -189,8 +189,9 @@ exports.Lexer = class Lexer
return @heregexToken match if match = HEREGEX.exec @chunk return @heregexToken match if match = HEREGEX.exec @chunk
return false if include NOT_REGEX, @tag() return false if include NOT_REGEX, @tag()
return false unless match = REGEX.exec @chunk return false unless match = REGEX.exec @chunk
@token 'REGEX', match[0] [regex] = match
@i += match[0].length @token 'REGEX', if regex is '//' then '/(?:)/' else regex
@i += regex.length
true true
# Matches experimental, multiline and extended regular expression literals. # Matches experimental, multiline and extended regular expression literals.
@ -559,11 +560,16 @@ JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/
# Regex-matching-regexes. # Regex-matching-regexes.
REGEX = /// ^ REGEX = /// ^
/ (?!\s) # disallow leading whitespace / (?! \s ) # disallow leading whitespace
(?: [^ [ / \n \\ ]+ # every other thing [^ [ / \n \\ ]* # every other thing
| \\[\s\S] # anything escaped (?:
| \[ ( [^ \] \n \\ ]+ | \\[\s\S] )* ] # character class (?: \\[\s\S] # anything escaped
)+ | \[ # character class
[^ \] \n \\ ]*
(?: \\[\s\S] [^ \] \n \\ ]* )*
]
) [^ [ / \n \\ ]*
)*
/ [imgy]{0,4} (?![A-Za-z]) / [imgy]{0,4} (?![A-Za-z])
/// ///
HEREGEX = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/ HEREGEX = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/