lexer: made REGEX more efficient

2010-10-18 07:43:29 +09:00 · 2010-10-18 07:43:29 +09:00 · 87560d943c
parent 8d0a0e8ab1
commit 87560d943c
2 changed files with 18 additions and 11 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -179,7 +179,7 @@
      return true;
    };
    Lexer.prototype.regexToken = function() {
-      var match;
+      var match, regex;
      if (this.chunk.charAt(0) !== '/') {
        return false;
      }
@ -192,8 +192,9 @@
      if (!(match = REGEX.exec(this.chunk))) {
        return false;
      }
-      this.token('REGEX', match[0]);
+      regex = match[0];
-      this.i += match[0].length;
+      this.token('REGEX', regex === '//' ? '/(?:)/' : regex);
      this.i += regex.length;
      return true;
    };
    Lexer.prototype.heregexToken = function(match) {
@ -610,7 +611,7 @@
  MULTI_DENT = /^(?:\n[ \t]*)+/;
  SIMPLESTR = /^'[^\\']*(?:\\.[^\\']*)*'/;
  JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/;
-  REGEX = /^\/(?!\s)(?:[^[\/\n\\]+|\\[\s\S]|\[([^\]\n\\]+|\\[\s\S])*])+\/[imgy]{0,4}(?![A-Za-z])/;
+  REGEX = /^\/(?!\s)[^[\/\n\\]*(?:(?:\\[\s\S]|\[[^\]\n\\]*(?:\\[\s\S][^\]\n\\]*)*])[^[\/\n\\]*)*\/[imgy]{0,4}(?![A-Za-z])/;
  HEREGEX = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/;
  HEREGEX_OMIT = /\s+(?:#.*)?/g;
  MULTILINER = /\n/g;
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -189,8 +189,9 @@ exports.Lexer = class Lexer
    return @heregexToken match if match = HEREGEX.exec @chunk
    return false if include NOT_REGEX, @tag()
    return false unless match = REGEX.exec @chunk
-    @token 'REGEX', match[0]
+    [regex] = match
-    @i += match[0].length
+    @token 'REGEX', if regex is '//' then '/(?:)/' else regex
    @i += regex.length
    true
  # Matches experimental, multiline and extended regular expression literals.
@ -559,11 +560,16 @@ JSTOKEN    = /^`[^\\`]*(?:\\.[^\\`]*)*`/
 # Regex-matching-regexes.
 REGEX = /// ^
-  / (?!\s)                                 # disallow leading whitespace
+  / (?! \s )       # disallow leading whitespace
-  (?: [^ [ / \n \\ ]+                      # every other thing
+  [^ [ / \n \\ ]*  # every other thing
-    | \\[\s\S]                             # anything escaped
+  (?:
-    | \[ ( [^ \] \n \\ ]+ | \\[\s\S] )* ]  # character class
+    (?: \\[\s\S]   # anything escaped
-  )+
+      | \[         # character class
           [^ \] \n \\ ]*
           (?: \\[\s\S] [^ \] \n \\ ]* )*
         ]
    ) [^ [ / \n \\ ]*
  )*
  / [imgy]{0,4} (?![A-Za-z])
 ///
 HEREGEX      = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/