Adding a starts() helper to avoid substring() calls for simple matches.

2010-03-06 16:24:06 -05:00 · 2010-03-06 16:24:06 -05:00 · a5e3617015
parent c4ad6d1ee6
commit a5e3617015
2 changed files with 37 additions and 25 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -1,5 +1,5 @@
 (function(){
-  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include;
+  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include, starts;
  // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
  // matches against the beginning of the source code. When a match is found,
  // a token is produced, we consume the match, and start again. Tokens are in the
@ -219,27 +219,30 @@
    // a series of delimiters, all of which must be balanced correctly within the
    // token's contents.
    Lexer.prototype.balanced_token = function balanced_token() {
-      var _a, _b, delimited, each, i, levels, type;
+      var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
      delimited = Array.prototype.slice.call(arguments, 0);
      levels = [];
      i = 0;
      while (i < this.chunk.length) {
        _a = delimited;
-        for (type = 0, _b = _a.length; type < _b; type++) {
-          each = _a[type];
-          if (levels.length && this.chunk.substring(i, i + 1) === '\\') {
+        for (_b = 0, _c = _a.length; _b < _c; _b++) {
+          pair = _a[_b];
+          _d = pair;
+          open = _d[0];
+          close = _d[1];
+          if (levels.length && starts(this.chunk, '\\', i)) {
            i += 1;
            break;
-          } else if (levels.length && this.chunk.substring(i, i + each[1].length) === each[1] && levels[levels.length - 1] === type) {
+          } else if (levels.length && starts(this.chunk, close, i) && levels[levels.length - 1] === pair) {
            levels.pop();
-            i += each[1].length - 1;
+            i += close.length - 1;
            if (!(levels.length)) {
              i += 1;
            }
            break;
-          } else if (this.chunk.substring(i, i + each[0].length) === each[0]) {
-            levels.push(type);
-            i += each[0].length - 1;
+          } else if (starts(this.chunk, open, i)) {
+            levels.push(pair);
+            i += open.length - 1;
            break;
          }
        }
@ -451,7 +454,7 @@
    //     "Hello ${name.capitalize()}."
    Lexer.prototype.interpolate_string = function interpolate_string(str) {
      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, before, each, group, i, inner, interp, lexer, match, nested, prev, quote, tok, tokens;
-      if (str.length < 3 || str.substring(0, 1) !== '"') {
+      if (str.length < 3 || !starts(str, '"')) {
        return this.token('STRING', str);
      } else {
        lexer = new Lexer();
@ -465,7 +468,7 @@
            group = _a[0];
            before = _a[1];
            interp = _a[2];
-            if (before.substring(before.length - 1) === '\\') {
+            if (starts(before, '\\', before.length - 1)) {
              prev = before.substring(0, before.length - 1);
              if (before.length) {
                tokens.push(['STRING', quote + prev + "$" + interp + quote]);
@ -474,7 +477,7 @@
              if (before.length) {
                tokens.push(['STRING', quote + before + quote]);
              }
-              if (interp.substring(0, 1) === '{') {
+              if (starts(interp, '{')) {
                inner = interp.substring(1, interp.length - 1);
                nested = lexer.tokenize("(" + inner + ")", {
                  rewrite: false,
@ -483,7 +486,7 @@
                nested.pop();
                tokens.push(['TOKENS', nested]);
              } else {
-                if (interp.substring(0, 1) === '@') {
+                if (starts(interp, '@')) {
                  interp = "this." + (interp.substring(1));
                }
                tokens.push(['IDENTIFIER', interp]);
@ -579,6 +582,10 @@
  include = function include(list, value) {
    return list.indexOf(value) >= 0;
  };
+  // Peek at the beginning of a given string to see if it matches a sequence.
+  starts = function starts(string, literal, start) {
+    return string.substring(start, (start || 0) + literal.length) === literal;
+  };
  // Trim out all falsy values from an array.
  compact = function compact(array) {
    var _a, _b, _c, _d, item;
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -204,18 +204,19 @@ exports.Lexer: class Lexer
    levels: []
    i: 0
    while i < @chunk.length
-      for each, type in delimited
-        if levels.length and @chunk.substring(i, i + 1) is '\\'
+      for pair in delimited
+        [open, close]: pair
+        if levels.length and starts @chunk, '\\', i
          i += 1
          break
-        else if levels.length and @chunk.substring(i, i + each[1].length) is each[1] and levels[levels.length - 1] is type
+        else if levels.length and starts(@chunk, close, i) and levels[levels.length - 1] is pair
          levels.pop()
-          i += each[1].length - 1
+          i += close.length - 1
          i += 1 unless levels.length
          break
-        else if @chunk.substring(i, i + each[0].length) is each[0]
-          levels.push(type)
-          i += each[0].length - 1
+        else if starts @chunk, open, i
+          levels.push(pair)
+          i += open.length - 1
          break
      break unless levels.length
      i += 1
@ -375,7 +376,7 @@ exports.Lexer: class Lexer
  #     "Hello ${name.capitalize()}."
  #
  interpolate_string: (str) ->
-    if str.length < 3 or str.substring(0, 1) isnt '"'
+    if str.length < 3 or not starts str, '"'
      @token 'STRING', str
    else
      lexer:  new Lexer()
@ -386,18 +387,18 @@ exports.Lexer: class Lexer
        match: str.match INTERPOLATION
        if match
          [group, before, interp]: match
-          if before.substring(before.length - 1) is '\\'
+          if starts before, '\\', before.length - 1
            prev: before.substring(0, before.length - 1)
            tokens.push ['STRING', "$quote$prev$$interp$quote"] if before.length
          else
            tokens.push ['STRING', "$quote$before$quote"] if before.length
-            if interp.substring(0, 1) is '{'
+            if starts interp, '{'
              inner: interp.substring(1, interp.length - 1)
              nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
              nested.pop()
              tokens.push ['TOKENS', nested]
            else
-              interp: "this.${ interp.substring(1) }" if interp.substring(0, 1) is '@'
+              interp: "this.${ interp.substring(1) }" if starts interp, '@'
              tokens.push ['IDENTIFIER', interp]
          str: str.substring(group.length)
        else
@ -452,6 +453,10 @@ exports.Lexer: class Lexer
 include: (list, value) ->
  list.indexOf(value) >= 0

+# Peek at the beginning of a given string to see if it matches a sequence.
+starts: (string, literal, start) ->
+  string.substring(start, (start or 0) + literal.length) is literal
+
 # Trim out all falsy values from an array.
 compact: (array) -> item for item in array when item