Adding a starts() helper to avoid substring() calls for simple matches.

This commit is contained in:
Jeremy Ashkenas 2010-03-06 16:24:06 -05:00
parent c4ad6d1ee6
commit a5e3617015
2 changed files with 37 additions and 25 deletions

View File

@ -1,5 +1,5 @@
(function(){
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include;
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include, starts;
// The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
// matches against the beginning of the source code. When a match is found,
// a token is produced, we consume the match, and start again. Tokens are in the
@ -219,27 +219,30 @@
// a series of delimiters, all of which must be balanced correctly within the
// token's contents.
Lexer.prototype.balanced_token = function balanced_token() {
var _a, _b, delimited, each, i, levels, type;
var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
delimited = Array.prototype.slice.call(arguments, 0);
levels = [];
i = 0;
while (i < this.chunk.length) {
_a = delimited;
for (type = 0, _b = _a.length; type < _b; type++) {
each = _a[type];
if (levels.length && this.chunk.substring(i, i + 1) === '\\') {
for (_b = 0, _c = _a.length; _b < _c; _b++) {
pair = _a[_b];
_d = pair;
open = _d[0];
close = _d[1];
if (levels.length && starts(this.chunk, '\\', i)) {
i += 1;
break;
} else if (levels.length && this.chunk.substring(i, i + each[1].length) === each[1] && levels[levels.length - 1] === type) {
} else if (levels.length && starts(this.chunk, close, i) && levels[levels.length - 1] === pair) {
levels.pop();
i += each[1].length - 1;
i += close.length - 1;
if (!(levels.length)) {
i += 1;
}
break;
} else if (this.chunk.substring(i, i + each[0].length) === each[0]) {
levels.push(type);
i += each[0].length - 1;
} else if (starts(this.chunk, open, i)) {
levels.push(pair);
i += open.length - 1;
break;
}
}
@ -451,7 +454,7 @@
// "Hello ${name.capitalize()}."
Lexer.prototype.interpolate_string = function interpolate_string(str) {
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, before, each, group, i, inner, interp, lexer, match, nested, prev, quote, tok, tokens;
if (str.length < 3 || str.substring(0, 1) !== '"') {
if (str.length < 3 || !starts(str, '"')) {
return this.token('STRING', str);
} else {
lexer = new Lexer();
@ -465,7 +468,7 @@
group = _a[0];
before = _a[1];
interp = _a[2];
if (before.substring(before.length - 1) === '\\') {
if (starts(before, '\\', before.length - 1)) {
prev = before.substring(0, before.length - 1);
if (before.length) {
tokens.push(['STRING', quote + prev + "$" + interp + quote]);
@ -474,7 +477,7 @@
if (before.length) {
tokens.push(['STRING', quote + before + quote]);
}
if (interp.substring(0, 1) === '{') {
if (starts(interp, '{')) {
inner = interp.substring(1, interp.length - 1);
nested = lexer.tokenize("(" + inner + ")", {
rewrite: false,
@ -483,7 +486,7 @@
nested.pop();
tokens.push(['TOKENS', nested]);
} else {
if (interp.substring(0, 1) === '@') {
if (starts(interp, '@')) {
interp = "this." + (interp.substring(1));
}
tokens.push(['IDENTIFIER', interp]);
@ -579,6 +582,10 @@
include = function include(list, value) {
return list.indexOf(value) >= 0;
};
// Peek at the beginning of a given string to see if it matches a sequence.
starts = function starts(string, literal, start) {
return string.substring(start, (start || 0) + literal.length) === literal;
};
// Trim out all falsy values from an array.
compact = function compact(array) {
var _a, _b, _c, _d, item;

View File

@ -204,18 +204,19 @@ exports.Lexer: class Lexer
levels: []
i: 0
while i < @chunk.length
for each, type in delimited
if levels.length and @chunk.substring(i, i + 1) is '\\'
for pair in delimited
[open, close]: pair
if levels.length and starts @chunk, '\\', i
i += 1
break
else if levels.length and @chunk.substring(i, i + each[1].length) is each[1] and levels[levels.length - 1] is type
else if levels.length and starts(@chunk, close, i) and levels[levels.length - 1] is pair
levels.pop()
i += each[1].length - 1
i += close.length - 1
i += 1 unless levels.length
break
else if @chunk.substring(i, i + each[0].length) is each[0]
levels.push(type)
i += each[0].length - 1
else if starts @chunk, open, i
levels.push(pair)
i += open.length - 1
break
break unless levels.length
i += 1
@ -375,7 +376,7 @@ exports.Lexer: class Lexer
# "Hello ${name.capitalize()}."
#
interpolate_string: (str) ->
if str.length < 3 or str.substring(0, 1) isnt '"'
if str.length < 3 or not starts str, '"'
@token 'STRING', str
else
lexer: new Lexer()
@ -386,18 +387,18 @@ exports.Lexer: class Lexer
match: str.match INTERPOLATION
if match
[group, before, interp]: match
if before.substring(before.length - 1) is '\\'
if starts before, '\\', before.length - 1
prev: before.substring(0, before.length - 1)
tokens.push ['STRING', "$quote$prev$$interp$quote"] if before.length
else
tokens.push ['STRING', "$quote$before$quote"] if before.length
if interp.substring(0, 1) is '{'
if starts interp, '{'
inner: interp.substring(1, interp.length - 1)
nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
nested.pop()
tokens.push ['TOKENS', nested]
else
interp: "this.${ interp.substring(1) }" if interp.substring(0, 1) is '@'
interp: "this.${ interp.substring(1) }" if starts interp, '@'
tokens.push ['IDENTIFIER', interp]
str: str.substring(group.length)
else
@ -452,6 +453,10 @@ exports.Lexer: class Lexer
include: (list, value) ->
list.indexOf(value) >= 0
# Peek at the beginning of a given string to see if it matches a sequence.
starts: (string, literal, start) ->
string.substring(start, (start or 0) + literal.length) is literal
# Trim out all falsy values from an array.
compact: (array) -> item for item in array when item