diff --git a/lib/lexer.js b/lib/lexer.js index fab26120..1986ba76 100644 --- a/lib/lexer.js +++ b/lib/lexer.js @@ -1,5 +1,5 @@ (function(){ - var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include, starts; + var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include, starts; // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt // matches against the beginning of the source code. When a match is found, // a token is produced, we consume the match, and start again. Tokens are in the @@ -129,18 +129,18 @@ // Matches strings, including multi-line strings. Ensures that quotation marks // are balanced within the string's contents, and within nested interpolations. Lexer.prototype.string_token = function string_token() { - var string; + var merge, string, supress; if (!(starts(this.chunk, '"') || starts(this.chunk, "'"))) { return false; } - string = this.balanced_token(['"', '"'], ['${', '}']); + string = this.balanced_token((supress = false), ['"', '"'], ['${', '}']); if (!(string)) { - string = this.balanced_token(["'", "'"]); + string = this.balanced_token((supress = false), ["'", "'"]); } if (!(string)) { return false; } - this.interpolate_string(string.replace(STRING_NEWLINES, " \\\n")); + this.interpolate_string(string.replace(STRING_NEWLINES, " \\\n"), (merge = true)); this.line += count(string, "\n"); this.i += string.length; return true; @@ -160,11 +160,11 @@ }; // Matches JavaScript interpolated directly into the source via backticks. Lexer.prototype.js_token = function js_token() { - var script; + var script, supress; if (!(starts(this.chunk, '`'))) { return false; } - if (!((script = this.balanced_token(['`', '`'])))) { + if (!((script = this.balanced_token((supress = false), ['`', '`'])))) { return false; } this.token('JS', script.replace(JS_CLEANER, '')); @@ -175,23 +175,57 @@ // to distinguish from division, so we borrow some basic heuristics from // JavaScript and Ruby. Lexer.prototype.regex_token = function regex_token() { - var regex; - if (!((regex = this.match(REGEX, 1)))) { + var _a, _b, _c, _d, _e, each, flags, i, index, interp_tokens, merge, regex, str, supress; + if (!((regex = this.balanced_token((supress = true), ['/', '/'])))) { + return false; + } + if (regex.length < 3 || regex.match(/^\/\s+|\n/)) { return false; } if (include(NOT_REGEX, this.tag())) { return false; } - this.token('REGEX', regex); + flags = ['i', 'm', 'g', 'y']; + while (((index = flags.indexOf(this.chunk.substr(regex.length, 1)))) >= 0) { + regex += flags[index]; + flags.splice(index, 1); + } + if (((0 < (_e = regex.indexOf('${'))) && (_e < regex.indexOf('}'))) || regex.match(/[^\\]\$[a-zA-Z_@]/)) { + _a = regex.substring(1).split('/'); + str = _a[0]; + flags = _a[1]; + str = str.replace(/\\[^\$]/g, function(escaped) { + return '\\' + escaped; + }); + this.tokens = this.tokens.concat([['(', '('], ['NEW', 'new'], ['IDENTIFIER', 'RegExp'], ['CALL_START', '(']]); + interp_tokens = this.interpolate_string("\"" + str + "\"", (merge = false)); + _b = interp_tokens; + for (i = 0, _c = _b.length; i < _c; i++) { + each = _b[i]; + if ((_d = each[0]) === 'TOKENS') { + this.tokens = this.tokens.concat(each[1]); + } else if (_d === 'STRING') { + this.token(each[0], each[1].substring(0, 1) + each[1].substring(1, each[1].length - 1).replace(/"/g, '\\"') + each[1].substring(0, 1)); + } else { + this.token(each[0], each[1]); + } + if (i < interp_tokens.length - 1) { + this.token('+', '+'); + } + } + this.tokens = this.tokens.concat([[',', ','], ['STRING', "'" + flags + "'"], [')', ')'], [')', ')']]); + } else { + this.token('REGEX', regex); + } this.i += regex.length; return true; }; // Matches a token in which which the passed delimiter pairs must be correctly // balanced (ie. strings, JS literals). - Lexer.prototype.balanced_token = function balanced_token() { + Lexer.prototype.balanced_token = function balanced_token(supress) { var delimited; - delimited = Array.prototype.slice.call(arguments, 0); - return this.balanced_string.apply(this, [this.chunk].concat(delimited)); + delimited = Array.prototype.slice.call(arguments, 1); + return this.balanced_string.apply(this, [this.chunk].concat([supress]).concat(delimited)); }; // Matches and conumes comments. We pass through comments into JavaScript, // so they're treated as real tokens, like any other part of the language. @@ -395,9 +429,9 @@ // a series of delimiters, all of which must be nested correctly within the // contents of the string. This method allows us to have strings within // interpolations within strings etc... - Lexer.prototype.balanced_string = function balanced_string(str) { + Lexer.prototype.balanced_string = function balanced_string(str, supress) { var _a, _b, _c, _d, close, delimited, i, levels, open, pair; - delimited = Array.prototype.slice.call(arguments, 1); + delimited = Array.prototype.slice.call(arguments, 2); levels = []; i = 0; while (i < str.length) { @@ -429,7 +463,10 @@ i += 1; } if (levels.length) { - throw new Error("SyntaxError: Unterminated " + (levels.pop()[0]) + " starting on line " + (this.line + 1)); + if (!(supress)) { + throw new Error("SyntaxError: Unterminated " + (levels.pop()[0]) + " starting on line " + (this.line + 1)); + } + return false; } if (i === 0) { return false; @@ -444,8 +481,8 @@ // If it encounters an interpolation, this method will recursively create a // new Lexer, tokenize the interpolated contents, and merge them into the // token stream. - Lexer.prototype.interpolate_string = function interpolate_string(str) { - var _a, _b, _c, _d, _e, each, expr, group, i, inner, interp, lexer, match, nested, pi, quote, tokens; + Lexer.prototype.interpolate_string = function interpolate_string(str, merge) { + var _a, _b, _c, _d, _e, _f, _g, each, expr, group, has_string, i, inner, interp, lexer, match, nested, pi, quote, supress, tokens; if (str.length < 3 || !starts(str, '"')) { return this.token('STRING', str); } else { @@ -466,14 +503,14 @@ interp = "this." + (interp.substring(1)); } if (pi < i) { - tokens.push(['STRING', quote + (str.substring(pi, i)) + quote]); + tokens.push(['STRING', '' + quote + (str.substring(pi, i)) + quote]); } tokens.push(['IDENTIFIER', interp]); i += group.length - 1; pi = i + 1; - } else if (((expr = this.balanced_string(str.substring(i), ['${', '}'])))) { + } else if (((expr = this.balanced_string(str.substring(i), (supress = false), ['${', '}'])))) { if (pi < i) { - tokens.push(['STRING', quote + (str.substring(pi, i)) + quote]); + tokens.push(['STRING', '' + quote + (str.substring(pi, i)) + quote]); } inner = expr.substring(2, expr.length - 1); if (inner.length) { @@ -484,7 +521,7 @@ nested.pop(); tokens.push(['TOKENS', nested]); } else { - tokens.push(['STRING', quote + quote]); + tokens.push(['STRING', '' + quote + quote]); } i += expr.length - 1; pi = i + 1; @@ -492,19 +529,27 @@ i += 1; } if (pi < i && pi < str.length - 1) { - tokens.push(['STRING', quote + (str.substring(pi, i)) + quote]); + tokens.push(['STRING', '' + quote + (str.substring(pi, i)) + quote]); } - _c = []; _d = tokens; - for (i = 0, _e = _d.length; i < _e; i++) { - each = _d[i]; - _c.push((function() { + _c = tokens; + for (_d = 0, _e = _c.length; _d < _e; _d++) { + each = _c[_d]; + each[0] === 'STRING' ? ((has_string = true)) : null; + } + if (!has_string) { + tokens.unshift(['STRING', "''"]); + } + if (((typeof merge !== "undefined" && merge !== null) ? merge : true)) { + _f = tokens; + for (i = 0, _g = _f.length; i < _g; i++) { + each = _f[i]; each[0] === 'TOKENS' ? (this.tokens = this.tokens.concat(each[1])) : this.token(each[0], each[1]); if (i < tokens.length - 1) { - return this.token('+', '+'); + this.token('+', '+'); } - }).call(this)); + } } - return _c; + return tokens; } }; // Helpers @@ -568,7 +613,7 @@ // be used as identifiers or properties. JS_FORBIDDEN = JS_KEYWORDS.concat(RESERVED); // Token matching regexes. - IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/; + IDENTIFIER = /^([a-zA-Z\$_](\w|\$)*)/; NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i; HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/; INTERPOLATION = /^\$([a-zA-Z_@]\w*(\.\w+)*)/; @@ -576,7 +621,6 @@ WHITESPACE = /^([ \t]+)/; COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/; CODE = /^((-|=)>)/; - REGEX = /^(\/(\S.*?)?([^\\]|\\\\)\/[imgy]{0,4})/; MULTI_DENT = /^((\n([ \t]*))+)(\.)?/; LAST_DENTS = /\n([ \t]*)/g; LAST_DENT = /\n([ \t]*)/; diff --git a/lib/nodes.js b/lib/nodes.js index 3ea376a6..bca3d9a8 100644 --- a/lib/nodes.js +++ b/lib/nodes.js @@ -289,7 +289,7 @@ idt += TAB var end, idt; idt = this.is_statement() ? this.idt() : ''; end = this.is_statement() ? ';' : ''; - return idt + this.value + end; + return '' + idt + this.value + end; }; LiteralNode.prototype.toString = function toString(idt) { return " \"" + this.value + "\""; @@ -762,7 +762,7 @@ idt += TAB props = props.empty() ? '' : props.compile(o) + '\n'; extension = extension ? this.idt() + extension.compile(o) + ';\n' : ''; returns = ret ? '\n' + this.idt() + 'return ' + this.variable.compile(o) + ';' : ''; - return construct + extension + props + returns; + return '' + construct + extension + props + returns; }; return ClassNode; }).call(this); @@ -1540,7 +1540,7 @@ idt += TAB indent: this.idt(), chain_child: true })) : " else {\n" + (Expressions.wrap([this.else_body]).compile(o)) + "\n" + this.tab + "}"; - return if_part + else_part; + return '' + if_part + else_part; }; // Compile the IfNode as a ternary operator. IfNode.prototype.compile_ternary = function compile_ternary(o) { @@ -1559,7 +1559,7 @@ idt += TAB // with Git. TRAILING_WHITESPACE = /\s+$/gm; // Keep this identifier regex in sync with the Lexer. - IDENTIFIER = /^[a-zA-Z$_](\w|\$)*$/; + IDENTIFIER = /^[a-zA-Z\$_](\w|\$)*$/; // Utility Functions // ----------------- // Merge objects, returning a fresh copy with attributes from both sides. diff --git a/src/lexer.coffee b/src/lexer.coffee index 30ba0d9b..b8822a51 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -96,10 +96,10 @@ exports.Lexer: class Lexer # are balanced within the string's contents, and within nested interpolations. string_token: -> return false unless starts(@chunk, '"') or starts(@chunk, "'") - string: @balanced_token ['"', '"'], ['${', '}'] - string: @balanced_token ["'", "'"] unless string + string: @balanced_token supress: false, ['"', '"'], ['${', '}'] + string: @balanced_token supress: false, ["'", "'"] unless string return false unless string - @interpolate_string string.replace STRING_NEWLINES, " \\\n" + @interpolate_string string.replace(STRING_NEWLINES, " \\\n"), merge: true @line += count string, "\n" @i += string.length true @@ -117,7 +117,7 @@ exports.Lexer: class Lexer # Matches JavaScript interpolated directly into the source via backticks. js_token: -> return false unless starts @chunk, '`' - return false unless script: @balanced_token ['`', '`'] + return false unless script: @balanced_token supress: false, ['`', '`'] @token 'JS', script.replace(JS_CLEANER, '') @i += script.length true @@ -126,16 +126,34 @@ exports.Lexer: class Lexer # to distinguish from division, so we borrow some basic heuristics from # JavaScript and Ruby. regex_token: -> - return false unless regex: @match REGEX, 1 + return false unless regex: @balanced_token supress: true, ['/', '/'] + return false if regex.length < 3 or regex.match /^\/\s+|\n/ return false if include NOT_REGEX, @tag() - @token 'REGEX', regex + flags: ['i', 'm', 'g', 'y'] + while (index: flags.indexOf @chunk.substr regex.length, 1) >= 0 + regex += flags[index] + flags.splice index, 1 + if (0 < regex.indexOf('${') < regex.indexOf('}')) or regex.match /[^\\]\$[a-zA-Z_@]/ + [str, flags]: regex.substring(1).split('/') + str: str.replace /\\[^\$]/g, (escaped) -> '\\' + escaped + @tokens: @tokens.concat [['(', '('], ['NEW', 'new'], ['IDENTIFIER', 'RegExp'], ['CALL_START', '(']] + interp_tokens: @interpolate_string "\"$str\"", merge: false + for each, i in interp_tokens + switch each[0] + when 'TOKENS' then @tokens: @tokens.concat each[1] + when 'STRING' then @token each[0], each[1].substring(0, 1) + each[1].substring(1, each[1].length - 1).replace(/"/g, '\\"') + each[1].substring(0, 1) + else @token each[0], each[1] + @token '+', '+' if i < interp_tokens.length - 1 + @tokens: @tokens.concat [[',', ','], ['STRING', "'$flags'"], [')', ')'], [')', ')']] + else + @token 'REGEX', regex @i += regex.length true # Matches a token in which which the passed delimiter pairs must be correctly # balanced (ie. strings, JS literals). - balanced_token: (delimited...) -> - @balanced_string @chunk, delimited... + balanced_token: (supress, delimited...) -> + @balanced_string @chunk, supress, delimited... # Matches and conumes comments. We pass through comments into JavaScript, # so they're treated as real tokens, like any other part of the language. @@ -297,7 +315,7 @@ exports.Lexer: class Lexer # a series of delimiters, all of which must be nested correctly within the # contents of the string. This method allows us to have strings within # interpolations within strings etc... - balanced_string: (str, delimited...) -> + balanced_string: (str, supress, delimited...) -> levels: [] i: 0 while i < str.length @@ -317,7 +335,9 @@ exports.Lexer: class Lexer break break unless levels.length i += 1 - throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length + if levels.length + throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" unless supress + return false return false if i is 0 return str.substring(0, i) @@ -331,7 +351,7 @@ exports.Lexer: class Lexer # If it encounters an interpolation, this method will recursively create a # new Lexer, tokenize the interpolated contents, and merge them into the # token stream. - interpolate_string: (str) -> + interpolate_string: (str, merge) -> if str.length < 3 or not starts str, '"' @token 'STRING', str else @@ -349,7 +369,7 @@ exports.Lexer: class Lexer tokens.push ['IDENTIFIER', interp] i += group.length - 1 pi: i + 1 - else if (expr: @balanced_string str.substring(i), ['${', '}']) + else if (expr: @balanced_string str.substring(i), supress: false, ['${', '}']) tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i inner: expr.substring(2, expr.length - 1) if inner.length @@ -362,12 +382,16 @@ exports.Lexer: class Lexer pi: i + 1 i += 1 tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i and pi < str.length - 1 - for each, i in tokens - if each[0] is 'TOKENS' - @tokens: @tokens.concat each[1] - else - @token each[0], each[1] - @token '+', '+' if i < tokens.length - 1 + (has_string: yes) for each in tokens when each[0] is 'STRING' + tokens.unshift ['STRING', "''"] if not has_string + if (merge ? true) + for each, i in tokens + if each[0] is 'TOKENS' + @tokens: @tokens.concat each[1] + else + @token each[0], each[1] + @token '+', '+' if i < tokens.length - 1 + tokens # Helpers # ------- @@ -440,7 +464,7 @@ RESERVED: [ JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED # Token matching regexes. -IDENTIFIER : /^([a-zA-Z$_](\w|\$)*)/ +IDENTIFIER : /^([a-zA-Z\$_](\w|\$)*)/ NUMBER : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i HEREDOC : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/ INTERPOLATION : /^\$([a-zA-Z_@]\w*(\.\w+)*)/ @@ -448,7 +472,6 @@ OPERATOR : /^([+\*&|\/\-%=<>:!?]+)/ WHITESPACE : /^([ \t]+)/ COMMENT : /^(((\n?[ \t]*)?#[^\n]*)+)/ CODE : /^((-|=)>)/ -REGEX : /^(\/(\S.*?)?([^\\]|\\\\)\/[imgy]{0,4})/ MULTI_DENT : /^((\n([ \t]*))+)(\.)?/ LAST_DENTS : /\n([ \t]*)/g LAST_DENT : /\n([ \t]*)/ diff --git a/src/nodes.coffee b/src/nodes.coffee index 142928f3..bcd18331 100644 --- a/src/nodes.coffee +++ b/src/nodes.coffee @@ -1192,7 +1192,7 @@ TAB: ' ' TRAILING_WHITESPACE: /\s+$/gm # Keep this identifier regex in sync with the Lexer. -IDENTIFIER: /^[a-zA-Z$_](\w|\$)*$/ +IDENTIFIER: /^[a-zA-Z\$_](\w|\$)*$/ # Utility Functions # ----------------- diff --git a/test/test_regexp_interpolation.coffee b/test/test_regexp_interpolation.coffee new file mode 100644 index 00000000..c5b2c74b --- /dev/null +++ b/test/test_regexp_interpolation.coffee @@ -0,0 +1,17 @@ +name: 'Bob' + +ok not not '"Bob"'.match(/^"${name}"$/i) +ok '"Bobby"'.match(/^"${name}"$/i) is null + +ok not not 'Bob'.match(/^$name$/) +ok 'Bobby'.match(/^$name/) + +ok 'Bobby'.match(/${"${"${"$name"}"}"}/imgy) + +ok '$a$b$c'.match(/\$A\$B\$C/i) + +a: 1 +b: 2 +c: 3 + +ok '123'.match(/$a$b$c/i) diff --git a/test/test_string_interpolation.coffee b/test/test_string_interpolation.coffee index 5c454544..a69c2780 100644 --- a/test/test_string_interpolation.coffee +++ b/test/test_string_interpolation.coffee @@ -61,3 +61,8 @@ ok "Where is ${"the nested ${obj["name"]}"}?" is 'Where is the nested Joe?' ok "Hello ${world ? "$hello"}" is 'Hello World' ok "Hello ${"${"${obj["name"]}" + '!'}"}" is 'Hello Joe!' + +a: 1 +b: 2 +c: 3 +ok "$a$b$c" is '123'