From 42aa8d256c861e8cbe756fefae99f631ba600d3e Mon Sep 17 00:00:00 2001 From: xixixao Date: Tue, 26 Nov 2013 19:29:13 +0000 Subject: [PATCH] Handle backslashes at the end of heredocs --- lib/coffee-script/lexer.js | 19 ++++++++----------- src/lexer.coffee | 22 +++++++++++----------- test/strings.coffee | 29 +++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 24 deletions(-) diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index 580798f6..a5fdc62f 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -173,7 +173,7 @@ return 0; } string = match[0]; - this.token('STRING', this.removeNewlines(string), 0, string.length); + this.token('STRING', this.escapeLines(string), 0, string.length); break; case '"': if (!(string = this.balancedString(this.chunk, '"'))) { @@ -185,7 +185,7 @@ lexedLength: string.length }); } else { - this.token('STRING', this.removeNewlines(string), 0, string.length); + this.token('STRING', this.escapeLines(string), 0, string.length); } break; default: @@ -198,13 +198,14 @@ }; Lexer.prototype.heredocToken = function() { - var doc, heredoc, match, quote; + var doc, heredoc, match, quote, trimmed; if (!(match = HEREDOC.exec(this.chunk))) { return 0; } heredoc = match[0]; quote = heredoc.charAt(0); - doc = this.sanitizeHeredoc(match[2], { + trimmed = match[2].replace(/(([^\\]|\\\\)\s*)\n[^\n\S]*$/, '$1'); + doc = this.sanitizeHeredoc(trimmed, { quote: quote, indent: null }); @@ -762,10 +763,6 @@ return LINE_CONTINUER.test(this.chunk) || ((_ref2 = this.tag()) === '\\' || _ref2 === '.' || _ref2 === '?.' || _ref2 === '?::' || _ref2 === 'UNARY' || _ref2 === 'MATH' || _ref2 === '+' || _ref2 === '-' || _ref2 === 'SHIFT' || _ref2 === 'RELATION' || _ref2 === 'COMPARE' || _ref2 === 'LOGIC' || _ref2 === 'THROW' || _ref2 === 'EXTENDS'); }; - Lexer.prototype.removeNewlines = function(str) { - return this.escapeLines(str.replace(/^(.)\s*\n\s*/, '$1').replace(/\s*\n\s*(.)$/, '$1')); - }; - Lexer.prototype.escapeLines = function(str, heredoc) { str = str.replace(/\\[^\S\n]*(\n|\\)\s*/g, function(escaped, character) { if (character === '\n') { @@ -777,7 +774,7 @@ if (heredoc) { return str.replace(MULTILINER, '\\n'); } else { - return str.replace(/\s*\n\s*/g, ' '); + return str.replace(/^(.)\s*\n\s*/, '$1').replace(/\s*\n\s*(.)$/, '$1').replace(/\s*\n\s*/g, ' '); } }; @@ -855,13 +852,13 @@ NUMBER = /^0b[01]+|^0o[0-7]+|^0x[\da-f]+|^\d*\.?\d+(?:e[+-]?\d+)?/i; - HEREDOC = /^("""|''')([\s\S]*?)(?:\n[^\n\S]*)?\1/; + HEREDOC = /^("""|''')(([\s\S]*?([^\\]|\\\\))?)\1/; OPERATOR = /^(?:[-=]>|[-+*\/%<>&|^!?=]=|>>>=?|([-+:])\1|([&|<>])\2=?|\?(\.|::)|\.{2,3})/; WHITESPACE = /^[^\n\S]+/; - COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|(?:###)$)|^(?:\s*#(?!##[^#]).*)+/; + COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/; CODE = /^[-=]>/; diff --git a/src/lexer.coffee b/src/lexer.coffee index 78e0e889..5340c11f 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -190,13 +190,13 @@ exports.Lexer = class Lexer when "'" return 0 unless match = SIMPLESTR.exec @chunk string = match[0] - @token 'STRING', @removeNewlines(string), 0, string.length + @token 'STRING', @escapeLines(string), 0, string.length when '"' return 0 unless string = @balancedString @chunk, '"' if 0 < string.indexOf '#{', 1 @interpolateString string[1...-1], strOffset: 1, lexedLength: string.length else - @token 'STRING', @removeNewlines(string), 0, string.length + @token 'STRING', @escapeLines(string), 0, string.length else return 0 if octalEsc = /^(?:\\.|[^\\])*\\(?:0[0-7]|[1-7])/.test string @@ -209,7 +209,9 @@ exports.Lexer = class Lexer return 0 unless match = HEREDOC.exec @chunk heredoc = match[0] quote = heredoc.charAt 0 - doc = @sanitizeHeredoc match[2], quote: quote, indent: null + # Trim last newline if it's not escaped + trimmed = match[2].replace /(([^\\]|\\\\)\s*)\n[^\n\S]*$/, '$1' + doc = @sanitizeHeredoc trimmed, quote: quote, indent: null if quote is '"' and 0 <= doc.indexOf '#{' @interpolateString doc, heredoc: yes, strOffset: 3, lexedLength: heredoc.length else @@ -684,11 +686,6 @@ exports.Lexer = class Lexer @tag() in ['\\', '.', '?.', '?::', 'UNARY', 'MATH', '+', '-', 'SHIFT', 'RELATION' 'COMPARE', 'LOGIC', 'THROW', 'EXTENDS'] - # Remove newlines from beginning and end of string literals. - # `str` includes quotes. - removeNewlines: (str) -> - @escapeLines str.replace(/^(.)\s*\n\s*/, '$1').replace(/\s*\n\s*(.)$/, '$1') - # Converts newlines for string literals. escapeLines: (str, heredoc) -> # Ignore escaped backslashes and remove escaped newlines @@ -697,7 +694,10 @@ exports.Lexer = class Lexer if heredoc str.replace MULTILINER, '\\n' else - str.replace /\s*\n\s*/g, ' ' + # Trim leading and trailing whitespace, string includes quotes + str.replace(/^(.)\s*\n\s*/, '$1') + .replace(/\s*\n\s*(.)$/, '$1') + .replace(/\s*\n\s*/g, ' ') # Constructs a string token by escaping quotes and newlines. makeString: (body, quote, heredoc) -> @@ -779,7 +779,7 @@ NUMBER = /// ^ \d*\.?\d+ (?:e[+-]?\d+)? # decimal ///i -HEREDOC = /// ^ ("""|''') ([\s\S]*?) (?:\n[^\n\S]*)? \1 /// +HEREDOC = /// ^ ("""|''') (( [\s\S]*? ([^\\]|\\\\) )?) \1 /// OPERATOR = /// ^ ( ?: [-=]> # function @@ -793,7 +793,7 @@ OPERATOR = /// ^ ( WHITESPACE = /^[^\n\S]+/ -COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|(?:###)$)|^(?:\s*#(?!##[^#]).*)+/ +COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/ CODE = /^[-=]>/ diff --git a/test/strings.coffee b/test/strings.coffee index ee276c20..536c1c15 100644 --- a/test/strings.coffee +++ b/test/strings.coffee @@ -63,6 +63,15 @@ test "#3229, multiline strings", -> eq ' \ ok', ' ok' + # #1273, empty strings. + eq '\ + ', '' + eq ' + ', '' + eq ' + ', '' + eq ' ', ' ' + # Same behavior in interpolated strings. eq "interpolation #{1} follows #{2} \ @@ -79,6 +88,10 @@ test "#3229, multiline strings", -> next line', 'escaped backslash at EOL\\ next line' eq '\\ next line', '\\ next line' + eq '\\ + ', '\\' + eq '\\\\\\ + ', '\\\\\\' eq "#{1}\\ after interpolation", '1\\ after interpolation' eq 'escaped backslash before slash\\ \ @@ -120,11 +133,14 @@ test "#3249, escape newlines in heredocs with backslashes", -> normal indentation """, 'Set whitespace <- this is ignorednone\n normal indentation' - # Changed from #647 + # Changed from #647, trailing backslash. eq ''' Hello, World\ ''', 'Hello, World' + eq ''' + \\ + ''', '\\' # Backslash at the beginning of a literal string. eq '''\ @@ -151,6 +167,9 @@ test "#3249, escape newlines in heredocs with backslashes", -> escaped backslash at EOL\\ next line ''', 'escaped backslash at EOL\\\n next line' + eq '''\\ + + ''', '\\\n' # Backslashes at beginning of lines. eq '''first line @@ -158,7 +177,7 @@ test "#3249, escape newlines in heredocs with backslashes", -> eq """first line\ \ backslash at BOL""", 'first line\ backslash at BOL' -# Edge case. + # Edge cases. eq '''lone \ @@ -166,6 +185,8 @@ test "#3249, escape newlines in heredocs with backslashes", -> backslash''', 'lone\n\n backslash' + eq '''\ + ''', '' #647 eq "''Hello, World\\''", ''' @@ -175,6 +196,10 @@ eq '""Hello, World\\""', """ "\"Hello, World\\\"" """ +test "#1273, escaping quotes at the end of heredocs.", -> + # """\""" no longer compiles + eq """\\""", '\\' + a = """ basic heredoc on two lines