From 42aa8d256c861e8cbe756fefae99f631ba600d3e Mon Sep 17 00:00:00 2001 From: xixixao Date: Tue, 26 Nov 2013 19:29:13 +0000 Subject: [PATCH 1/4] Handle backslashes at the end of heredocs --- lib/coffee-script/lexer.js | 19 ++++++++----------- src/lexer.coffee | 22 +++++++++++----------- test/strings.coffee | 29 +++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 24 deletions(-) diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index 580798f6..a5fdc62f 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -173,7 +173,7 @@ return 0; } string = match[0]; - this.token('STRING', this.removeNewlines(string), 0, string.length); + this.token('STRING', this.escapeLines(string), 0, string.length); break; case '"': if (!(string = this.balancedString(this.chunk, '"'))) { @@ -185,7 +185,7 @@ lexedLength: string.length }); } else { - this.token('STRING', this.removeNewlines(string), 0, string.length); + this.token('STRING', this.escapeLines(string), 0, string.length); } break; default: @@ -198,13 +198,14 @@ }; Lexer.prototype.heredocToken = function() { - var doc, heredoc, match, quote; + var doc, heredoc, match, quote, trimmed; if (!(match = HEREDOC.exec(this.chunk))) { return 0; } heredoc = match[0]; quote = heredoc.charAt(0); - doc = this.sanitizeHeredoc(match[2], { + trimmed = match[2].replace(/(([^\\]|\\\\)\s*)\n[^\n\S]*$/, '$1'); + doc = this.sanitizeHeredoc(trimmed, { quote: quote, indent: null }); @@ -762,10 +763,6 @@ return LINE_CONTINUER.test(this.chunk) || ((_ref2 = this.tag()) === '\\' || _ref2 === '.' || _ref2 === '?.' || _ref2 === '?::' || _ref2 === 'UNARY' || _ref2 === 'MATH' || _ref2 === '+' || _ref2 === '-' || _ref2 === 'SHIFT' || _ref2 === 'RELATION' || _ref2 === 'COMPARE' || _ref2 === 'LOGIC' || _ref2 === 'THROW' || _ref2 === 'EXTENDS'); }; - Lexer.prototype.removeNewlines = function(str) { - return this.escapeLines(str.replace(/^(.)\s*\n\s*/, '$1').replace(/\s*\n\s*(.)$/, '$1')); - }; - Lexer.prototype.escapeLines = function(str, heredoc) { str = str.replace(/\\[^\S\n]*(\n|\\)\s*/g, function(escaped, character) { if (character === '\n') { @@ -777,7 +774,7 @@ if (heredoc) { return str.replace(MULTILINER, '\\n'); } else { - return str.replace(/\s*\n\s*/g, ' '); + return str.replace(/^(.)\s*\n\s*/, '$1').replace(/\s*\n\s*(.)$/, '$1').replace(/\s*\n\s*/g, ' '); } }; @@ -855,13 +852,13 @@ NUMBER = /^0b[01]+|^0o[0-7]+|^0x[\da-f]+|^\d*\.?\d+(?:e[+-]?\d+)?/i; - HEREDOC = /^("""|''')([\s\S]*?)(?:\n[^\n\S]*)?\1/; + HEREDOC = /^("""|''')(([\s\S]*?([^\\]|\\\\))?)\1/; OPERATOR = /^(?:[-=]>|[-+*\/%<>&|^!?=]=|>>>=?|([-+:])\1|([&|<>])\2=?|\?(\.|::)|\.{2,3})/; WHITESPACE = /^[^\n\S]+/; - COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|(?:###)$)|^(?:\s*#(?!##[^#]).*)+/; + COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/; CODE = /^[-=]>/; diff --git a/src/lexer.coffee b/src/lexer.coffee index 78e0e889..5340c11f 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -190,13 +190,13 @@ exports.Lexer = class Lexer when "'" return 0 unless match = SIMPLESTR.exec @chunk string = match[0] - @token 'STRING', @removeNewlines(string), 0, string.length + @token 'STRING', @escapeLines(string), 0, string.length when '"' return 0 unless string = @balancedString @chunk, '"' if 0 < string.indexOf '#{', 1 @interpolateString string[1...-1], strOffset: 1, lexedLength: string.length else - @token 'STRING', @removeNewlines(string), 0, string.length + @token 'STRING', @escapeLines(string), 0, string.length else return 0 if octalEsc = /^(?:\\.|[^\\])*\\(?:0[0-7]|[1-7])/.test string @@ -209,7 +209,9 @@ exports.Lexer = class Lexer return 0 unless match = HEREDOC.exec @chunk heredoc = match[0] quote = heredoc.charAt 0 - doc = @sanitizeHeredoc match[2], quote: quote, indent: null + # Trim last newline if it's not escaped + trimmed = match[2].replace /(([^\\]|\\\\)\s*)\n[^\n\S]*$/, '$1' + doc = @sanitizeHeredoc trimmed, quote: quote, indent: null if quote is '"' and 0 <= doc.indexOf '#{' @interpolateString doc, heredoc: yes, strOffset: 3, lexedLength: heredoc.length else @@ -684,11 +686,6 @@ exports.Lexer = class Lexer @tag() in ['\\', '.', '?.', '?::', 'UNARY', 'MATH', '+', '-', 'SHIFT', 'RELATION' 'COMPARE', 'LOGIC', 'THROW', 'EXTENDS'] - # Remove newlines from beginning and end of string literals. - # `str` includes quotes. - removeNewlines: (str) -> - @escapeLines str.replace(/^(.)\s*\n\s*/, '$1').replace(/\s*\n\s*(.)$/, '$1') - # Converts newlines for string literals. escapeLines: (str, heredoc) -> # Ignore escaped backslashes and remove escaped newlines @@ -697,7 +694,10 @@ exports.Lexer = class Lexer if heredoc str.replace MULTILINER, '\\n' else - str.replace /\s*\n\s*/g, ' ' + # Trim leading and trailing whitespace, string includes quotes + str.replace(/^(.)\s*\n\s*/, '$1') + .replace(/\s*\n\s*(.)$/, '$1') + .replace(/\s*\n\s*/g, ' ') # Constructs a string token by escaping quotes and newlines. makeString: (body, quote, heredoc) -> @@ -779,7 +779,7 @@ NUMBER = /// ^ \d*\.?\d+ (?:e[+-]?\d+)? # decimal ///i -HEREDOC = /// ^ ("""|''') ([\s\S]*?) (?:\n[^\n\S]*)? \1 /// +HEREDOC = /// ^ ("""|''') (( [\s\S]*? ([^\\]|\\\\) )?) \1 /// OPERATOR = /// ^ ( ?: [-=]> # function @@ -793,7 +793,7 @@ OPERATOR = /// ^ ( WHITESPACE = /^[^\n\S]+/ -COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|(?:###)$)|^(?:\s*#(?!##[^#]).*)+/ +COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/ CODE = /^[-=]>/ diff --git a/test/strings.coffee b/test/strings.coffee index ee276c20..536c1c15 100644 --- a/test/strings.coffee +++ b/test/strings.coffee @@ -63,6 +63,15 @@ test "#3229, multiline strings", -> eq ' \ ok', ' ok' + # #1273, empty strings. + eq '\ + ', '' + eq ' + ', '' + eq ' + ', '' + eq ' ', ' ' + # Same behavior in interpolated strings. eq "interpolation #{1} follows #{2} \ @@ -79,6 +88,10 @@ test "#3229, multiline strings", -> next line', 'escaped backslash at EOL\\ next line' eq '\\ next line', '\\ next line' + eq '\\ + ', '\\' + eq '\\\\\\ + ', '\\\\\\' eq "#{1}\\ after interpolation", '1\\ after interpolation' eq 'escaped backslash before slash\\ \ @@ -120,11 +133,14 @@ test "#3249, escape newlines in heredocs with backslashes", -> normal indentation """, 'Set whitespace <- this is ignorednone\n normal indentation' - # Changed from #647 + # Changed from #647, trailing backslash. eq ''' Hello, World\ ''', 'Hello, World' + eq ''' + \\ + ''', '\\' # Backslash at the beginning of a literal string. eq '''\ @@ -151,6 +167,9 @@ test "#3249, escape newlines in heredocs with backslashes", -> escaped backslash at EOL\\ next line ''', 'escaped backslash at EOL\\\n next line' + eq '''\\ + + ''', '\\\n' # Backslashes at beginning of lines. eq '''first line @@ -158,7 +177,7 @@ test "#3249, escape newlines in heredocs with backslashes", -> eq """first line\ \ backslash at BOL""", 'first line\ backslash at BOL' -# Edge case. + # Edge cases. eq '''lone \ @@ -166,6 +185,8 @@ test "#3249, escape newlines in heredocs with backslashes", -> backslash''', 'lone\n\n backslash' + eq '''\ + ''', '' #647 eq "''Hello, World\\''", ''' @@ -175,6 +196,10 @@ eq '""Hello, World\\""', """ "\"Hello, World\\\"" """ +test "#1273, escaping quotes at the end of heredocs.", -> + # """\""" no longer compiles + eq """\\""", '\\' + a = """ basic heredoc on two lines From b11d956d5331e1c104c668776d6387b61d0a7526 Mon Sep 17 00:00:00 2001 From: xixixao Date: Wed, 27 Nov 2013 12:58:14 +0000 Subject: [PATCH 2/4] Added compilation regression test --- test/compilation.coffee | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/compilation.coffee b/test/compilation.coffee index 93c4bc36..7190edf6 100644 --- a/test/compilation.coffee +++ b/test/compilation.coffee @@ -68,6 +68,9 @@ test "#1026", -> test "#1050", -> cantCompile "### */ ###" +test "#1273: escaping quotes at the end of heredocs", -> + cantCompile '"""\\"""' # """\""" + test "#1106: __proto__ compilation", -> object = eq @["__proto__"] = true From a61b6ee925bb6127e9e2226d7683a5b0eeacc70c Mon Sep 17 00:00:00 2001 From: xixixao Date: Wed, 27 Nov 2013 20:29:45 +0000 Subject: [PATCH 3/4] Fixed leading whitespace before interpolation in simple strings --- lib/coffee-script/lexer.js | 43 +++++++++++++++++++------------------- src/lexer.coffee | 31 +++++++++++++-------------- test/strings.coffee | 8 +++++++ 3 files changed, 44 insertions(+), 38 deletions(-) diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index a5fdc62f..b5e1f01c 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -166,30 +166,25 @@ }; Lexer.prototype.stringToken = function() { - var match, octalEsc, string; - switch (this.chunk.charAt(0)) { + var octalEsc, quote, string, trimmed; + switch (quote = this.chunk.charAt(0)) { case "'": - if (!(match = SIMPLESTR.exec(this.chunk))) { - return 0; - } - string = match[0]; - this.token('STRING', this.escapeLines(string), 0, string.length); + string = SIMPLESTR.exec(this.chunk)[0]; break; case '"': - if (!(string = this.balancedString(this.chunk, '"'))) { - return 0; - } - if (0 < string.indexOf('#{', 1)) { - this.interpolateString(string.slice(1, -1), { - strOffset: 1, - lexedLength: string.length - }); - } else { - this.token('STRING', this.escapeLines(string), 0, string.length); - } - break; - default: - return 0; + string = this.balancedString(this.chunk, '"'); + } + if (!string) { + return 0; + } + trimmed = this.removeNewlines(string.slice(1, -1)); + if (quote === '"' && 0 < string.indexOf('#{', 1)) { + this.interpolateString(trimmed, { + strOffset: 1, + lexedLength: string.length + }); + } else { + this.token('STRING', quote + this.escapeLines(trimmed) + quote, 0, string.length); } if (octalEsc = /^(?:\\.|[^\\])*\\(?:0[0-7]|[1-7])/.test(string)) { this.error("octal escape sequences " + string + " are not allowed"); @@ -763,6 +758,10 @@ return LINE_CONTINUER.test(this.chunk) || ((_ref2 = this.tag()) === '\\' || _ref2 === '.' || _ref2 === '?.' || _ref2 === '?::' || _ref2 === 'UNARY' || _ref2 === 'MATH' || _ref2 === '+' || _ref2 === '-' || _ref2 === 'SHIFT' || _ref2 === 'RELATION' || _ref2 === 'COMPARE' || _ref2 === 'LOGIC' || _ref2 === 'THROW' || _ref2 === 'EXTENDS'); }; + Lexer.prototype.removeNewlines = function(str) { + return str.replace(/^\s*\n\s*/, '').replace(/([^\\]|\\\\)\s*\n\s*$/, '$1'); + }; + Lexer.prototype.escapeLines = function(str, heredoc) { str = str.replace(/\\[^\S\n]*(\n|\\)\s*/g, function(escaped, character) { if (character === '\n') { @@ -774,7 +773,7 @@ if (heredoc) { return str.replace(MULTILINER, '\\n'); } else { - return str.replace(/^(.)\s*\n\s*/, '$1').replace(/\s*\n\s*(.)$/, '$1').replace(/\s*\n\s*/g, ' '); + return str.replace(/\s*\n\s*/g, ' '); } }; diff --git a/src/lexer.coffee b/src/lexer.coffee index 5340c11f..385afd3c 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -186,19 +186,15 @@ exports.Lexer = class Lexer # Matches strings, including multi-line strings. Ensures that quotation marks # are balanced within the string's contents, and within nested interpolations. stringToken: -> - switch @chunk.charAt 0 - when "'" - return 0 unless match = SIMPLESTR.exec @chunk - string = match[0] - @token 'STRING', @escapeLines(string), 0, string.length - when '"' - return 0 unless string = @balancedString @chunk, '"' - if 0 < string.indexOf '#{', 1 - @interpolateString string[1...-1], strOffset: 1, lexedLength: string.length - else - @token 'STRING', @escapeLines(string), 0, string.length - else - return 0 + switch quote = @chunk.charAt 0 + when "'" then [string] = SIMPLESTR.exec @chunk + when '"' then string = @balancedString @chunk, '"' + return 0 unless string + trimmed = @removeNewlines string[1...-1] + if quote is '"' and 0 < string.indexOf '#{', 1 + @interpolateString trimmed, strOffset: 1, lexedLength: string.length + else + @token 'STRING', quote + @escapeLines(trimmed) + quote, 0, string.length if octalEsc = /^(?:\\.|[^\\])*\\(?:0[0-7]|[1-7])/.test string @error "octal escape sequences #{string} are not allowed" string.length @@ -686,6 +682,11 @@ exports.Lexer = class Lexer @tag() in ['\\', '.', '?.', '?::', 'UNARY', 'MATH', '+', '-', 'SHIFT', 'RELATION' 'COMPARE', 'LOGIC', 'THROW', 'EXTENDS'] + # Remove newlines from beginning and (non escaped) from end of string literals. + removeNewlines: (str) -> + str.replace(/^\s*\n\s*/, '') + .replace(/([^\\]|\\\\)\s*\n\s*$/, '$1') + # Converts newlines for string literals. escapeLines: (str, heredoc) -> # Ignore escaped backslashes and remove escaped newlines @@ -695,9 +696,7 @@ exports.Lexer = class Lexer str.replace MULTILINER, '\\n' else # Trim leading and trailing whitespace, string includes quotes - str.replace(/^(.)\s*\n\s*/, '$1') - .replace(/\s*\n\s*(.)$/, '$1') - .replace(/\s*\n\s*/g, ' ') + str.replace /\s*\n\s*/g, ' ' # Constructs a string token by escaping quotes and newlines. makeString: (body, quote, heredoc) -> diff --git a/test/strings.coffee b/test/strings.coffee index 536c1c15..803981ac 100644 --- a/test/strings.coffee +++ b/test/strings.coffee @@ -81,6 +81,9 @@ test "#3229, multiline strings", -> 'string ' + "inside interpolation" }", "a string inside interpolation" + eq " + #{1} + ", '1' # Handle escaped backslashes correctly. eq '\\', `'\\'` @@ -155,6 +158,11 @@ test "#3249, escape newlines in heredocs with backslashes", -> too #{3}\ ! """, 'interpolation 1\n follows 2 too 3!' + eq """ + + #{1} #{2} + + """, '\n1 2\n' # TODO: uncomment when #2388 is fixed # eq """a heredoc #{ From 5e4cca90a3a38cb9a33225fd83923f5a60744925 Mon Sep 17 00:00:00 2001 From: xixixao Date: Wed, 27 Nov 2013 20:41:32 +0000 Subject: [PATCH 4/4] Fix #3264, missing leading whitespace before interpolation in heredoc --- lib/coffee-script/lexer.js | 4 ---- src/lexer.coffee | 6 ------ 2 files changed, 10 deletions(-) diff --git a/lib/coffee-script/lexer.js b/lib/coffee-script/lexer.js index b5e1f01c..2c486d83 100644 --- a/lib/coffee-script/lexer.js +++ b/lib/coffee-script/lexer.js @@ -597,10 +597,6 @@ offsetInChunk = offsetInChunk || 0; strOffset = strOffset || 0; lexedLength = lexedLength || str.length; - if (heredoc && str.length > 0 && str[0] === '\n') { - str = str.slice(1); - strOffset++; - } tokens = []; pi = 0; i = -1; diff --git a/src/lexer.coffee b/src/lexer.coffee index 385afd3c..72b63143 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -526,11 +526,6 @@ exports.Lexer = class Lexer strOffset = strOffset || 0 lexedLength = lexedLength || str.length - # Clip leading \n from heredoc - if heredoc and str.length > 0 and str[0] == '\n' - str = str[1...] - strOffset++ - # Parse the string. tokens = [] pi = 0 @@ -695,7 +690,6 @@ exports.Lexer = class Lexer if heredoc str.replace MULTILINER, '\\n' else - # Trim leading and trailing whitespace, string includes quotes str.replace /\s*\n\s*/g, ' ' # Constructs a string token by escaping quotes and newlines.