diff --git a/lib/coffeescript/lexer.js b/lib/coffeescript/lexer.js index fa388049..2b8d1436 100644 --- a/lib/coffeescript/lexer.js +++ b/lib/coffeescript/lexer.js @@ -1,6 +1,6 @@ // Generated by CoffeeScript 2.0.0-beta1 (function() { - var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, repeat, starts, throwSyntaxError, + var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, UNICODE_CODE_POINT_ESCAPE, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, repeat, starts, throwSyntaxError, indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; }; ({Rewriter, INVERSES} = require('./rewriter')); @@ -284,7 +284,9 @@ indentRegex = RegExp(`\\n${indent}`, "g"); } this.mergeInterpolationTokens(tokens, {delimiter}, (value, i) => { - value = this.formatString(value); + value = this.formatString(value, { + delimiter: quote + }); if (indentRegex) { value = value.replace(indentRegex, '\n'); } @@ -298,7 +300,9 @@ }); } else { this.mergeInterpolationTokens(tokens, {delimiter}, (value, i) => { - value = this.formatString(value); + value = this.formatString(value, { + delimiter: quote + }); value = value.replace(SIMPLE_STRING_OMIT, function(match, offset) { if ((i === 0 && offset === 0) || (i === $ && offset + match.length === value.length)) { return ''; @@ -362,6 +366,9 @@ isRegex: true, offsetInChunk: 1 }); + body = this.formatRegex(body, { + delimiter: '/' + }); index = regex.length; prev = this.prev(); if (prev) { @@ -759,7 +766,7 @@ tokensToPush = value; break; case 'NEOSTRING': - converted = fn(token[1], i); + converted = fn.call(this, token[1], i); if (converted.length === 0) { if (i === 0) { firstEmptyStringIndex = this.tokens.length; @@ -882,24 +889,62 @@ return LINE_CONTINUER.test(this.chunk) || ((ref = this.tag()) === '\\' || ref === '.' || ref === '?.' || ref === '?::' || ref === 'UNARY' || ref === 'MATH' || ref === 'UNARY_MATH' || ref === '+' || ref === '-' || ref === '**' || ref === 'SHIFT' || ref === 'RELATION' || ref === 'COMPARE' || ref === '&' || ref === '^' || ref === '|' || ref === '&&' || ref === '||' || ref === 'BIN?' || ref === 'THROW' || ref === 'EXTENDS'); } - formatString(str) { - return str.replace(STRING_OMIT, '$1'); + formatString(str, options) { + return this.replaceUnicodeCodePointEscapes(str.replace(STRING_OMIT, '$1'), options); } formatHeregex(str) { - return str.replace(HEREGEX_OMIT, '$1$2'); + return this.formatRegex(str.replace(HEREGEX_OMIT, '$1$2'), { + delimiter: '///' + }); + } + + formatRegex(str, options) { + return this.replaceUnicodeCodePointEscapes(str, options); + } + + unicodeCodePointToUnicodeEscapes(codePoint) { + var high, low, toUnicodeEscape; + toUnicodeEscape = function(val) { + var str; + str = val.toString(16); + return `\\u${repeat('0', 4 - str.length)}${str}`; + }; + if (codePoint < 0x10000) { + return toUnicodeEscape(codePoint); + } + high = Math.floor((codePoint - 0x10000) / 0x400) + 0xD800; + low = (codePoint - 0x10000) % 0x400 + 0xDC00; + return `${toUnicodeEscape(high)}${toUnicodeEscape(low)}`; + } + + replaceUnicodeCodePointEscapes(str, options) { + return str.replace(UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) => { + var codePointDecimal; + if (escapedBackslash) { + return escapedBackslash; + } + codePointDecimal = parseInt(codePointHex, 16); + if (codePointDecimal > 0x10ffff) { + this.error("unicode code point escapes greater than \\u{10ffff} are not allowed", { + offset: offset + options.delimiter.length, + length: codePointHex.length + 4 + }); + } + return this.unicodeCodePointToUnicodeEscapes(codePointDecimal); + }); } validateEscapes(str, options = {}) { - var before, hex, invalidEscape, invalidEscapeRegex, match, message, octal, ref, unicode; + var before, hex, invalidEscape, invalidEscapeRegex, match, message, octal, ref, unicode, unicodeCodePoint; invalidEscapeRegex = options.isRegex ? REGEX_INVALID_ESCAPE : STRING_INVALID_ESCAPE; match = invalidEscapeRegex.exec(str); if (!match) { return; } - match[0], before = match[1], octal = match[2], hex = match[3], unicode = match[4]; + match[0], before = match[1], octal = match[2], hex = match[3], unicodeCodePoint = match[4], unicode = match[5]; message = octal ? "octal escape sequences are not allowed" : "invalid escape sequence"; - invalidEscape = `\\${octal || hex || unicode}`; + invalidEscape = `\\${octal || hex || unicodeCodePoint || unicode}`; return this.error(`${message} ${invalidEscape}`, { offset: ((ref = options.offsetInChunk) != null ? ref : 0) + match.index + before.length, length: invalidEscape.length @@ -1060,7 +1105,7 @@ REGEX_FLAGS = /^\w*/; - VALID_FLAGS = /^(?!.*(.).*\1)[imgy]*$/; + VALID_FLAGS = /^(?!.*(.).*\1)[imguy]*$/; HEREGEX = /^(?:[^\\\/#]|\\[\s\S]|\/(?!\/\/)|\#(?!\{))*/; @@ -1074,9 +1119,11 @@ LINE_CONTINUER = /^\s*(?:,|\??\.(?![.\d])|::)/; - STRING_INVALID_ESCAPE = /((?:^|[^\\])(?:\\\\)*)\\(?:(0[0-7]|[1-7])|(x(?![\da-fA-F]{2}).{0,2})|(u(?![\da-fA-F]{4}).{0,4}))/; + STRING_INVALID_ESCAPE = /((?:^|[^\\])(?:\\\\)*)\\(?:(0[0-7]|[1-7])|(x(?![\da-fA-F]{2}).{0,2})|(u\{(?![\da-fA-F]{1,}\})[^}]*\}?)|(u(?!\{|[\da-fA-F]{4}).{0,4}))/; - REGEX_INVALID_ESCAPE = /((?:^|[^\\])(?:\\\\)*)\\(?:(0[0-7])|(x(?![\da-fA-F]{2}).{0,2})|(u(?![\da-fA-F]{4}).{0,4}))/; + REGEX_INVALID_ESCAPE = /((?:^|[^\\])(?:\\\\)*)\\(?:(0[0-7])|(x(?![\da-fA-F]{2}).{0,2})|(u\{(?![\da-fA-F]{1,}\})[^}]*\}?)|(u(?!\{|[\da-fA-F]{4}).{0,4}))/; + + UNICODE_CODE_POINT_ESCAPE = /(\\\\)|\\u\{([\da-fA-F]+)\}/g; LEADING_BLANK_LINE = /^[^\n\S]*\n/; diff --git a/lib/coffeescript/nodes.js b/lib/coffeescript/nodes.js index b3356353..7cf2535a 100644 --- a/lib/coffeescript/nodes.js +++ b/lib/coffeescript/nodes.js @@ -3706,7 +3706,7 @@ return expr.compileToFragments(o); } fragments = expr.compileToFragments(o, LEVEL_PAREN); - bare = o.level < LEVEL_OP && (expr instanceof Op || expr instanceof Call || (expr instanceof For && expr.returns)); + bare = o.level < LEVEL_OP && (expr instanceof Op || expr instanceof Call || (expr instanceof For && expr.returns)) && (o.level < LEVEL_COND || fragments.length <= 3); if (bare) { return fragments; } else { diff --git a/lib/coffeescript/repl.js b/lib/coffeescript/repl.js index bf842612..3db47d3a 100644 --- a/lib/coffeescript/repl.js +++ b/lib/coffeescript/repl.js @@ -16,7 +16,13 @@ replDefaults = { prompt: 'coffee> ', - historyFile: process.env.HOME ? path.join(process.env.HOME, '.coffee_history') : void 0, + historyFile: (function() { + var historyPath; + historyPath = process.env.XDG_CACHE_HOME || process.env.HOME; + if (historyPath) { + return path.join(historyPath, '.coffee_history'); + } + })(), historyMaxInputSize: 10240, "eval": function(input, context, filename, cb) { var Assign, Block, Literal, Value, ast, err, js, referencedVars, token, tokens; diff --git a/src/lexer.coffee b/src/lexer.coffee index c824daa5..dcd42e2b 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -272,14 +272,14 @@ exports.Lexer = class Lexer indent = attempt if indent is null or 0 < attempt.length < indent.length indentRegex = /// \n#{indent} ///g if indent @mergeInterpolationTokens tokens, {delimiter}, (value, i) => - value = @formatString value + value = @formatString value, delimiter: quote value = value.replace indentRegex, '\n' if indentRegex value = value.replace LEADING_BLANK_LINE, '' if i is 0 value = value.replace TRAILING_BLANK_LINE, '' if i is $ value else @mergeInterpolationTokens tokens, {delimiter}, (value, i) => - value = @formatString value + value = @formatString value, delimiter: quote value = value.replace SIMPLE_STRING_OMIT, (match, offset) -> if (i is 0 and offset is 0) or (i is $ and offset + match.length is value.length) @@ -329,6 +329,7 @@ exports.Lexer = class Lexer when match = REGEX.exec @chunk [regex, body, closed] = match @validateEscapes body, isRegex: yes, offsetInChunk: 1 + body = @formatRegex body, delimiter: '/' index = regex.length prev = @prev() if prev @@ -653,7 +654,7 @@ exports.Lexer = class Lexer tokensToPush = value when 'NEOSTRING' # Convert 'NEOSTRING' into 'STRING'. - converted = fn token[1], i + converted = fn.call this, token[1], i # Optimize out empty strings. We ensure that the tokens stream always # starts with a string token, though, to make sure that the result # really is a string. @@ -787,11 +788,37 @@ exports.Lexer = class Lexer '**', 'SHIFT', 'RELATION', 'COMPARE', '&', '^', '|', '&&', '||', 'BIN?', 'THROW', 'EXTENDS'] - formatString: (str) -> - str.replace STRING_OMIT, '$1' + formatString: (str, options) -> + @replaceUnicodeCodePointEscapes str.replace(STRING_OMIT, '$1'), options formatHeregex: (str) -> - str.replace HEREGEX_OMIT, '$1$2' + @formatRegex str.replace(HEREGEX_OMIT, '$1$2'), delimiter: '///' + + formatRegex: (str, options) -> + @replaceUnicodeCodePointEscapes str, options + + unicodeCodePointToUnicodeEscapes: (codePoint) -> + toUnicodeEscape = (val) -> + str = val.toString 16 + "\\u#{repeat '0', 4 - str.length}#{str}" + return toUnicodeEscape(codePoint) if codePoint < 0x10000 + # surrogate pair + high = Math.floor((codePoint - 0x10000) / 0x400) + 0xD800 + low = (codePoint - 0x10000) % 0x400 + 0xDC00 + "#{toUnicodeEscape(high)}#{toUnicodeEscape(low)}" + + # Replace \u{...} with \uxxxx[\uxxxx] in strings and regexes + replaceUnicodeCodePointEscapes: (str, options) -> + str.replace UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) => + return escapedBackslash if escapedBackslash + + codePointDecimal = parseInt codePointHex, 16 + if codePointDecimal > 0x10ffff + @error "unicode code point escapes greater than \\u{10ffff} are not allowed", + offset: offset + options.delimiter.length + length: codePointHex.length + 4 + + @unicodeCodePointToUnicodeEscapes codePointDecimal # Validates escapes in strings and regexes. validateEscapes: (str, options = {}) -> @@ -802,13 +829,13 @@ exports.Lexer = class Lexer STRING_INVALID_ESCAPE match = invalidEscapeRegex.exec str return unless match - [[], before, octal, hex, unicode] = match + [[], before, octal, hex, unicodeCodePoint, unicode] = match message = if octal "octal escape sequences are not allowed" else "invalid escape sequence" - invalidEscape = "\\#{octal or hex or unicode}" + invalidEscape = "\\#{octal or hex or unicodeCodePoint or unicode}" @error "#{message} #{invalidEscape}", offset: (options.offsetInChunk ? 0) + match.index + before.length length: invalidEscape.length @@ -995,7 +1022,7 @@ REGEX = /// ^ /// REGEX_FLAGS = /^\w*/ -VALID_FLAGS = /^(?!.*(.).*\1)[imgy]*$/ +VALID_FLAGS = /^(?!.*(.).*\1)[imguy]*$/ HEREGEX = /// ^(?: [^\\/#] | \\[\s\S] | /(?!//) | \#(?!\{) )* /// @@ -1019,7 +1046,8 @@ STRING_INVALID_ESCAPE = /// \\ ( ?: (0[0-7]|[1-7]) # octal escape | (x(?![\da-fA-F]{2}).{0,2}) # hex escape - | (u(?![\da-fA-F]{4}).{0,4}) # unicode escape + | (u\{(?![\da-fA-F]{1,}\})[^}]*\}?) # unicode code point escape + | (u(?!\{|[\da-fA-F]{4}).{0,4}) # unicode escape ) /// REGEX_INVALID_ESCAPE = /// @@ -1027,10 +1055,17 @@ REGEX_INVALID_ESCAPE = /// \\ ( ?: (0[0-7]) # octal escape | (x(?![\da-fA-F]{2}).{0,2}) # hex escape - | (u(?![\da-fA-F]{4}).{0,4}) # unicode escape + | (u\{(?![\da-fA-F]{1,}\})[^}]*\}?) # unicode code point escape + | (u(?!\{|[\da-fA-F]{4}).{0,4}) # unicode escape ) /// +UNICODE_CODE_POINT_ESCAPE = /// + ( \\\\ ) # make sure the escape isn’t escaped + | + \\u\{ ( [\da-fA-F]+ ) \} +///g + LEADING_BLANK_LINE = /^[^\n\S]*\n/ TRAILING_BLANK_LINE = /\n[^\n\S]*$/ diff --git a/src/nodes.coffee b/src/nodes.coffee index 35bdc520..fbfd2764 100644 --- a/src/nodes.coffee +++ b/src/nodes.coffee @@ -2797,7 +2797,8 @@ exports.Parens = class Parens extends Base return expr.compileToFragments o fragments = expr.compileToFragments o, LEVEL_PAREN bare = o.level < LEVEL_OP and (expr instanceof Op or expr instanceof Call or - (expr instanceof For and expr.returns)) + (expr instanceof For and expr.returns)) and (o.level < LEVEL_COND or + fragments.length <= 3) if bare then fragments else @wrapInParentheses fragments #### StringWithInterpolations diff --git a/test/control_flow.coffee b/test/control_flow.coffee index dc91c099..9b5e3b03 100644 --- a/test/control_flow.coffee +++ b/test/control_flow.coffee @@ -198,6 +198,17 @@ test "#748: trailing reserved identifiers", -> nonce eq nonce, result +test 'if-else within an assignment, condition parenthesized', -> + result = if (1 is 1) then 'correct' + eq result, 'correct' + + result = if ('whatever' ? no) then 'correct' + eq result, 'correct' + + f = -> 'wrong' + result = if (f?()) then 'correct' else 'wrong' + eq result, 'correct' + # Postfix test "#3056: multiple postfix conditionals", -> diff --git a/test/error_messages.coffee b/test/error_messages.coffee index dcddb638..63a1d4ed 100644 --- a/test/error_messages.coffee +++ b/test/error_messages.coffee @@ -1483,3 +1483,65 @@ test "setter keyword before static method", -> set @foo = -> ^^^ ''' + +test "#4248: Unicode code point escapes", -> + assertErrorFormat ''' + "a + #{b} \\u{G02} + c" + ''', ''' + [stdin]:2:8: error: invalid escape sequence \\u{G02} + #{b} \\u{G02} + ^\^^^^^^ + ''' + assertErrorFormat ''' + /a\\u{}b/ + ''', ''' + [stdin]:1:3: error: invalid escape sequence \\u{} + /a\\u{}b/ + ^\^^^ + ''' + assertErrorFormat ''' + ///a \\u{01abc/// + ''', ''' + [stdin]:1:6: error: invalid escape sequence \\u{01abc + ///a \\u{01abc/// + ^\^^^^^^^ + ''' + + assertErrorFormat ''' + /\\u{123} \\u{110000}/ + ''', ''' + [stdin]:1:10: error: unicode code point escapes greater than \\u{10ffff} are not allowed + /\\u{123} \\u{110000}/ + \ ^\^^^^^^^^^ + ''' + + assertErrorFormat ''' + ///abc\\\\\\u{123456}///u + ''', ''' + [stdin]:1:9: error: unicode code point escapes greater than \\u{10ffff} are not allowed + ///abc\\\\\\u{123456}///u + \ \^\^^^^^^^^^ + ''' + + assertErrorFormat ''' + """ + \\u{123} + a + \\u{00110000} + #{ 'b' } + """ + ''', ''' + [stdin]:4:5: error: unicode code point escapes greater than \\u{10ffff} are not allowed + \\u{00110000} + ^\^^^^^^^^^^^ + ''' + + assertErrorFormat ''' + '\\u{a}\\u{1111110000}' + ''', ''' + [stdin]:1:7: error: unicode code point escapes greater than \\u{10ffff} are not allowed + '\\u{a}\\u{1111110000}' + \ ^\^^^^^^^^^^^^^ + ''' diff --git a/test/regexps.coffee b/test/regexps.coffee index 0bbab0c9..0061a5a1 100644 --- a/test/regexps.coffee +++ b/test/regexps.coffee @@ -6,6 +6,12 @@ # * Regexen # * Heregexen +# Helper function +toJS = (str) -> + CoffeeScript.compile str, bare: yes + .replace /^\s+|\s+$/g, '' # Trim leading/trailing whitespace + + test "basic regular expression literals", -> ok 'a'.match(/a/) ok 'a'.match /a/ @@ -286,3 +292,32 @@ test "#3795: Escape otherwise invalid characters", -> ok ///#{a}\
///.test 'a\u2029' ok ///#{a}\0 1///.test 'a\x001' + +test "#4248: Unicode code point escapes", -> + ok /a\u{1ab}c/u.test 'a\u01abc' + ok ///#{ 'a' }\u{000001ab}c///u.test 'a\u{1ab}c' + ok ///a\u{000001ab}c///u.test 'a\u{1ab}c' + ok /a\u{12345}c/u.test 'a\ud808\udf45c' + + # and now without u flag + ok /a\u{1ab}c/.test 'a\u01abc' + ok ///#{ 'a' }\u{000001ab}c///.test 'a\u{1ab}c' + ok ///a\u{000001ab}c///.test 'a\u{1ab}c' + ok /a\u{12345}c/.test 'a\ud808\udf45c' + + # rewrite code point escapes + input = """ + /\\u{bcdef}\\u{abc}/u + """ + output = """ + /\\udab3\\uddef\\u0abc/u; + """ + eq toJS(input), output + + input = """ + ///#{ 'a' }\\u{bcdef}/// + """ + output = """ + /a\\udab3\\uddef/; + """ + eq toJS(input), output diff --git a/test/strict.coffee b/test/strict.coffee index 7a4e5b0e..6f9626d5 100644 --- a/test/strict.coffee +++ b/test/strict.coffee @@ -140,10 +140,10 @@ test "`Future Reserved Word`s, `eval` and `arguments` restrictions", -> check "#{keyword} *= 1" check "#{keyword} /= 1" check "#{keyword} ?= 1" - check "{keyword}++" - check "++{keyword}" - check "{keyword}--" - check "--{keyword}" + check "#{keyword}++" + check "++#{keyword}" + check "#{keyword}--" + check "--#{keyword}" destruct = (keyword, check = strict) -> check "{#{keyword}}" check "o = {#{keyword}}" diff --git a/test/strings.coffee b/test/strings.coffee index 0f1975e2..1cd17efa 100644 --- a/test/strings.coffee +++ b/test/strings.coffee @@ -7,6 +7,12 @@ # * Strings # * Heredocs +# Helper function +toJS = (str) -> + CoffeeScript.compile str, bare: yes + .replace /^\s+|\s+$/g, '' # Trim leading/trailing whitespace + + test "backslash escapes", -> eq "\\/\\\\", /\/\\/.source @@ -400,3 +406,33 @@ test "#4314: Whitespace less than or equal to stripped indentation", -> eq '1 2 3 4 5 end\na 0 b', """ #{1} #{2} #{3} #{4} #{5} end a #{0} b""" + +test "#4248: Unicode code point escapes", -> + eq '\u01ab\u00cd', '\u{1ab}\u{cd}' + eq '\u01ab', '\u{000001ab}' + eq 'a\u01ab', "#{ 'a' }\u{1ab}" + eq '\u01abc', '''\u{01ab}c''' + eq '\u01abc', """\u{1ab}#{ 'c' }""" + eq '\udab3\uddef', '\u{bcdef}' + eq '\udab3\uddef', '\u{0000bcdef}' + eq 'a\udab3\uddef', "#{ 'a' }\u{bcdef}" + eq '\udab3\uddefc', '''\u{0bcdef}c''' + eq '\udab3\uddefc', """\u{bcdef}#{ 'c' }""" + eq '\\u{123456}', "#{'\\'}#{'u{123456}'}" + + # rewrite code point escapes + input = """ + '\\u{bcdef}\\u{abc}' + """ + output = """ + '\\udab3\\uddef\\u0abc'; + """ + eq toJS(input), output + + input = """ + "#{ 'a' }\\u{bcdef}" + """ + output = """ + "a\\udab3\\uddef"; + """ + eq toJS(input), output