1
0
Fork 0
mirror of https://github.com/jashkenas/coffeescript.git synced 2022-11-09 12:23:24 -05:00

Keep unicode code point escapes as is when possible (#4520)

This commit is contained in:
Julian Rosse 2017-04-25 12:15:08 -05:00 committed by Simon Lydell
parent 07ae1edb44
commit 7ef5cb4a1f
4 changed files with 37 additions and 23 deletions

View file

@ -1,11 +1,11 @@
// Generated by CoffeeScript 2.0.0-beta1 // Generated by CoffeeScript 2.0.0-beta1
(function() { (function() {
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, UNICODE_CODE_POINT_ESCAPE, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, repeat, starts, throwSyntaxError, var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, UNICODE_CODE_POINT_ESCAPE, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, merge, repeat, starts, throwSyntaxError,
indexOf = [].indexOf; indexOf = [].indexOf;
({Rewriter, INVERSES} = require('./rewriter')); ({Rewriter, INVERSES} = require('./rewriter'));
({count, starts, compact, repeat, invertLiterate, locationDataToString, throwSyntaxError} = require('./helpers')); ({count, starts, compact, repeat, invertLiterate, merge, locationDataToString, throwSyntaxError} = require('./helpers'));
exports.Lexer = Lexer = class Lexer { exports.Lexer = Lexer = class Lexer {
tokenize(code, opts = {}) { tokenize(code, opts = {}) {
@ -366,9 +366,6 @@
isRegex: true, isRegex: true,
offsetInChunk: 1 offsetInChunk: 1
}); });
body = this.formatRegex(body, {
delimiter: '/'
});
index = regex.length; index = regex.length;
prev = this.prev(); prev = this.prev();
if (prev) { if (prev) {
@ -398,8 +395,13 @@
}); });
break; break;
case !(regex || tokens.length === 1): case !(regex || tokens.length === 1):
if (body == null) { if (body) {
body = this.formatHeregex(tokens[0][1]); body = this.formatRegex(body, {
flags,
delimiter: '/'
});
} else {
body = this.formatHeregex(tokens[0][1], {flags});
} }
this.token('REGEX', `${this.makeDelimitedLiteral(body, { this.token('REGEX', `${this.makeDelimitedLiteral(body, {
delimiter: '/' delimiter: '/'
@ -412,7 +414,9 @@
this.mergeInterpolationTokens(tokens, { this.mergeInterpolationTokens(tokens, {
delimiter: '"', delimiter: '"',
double: true double: true
}, this.formatHeregex); }, (str) => {
return this.formatHeregex(str, {flags});
});
if (flags) { if (flags) {
this.token(',', ',', index - 1, 0); this.token(',', ',', index - 1, 0);
this.token('STRING', '"' + flags + '"', index - 1, flags.length); this.token('STRING', '"' + flags + '"', index - 1, flags.length);
@ -893,10 +897,10 @@
return this.replaceUnicodeCodePointEscapes(str.replace(STRING_OMIT, '$1'), options); return this.replaceUnicodeCodePointEscapes(str.replace(STRING_OMIT, '$1'), options);
} }
formatHeregex(str) { formatHeregex(str, options) {
return this.formatRegex(str.replace(HEREGEX_OMIT, '$1$2'), { return this.formatRegex(str.replace(HEREGEX_OMIT, '$1$2'), merge(options, {
delimiter: '///' delimiter: '///'
}); }));
} }
formatRegex(str, options) { formatRegex(str, options) {
@ -919,6 +923,8 @@
} }
replaceUnicodeCodePointEscapes(str, options) { replaceUnicodeCodePointEscapes(str, options) {
var shouldReplace;
shouldReplace = (options.flags != null) && indexOf.call(options.flags, 'u') < 0;
return str.replace(UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) => { return str.replace(UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) => {
var codePointDecimal; var codePointDecimal;
if (escapedBackslash) { if (escapedBackslash) {
@ -931,6 +937,9 @@
length: codePointHex.length + 4 length: codePointHex.length + 4
}); });
} }
if (!shouldReplace) {
return match;
}
return this.unicodeCodePointToUnicodeEscapes(codePointDecimal); return this.unicodeCodePointToUnicodeEscapes(codePointDecimal);
}); });
} }

View file

@ -12,7 +12,7 @@
{Rewriter, INVERSES} = require './rewriter' {Rewriter, INVERSES} = require './rewriter'
# Import the helpers we need. # Import the helpers we need.
{count, starts, compact, repeat, invertLiterate, {count, starts, compact, repeat, invertLiterate, merge,
locationDataToString, throwSyntaxError} = require './helpers' locationDataToString, throwSyntaxError} = require './helpers'
# The Lexer Class # The Lexer Class
@ -330,7 +330,6 @@ exports.Lexer = class Lexer
when match = REGEX.exec @chunk when match = REGEX.exec @chunk
[regex, body, closed] = match [regex, body, closed] = match
@validateEscapes body, isRegex: yes, offsetInChunk: 1 @validateEscapes body, isRegex: yes, offsetInChunk: 1
body = @formatRegex body, delimiter: '/'
index = regex.length index = regex.length
prev = @prev() prev = @prev()
if prev if prev
@ -349,13 +348,17 @@ exports.Lexer = class Lexer
when not VALID_FLAGS.test flags when not VALID_FLAGS.test flags
@error "invalid regular expression flags #{flags}", offset: index, length: flags.length @error "invalid regular expression flags #{flags}", offset: index, length: flags.length
when regex or tokens.length is 1 when regex or tokens.length is 1
body ?= @formatHeregex tokens[0][1] if body
body = @formatRegex body, { flags, delimiter: '/' }
else
body = @formatHeregex tokens[0][1], { flags }
@token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, origin @token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, origin
else else
@token 'REGEX_START', '(', 0, 0, origin @token 'REGEX_START', '(', 0, 0, origin
@token 'IDENTIFIER', 'RegExp', 0, 0 @token 'IDENTIFIER', 'RegExp', 0, 0
@token 'CALL_START', '(', 0, 0 @token 'CALL_START', '(', 0, 0
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, @formatHeregex @mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, (str) =>
@formatHeregex str, { flags }
if flags if flags
@token ',', ',', index - 1, 0 @token ',', ',', index - 1, 0
@token 'STRING', '"' + flags + '"', index - 1, flags.length @token 'STRING', '"' + flags + '"', index - 1, flags.length
@ -792,8 +795,8 @@ exports.Lexer = class Lexer
formatString: (str, options) -> formatString: (str, options) ->
@replaceUnicodeCodePointEscapes str.replace(STRING_OMIT, '$1'), options @replaceUnicodeCodePointEscapes str.replace(STRING_OMIT, '$1'), options
formatHeregex: (str) -> formatHeregex: (str, options) ->
@formatRegex str.replace(HEREGEX_OMIT, '$1$2'), delimiter: '///' @formatRegex str.replace(HEREGEX_OMIT, '$1$2'), merge(options, delimiter: '///')
formatRegex: (str, options) -> formatRegex: (str, options) ->
@replaceUnicodeCodePointEscapes str, options @replaceUnicodeCodePointEscapes str, options
@ -808,8 +811,9 @@ exports.Lexer = class Lexer
low = (codePoint - 0x10000) % 0x400 + 0xDC00 low = (codePoint - 0x10000) % 0x400 + 0xDC00
"#{toUnicodeEscape(high)}#{toUnicodeEscape(low)}" "#{toUnicodeEscape(high)}#{toUnicodeEscape(low)}"
# Replace \u{...} with \uxxxx[\uxxxx] in strings and regexes # Replace \u{...} with \uxxxx[\uxxxx] in regexes without `u` flag
replaceUnicodeCodePointEscapes: (str, options) -> replaceUnicodeCodePointEscapes: (str, options) ->
shouldReplace = options.flags? and 'u' not in options.flags
str.replace UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) => str.replace UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) =>
return escapedBackslash if escapedBackslash return escapedBackslash if escapedBackslash
@ -818,6 +822,7 @@ exports.Lexer = class Lexer
@error "unicode code point escapes greater than \\u{10ffff} are not allowed", @error "unicode code point escapes greater than \\u{10ffff} are not allowed",
offset: offset + options.delimiter.length offset: offset + options.delimiter.length
length: codePointHex.length + 4 length: codePointHex.length + 4
return match unless shouldReplace
@unicodeCodePointToUnicodeEscapes codePointDecimal @unicodeCodePointToUnicodeEscapes codePointDecimal

View file

@ -305,12 +305,12 @@ test "#4248: Unicode code point escapes", ->
ok ///a\u{000001ab}c///.test 'a\u{1ab}c' ok ///a\u{000001ab}c///.test 'a\u{1ab}c'
ok /a\u{12345}c/.test 'a\ud808\udf45c' ok /a\u{12345}c/.test 'a\ud808\udf45c'
# rewrite code point escapes # rewrite code point escapes unless u flag is set
input = """ input = """
/\\u{bcdef}\\u{abc}/u /\\u{bcdef}\\u{abc}/u
""" """
output = """ output = """
/\\udab3\\uddef\\u0abc/u; /\\u{bcdef}\\u{abc}/u;
""" """
eq toJS(input), output eq toJS(input), output

View file

@ -420,12 +420,12 @@ test "#4248: Unicode code point escapes", ->
eq '\udab3\uddefc', """\u{bcdef}#{ 'c' }""" eq '\udab3\uddefc', """\u{bcdef}#{ 'c' }"""
eq '\\u{123456}', "#{'\\'}#{'u{123456}'}" eq '\\u{123456}', "#{'\\'}#{'u{123456}'}"
# rewrite code point escapes # don't rewrite code point escapes
input = """ input = """
'\\u{bcdef}\\u{abc}' '\\u{bcdef}\\u{abc}'
""" """
output = """ output = """
'\\udab3\\uddef\\u0abc'; '\\u{bcdef}\\u{abc}';
""" """
eq toJS(input), output eq toJS(input), output
@ -433,6 +433,6 @@ test "#4248: Unicode code point escapes", ->
"#{ 'a' }\\u{bcdef}" "#{ 'a' }\\u{bcdef}"
""" """
output = """ output = """
"a\\udab3\\uddef"; "a\\u{bcdef}";
""" """
eq toJS(input), output eq toJS(input), output