1
0
Fork 0
mirror of https://github.com/jashkenas/coffeescript.git synced 2022-11-09 12:23:24 -05:00

Keep unicode code point escapes as is when possible (#4520)

This commit is contained in:
Julian Rosse 2017-04-25 12:15:08 -05:00 committed by Simon Lydell
parent 07ae1edb44
commit 7ef5cb4a1f
4 changed files with 37 additions and 23 deletions

View file

@ -1,11 +1,11 @@
// Generated by CoffeeScript 2.0.0-beta1
(function() {
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, UNICODE_CODE_POINT_ESCAPE, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, repeat, starts, throwSyntaxError,
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, UNICODE_CODE_POINT_ESCAPE, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, merge, repeat, starts, throwSyntaxError,
indexOf = [].indexOf;
({Rewriter, INVERSES} = require('./rewriter'));
({count, starts, compact, repeat, invertLiterate, locationDataToString, throwSyntaxError} = require('./helpers'));
({count, starts, compact, repeat, invertLiterate, merge, locationDataToString, throwSyntaxError} = require('./helpers'));
exports.Lexer = Lexer = class Lexer {
tokenize(code, opts = {}) {
@ -366,9 +366,6 @@
isRegex: true,
offsetInChunk: 1
});
body = this.formatRegex(body, {
delimiter: '/'
});
index = regex.length;
prev = this.prev();
if (prev) {
@ -398,8 +395,13 @@
});
break;
case !(regex || tokens.length === 1):
if (body == null) {
body = this.formatHeregex(tokens[0][1]);
if (body) {
body = this.formatRegex(body, {
flags,
delimiter: '/'
});
} else {
body = this.formatHeregex(tokens[0][1], {flags});
}
this.token('REGEX', `${this.makeDelimitedLiteral(body, {
delimiter: '/'
@ -412,7 +414,9 @@
this.mergeInterpolationTokens(tokens, {
delimiter: '"',
double: true
}, this.formatHeregex);
}, (str) => {
return this.formatHeregex(str, {flags});
});
if (flags) {
this.token(',', ',', index - 1, 0);
this.token('STRING', '"' + flags + '"', index - 1, flags.length);
@ -893,10 +897,10 @@
return this.replaceUnicodeCodePointEscapes(str.replace(STRING_OMIT, '$1'), options);
}
formatHeregex(str) {
return this.formatRegex(str.replace(HEREGEX_OMIT, '$1$2'), {
formatHeregex(str, options) {
return this.formatRegex(str.replace(HEREGEX_OMIT, '$1$2'), merge(options, {
delimiter: '///'
});
}));
}
formatRegex(str, options) {
@ -919,6 +923,8 @@
}
replaceUnicodeCodePointEscapes(str, options) {
var shouldReplace;
shouldReplace = (options.flags != null) && indexOf.call(options.flags, 'u') < 0;
return str.replace(UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) => {
var codePointDecimal;
if (escapedBackslash) {
@ -931,6 +937,9 @@
length: codePointHex.length + 4
});
}
if (!shouldReplace) {
return match;
}
return this.unicodeCodePointToUnicodeEscapes(codePointDecimal);
});
}

View file

@ -12,7 +12,7 @@
{Rewriter, INVERSES} = require './rewriter'
# Import the helpers we need.
{count, starts, compact, repeat, invertLiterate,
{count, starts, compact, repeat, invertLiterate, merge,
locationDataToString, throwSyntaxError} = require './helpers'
# The Lexer Class
@ -330,7 +330,6 @@ exports.Lexer = class Lexer
when match = REGEX.exec @chunk
[regex, body, closed] = match
@validateEscapes body, isRegex: yes, offsetInChunk: 1
body = @formatRegex body, delimiter: '/'
index = regex.length
prev = @prev()
if prev
@ -349,13 +348,17 @@ exports.Lexer = class Lexer
when not VALID_FLAGS.test flags
@error "invalid regular expression flags #{flags}", offset: index, length: flags.length
when regex or tokens.length is 1
body ?= @formatHeregex tokens[0][1]
if body
body = @formatRegex body, { flags, delimiter: '/' }
else
body = @formatHeregex tokens[0][1], { flags }
@token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, origin
else
@token 'REGEX_START', '(', 0, 0, origin
@token 'IDENTIFIER', 'RegExp', 0, 0
@token 'CALL_START', '(', 0, 0
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, @formatHeregex
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, (str) =>
@formatHeregex str, { flags }
if flags
@token ',', ',', index - 1, 0
@token 'STRING', '"' + flags + '"', index - 1, flags.length
@ -792,8 +795,8 @@ exports.Lexer = class Lexer
formatString: (str, options) ->
@replaceUnicodeCodePointEscapes str.replace(STRING_OMIT, '$1'), options
formatHeregex: (str) ->
@formatRegex str.replace(HEREGEX_OMIT, '$1$2'), delimiter: '///'
formatHeregex: (str, options) ->
@formatRegex str.replace(HEREGEX_OMIT, '$1$2'), merge(options, delimiter: '///')
formatRegex: (str, options) ->
@replaceUnicodeCodePointEscapes str, options
@ -808,8 +811,9 @@ exports.Lexer = class Lexer
low = (codePoint - 0x10000) % 0x400 + 0xDC00
"#{toUnicodeEscape(high)}#{toUnicodeEscape(low)}"
# Replace \u{...} with \uxxxx[\uxxxx] in strings and regexes
# Replace \u{...} with \uxxxx[\uxxxx] in regexes without `u` flag
replaceUnicodeCodePointEscapes: (str, options) ->
shouldReplace = options.flags? and 'u' not in options.flags
str.replace UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) =>
return escapedBackslash if escapedBackslash
@ -818,6 +822,7 @@ exports.Lexer = class Lexer
@error "unicode code point escapes greater than \\u{10ffff} are not allowed",
offset: offset + options.delimiter.length
length: codePointHex.length + 4
return match unless shouldReplace
@unicodeCodePointToUnicodeEscapes codePointDecimal

View file

@ -305,12 +305,12 @@ test "#4248: Unicode code point escapes", ->
ok ///a\u{000001ab}c///.test 'a\u{1ab}c'
ok /a\u{12345}c/.test 'a\ud808\udf45c'
# rewrite code point escapes
# rewrite code point escapes unless u flag is set
input = """
/\\u{bcdef}\\u{abc}/u
"""
output = """
/\\udab3\\uddef\\u0abc/u;
/\\u{bcdef}\\u{abc}/u;
"""
eq toJS(input), output

View file

@ -420,12 +420,12 @@ test "#4248: Unicode code point escapes", ->
eq '\udab3\uddefc', """\u{bcdef}#{ 'c' }"""
eq '\\u{123456}', "#{'\\'}#{'u{123456}'}"
# rewrite code point escapes
# don't rewrite code point escapes
input = """
'\\u{bcdef}\\u{abc}'
"""
output = """
'\\udab3\\uddef\\u0abc';
'\\u{bcdef}\\u{abc}';
"""
eq toJS(input), output
@ -433,6 +433,6 @@ test "#4248: Unicode code point escapes", ->
"#{ 'a' }\\u{bcdef}"
"""
output = """
"a\\udab3\\uddef";
"a\\u{bcdef}";
"""
eq toJS(input), output