mirror of
https://github.com/jashkenas/coffeescript.git
synced 2022-11-09 12:23:24 -05:00
Keep unicode code point escapes as is when possible (#4520)
This commit is contained in:
parent
07ae1edb44
commit
7ef5cb4a1f
4 changed files with 37 additions and 23 deletions
|
@ -1,11 +1,11 @@
|
|||
// Generated by CoffeeScript 2.0.0-beta1
|
||||
(function() {
|
||||
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, UNICODE_CODE_POINT_ESCAPE, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, repeat, starts, throwSyntaxError,
|
||||
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, UNICODE_CODE_POINT_ESCAPE, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, merge, repeat, starts, throwSyntaxError,
|
||||
indexOf = [].indexOf;
|
||||
|
||||
({Rewriter, INVERSES} = require('./rewriter'));
|
||||
|
||||
({count, starts, compact, repeat, invertLiterate, locationDataToString, throwSyntaxError} = require('./helpers'));
|
||||
({count, starts, compact, repeat, invertLiterate, merge, locationDataToString, throwSyntaxError} = require('./helpers'));
|
||||
|
||||
exports.Lexer = Lexer = class Lexer {
|
||||
tokenize(code, opts = {}) {
|
||||
|
@ -366,9 +366,6 @@
|
|||
isRegex: true,
|
||||
offsetInChunk: 1
|
||||
});
|
||||
body = this.formatRegex(body, {
|
||||
delimiter: '/'
|
||||
});
|
||||
index = regex.length;
|
||||
prev = this.prev();
|
||||
if (prev) {
|
||||
|
@ -398,8 +395,13 @@
|
|||
});
|
||||
break;
|
||||
case !(regex || tokens.length === 1):
|
||||
if (body == null) {
|
||||
body = this.formatHeregex(tokens[0][1]);
|
||||
if (body) {
|
||||
body = this.formatRegex(body, {
|
||||
flags,
|
||||
delimiter: '/'
|
||||
});
|
||||
} else {
|
||||
body = this.formatHeregex(tokens[0][1], {flags});
|
||||
}
|
||||
this.token('REGEX', `${this.makeDelimitedLiteral(body, {
|
||||
delimiter: '/'
|
||||
|
@ -412,7 +414,9 @@
|
|||
this.mergeInterpolationTokens(tokens, {
|
||||
delimiter: '"',
|
||||
double: true
|
||||
}, this.formatHeregex);
|
||||
}, (str) => {
|
||||
return this.formatHeregex(str, {flags});
|
||||
});
|
||||
if (flags) {
|
||||
this.token(',', ',', index - 1, 0);
|
||||
this.token('STRING', '"' + flags + '"', index - 1, flags.length);
|
||||
|
@ -893,10 +897,10 @@
|
|||
return this.replaceUnicodeCodePointEscapes(str.replace(STRING_OMIT, '$1'), options);
|
||||
}
|
||||
|
||||
formatHeregex(str) {
|
||||
return this.formatRegex(str.replace(HEREGEX_OMIT, '$1$2'), {
|
||||
formatHeregex(str, options) {
|
||||
return this.formatRegex(str.replace(HEREGEX_OMIT, '$1$2'), merge(options, {
|
||||
delimiter: '///'
|
||||
});
|
||||
}));
|
||||
}
|
||||
|
||||
formatRegex(str, options) {
|
||||
|
@ -919,6 +923,8 @@
|
|||
}
|
||||
|
||||
replaceUnicodeCodePointEscapes(str, options) {
|
||||
var shouldReplace;
|
||||
shouldReplace = (options.flags != null) && indexOf.call(options.flags, 'u') < 0;
|
||||
return str.replace(UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) => {
|
||||
var codePointDecimal;
|
||||
if (escapedBackslash) {
|
||||
|
@ -931,6 +937,9 @@
|
|||
length: codePointHex.length + 4
|
||||
});
|
||||
}
|
||||
if (!shouldReplace) {
|
||||
return match;
|
||||
}
|
||||
return this.unicodeCodePointToUnicodeEscapes(codePointDecimal);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
{Rewriter, INVERSES} = require './rewriter'
|
||||
|
||||
# Import the helpers we need.
|
||||
{count, starts, compact, repeat, invertLiterate,
|
||||
{count, starts, compact, repeat, invertLiterate, merge,
|
||||
locationDataToString, throwSyntaxError} = require './helpers'
|
||||
|
||||
# The Lexer Class
|
||||
|
@ -330,7 +330,6 @@ exports.Lexer = class Lexer
|
|||
when match = REGEX.exec @chunk
|
||||
[regex, body, closed] = match
|
||||
@validateEscapes body, isRegex: yes, offsetInChunk: 1
|
||||
body = @formatRegex body, delimiter: '/'
|
||||
index = regex.length
|
||||
prev = @prev()
|
||||
if prev
|
||||
|
@ -349,13 +348,17 @@ exports.Lexer = class Lexer
|
|||
when not VALID_FLAGS.test flags
|
||||
@error "invalid regular expression flags #{flags}", offset: index, length: flags.length
|
||||
when regex or tokens.length is 1
|
||||
body ?= @formatHeregex tokens[0][1]
|
||||
if body
|
||||
body = @formatRegex body, { flags, delimiter: '/' }
|
||||
else
|
||||
body = @formatHeregex tokens[0][1], { flags }
|
||||
@token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, origin
|
||||
else
|
||||
@token 'REGEX_START', '(', 0, 0, origin
|
||||
@token 'IDENTIFIER', 'RegExp', 0, 0
|
||||
@token 'CALL_START', '(', 0, 0
|
||||
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, @formatHeregex
|
||||
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, (str) =>
|
||||
@formatHeregex str, { flags }
|
||||
if flags
|
||||
@token ',', ',', index - 1, 0
|
||||
@token 'STRING', '"' + flags + '"', index - 1, flags.length
|
||||
|
@ -792,8 +795,8 @@ exports.Lexer = class Lexer
|
|||
formatString: (str, options) ->
|
||||
@replaceUnicodeCodePointEscapes str.replace(STRING_OMIT, '$1'), options
|
||||
|
||||
formatHeregex: (str) ->
|
||||
@formatRegex str.replace(HEREGEX_OMIT, '$1$2'), delimiter: '///'
|
||||
formatHeregex: (str, options) ->
|
||||
@formatRegex str.replace(HEREGEX_OMIT, '$1$2'), merge(options, delimiter: '///')
|
||||
|
||||
formatRegex: (str, options) ->
|
||||
@replaceUnicodeCodePointEscapes str, options
|
||||
|
@ -808,8 +811,9 @@ exports.Lexer = class Lexer
|
|||
low = (codePoint - 0x10000) % 0x400 + 0xDC00
|
||||
"#{toUnicodeEscape(high)}#{toUnicodeEscape(low)}"
|
||||
|
||||
# Replace \u{...} with \uxxxx[\uxxxx] in strings and regexes
|
||||
# Replace \u{...} with \uxxxx[\uxxxx] in regexes without `u` flag
|
||||
replaceUnicodeCodePointEscapes: (str, options) ->
|
||||
shouldReplace = options.flags? and 'u' not in options.flags
|
||||
str.replace UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) =>
|
||||
return escapedBackslash if escapedBackslash
|
||||
|
||||
|
@ -818,6 +822,7 @@ exports.Lexer = class Lexer
|
|||
@error "unicode code point escapes greater than \\u{10ffff} are not allowed",
|
||||
offset: offset + options.delimiter.length
|
||||
length: codePointHex.length + 4
|
||||
return match unless shouldReplace
|
||||
|
||||
@unicodeCodePointToUnicodeEscapes codePointDecimal
|
||||
|
||||
|
|
|
@ -305,12 +305,12 @@ test "#4248: Unicode code point escapes", ->
|
|||
ok ///a\u{000001ab}c///.test 'a\u{1ab}c'
|
||||
ok /a\u{12345}c/.test 'a\ud808\udf45c'
|
||||
|
||||
# rewrite code point escapes
|
||||
# rewrite code point escapes unless u flag is set
|
||||
input = """
|
||||
/\\u{bcdef}\\u{abc}/u
|
||||
"""
|
||||
output = """
|
||||
/\\udab3\\uddef\\u0abc/u;
|
||||
/\\u{bcdef}\\u{abc}/u;
|
||||
"""
|
||||
eq toJS(input), output
|
||||
|
||||
|
|
|
@ -420,12 +420,12 @@ test "#4248: Unicode code point escapes", ->
|
|||
eq '\udab3\uddefc', """\u{bcdef}#{ 'c' }"""
|
||||
eq '\\u{123456}', "#{'\\'}#{'u{123456}'}"
|
||||
|
||||
# rewrite code point escapes
|
||||
# don't rewrite code point escapes
|
||||
input = """
|
||||
'\\u{bcdef}\\u{abc}'
|
||||
"""
|
||||
output = """
|
||||
'\\udab3\\uddef\\u0abc';
|
||||
'\\u{bcdef}\\u{abc}';
|
||||
"""
|
||||
eq toJS(input), output
|
||||
|
||||
|
@ -433,6 +433,6 @@ test "#4248: Unicode code point escapes", ->
|
|||
"#{ 'a' }\\u{bcdef}"
|
||||
"""
|
||||
output = """
|
||||
"a\\udab3\\uddef";
|
||||
"a\\u{bcdef}";
|
||||
"""
|
||||
eq toJS(input), output
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue