mirror of
https://github.com/jashkenas/coffeescript.git
synced 2022-11-09 12:23:24 -05:00
Keep unicode code point escapes as is when possible (#4520)
This commit is contained in:
parent
07ae1edb44
commit
7ef5cb4a1f
4 changed files with 37 additions and 23 deletions
|
@ -1,11 +1,11 @@
|
||||||
// Generated by CoffeeScript 2.0.0-beta1
|
// Generated by CoffeeScript 2.0.0-beta1
|
||||||
(function() {
|
(function() {
|
||||||
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, UNICODE_CODE_POINT_ESCAPE, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, repeat, starts, throwSyntaxError,
|
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, SIMPLE_STRING_OMIT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, UNICODE_CODE_POINT_ESCAPE, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, merge, repeat, starts, throwSyntaxError,
|
||||||
indexOf = [].indexOf;
|
indexOf = [].indexOf;
|
||||||
|
|
||||||
({Rewriter, INVERSES} = require('./rewriter'));
|
({Rewriter, INVERSES} = require('./rewriter'));
|
||||||
|
|
||||||
({count, starts, compact, repeat, invertLiterate, locationDataToString, throwSyntaxError} = require('./helpers'));
|
({count, starts, compact, repeat, invertLiterate, merge, locationDataToString, throwSyntaxError} = require('./helpers'));
|
||||||
|
|
||||||
exports.Lexer = Lexer = class Lexer {
|
exports.Lexer = Lexer = class Lexer {
|
||||||
tokenize(code, opts = {}) {
|
tokenize(code, opts = {}) {
|
||||||
|
@ -366,9 +366,6 @@
|
||||||
isRegex: true,
|
isRegex: true,
|
||||||
offsetInChunk: 1
|
offsetInChunk: 1
|
||||||
});
|
});
|
||||||
body = this.formatRegex(body, {
|
|
||||||
delimiter: '/'
|
|
||||||
});
|
|
||||||
index = regex.length;
|
index = regex.length;
|
||||||
prev = this.prev();
|
prev = this.prev();
|
||||||
if (prev) {
|
if (prev) {
|
||||||
|
@ -398,8 +395,13 @@
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
case !(regex || tokens.length === 1):
|
case !(regex || tokens.length === 1):
|
||||||
if (body == null) {
|
if (body) {
|
||||||
body = this.formatHeregex(tokens[0][1]);
|
body = this.formatRegex(body, {
|
||||||
|
flags,
|
||||||
|
delimiter: '/'
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
body = this.formatHeregex(tokens[0][1], {flags});
|
||||||
}
|
}
|
||||||
this.token('REGEX', `${this.makeDelimitedLiteral(body, {
|
this.token('REGEX', `${this.makeDelimitedLiteral(body, {
|
||||||
delimiter: '/'
|
delimiter: '/'
|
||||||
|
@ -412,7 +414,9 @@
|
||||||
this.mergeInterpolationTokens(tokens, {
|
this.mergeInterpolationTokens(tokens, {
|
||||||
delimiter: '"',
|
delimiter: '"',
|
||||||
double: true
|
double: true
|
||||||
}, this.formatHeregex);
|
}, (str) => {
|
||||||
|
return this.formatHeregex(str, {flags});
|
||||||
|
});
|
||||||
if (flags) {
|
if (flags) {
|
||||||
this.token(',', ',', index - 1, 0);
|
this.token(',', ',', index - 1, 0);
|
||||||
this.token('STRING', '"' + flags + '"', index - 1, flags.length);
|
this.token('STRING', '"' + flags + '"', index - 1, flags.length);
|
||||||
|
@ -893,10 +897,10 @@
|
||||||
return this.replaceUnicodeCodePointEscapes(str.replace(STRING_OMIT, '$1'), options);
|
return this.replaceUnicodeCodePointEscapes(str.replace(STRING_OMIT, '$1'), options);
|
||||||
}
|
}
|
||||||
|
|
||||||
formatHeregex(str) {
|
formatHeregex(str, options) {
|
||||||
return this.formatRegex(str.replace(HEREGEX_OMIT, '$1$2'), {
|
return this.formatRegex(str.replace(HEREGEX_OMIT, '$1$2'), merge(options, {
|
||||||
delimiter: '///'
|
delimiter: '///'
|
||||||
});
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
formatRegex(str, options) {
|
formatRegex(str, options) {
|
||||||
|
@ -919,6 +923,8 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
replaceUnicodeCodePointEscapes(str, options) {
|
replaceUnicodeCodePointEscapes(str, options) {
|
||||||
|
var shouldReplace;
|
||||||
|
shouldReplace = (options.flags != null) && indexOf.call(options.flags, 'u') < 0;
|
||||||
return str.replace(UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) => {
|
return str.replace(UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) => {
|
||||||
var codePointDecimal;
|
var codePointDecimal;
|
||||||
if (escapedBackslash) {
|
if (escapedBackslash) {
|
||||||
|
@ -931,6 +937,9 @@
|
||||||
length: codePointHex.length + 4
|
length: codePointHex.length + 4
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if (!shouldReplace) {
|
||||||
|
return match;
|
||||||
|
}
|
||||||
return this.unicodeCodePointToUnicodeEscapes(codePointDecimal);
|
return this.unicodeCodePointToUnicodeEscapes(codePointDecimal);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
{Rewriter, INVERSES} = require './rewriter'
|
{Rewriter, INVERSES} = require './rewriter'
|
||||||
|
|
||||||
# Import the helpers we need.
|
# Import the helpers we need.
|
||||||
{count, starts, compact, repeat, invertLiterate,
|
{count, starts, compact, repeat, invertLiterate, merge,
|
||||||
locationDataToString, throwSyntaxError} = require './helpers'
|
locationDataToString, throwSyntaxError} = require './helpers'
|
||||||
|
|
||||||
# The Lexer Class
|
# The Lexer Class
|
||||||
|
@ -330,7 +330,6 @@ exports.Lexer = class Lexer
|
||||||
when match = REGEX.exec @chunk
|
when match = REGEX.exec @chunk
|
||||||
[regex, body, closed] = match
|
[regex, body, closed] = match
|
||||||
@validateEscapes body, isRegex: yes, offsetInChunk: 1
|
@validateEscapes body, isRegex: yes, offsetInChunk: 1
|
||||||
body = @formatRegex body, delimiter: '/'
|
|
||||||
index = regex.length
|
index = regex.length
|
||||||
prev = @prev()
|
prev = @prev()
|
||||||
if prev
|
if prev
|
||||||
|
@ -349,13 +348,17 @@ exports.Lexer = class Lexer
|
||||||
when not VALID_FLAGS.test flags
|
when not VALID_FLAGS.test flags
|
||||||
@error "invalid regular expression flags #{flags}", offset: index, length: flags.length
|
@error "invalid regular expression flags #{flags}", offset: index, length: flags.length
|
||||||
when regex or tokens.length is 1
|
when regex or tokens.length is 1
|
||||||
body ?= @formatHeregex tokens[0][1]
|
if body
|
||||||
|
body = @formatRegex body, { flags, delimiter: '/' }
|
||||||
|
else
|
||||||
|
body = @formatHeregex tokens[0][1], { flags }
|
||||||
@token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, origin
|
@token 'REGEX', "#{@makeDelimitedLiteral body, delimiter: '/'}#{flags}", 0, end, origin
|
||||||
else
|
else
|
||||||
@token 'REGEX_START', '(', 0, 0, origin
|
@token 'REGEX_START', '(', 0, 0, origin
|
||||||
@token 'IDENTIFIER', 'RegExp', 0, 0
|
@token 'IDENTIFIER', 'RegExp', 0, 0
|
||||||
@token 'CALL_START', '(', 0, 0
|
@token 'CALL_START', '(', 0, 0
|
||||||
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, @formatHeregex
|
@mergeInterpolationTokens tokens, {delimiter: '"', double: yes}, (str) =>
|
||||||
|
@formatHeregex str, { flags }
|
||||||
if flags
|
if flags
|
||||||
@token ',', ',', index - 1, 0
|
@token ',', ',', index - 1, 0
|
||||||
@token 'STRING', '"' + flags + '"', index - 1, flags.length
|
@token 'STRING', '"' + flags + '"', index - 1, flags.length
|
||||||
|
@ -792,8 +795,8 @@ exports.Lexer = class Lexer
|
||||||
formatString: (str, options) ->
|
formatString: (str, options) ->
|
||||||
@replaceUnicodeCodePointEscapes str.replace(STRING_OMIT, '$1'), options
|
@replaceUnicodeCodePointEscapes str.replace(STRING_OMIT, '$1'), options
|
||||||
|
|
||||||
formatHeregex: (str) ->
|
formatHeregex: (str, options) ->
|
||||||
@formatRegex str.replace(HEREGEX_OMIT, '$1$2'), delimiter: '///'
|
@formatRegex str.replace(HEREGEX_OMIT, '$1$2'), merge(options, delimiter: '///')
|
||||||
|
|
||||||
formatRegex: (str, options) ->
|
formatRegex: (str, options) ->
|
||||||
@replaceUnicodeCodePointEscapes str, options
|
@replaceUnicodeCodePointEscapes str, options
|
||||||
|
@ -808,8 +811,9 @@ exports.Lexer = class Lexer
|
||||||
low = (codePoint - 0x10000) % 0x400 + 0xDC00
|
low = (codePoint - 0x10000) % 0x400 + 0xDC00
|
||||||
"#{toUnicodeEscape(high)}#{toUnicodeEscape(low)}"
|
"#{toUnicodeEscape(high)}#{toUnicodeEscape(low)}"
|
||||||
|
|
||||||
# Replace \u{...} with \uxxxx[\uxxxx] in strings and regexes
|
# Replace \u{...} with \uxxxx[\uxxxx] in regexes without `u` flag
|
||||||
replaceUnicodeCodePointEscapes: (str, options) ->
|
replaceUnicodeCodePointEscapes: (str, options) ->
|
||||||
|
shouldReplace = options.flags? and 'u' not in options.flags
|
||||||
str.replace UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) =>
|
str.replace UNICODE_CODE_POINT_ESCAPE, (match, escapedBackslash, codePointHex, offset) =>
|
||||||
return escapedBackslash if escapedBackslash
|
return escapedBackslash if escapedBackslash
|
||||||
|
|
||||||
|
@ -818,6 +822,7 @@ exports.Lexer = class Lexer
|
||||||
@error "unicode code point escapes greater than \\u{10ffff} are not allowed",
|
@error "unicode code point escapes greater than \\u{10ffff} are not allowed",
|
||||||
offset: offset + options.delimiter.length
|
offset: offset + options.delimiter.length
|
||||||
length: codePointHex.length + 4
|
length: codePointHex.length + 4
|
||||||
|
return match unless shouldReplace
|
||||||
|
|
||||||
@unicodeCodePointToUnicodeEscapes codePointDecimal
|
@unicodeCodePointToUnicodeEscapes codePointDecimal
|
||||||
|
|
||||||
|
|
|
@ -305,12 +305,12 @@ test "#4248: Unicode code point escapes", ->
|
||||||
ok ///a\u{000001ab}c///.test 'a\u{1ab}c'
|
ok ///a\u{000001ab}c///.test 'a\u{1ab}c'
|
||||||
ok /a\u{12345}c/.test 'a\ud808\udf45c'
|
ok /a\u{12345}c/.test 'a\ud808\udf45c'
|
||||||
|
|
||||||
# rewrite code point escapes
|
# rewrite code point escapes unless u flag is set
|
||||||
input = """
|
input = """
|
||||||
/\\u{bcdef}\\u{abc}/u
|
/\\u{bcdef}\\u{abc}/u
|
||||||
"""
|
"""
|
||||||
output = """
|
output = """
|
||||||
/\\udab3\\uddef\\u0abc/u;
|
/\\u{bcdef}\\u{abc}/u;
|
||||||
"""
|
"""
|
||||||
eq toJS(input), output
|
eq toJS(input), output
|
||||||
|
|
||||||
|
|
|
@ -420,12 +420,12 @@ test "#4248: Unicode code point escapes", ->
|
||||||
eq '\udab3\uddefc', """\u{bcdef}#{ 'c' }"""
|
eq '\udab3\uddefc', """\u{bcdef}#{ 'c' }"""
|
||||||
eq '\\u{123456}', "#{'\\'}#{'u{123456}'}"
|
eq '\\u{123456}', "#{'\\'}#{'u{123456}'}"
|
||||||
|
|
||||||
# rewrite code point escapes
|
# don't rewrite code point escapes
|
||||||
input = """
|
input = """
|
||||||
'\\u{bcdef}\\u{abc}'
|
'\\u{bcdef}\\u{abc}'
|
||||||
"""
|
"""
|
||||||
output = """
|
output = """
|
||||||
'\\udab3\\uddef\\u0abc';
|
'\\u{bcdef}\\u{abc}';
|
||||||
"""
|
"""
|
||||||
eq toJS(input), output
|
eq toJS(input), output
|
||||||
|
|
||||||
|
@ -433,6 +433,6 @@ test "#4248: Unicode code point escapes", ->
|
||||||
"#{ 'a' }\\u{bcdef}"
|
"#{ 'a' }\\u{bcdef}"
|
||||||
"""
|
"""
|
||||||
output = """
|
output = """
|
||||||
"a\\udab3\\uddef";
|
"a\\u{bcdef}";
|
||||||
"""
|
"""
|
||||||
eq toJS(input), output
|
eq toJS(input), output
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue