mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
ASCII-incompatible escape
* lib/cgi/util.rb (escapeHTML, unescapeHTML): consider ASCII-incompatible encodings. [Fix GH-1239] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53754 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
16e613fcc3
commit
8e46f401b2
3 changed files with 50 additions and 10 deletions
|
@ -1,3 +1,8 @@
|
||||||
|
Sat Feb 6 22:30:57 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
|
* lib/cgi/util.rb (escapeHTML, unescapeHTML): consider
|
||||||
|
ASCII-incompatible encodings. [Fix GH-1239]
|
||||||
|
|
||||||
Sat Feb 6 20:44:24 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Sat Feb 6 20:44:24 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* configure.in: check __int64_t and __int128_t for RUBY_DEFINT on
|
* configure.in: check __int64_t and __int128_t for RUBY_DEFINT on
|
||||||
|
|
|
@ -35,6 +35,18 @@ module CGI::Util
|
||||||
# CGI::escapeHTML('Usage: foo "bar" <baz>')
|
# CGI::escapeHTML('Usage: foo "bar" <baz>')
|
||||||
# # => "Usage: foo "bar" <baz>"
|
# # => "Usage: foo "bar" <baz>"
|
||||||
def escapeHTML(string)
|
def escapeHTML(string)
|
||||||
|
enc = string.encoding
|
||||||
|
unless enc.ascii_compatible?
|
||||||
|
if enc.dummy?
|
||||||
|
origenc = enc
|
||||||
|
enc = Encoding::Converter.asciicompat_encoding(enc)
|
||||||
|
string = enc ? string.encode(enc) : string.b
|
||||||
|
end
|
||||||
|
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
|
||||||
|
string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table)
|
||||||
|
string.encode!(origenc) if origenc
|
||||||
|
return string
|
||||||
|
end
|
||||||
string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
|
string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -47,10 +59,14 @@ module CGI::Util
|
||||||
# CGI::unescapeHTML("Usage: foo "bar" <baz>")
|
# CGI::unescapeHTML("Usage: foo "bar" <baz>")
|
||||||
# # => "Usage: foo \"bar\" <baz>"
|
# # => "Usage: foo \"bar\" <baz>"
|
||||||
def unescapeHTML(string)
|
def unescapeHTML(string)
|
||||||
return string unless string.include? '&'
|
|
||||||
enc = string.encoding
|
enc = string.encoding
|
||||||
if enc != Encoding::UTF_8 && [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc)
|
unless enc.ascii_compatible?
|
||||||
return string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
|
if enc.dummy?
|
||||||
|
origenc = enc
|
||||||
|
enc = Encoding::Converter.asciicompat_encoding(enc)
|
||||||
|
string = enc ? string.encode(enc) : string.b
|
||||||
|
end
|
||||||
|
string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
|
||||||
case $1.encode(Encoding::US_ASCII)
|
case $1.encode(Encoding::US_ASCII)
|
||||||
when 'apos' then "'".encode(enc)
|
when 'apos' then "'".encode(enc)
|
||||||
when 'amp' then '&'.encode(enc)
|
when 'amp' then '&'.encode(enc)
|
||||||
|
@ -61,8 +77,15 @@ module CGI::Util
|
||||||
when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
|
when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
string.encode!(origenc) if origenc
|
||||||
|
return string
|
||||||
end
|
end
|
||||||
asciicompat = Encoding.compatible?(string, "a")
|
return string unless string.include? '&'
|
||||||
|
charlimit = case enc
|
||||||
|
when Encoding::UTF_8; 0x10ffff
|
||||||
|
when Encoding::ISO_8859_1; 256
|
||||||
|
else 128
|
||||||
|
end
|
||||||
string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
|
string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
|
||||||
match = $1.dup
|
match = $1.dup
|
||||||
case match
|
case match
|
||||||
|
@ -73,18 +96,14 @@ module CGI::Util
|
||||||
when 'lt' then '<'
|
when 'lt' then '<'
|
||||||
when /\A#0*(\d+)\z/
|
when /\A#0*(\d+)\z/
|
||||||
n = $1.to_i
|
n = $1.to_i
|
||||||
if enc == Encoding::UTF_8 or
|
if n < charlimit
|
||||||
enc == Encoding::ISO_8859_1 && n < 256 or
|
|
||||||
asciicompat && n < 128
|
|
||||||
n.chr(enc)
|
n.chr(enc)
|
||||||
else
|
else
|
||||||
"&##{$1};"
|
"&##{$1};"
|
||||||
end
|
end
|
||||||
when /\A#x([0-9a-f]+)\z/i
|
when /\A#x([0-9a-f]+)\z/i
|
||||||
n = $1.hex
|
n = $1.hex
|
||||||
if enc == Encoding::UTF_8 or
|
if n < charlimit
|
||||||
enc == Encoding::ISO_8859_1 && n < 256 or
|
|
||||||
asciicompat && n < 128
|
|
||||||
n.chr(enc)
|
n.chr(enc)
|
||||||
else
|
else
|
||||||
"&#x#{$1};"
|
"&#x#{$1};"
|
||||||
|
|
|
@ -98,6 +98,22 @@ class CGIUtilTest < Test::Unit::TestCase
|
||||||
assert_equal("'&\"><", CGI::unescapeHTML("'&"><"))
|
assert_equal("'&\"><", CGI::unescapeHTML("'&"><"))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Encoding.list.each do |enc|
|
||||||
|
begin
|
||||||
|
escaped = "'&"><".encode(enc)
|
||||||
|
unescaped = "'&\"><".encode(enc)
|
||||||
|
rescue Encoding::ConverterNotFoundError
|
||||||
|
next
|
||||||
|
else
|
||||||
|
define_method("test_cgi_escapeHTML:#{enc.name}") do
|
||||||
|
assert_equal(escaped, CGI::escapeHTML(unescaped))
|
||||||
|
end
|
||||||
|
define_method("test_cgi_unescapeHTML:#{enc.name}") do
|
||||||
|
assert_equal(unescaped, CGI::unescapeHTML(escaped))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def test_cgi_unescapeHTML_uppercasecharacter
|
def test_cgi_unescapeHTML_uppercasecharacter
|
||||||
assert_equal("\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", CGI::unescapeHTML("あいう"))
|
assert_equal("\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", CGI::unescapeHTML("あいう"))
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue