1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

[ruby/uri] Add URI::Generic#decoded_#{user,password}

URI::Generic#{user,password} return the encoded values, which are
not that useful if you want to do authentication with them.
Automatic decoding by default would break backwards compatibility.
Optional automatic decoding via a keyword to URI.parse would
require threading the option through at least 3 other methods, and
would make semantics confusing (user= takes encoded or unencoded
password?) or require more work.  Thus, adding this as a separate
method seemed the simplest approach.

Unfortunately, URI lacks a method for correct decoding.  Unlike in
www form components, + in earlier parts of the URI such as the
userinfo section is treated verbatim and not as an encoded space.
Add URI.#{en,de}code_uri_component methods, which are almost the
same as URI.#{en,de}code_www_form_component, but without the
special SP => + handling.

Implements [Feature #9045]

https://github.com/ruby/uri/commit/16cfc4e92f
This commit is contained in:
Jeremy Evans 2021-03-04 14:05:18 -08:00 committed by git
parent 054ae999dc
commit fbebfe1697
4 changed files with 110 additions and 14 deletions

View file

@ -295,6 +295,7 @@ module URI
256.times do |i|
TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
end
TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
TBLENCWWWCOMP_[' '] = '+'
TBLENCWWWCOMP_.freeze
TBLDECWWWCOMP_ = {} # :nodoc:
@ -320,16 +321,7 @@ module URI
#
# See URI.decode_www_form_component, URI.encode_www_form.
def self.encode_www_form_component(str, enc=nil)
str = str.to_s.dup
if str.encoding != Encoding::ASCII_8BIT
if enc && enc != Encoding::ASCII_8BIT
str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace)
str.encode!(enc, fallback: ->(x){"&##{x.ord};"})
end
str.force_encoding(Encoding::ASCII_8BIT)
end
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
str.force_encoding(Encoding::US_ASCII)
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
end
# Decodes given +str+ of URL-encoded form data.
@ -338,10 +330,43 @@ module URI
#
# See URI.encode_www_form_component, URI.decode_www_form.
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
_decode_uri_component(/\+|%\h\h/, str, enc)
end
# Encodes +str+ using URL encoding
#
# This encodes SP to %20 instead of +.
def self.encode_uri_component(str, enc=nil)
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
end
# Decodes given +str+ of URL-encoded data.
#
# This does not decode + to SP.
def self.decode_uri_component(str, enc=Encoding::UTF_8)
_decode_uri_component(/%\h\h/, str, enc)
end
def self._encode_uri_component(regexp, table, str, enc)
str = str.to_s.dup
if str.encoding != Encoding::ASCII_8BIT
if enc && enc != Encoding::ASCII_8BIT
str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace)
str.encode!(enc, fallback: ->(x){"&##{x.ord};"})
end
str.force_encoding(Encoding::ASCII_8BIT)
end
str.gsub!(regexp, table)
str.force_encoding(Encoding::US_ASCII)
end
private_class_method :_encode_uri_component
def self._decode_uri_component(regexp, str, enc)
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
end
private_class_method :_decode_uri_component
# Generates URL-encoded form data from given +enum+.
#
# This generates application/x-www-form-urlencoded data defined in HTML5

View file

@ -564,16 +564,26 @@ module URI
end
end
# Returns the user component.
# Returns the user component (without URI decoding).
def user
@user
end
# Returns the password component.
# Returns the password component (without URI decoding).
def password
@password
end
# Returns the user component after URI decoding.
def decoded_user
URI.decode_uri_component(@user) if @user
end
# Returns the password component after URI decoding.
def decoded_password
URI.decode_uri_component(@password) if @password
end
#
# Checks the host +v+ component for RFC2396 compliance
# and against the URI::Parser Regexp for :HOST.

View file

@ -130,6 +130,58 @@ class TestCommon < Test::Unit::TestCase
assert_nothing_raised(ArgumentError){URI.decode_www_form_component("x"*(1024*1024))}
end
def test_encode_uri_component
assert_equal("%00%20%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
URI.encode_uri_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
assert_equal("%95A", URI.encode_uri_component(
"\x95\x41".force_encoding(Encoding::Shift_JIS)))
assert_equal("0B", URI.encode_uri_component(
"\x30\x42".force_encoding(Encoding::UTF_16BE)))
assert_equal("%1B%24B%24%22%1B%28B", URI.encode_uri_component(
"\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP)))
assert_equal("%E3%81%82", URI.encode_uri_component(
"\u3042", Encoding::ASCII_8BIT))
assert_equal("%82%A0", URI.encode_uri_component(
"\u3042", Encoding::Windows_31J))
assert_equal("%E3%81%82", URI.encode_uri_component(
"\u3042", Encoding::UTF_8))
assert_equal("%82%A0", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::ASCII_8BIT))
assert_equal("%A4%A2", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::EUC_JP))
assert_equal("%E3%81%82", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::UTF_8))
assert_equal("B0", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::UTF_16LE))
assert_equal("%26%23730%3B", URI.encode_uri_component(
"\u02DA", Encoding::WINDOWS_1252))
# invalid
assert_equal("%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
"\xE3\x81\xFF", "utf-8"))
assert_equal("%E6%9F%8A%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
"\x95\x41\xff\xff".force_encoding(Encoding::Shift_JIS), "utf-8"))
end
def test_decode_uri_component
assert_equal(" +!\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
URI.decode_uri_component(
"%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP),
URI.decode_uri_component("%A1%A2", "EUC-JP"))
assert_equal("\xE3\x81\x82\xE3\x81\x82".force_encoding("UTF-8"),
URI.decode_uri_component("\xE3\x81\x82%E3%81%82".force_encoding("UTF-8")))
assert_raise(ArgumentError){URI.decode_uri_component("%")}
assert_raise(ArgumentError){URI.decode_uri_component("%a")}
assert_raise(ArgumentError){URI.decode_uri_component("x%a_")}
assert_nothing_raised(ArgumentError){URI.decode_uri_component("x"*(1024*1024))}
end
def test_encode_www_form
assert_equal("a=1", URI.encode_www_form("a" => "1"))
assert_equal("a=1", URI.encode_www_form(a: 1))

View file

@ -50,6 +50,15 @@ class URI::TestParser < Test::Unit::TestCase
assert_raise(URI::InvalidURIError) { URI.parse('https://www.example.com/search?q=%XX') }
end
def test_parse_auth
str = "http://al%40ice:p%40s%25sword@example.com/dir%2Fname/subdir?foo=bar%40example.com"
uri = URI.parse(str)
assert_equal "al%40ice", uri.user
assert_equal "p%40s%25sword", uri.password
assert_equal "al@ice", uri.decoded_user
assert_equal "p@s%sword", uri.decoded_password
end
def test_raise_bad_uri_for_integer
assert_raise(URI::InvalidURIError) do
URI.parse(1)