mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
[ruby/uri] Add URI::Generic#decoded_#{user,password}
URI::Generic#{user,password} return the encoded values, which are not that useful if you want to do authentication with them. Automatic decoding by default would break backwards compatibility. Optional automatic decoding via a keyword to URI.parse would require threading the option through at least 3 other methods, and would make semantics confusing (user= takes encoded or unencoded password?) or require more work. Thus, adding this as a separate method seemed the simplest approach. Unfortunately, URI lacks a method for correct decoding. Unlike in www form components, + in earlier parts of the URI such as the userinfo section is treated verbatim and not as an encoded space. Add URI.#{en,de}code_uri_component methods, which are almost the same as URI.#{en,de}code_www_form_component, but without the special SP => + handling. Implements [Feature #9045] https://github.com/ruby/uri/commit/16cfc4e92f
This commit is contained in:
parent
054ae999dc
commit
fbebfe1697
4 changed files with 110 additions and 14 deletions
|
@ -295,6 +295,7 @@ module URI
|
|||
256.times do |i|
|
||||
TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
|
||||
end
|
||||
TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
|
||||
TBLENCWWWCOMP_[' '] = '+'
|
||||
TBLENCWWWCOMP_.freeze
|
||||
TBLDECWWWCOMP_ = {} # :nodoc:
|
||||
|
@ -320,16 +321,7 @@ module URI
|
|||
#
|
||||
# See URI.decode_www_form_component, URI.encode_www_form.
|
||||
def self.encode_www_form_component(str, enc=nil)
|
||||
str = str.to_s.dup
|
||||
if str.encoding != Encoding::ASCII_8BIT
|
||||
if enc && enc != Encoding::ASCII_8BIT
|
||||
str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
||||
str.encode!(enc, fallback: ->(x){"&##{x.ord};"})
|
||||
end
|
||||
str.force_encoding(Encoding::ASCII_8BIT)
|
||||
end
|
||||
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
|
||||
str.force_encoding(Encoding::US_ASCII)
|
||||
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
|
||||
end
|
||||
|
||||
# Decodes given +str+ of URL-encoded form data.
|
||||
|
@ -338,10 +330,43 @@ module URI
|
|||
#
|
||||
# See URI.encode_www_form_component, URI.decode_www_form.
|
||||
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
|
||||
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
|
||||
str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
|
||||
_decode_uri_component(/\+|%\h\h/, str, enc)
|
||||
end
|
||||
|
||||
# Encodes +str+ using URL encoding
|
||||
#
|
||||
# This encodes SP to %20 instead of +.
|
||||
def self.encode_uri_component(str, enc=nil)
|
||||
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
|
||||
end
|
||||
|
||||
# Decodes given +str+ of URL-encoded data.
|
||||
#
|
||||
# This does not decode + to SP.
|
||||
def self.decode_uri_component(str, enc=Encoding::UTF_8)
|
||||
_decode_uri_component(/%\h\h/, str, enc)
|
||||
end
|
||||
|
||||
def self._encode_uri_component(regexp, table, str, enc)
|
||||
str = str.to_s.dup
|
||||
if str.encoding != Encoding::ASCII_8BIT
|
||||
if enc && enc != Encoding::ASCII_8BIT
|
||||
str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
||||
str.encode!(enc, fallback: ->(x){"&##{x.ord};"})
|
||||
end
|
||||
str.force_encoding(Encoding::ASCII_8BIT)
|
||||
end
|
||||
str.gsub!(regexp, table)
|
||||
str.force_encoding(Encoding::US_ASCII)
|
||||
end
|
||||
private_class_method :_encode_uri_component
|
||||
|
||||
def self._decode_uri_component(regexp, str, enc)
|
||||
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
|
||||
str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
|
||||
end
|
||||
private_class_method :_decode_uri_component
|
||||
|
||||
# Generates URL-encoded form data from given +enum+.
|
||||
#
|
||||
# This generates application/x-www-form-urlencoded data defined in HTML5
|
||||
|
|
|
@ -564,16 +564,26 @@ module URI
|
|||
end
|
||||
end
|
||||
|
||||
# Returns the user component.
|
||||
# Returns the user component (without URI decoding).
|
||||
def user
|
||||
@user
|
||||
end
|
||||
|
||||
# Returns the password component.
|
||||
# Returns the password component (without URI decoding).
|
||||
def password
|
||||
@password
|
||||
end
|
||||
|
||||
# Returns the user component after URI decoding.
|
||||
def decoded_user
|
||||
URI.decode_uri_component(@user) if @user
|
||||
end
|
||||
|
||||
# Returns the password component after URI decoding.
|
||||
def decoded_password
|
||||
URI.decode_uri_component(@password) if @password
|
||||
end
|
||||
|
||||
#
|
||||
# Checks the host +v+ component for RFC2396 compliance
|
||||
# and against the URI::Parser Regexp for :HOST.
|
||||
|
|
|
@ -130,6 +130,58 @@ class TestCommon < Test::Unit::TestCase
|
|||
assert_nothing_raised(ArgumentError){URI.decode_www_form_component("x"*(1024*1024))}
|
||||
end
|
||||
|
||||
def test_encode_uri_component
|
||||
assert_equal("%00%20%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
|
||||
URI.encode_uri_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
|
||||
assert_equal("%95A", URI.encode_uri_component(
|
||||
"\x95\x41".force_encoding(Encoding::Shift_JIS)))
|
||||
assert_equal("0B", URI.encode_uri_component(
|
||||
"\x30\x42".force_encoding(Encoding::UTF_16BE)))
|
||||
assert_equal("%1B%24B%24%22%1B%28B", URI.encode_uri_component(
|
||||
"\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP)))
|
||||
|
||||
assert_equal("%E3%81%82", URI.encode_uri_component(
|
||||
"\u3042", Encoding::ASCII_8BIT))
|
||||
assert_equal("%82%A0", URI.encode_uri_component(
|
||||
"\u3042", Encoding::Windows_31J))
|
||||
assert_equal("%E3%81%82", URI.encode_uri_component(
|
||||
"\u3042", Encoding::UTF_8))
|
||||
|
||||
assert_equal("%82%A0", URI.encode_uri_component(
|
||||
"\u3042".encode("sjis"), Encoding::ASCII_8BIT))
|
||||
assert_equal("%A4%A2", URI.encode_uri_component(
|
||||
"\u3042".encode("sjis"), Encoding::EUC_JP))
|
||||
assert_equal("%E3%81%82", URI.encode_uri_component(
|
||||
"\u3042".encode("sjis"), Encoding::UTF_8))
|
||||
assert_equal("B0", URI.encode_uri_component(
|
||||
"\u3042".encode("sjis"), Encoding::UTF_16LE))
|
||||
assert_equal("%26%23730%3B", URI.encode_uri_component(
|
||||
"\u02DA", Encoding::WINDOWS_1252))
|
||||
|
||||
# invalid
|
||||
assert_equal("%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
|
||||
"\xE3\x81\xFF", "utf-8"))
|
||||
assert_equal("%E6%9F%8A%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
|
||||
"\x95\x41\xff\xff".force_encoding(Encoding::Shift_JIS), "utf-8"))
|
||||
end
|
||||
|
||||
def test_decode_uri_component
|
||||
assert_equal(" +!\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
|
||||
URI.decode_uri_component(
|
||||
"%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
|
||||
assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP),
|
||||
URI.decode_uri_component("%A1%A2", "EUC-JP"))
|
||||
assert_equal("\xE3\x81\x82\xE3\x81\x82".force_encoding("UTF-8"),
|
||||
URI.decode_uri_component("\xE3\x81\x82%E3%81%82".force_encoding("UTF-8")))
|
||||
|
||||
assert_raise(ArgumentError){URI.decode_uri_component("%")}
|
||||
assert_raise(ArgumentError){URI.decode_uri_component("%a")}
|
||||
assert_raise(ArgumentError){URI.decode_uri_component("x%a_")}
|
||||
assert_nothing_raised(ArgumentError){URI.decode_uri_component("x"*(1024*1024))}
|
||||
end
|
||||
|
||||
def test_encode_www_form
|
||||
assert_equal("a=1", URI.encode_www_form("a" => "1"))
|
||||
assert_equal("a=1", URI.encode_www_form(a: 1))
|
||||
|
|
|
@ -50,6 +50,15 @@ class URI::TestParser < Test::Unit::TestCase
|
|||
assert_raise(URI::InvalidURIError) { URI.parse('https://www.example.com/search?q=%XX') }
|
||||
end
|
||||
|
||||
def test_parse_auth
|
||||
str = "http://al%40ice:p%40s%25sword@example.com/dir%2Fname/subdir?foo=bar%40example.com"
|
||||
uri = URI.parse(str)
|
||||
assert_equal "al%40ice", uri.user
|
||||
assert_equal "p%40s%25sword", uri.password
|
||||
assert_equal "al@ice", uri.decoded_user
|
||||
assert_equal "p@s%sword", uri.decoded_password
|
||||
end
|
||||
|
||||
def test_raise_bad_uri_for_integer
|
||||
assert_raise(URI::InvalidURIError) do
|
||||
URI.parse(1)
|
||||
|
|
Loading…
Reference in a new issue