mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/uri/common.rb (URI#{en,de}code_www_form_component):
renamed from URI#{en,de}code_www_component. [ruby-dev:40672] * lib/uri/common.rb (URI#encode_www_form_component): %-encoded element should have always two hex. * lib/uri/common.rb (URI#encode_www_form_component): better treatment for ASCII incompatible encodings and encodings whose lead byte may use 7bit. * lib/uri/common.rb (URI#decode_www_form_component): add %20. * lib/uri/common.rb (URI#decode_www_form_component): add result's encoding as 2nd argument. * lib/uri/common.rb (URI#decode_www_form): added. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26962 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
4067fd028d
commit
f626a17d8c
3 changed files with 106 additions and 33 deletions
19
ChangeLog
19
ChangeLog
|
@ -1,3 +1,22 @@
|
|||
Thu Mar 18 00:00:58 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* lib/uri/common.rb (URI#{en,de}code_www_form_component):
|
||||
renamed from URI#{en,de}code_www_component. [ruby-dev:40672]
|
||||
|
||||
* lib/uri/common.rb (URI#encode_www_form_component): %-encoded
|
||||
element should have always two hex.
|
||||
|
||||
* lib/uri/common.rb (URI#encode_www_form_component):
|
||||
better treatment for ASCII incompatible encodings and
|
||||
encodings whose lead byte may use 7bit.
|
||||
|
||||
* lib/uri/common.rb (URI#decode_www_form_component): add %20.
|
||||
|
||||
* lib/uri/common.rb (URI#decode_www_form_component): add
|
||||
result's encoding as 2nd argument.
|
||||
|
||||
* lib/uri/common.rb (URI#decode_www_form): added.
|
||||
|
||||
Wed Mar 17 16:25:53 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* hash.c (rb_hash_aset): allow recursive key. [ruby-core:24648]
|
||||
|
|
|
@ -729,49 +729,60 @@ module URI
|
|||
#
|
||||
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
||||
#
|
||||
# See URI.decode_www_component(str), URI.encode_www_form(enum)
|
||||
def self.encode_www_component(str)
|
||||
# See URI.decode_www_form_component, URI.encode_www_form
|
||||
def self.encode_www_form_component(str)
|
||||
if TBLENCWWWCOMP_.empty?
|
||||
256.times do |i|
|
||||
case i
|
||||
when 0x20
|
||||
TBLENCWWWCOMP_[' '] = '+'
|
||||
when 0x2A, 0x2D, 0x2E, 0x30..0x39, 0x41..0x5A, 0x5F, 0x61..0x7A
|
||||
# when 0x2A, 0x2D, 0x2E, 0x30..0x39, 0x41..0x5A, 0x5F, 0x61..0x7A
|
||||
else
|
||||
TBLENCWWWCOMP_[i.chr] = '%%%X' % i
|
||||
TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
|
||||
end
|
||||
end
|
||||
TBLENCWWWCOMP_.freeze
|
||||
end
|
||||
str = str.to_s.dup
|
||||
enc = str.encoding
|
||||
str.force_encoding(Encoding::ASCII_8BIT)
|
||||
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
|
||||
str.force_encoding(enc)
|
||||
str = str.to_s
|
||||
case str.encoding
|
||||
when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::UTF_8
|
||||
str = str.dup.force_encoding(Encoding::ASCII_8BIT)
|
||||
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
|
||||
when Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE
|
||||
reg = Regexp.new('[^*\-.0-9A-Z_a-z]+'.encode(str.encoding))
|
||||
str = str.gsub(reg){
|
||||
$&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_).
|
||||
force_encoding(str.encoding)
|
||||
}
|
||||
else
|
||||
if str.encoding.ascii_compatible?
|
||||
str = str.gsub(/[^*\-.0-9A-Z_a-z]+/){
|
||||
$&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)}
|
||||
else
|
||||
str = str.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)
|
||||
end
|
||||
end
|
||||
str.force_encoding(Encoding::US_ASCII)
|
||||
end
|
||||
|
||||
# Decode given +str+ of URL-encoded form data.
|
||||
#
|
||||
# This decods + to SP.
|
||||
#
|
||||
# See URI.encode_www_component(str)
|
||||
def self.decode_www_component(str)
|
||||
# See URI.encode_www_form_component, URI.decode_www_form
|
||||
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
|
||||
if TBLDECWWWCOMP_.empty?
|
||||
256.times do |i|
|
||||
case i
|
||||
when 0x20
|
||||
TBLDECWWWCOMP_['+'] = ' '
|
||||
else
|
||||
h, l = i>>4, i&15
|
||||
TBLDECWWWCOMP_['%%%X%X' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
|
||||
end
|
||||
h, l = i>>4, i&15
|
||||
TBLDECWWWCOMP_['%%%X%X' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['+'] = ' ' if i == 0x20
|
||||
end
|
||||
TBLDECWWWCOMP_.freeze
|
||||
end
|
||||
str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_)
|
||||
str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(Encoding::UTF_8)
|
||||
end
|
||||
|
||||
# Generate URL-encoded form data from given +enum+.
|
||||
|
@ -779,7 +790,7 @@ module URI
|
|||
# This generates application/x-www-form-urlencoded data defined in HTML5
|
||||
# from given an Enumerable object.
|
||||
#
|
||||
# This internally uses URI.encode_www_component(str).
|
||||
# This internally uses URI.encode_www_form_component(str).
|
||||
#
|
||||
# This doesn't convert encodings of give items, so convert them before call
|
||||
# this method if you want to send data as other than original encoding or
|
||||
|
@ -789,7 +800,7 @@ module URI
|
|||
#
|
||||
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
||||
#
|
||||
# See URI.encode_www_component(str)
|
||||
# See URI.encode_www_form_component, URI.decode_www_form
|
||||
def self.encode_www_form(enum)
|
||||
str = nil
|
||||
enum.each do |k,v|
|
||||
|
@ -798,12 +809,43 @@ module URI
|
|||
else
|
||||
str = ''.force_encoding(Encoding::US_ASCII)
|
||||
end
|
||||
str << encode_www_component(k)
|
||||
str << encode_www_form_component(k)
|
||||
str << '='
|
||||
str << encode_www_component(v)
|
||||
str << encode_www_form_component(v)
|
||||
end
|
||||
str
|
||||
end
|
||||
|
||||
# Decode URL-encoded form data from given +str+.
|
||||
#
|
||||
# This decodes application/x-www-form-urlencoded data
|
||||
# and returns array of key-value array.
|
||||
# This internally uses URI.decode_www_form_component.
|
||||
#
|
||||
# _charset_ hack is not supported now because the mapping from given charset
|
||||
# to Ruby's encoding is not clear yet.
|
||||
# see also http://www.w3.org/TR/html5/syntax.html#character-encodings-0
|
||||
#
|
||||
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
||||
#
|
||||
# ary = URI.decode_www_form("a=1&a=2&b=3")
|
||||
# p ary #=> [['a', '1'], ['a', '2'], ['b', '3']]
|
||||
# p ary.assoc('a').last #=> '1'
|
||||
# p ary.assoc('b').last #=> '3'
|
||||
# p ary.rassoc('a').last #=> '2'
|
||||
# p Hash[ary] # => {"a"=>"2", "b"=>"3"}
|
||||
#
|
||||
# See URI.decode_www_form_component, URI.encode_www_form
|
||||
def self.decode_www_form(str, enc=Encoding::UTF_8)
|
||||
ary = []
|
||||
unless /\A\??(?<query>[^=;&]*=[^;&]*(?:[;&][^=;&]*=[^;&]*)*)\z/ =~ str
|
||||
raise ArgumentError, "invalid data of application/x-www-form-urlencoded (#{str})"
|
||||
end
|
||||
query.scan(/([^=;&]+)=([^;&]*)/) do
|
||||
ary << [decode_www_form_component($1, enc), decode_www_form_component($2, enc)]
|
||||
end
|
||||
ary
|
||||
end
|
||||
end
|
||||
|
||||
module Kernel
|
||||
|
|
|
@ -50,16 +50,22 @@ class TestCommon < Test::Unit::TestCase
|
|||
assert_raise(NoMethodError) { Object.new.URI("http://www.ruby-lang.org/") }
|
||||
end
|
||||
|
||||
def test_encode_www_component
|
||||
assert_equal("+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||
def test_encode_www_form_component
|
||||
assert_equal("%00+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
|
||||
URI.encode_www_component(" !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
|
||||
URI.encode_www_form_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
|
||||
assert_equal("%95%41", URI.encode_www_form_component(
|
||||
"\x95\x41".force_encoding(Encoding::Shift_JIS)))
|
||||
assert_equal("%30%42", URI.encode_www_form_component(
|
||||
"\x30\x42".force_encoding(Encoding::UTF_16BE)))
|
||||
assert_equal("%30%42", URI.encode_www_form_component(
|
||||
"\x30\x42".force_encoding(Encoding::ISO_2022_JP)))
|
||||
end
|
||||
|
||||
def test_decode_www_component
|
||||
assert_equal(" !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
|
||||
URI.decode_www_component(
|
||||
"+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||
def test_decode_www_form_component
|
||||
assert_equal(" !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
|
||||
URI.decode_www_form_component(
|
||||
"%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
|
||||
end
|
||||
|
||||
|
@ -74,6 +80,12 @@ class TestCommon < Test::Unit::TestCase
|
|||
assert_equal(expected, URI.encode_www_form([["a", "1"], ["\u3042", "\u6F22"]]))
|
||||
assert_equal(expected, URI.encode_www_form([[:a, 1], [:"\u3042", "\u6F22"]]))
|
||||
end
|
||||
|
||||
def test_decode_www_form
|
||||
assert_equal([%w[a 1], %w[a 2]], URI.decode_www_form("a=1&a=2"))
|
||||
assert_equal([%w[a 1], ["\u3042", "\u6F22"]],
|
||||
URI.decode_www_form("a=1&%E3%81%82=%E6%BC%A2"))
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue