mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/uri/common.rb (URI#{en,de}code_www_form_component):
renamed from URI#{en,de}code_www_component. [ruby-dev:40672] * lib/uri/common.rb (URI#encode_www_form_component): %-encoded element should have always two hex. * lib/uri/common.rb (URI#encode_www_form_component): better treatment for ASCII incompatible encodings and encodings whose lead byte may use 7bit. * lib/uri/common.rb (URI#decode_www_form_component): add %20. * lib/uri/common.rb (URI#decode_www_form_component): add result's encoding as 2nd argument. * lib/uri/common.rb (URI#decode_www_form): added. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26962 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
4067fd028d
commit
f626a17d8c
3 changed files with 106 additions and 33 deletions
19
ChangeLog
19
ChangeLog
|
@ -1,3 +1,22 @@
|
||||||
|
Thu Mar 18 00:00:58 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* lib/uri/common.rb (URI#{en,de}code_www_form_component):
|
||||||
|
renamed from URI#{en,de}code_www_component. [ruby-dev:40672]
|
||||||
|
|
||||||
|
* lib/uri/common.rb (URI#encode_www_form_component): %-encoded
|
||||||
|
element should have always two hex.
|
||||||
|
|
||||||
|
* lib/uri/common.rb (URI#encode_www_form_component):
|
||||||
|
better treatment for ASCII incompatible encodings and
|
||||||
|
encodings whose lead byte may use 7bit.
|
||||||
|
|
||||||
|
* lib/uri/common.rb (URI#decode_www_form_component): add %20.
|
||||||
|
|
||||||
|
* lib/uri/common.rb (URI#decode_www_form_component): add
|
||||||
|
result's encoding as 2nd argument.
|
||||||
|
|
||||||
|
* lib/uri/common.rb (URI#decode_www_form): added.
|
||||||
|
|
||||||
Wed Mar 17 16:25:53 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Wed Mar 17 16:25:53 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* hash.c (rb_hash_aset): allow recursive key. [ruby-core:24648]
|
* hash.c (rb_hash_aset): allow recursive key. [ruby-core:24648]
|
||||||
|
|
|
@ -729,49 +729,60 @@ module URI
|
||||||
#
|
#
|
||||||
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
||||||
#
|
#
|
||||||
# See URI.decode_www_component(str), URI.encode_www_form(enum)
|
# See URI.decode_www_form_component, URI.encode_www_form
|
||||||
def self.encode_www_component(str)
|
def self.encode_www_form_component(str)
|
||||||
if TBLENCWWWCOMP_.empty?
|
if TBLENCWWWCOMP_.empty?
|
||||||
256.times do |i|
|
256.times do |i|
|
||||||
case i
|
case i
|
||||||
when 0x20
|
when 0x20
|
||||||
TBLENCWWWCOMP_[' '] = '+'
|
TBLENCWWWCOMP_[' '] = '+'
|
||||||
when 0x2A, 0x2D, 0x2E, 0x30..0x39, 0x41..0x5A, 0x5F, 0x61..0x7A
|
# when 0x2A, 0x2D, 0x2E, 0x30..0x39, 0x41..0x5A, 0x5F, 0x61..0x7A
|
||||||
else
|
else
|
||||||
TBLENCWWWCOMP_[i.chr] = '%%%X' % i
|
TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
TBLENCWWWCOMP_.freeze
|
TBLENCWWWCOMP_.freeze
|
||||||
end
|
end
|
||||||
str = str.to_s.dup
|
str = str.to_s
|
||||||
enc = str.encoding
|
case str.encoding
|
||||||
str.force_encoding(Encoding::ASCII_8BIT)
|
when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::UTF_8
|
||||||
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
|
str = str.dup.force_encoding(Encoding::ASCII_8BIT)
|
||||||
str.force_encoding(enc)
|
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
|
||||||
|
when Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE
|
||||||
|
reg = Regexp.new('[^*\-.0-9A-Z_a-z]+'.encode(str.encoding))
|
||||||
|
str = str.gsub(reg){
|
||||||
|
$&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_).
|
||||||
|
force_encoding(str.encoding)
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if str.encoding.ascii_compatible?
|
||||||
|
str = str.gsub(/[^*\-.0-9A-Z_a-z]+/){
|
||||||
|
$&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)}
|
||||||
|
else
|
||||||
|
str = str.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
str.force_encoding(Encoding::US_ASCII)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Decode given +str+ of URL-encoded form data.
|
# Decode given +str+ of URL-encoded form data.
|
||||||
#
|
#
|
||||||
# This decods + to SP.
|
# This decods + to SP.
|
||||||
#
|
#
|
||||||
# See URI.encode_www_component(str)
|
# See URI.encode_www_form_component, URI.decode_www_form
|
||||||
def self.decode_www_component(str)
|
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
|
||||||
if TBLDECWWWCOMP_.empty?
|
if TBLDECWWWCOMP_.empty?
|
||||||
256.times do |i|
|
256.times do |i|
|
||||||
case i
|
h, l = i>>4, i&15
|
||||||
when 0x20
|
TBLDECWWWCOMP_['%%%X%X' % [h, l]] = i.chr
|
||||||
TBLDECWWWCOMP_['+'] = ' '
|
TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
|
||||||
else
|
TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
|
||||||
h, l = i>>4, i&15
|
TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
|
||||||
TBLDECWWWCOMP_['%%%X%X' % [h, l]] = i.chr
|
TBLDECWWWCOMP_['+'] = ' ' if i == 0x20
|
||||||
TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
|
|
||||||
TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
|
|
||||||
TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
TBLDECWWWCOMP_.freeze
|
TBLDECWWWCOMP_.freeze
|
||||||
end
|
end
|
||||||
str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_)
|
str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(Encoding::UTF_8)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Generate URL-encoded form data from given +enum+.
|
# Generate URL-encoded form data from given +enum+.
|
||||||
|
@ -779,7 +790,7 @@ module URI
|
||||||
# This generates application/x-www-form-urlencoded data defined in HTML5
|
# This generates application/x-www-form-urlencoded data defined in HTML5
|
||||||
# from given an Enumerable object.
|
# from given an Enumerable object.
|
||||||
#
|
#
|
||||||
# This internally uses URI.encode_www_component(str).
|
# This internally uses URI.encode_www_form_component(str).
|
||||||
#
|
#
|
||||||
# This doesn't convert encodings of give items, so convert them before call
|
# This doesn't convert encodings of give items, so convert them before call
|
||||||
# this method if you want to send data as other than original encoding or
|
# this method if you want to send data as other than original encoding or
|
||||||
|
@ -789,7 +800,7 @@ module URI
|
||||||
#
|
#
|
||||||
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
||||||
#
|
#
|
||||||
# See URI.encode_www_component(str)
|
# See URI.encode_www_form_component, URI.decode_www_form
|
||||||
def self.encode_www_form(enum)
|
def self.encode_www_form(enum)
|
||||||
str = nil
|
str = nil
|
||||||
enum.each do |k,v|
|
enum.each do |k,v|
|
||||||
|
@ -798,12 +809,43 @@ module URI
|
||||||
else
|
else
|
||||||
str = ''.force_encoding(Encoding::US_ASCII)
|
str = ''.force_encoding(Encoding::US_ASCII)
|
||||||
end
|
end
|
||||||
str << encode_www_component(k)
|
str << encode_www_form_component(k)
|
||||||
str << '='
|
str << '='
|
||||||
str << encode_www_component(v)
|
str << encode_www_form_component(v)
|
||||||
end
|
end
|
||||||
str
|
str
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Decode URL-encoded form data from given +str+.
|
||||||
|
#
|
||||||
|
# This decodes application/x-www-form-urlencoded data
|
||||||
|
# and returns array of key-value array.
|
||||||
|
# This internally uses URI.decode_www_form_component.
|
||||||
|
#
|
||||||
|
# _charset_ hack is not supported now because the mapping from given charset
|
||||||
|
# to Ruby's encoding is not clear yet.
|
||||||
|
# see also http://www.w3.org/TR/html5/syntax.html#character-encodings-0
|
||||||
|
#
|
||||||
|
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
||||||
|
#
|
||||||
|
# ary = URI.decode_www_form("a=1&a=2&b=3")
|
||||||
|
# p ary #=> [['a', '1'], ['a', '2'], ['b', '3']]
|
||||||
|
# p ary.assoc('a').last #=> '1'
|
||||||
|
# p ary.assoc('b').last #=> '3'
|
||||||
|
# p ary.rassoc('a').last #=> '2'
|
||||||
|
# p Hash[ary] # => {"a"=>"2", "b"=>"3"}
|
||||||
|
#
|
||||||
|
# See URI.decode_www_form_component, URI.encode_www_form
|
||||||
|
def self.decode_www_form(str, enc=Encoding::UTF_8)
|
||||||
|
ary = []
|
||||||
|
unless /\A\??(?<query>[^=;&]*=[^;&]*(?:[;&][^=;&]*=[^;&]*)*)\z/ =~ str
|
||||||
|
raise ArgumentError, "invalid data of application/x-www-form-urlencoded (#{str})"
|
||||||
|
end
|
||||||
|
query.scan(/([^=;&]+)=([^;&]*)/) do
|
||||||
|
ary << [decode_www_form_component($1, enc), decode_www_form_component($2, enc)]
|
||||||
|
end
|
||||||
|
ary
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
module Kernel
|
module Kernel
|
||||||
|
|
|
@ -50,16 +50,22 @@ class TestCommon < Test::Unit::TestCase
|
||||||
assert_raise(NoMethodError) { Object.new.URI("http://www.ruby-lang.org/") }
|
assert_raise(NoMethodError) { Object.new.URI("http://www.ruby-lang.org/") }
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_encode_www_component
|
def test_encode_www_form_component
|
||||||
assert_equal("+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
assert_equal("%00+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||||
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
|
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
|
||||||
URI.encode_www_component(" !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
|
URI.encode_www_form_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
|
||||||
|
assert_equal("%95%41", URI.encode_www_form_component(
|
||||||
|
"\x95\x41".force_encoding(Encoding::Shift_JIS)))
|
||||||
|
assert_equal("%30%42", URI.encode_www_form_component(
|
||||||
|
"\x30\x42".force_encoding(Encoding::UTF_16BE)))
|
||||||
|
assert_equal("%30%42", URI.encode_www_form_component(
|
||||||
|
"\x30\x42".force_encoding(Encoding::ISO_2022_JP)))
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_decode_www_component
|
def test_decode_www_form_component
|
||||||
assert_equal(" !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
|
assert_equal(" !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
|
||||||
URI.decode_www_component(
|
URI.decode_www_form_component(
|
||||||
"+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
"%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||||
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
|
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -74,6 +80,12 @@ class TestCommon < Test::Unit::TestCase
|
||||||
assert_equal(expected, URI.encode_www_form([["a", "1"], ["\u3042", "\u6F22"]]))
|
assert_equal(expected, URI.encode_www_form([["a", "1"], ["\u3042", "\u6F22"]]))
|
||||||
assert_equal(expected, URI.encode_www_form([[:a, 1], [:"\u3042", "\u6F22"]]))
|
assert_equal(expected, URI.encode_www_form([[:a, 1], [:"\u3042", "\u6F22"]]))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_decode_www_form
|
||||||
|
assert_equal([%w[a 1], %w[a 2]], URI.decode_www_form("a=1&a=2"))
|
||||||
|
assert_equal([%w[a 1], ["\u3042", "\u6F22"]],
|
||||||
|
URI.decode_www_form("a=1&%E3%81%82=%E6%BC%A2"))
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue