1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* lib/uri/common.rb (URI#{en,de}code_www_form_component):

renamed from URI#{en,de}code_www_component. [ruby-dev:40672]

* lib/uri/common.rb (URI#encode_www_form_component): %-encoded
  element should have always two hex.

* lib/uri/common.rb (URI#encode_www_form_component):
  better treatment for ASCII incompatible encodings and
  encodings whose lead byte may use 7bit.

* lib/uri/common.rb (URI#decode_www_form_component): add %20.

* lib/uri/common.rb (URI#decode_www_form_component): add
  result's encoding as 2nd argument.

* lib/uri/common.rb (URI#decode_www_form): added.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26962 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2010-03-17 15:35:38 +00:00
parent 4067fd028d
commit f626a17d8c
3 changed files with 106 additions and 33 deletions

View file

@ -1,3 +1,22 @@
Thu Mar 18 00:00:58 2010 NARUSE, Yui <naruse@ruby-lang.org>
* lib/uri/common.rb (URI#{en,de}code_www_form_component):
renamed from URI#{en,de}code_www_component. [ruby-dev:40672]
* lib/uri/common.rb (URI#encode_www_form_component): %-encoded
element should have always two hex.
* lib/uri/common.rb (URI#encode_www_form_component):
better treatment for ASCII incompatible encodings and
encodings whose lead byte may use 7bit.
* lib/uri/common.rb (URI#decode_www_form_component): add %20.
* lib/uri/common.rb (URI#decode_www_form_component): add
result's encoding as 2nd argument.
* lib/uri/common.rb (URI#decode_www_form): added.
Wed Mar 17 16:25:53 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
* hash.c (rb_hash_aset): allow recursive key. [ruby-core:24648]

View file

@ -729,49 +729,60 @@ module URI
#
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
#
# See URI.decode_www_component(str), URI.encode_www_form(enum)
def self.encode_www_component(str)
# See URI.decode_www_form_component, URI.encode_www_form
def self.encode_www_form_component(str)
if TBLENCWWWCOMP_.empty?
256.times do |i|
case i
when 0x20
TBLENCWWWCOMP_[' '] = '+'
when 0x2A, 0x2D, 0x2E, 0x30..0x39, 0x41..0x5A, 0x5F, 0x61..0x7A
# when 0x2A, 0x2D, 0x2E, 0x30..0x39, 0x41..0x5A, 0x5F, 0x61..0x7A
else
TBLENCWWWCOMP_[i.chr] = '%%%X' % i
TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
end
end
TBLENCWWWCOMP_.freeze
end
str = str.to_s.dup
enc = str.encoding
str.force_encoding(Encoding::ASCII_8BIT)
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
str.force_encoding(enc)
str = str.to_s
case str.encoding
when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::UTF_8
str = str.dup.force_encoding(Encoding::ASCII_8BIT)
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
when Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE
reg = Regexp.new('[^*\-.0-9A-Z_a-z]+'.encode(str.encoding))
str = str.gsub(reg){
$&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_).
force_encoding(str.encoding)
}
else
if str.encoding.ascii_compatible?
str = str.gsub(/[^*\-.0-9A-Z_a-z]+/){
$&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)}
else
str = str.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)
end
end
str.force_encoding(Encoding::US_ASCII)
end
# Decode given +str+ of URL-encoded form data.
#
# This decods + to SP.
#
# See URI.encode_www_component(str)
def self.decode_www_component(str)
# See URI.encode_www_form_component, URI.decode_www_form
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
if TBLDECWWWCOMP_.empty?
256.times do |i|
case i
when 0x20
TBLDECWWWCOMP_['+'] = ' '
else
h, l = i>>4, i&15
TBLDECWWWCOMP_['%%%X%X' % [h, l]] = i.chr
TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
end
h, l = i>>4, i&15
TBLDECWWWCOMP_['%%%X%X' % [h, l]] = i.chr
TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
TBLDECWWWCOMP_['+'] = ' ' if i == 0x20
end
TBLDECWWWCOMP_.freeze
end
str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_)
str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(Encoding::UTF_8)
end
# Generate URL-encoded form data from given +enum+.
@ -779,7 +790,7 @@ module URI
# This generates application/x-www-form-urlencoded data defined in HTML5
# from given an Enumerable object.
#
# This internally uses URI.encode_www_component(str).
# This internally uses URI.encode_www_form_component(str).
#
# This doesn't convert encodings of give items, so convert them before call
# this method if you want to send data as other than original encoding or
@ -789,7 +800,7 @@ module URI
#
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
#
# See URI.encode_www_component(str)
# See URI.encode_www_form_component, URI.decode_www_form
def self.encode_www_form(enum)
str = nil
enum.each do |k,v|
@ -798,12 +809,43 @@ module URI
else
str = ''.force_encoding(Encoding::US_ASCII)
end
str << encode_www_component(k)
str << encode_www_form_component(k)
str << '='
str << encode_www_component(v)
str << encode_www_form_component(v)
end
str
end
# Decode URL-encoded form data from given +str+.
#
# This decodes application/x-www-form-urlencoded data
# and returns array of key-value array.
# This internally uses URI.decode_www_form_component.
#
# _charset_ hack is not supported now because the mapping from given charset
# to Ruby's encoding is not clear yet.
# see also http://www.w3.org/TR/html5/syntax.html#character-encodings-0
#
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
#
# ary = URI.decode_www_form("a=1&a=2&b=3")
# p ary #=> [['a', '1'], ['a', '2'], ['b', '3']]
# p ary.assoc('a').last #=> '1'
# p ary.assoc('b').last #=> '3'
# p ary.rassoc('a').last #=> '2'
# p Hash[ary] # => {"a"=>"2", "b"=>"3"}
#
# See URI.decode_www_form_component, URI.encode_www_form
def self.decode_www_form(str, enc=Encoding::UTF_8)
ary = []
unless /\A\??(?<query>[^=;&]*=[^;&]*(?:[;&][^=;&]*=[^;&]*)*)\z/ =~ str
raise ArgumentError, "invalid data of application/x-www-form-urlencoded (#{str})"
end
query.scan(/([^=;&]+)=([^;&]*)/) do
ary << [decode_www_form_component($1, enc), decode_www_form_component($2, enc)]
end
ary
end
end
module Kernel

View file

@ -50,16 +50,22 @@ class TestCommon < Test::Unit::TestCase
assert_raise(NoMethodError) { Object.new.URI("http://www.ruby-lang.org/") }
end
def test_encode_www_component
assert_equal("+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
def test_encode_www_form_component
assert_equal("%00+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
URI.encode_www_component(" !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
URI.encode_www_form_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
assert_equal("%95%41", URI.encode_www_form_component(
"\x95\x41".force_encoding(Encoding::Shift_JIS)))
assert_equal("%30%42", URI.encode_www_form_component(
"\x30\x42".force_encoding(Encoding::UTF_16BE)))
assert_equal("%30%42", URI.encode_www_form_component(
"\x30\x42".force_encoding(Encoding::ISO_2022_JP)))
end
def test_decode_www_component
assert_equal(" !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
URI.decode_www_component(
"+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
def test_decode_www_form_component
assert_equal(" !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
URI.decode_www_form_component(
"%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
end
@ -74,6 +80,12 @@ class TestCommon < Test::Unit::TestCase
assert_equal(expected, URI.encode_www_form([["a", "1"], ["\u3042", "\u6F22"]]))
assert_equal(expected, URI.encode_www_form([[:a, 1], [:"\u3042", "\u6F22"]]))
end
def test_decode_www_form
assert_equal([%w[a 1], %w[a 2]], URI.decode_www_form("a=1&a=2"))
assert_equal([%w[a 1], ["\u3042", "\u6F22"]],
URI.decode_www_form("a=1&%E3%81%82=%E6%BC%A2"))
end
end