mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/uri/common.rb (URI.encode_www_form_component):
convert strings of HTML5 ASCII incompatible encoding to UTF-8. * lib/uri/common.rb (URI.encode_www_form_component): "\x83\x41" of Shift_JIS should be encoded as "%83A". This follows real implementations. * lib/uri/common.rb (URI.decode_www_form_component): use given encoding for force_encoding. [ruby-dev:40721] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27001 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
66390013a1
commit
83c2f60b88
3 changed files with 34 additions and 30 deletions
13
ChangeLog
13
ChangeLog
|
@ -1,3 +1,16 @@
|
|||
Sun Mar 21 00:46:29 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* lib/uri/common.rb (URI.encode_www_form_component):
|
||||
convert strings of HTML5 ASCII incompatible encoding
|
||||
to UTF-8.
|
||||
|
||||
* lib/uri/common.rb (URI.encode_www_form_component):
|
||||
"\x83\x41" of Shift_JIS should be encoded as "%83A".
|
||||
This follows real implementations.
|
||||
|
||||
* lib/uri/common.rb (URI.decode_www_form_component):
|
||||
use given encoding for force_encoding. [ruby-dev:40721]
|
||||
|
||||
Sun Mar 21 21:09:17 2010 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* lib/resolv-replace.rb: specify super class for rdoc.
|
||||
|
|
|
@ -722,6 +722,10 @@ module URI
|
|||
# :nodoc:
|
||||
TBLDECWWWCOMP_ = {}
|
||||
|
||||
# :nodoc:
|
||||
HTML5ASCIIINCOMPAT = [Encoding::UTF_7, Encoding::UTF_16BE, Encoding::UTF_16LE,
|
||||
Encoding::UTF_32BE, Encoding::UTF_32LE]
|
||||
|
||||
# Encode given +str+ to URL-encoded form data.
|
||||
#
|
||||
# This doesn't convert *, -, ., 0-9, A-Z, _, a-z,
|
||||
|
@ -733,35 +737,19 @@ module URI
|
|||
def self.encode_www_form_component(str)
|
||||
if TBLENCWWWCOMP_.empty?
|
||||
256.times do |i|
|
||||
case i
|
||||
when 0x20
|
||||
TBLENCWWWCOMP_[' '] = '+'
|
||||
# when 0x2A, 0x2D, 0x2E, 0x30..0x39, 0x41..0x5A, 0x5F, 0x61..0x7A
|
||||
else
|
||||
TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
|
||||
end
|
||||
TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
|
||||
end
|
||||
TBLENCWWWCOMP_[' '] = '+'
|
||||
TBLENCWWWCOMP_.freeze
|
||||
end
|
||||
str = str.to_s
|
||||
case str.encoding
|
||||
when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::UTF_8
|
||||
str = str.dup.force_encoding(Encoding::ASCII_8BIT)
|
||||
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
|
||||
when Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE
|
||||
reg = Regexp.new('[^*\-.0-9A-Z_a-z]+'.encode(str.encoding))
|
||||
str = str.gsub(reg){
|
||||
$&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_).
|
||||
force_encoding(str.encoding)
|
||||
}
|
||||
if HTML5ASCIIINCOMPAT.include?(str.encoding)
|
||||
str = str.encode(Encoding::UTF_8)
|
||||
else
|
||||
if str.encoding.ascii_compatible?
|
||||
str = str.gsub(/[^*\-.0-9A-Z_a-z]+/){
|
||||
$&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)}
|
||||
else
|
||||
str = str.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)
|
||||
end
|
||||
str = str.dup
|
||||
end
|
||||
str.force_encoding(Encoding::ASCII_8BIT)
|
||||
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
|
||||
str.force_encoding(Encoding::US_ASCII)
|
||||
end
|
||||
|
||||
|
@ -778,11 +766,11 @@ module URI
|
|||
TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
|
||||
TBLDECWWWCOMP_['+'] = ' ' if i == 0x20
|
||||
end
|
||||
TBLDECWWWCOMP_['+'] = ' '
|
||||
TBLDECWWWCOMP_.freeze
|
||||
end
|
||||
str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(Encoding::UTF_8)
|
||||
str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
|
||||
end
|
||||
|
||||
# Generate URL-encoded form data from given +enum+.
|
||||
|
@ -794,7 +782,8 @@ module URI
|
|||
#
|
||||
# This doesn't convert encodings of give items, so convert them before call
|
||||
# this method if you want to send data as other than original encoding or
|
||||
# mixed encoding data.
|
||||
# mixed encoding data. (strings which is encoded in HTML5 ASCII incompatible
|
||||
# encoding is converted to UTF-8)
|
||||
#
|
||||
# This doesn't treat files. When you send a file, use multipart/form-data.
|
||||
#
|
||||
|
|
|
@ -54,12 +54,12 @@ class TestCommon < Test::Unit::TestCase
|
|||
assert_equal("%00+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
|
||||
URI.encode_www_form_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
|
||||
assert_equal("%95%41", URI.encode_www_form_component(
|
||||
assert_equal("%95A", URI.encode_www_form_component(
|
||||
"\x95\x41".force_encoding(Encoding::Shift_JIS)))
|
||||
assert_equal("%30%42", URI.encode_www_form_component(
|
||||
assert_equal("%E3%81%82", URI.encode_www_form_component(
|
||||
"\x30\x42".force_encoding(Encoding::UTF_16BE)))
|
||||
assert_equal("%30%42", URI.encode_www_form_component(
|
||||
"\x30\x42".force_encoding(Encoding::ISO_2022_JP)))
|
||||
assert_equal("%1B%24B%24%22%1B%28B", URI.encode_www_form_component(
|
||||
"\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP)))
|
||||
end
|
||||
|
||||
def test_decode_www_form_component
|
||||
|
@ -67,6 +67,8 @@ class TestCommon < Test::Unit::TestCase
|
|||
URI.decode_www_form_component(
|
||||
"%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
|
||||
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
|
||||
assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP),
|
||||
URI.decode_www_form_component("%A1%A2", "EUC-JP"))
|
||||
end
|
||||
|
||||
def test_encode_www_form
|
||||
|
|
Loading…
Add table
Reference in a new issue