From 3c419de10b113aceea7db10eb0d4521086696bc9 Mon Sep 17 00:00:00 2001 From: xibbar Date: Wed, 24 Sep 2008 14:27:53 +0000 Subject: [PATCH] * lib/cgi/core.rb (CGI::new, CGI::{accept_charset,accept_charset=}) : accept parameters either in a hash, string as a block. add the encoding validation process. * test/cgi/test_cgi_core.rb : test for query encoding check. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19530 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 +++ lib/cgi/core.rb | 110 +++++++++++++++++++++++++++++++++----- test/cgi/test_cgi_core.rb | 38 +++++++++++++ 3 files changed, 143 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index d9255584ca..f07343a4e2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Wed Sep 24 23:12:49 2008 Takeyuki Fujioka + + * lib/cgi/core.rb (CGI::new, CGI::{accept_charset,accept_charset=}) : + accept parameters either in a hash, + string as a block. add the encoding validation process. + * test/cgi/test_cgi_core.rb : test for query encoding check. + Wed Sep 24 22:58:18 2008 NAKAMURA Usaku * string.c (rb_str_rstrip_bang): raise exception when the encoding of diff --git a/lib/cgi/core.rb b/lib/cgi/core.rb index 163af7fb44..b99f5a1c7d 100644 --- a/lib/cgi/core.rb +++ b/lib/cgi/core.rb @@ -462,12 +462,12 @@ class CGI body.rewind /Content-Disposition:.* filename=(?:"((?:\\.|[^\"])*)"|([^;\s]*))/i.match(head) - filename = ($1 or $2 or "") - if /Mac/i =~ env_table['HTTP_USER_AGENT'] and - /Mozilla/i =~ env_table['HTTP_USER_AGENT'] and - /MSIE/i !~ env_table['HTTP_USER_AGENT'] - filename = CGI::unescape(filename) - end + filename = ($1 or $2 or "") + if /Mac/i =~ env_table['HTTP_USER_AGENT'] and + /Mozilla/i =~ env_table['HTTP_USER_AGENT'] and + /MSIE/i !~ env_table['HTTP_USER_AGENT'] + filename = CGI::unescape(filename) + end /Content-Type: ([^\s]*)/i.match(head) content_type = ($1 or "") @@ -598,8 +598,21 @@ class CGI stdinput.read(Integer(env_table['CONTENT_LENGTH'])) or '' else read_from_cmdline - end + end.dup.force_encoding(@accept_charset) ) + if @accept_charset!="ASCII-8BIT" || @accept_charset!=Encoding::ASCII_8BIT + @params.each do |key,values| + values.each do |value| + unless value.valid_encoding? + if @accept_charset_error_block + @accept_charset_error_block.call(key,value) + else + raise InvalidEncoding,"Accept-Charset encoding error" + end + end + end + end + end end @cookies = CGI::Cookie::parse((env_table['HTTP_COOKIE'] or env_table['COOKIE'])) @@ -646,10 +659,66 @@ class CGI end # QueryExtension + # InvalidEncoding Exception class + class InvalidEncoding < Exception; end - # Creates a new CGI instance. + # @@accept_charset is default accept character set. + # This default value default is "UTF-8" + # If you want to change the default accept character set + # when create a new CGI instance, set this: + # + # CGI.accept_charset = "EUC-JP" # - # +type+ specifies which version of HTML to load the HTML generation + + @@accept_charset="UTF-8" + + def self.accept_charset + @@accept_charset + end + + def self.accept_charset=(accept_charset) + @@accept_charset=accept_charset + end + + # Create a new CGI instance. + # + # CGI accept constructor parameters either in a hash, string as a block. + # But string is as same as using :tag_maker of hash. + # + # CGI.new("html3") #=> CGI.new(:tag_maker=>"html3") + # + # And, if you specify string, @accept_charset cannot be changed. + # Instead, please use hash parameter. + # + # == accept_charset + # + # :accept_charset specifies encoding of received query string. + # ( Default value is @@accept_charset. ) + # If not valid, raise CGI::InvalidEncoding + # + # Example. Suppose @@accept_charset # => "UTF-8" + # + # when not specified: + # + # cgi=CGI.new # @accept_charset # => "UTF-8" + # + # when specified "EUC-JP": + # + # cgi=CGI.new(:accept_charset => "EUC-JP") # => "EUC-JP" + # + # == block + # + # When you use a block, you can write a process + # that query encoding is invalid. Example: + # + # encoding_error={} + # cgi=CGI.new(:accept_charset=>"EUC-JP") do |name,value| + # encoding_error[key] = value + # end + # + # == tag_maker + # + # :tag_maker specifies which version of HTML to load the HTML generation # methods for. The following versions of HTML are supported: # # html3:: HTML 3.x @@ -664,8 +733,25 @@ class CGI # it will run in "offline" mode. In this mode, it reads its parameters # from the command line or (failing that) from standard input. Otherwise, # cookies and other parameters are parsed automatically from the standard - # CGI locations, which varies according to the REQUEST_METHOD. - def initialize(type = "query") + # CGI locations, which varies according to the REQUEST_METHOD. It works this: + # + # CGI.new(:tag_maker=>"html3") + # + # This will be obsolete: + # + # CGI.new("html3") + # + attr_reader :accept_charset + def initialize(options = {},&block) + @accept_charset_error_block=block if block_given? + @options={:accept_charset=>@@accept_charset} + case options + when Hash + @options.merge!(options) + when String + @options[:tag_maker]=options + end + @accept_charset=@options[:accept_charset] if defined?(MOD_RUBY) && !ENV.key?("GATEWAY_INTERFACE") Apache.request.setup_cgi_env end @@ -677,7 +763,7 @@ class CGI @output_cookies = nil @output_hidden = nil - case type + case @options[:tag_maker] when "html3" require 'cgi/html' extend Html3 diff --git a/test/cgi/test_cgi_core.rb b/test/cgi/test_cgi_core.rb index 6f420199d3..1298ae792d 100755 --- a/test/cgi/test_cgi_core.rb +++ b/test/cgi/test_cgi_core.rb @@ -99,6 +99,44 @@ class CGICoreTest < Test::Unit::TestCase assert_equal([], cgi.params['*notfound*']) end + def test_cgi_core_params_encoding_check + query_str = 'str=%BE%BE%B9%BE' + @environ = { + 'REQUEST_METHOD' => 'POST', + 'CONTENT_LENGTH' => query_str.length.to_s, + 'SERVER_SOFTWARE' => 'Apache 2.2.0', + 'SERVER_PROTOCOL' => 'HTTP/1.1', + } + ENV.update(@environ) + $stdin = StringIO.new + $stdin << query_str + $stdin.rewind + if RUBY_VERSION>="1.9.0" + hash={} + cgi = CGI.new(:accept_charset=>"UTF-8"){|key,val|hash[key]=val} + ## cgi[] + assert_equal("\xBE\xBE\xB9\xBE".force_encoding("ASCII-8BIT"), cgi['str']) + ## cgi.params + assert_equal(["\xBE\xBE\xB9\xBE".force_encoding("ASCII-8BIT")], cgi.params['str']) + ## accept-charset error + assert_equal({"str"=>"\xBE\xBE\xB9\xBE".force_encoding("ASCII-8BIT")},hash) + + $stdin.rewind + assert_raises(CGI::InvalidEncoding) do + cgi = CGI.new(:accept_charset=>"UTF-8") + end + + $stdin.rewind + cgi = CGI.new(:accept_charset=>"EUC-JP") + ## cgi[] + assert_equal("\xBE\xBE\xB9\xBE".force_encoding("EUC-JP"), cgi['str']) + ## cgi.params + assert_equal(["\xBE\xBE\xB9\xBE".force_encoding("EUC-JP")], cgi.params['str']) + else + assert(true) + end + end + def test_cgi_core_cookie @environ = {