* lib/soap/**/*.rb, lib/wsdl/**/*.rb, lib/xsd/**/*.rb: changed license; GPL2 -> Ruby's.
* lib/soap/rpc/driver.rb, lib/soap/wsdlDriver.rb, lib/soap/streamHandler.rb: add interface to streamhandler.
* lib/soap/marshal.rb: raise error if parse fails.
* lib/soap/netHttpClient.rb: add https support. Patched by Oliver M. Bolzer.
* lib/soap/netHttpClient.rb: dump HTTP response message body by itself.
* lib/soap/rpc/driver.rb, lib/soap/rpc/proxy.rb, lib/soap/wsdlDriver.rb: add driver#mandatorycharset interface to foce using charset for parsing response from buggy server.
* lib/soap/encodingstyle/soapHandler.rb: support Apache Axis's half typed multi-ref array.
* lib/soap/mapping/factory.rb, lib/soap/mapping/registry.rb: map SOAPStruct which has multi-accessors which name are the same, to an array.
* lib/soap/rpc/element.rb: fixed illegal parameter order.
* lib/soap/rpc/element.rb: element name of response message could have the name other than 'return'.
* lib/wsdl/operation.rb, lib/wsdl/operationBinding.rb, lib/wsdl/soap/classDefCreator.rb, lib/wsdl/soap/methodDefCreator.rb, lib/wsdl/soap/methodDefCreatorSupport.rb: WSDL/1.1 allows plural fault definition in a operation. [ruby-talk:84948]
* test/wsdl/multiplefault.wsdl, test/wsdl/test_multiplefault.rb: add test for above fix.
* lib/wsdl/soap/complexType.rb: support WSDL array definition with maxOccures="unbound".
* lib/xsd/charset.rb: use cp932 under emx. Patched by Siena. / SHINAGAWA, Norihide in [ruby-dev:21972]
* lib/xsd/xmlparser/parser.rb: set @charset nil by default. Nil means 'follow encoding declaration in XML'.
* sample/soap/digraph.rb, sample/wsdl/amazon/wsdlDriver.rb, sample/wsdl/googleSearch/sampleClient.rb, sample/wsdl/googleSearch/wsdlDriver.rb, test/wsdl/test_emptycomplextype.rb, test/wsdl/marshal/test_wsdlmarshal.rb, test/xsd/test_xmlschemaparser.rb: use File.open(...) { |f| f.read } instead of File.open(...).read. [ruby-dev:21964]
* test/wsdl/emptycomplextype.wsdl, test/wsdl/test_emptycomplextype.rb: simplify the test case.
* test/wsdl/axisArray/*: add tests for axis's array encoding.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5022 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2003-11-25 02:31:33 -05:00
|
|
|
# XSD4R - Charset handling library.
|
2005-09-15 10:33:53 -04:00
|
|
|
# Copyright (C) 2001, 2003, 2005 NAKAMURA, Hiroshi <nahi@ruby-lang.org>.
|
2003-09-24 11:18:44 -04:00
|
|
|
|
* lib/soap/**/*.rb, lib/wsdl/**/*.rb, lib/xsd/**/*.rb: changed license; GPL2 -> Ruby's.
* lib/soap/rpc/driver.rb, lib/soap/wsdlDriver.rb, lib/soap/streamHandler.rb: add interface to streamhandler.
* lib/soap/marshal.rb: raise error if parse fails.
* lib/soap/netHttpClient.rb: add https support. Patched by Oliver M. Bolzer.
* lib/soap/netHttpClient.rb: dump HTTP response message body by itself.
* lib/soap/rpc/driver.rb, lib/soap/rpc/proxy.rb, lib/soap/wsdlDriver.rb: add driver#mandatorycharset interface to foce using charset for parsing response from buggy server.
* lib/soap/encodingstyle/soapHandler.rb: support Apache Axis's half typed multi-ref array.
* lib/soap/mapping/factory.rb, lib/soap/mapping/registry.rb: map SOAPStruct which has multi-accessors which name are the same, to an array.
* lib/soap/rpc/element.rb: fixed illegal parameter order.
* lib/soap/rpc/element.rb: element name of response message could have the name other than 'return'.
* lib/wsdl/operation.rb, lib/wsdl/operationBinding.rb, lib/wsdl/soap/classDefCreator.rb, lib/wsdl/soap/methodDefCreator.rb, lib/wsdl/soap/methodDefCreatorSupport.rb: WSDL/1.1 allows plural fault definition in a operation. [ruby-talk:84948]
* test/wsdl/multiplefault.wsdl, test/wsdl/test_multiplefault.rb: add test for above fix.
* lib/wsdl/soap/complexType.rb: support WSDL array definition with maxOccures="unbound".
* lib/xsd/charset.rb: use cp932 under emx. Patched by Siena. / SHINAGAWA, Norihide in [ruby-dev:21972]
* lib/xsd/xmlparser/parser.rb: set @charset nil by default. Nil means 'follow encoding declaration in XML'.
* sample/soap/digraph.rb, sample/wsdl/amazon/wsdlDriver.rb, sample/wsdl/googleSearch/sampleClient.rb, sample/wsdl/googleSearch/wsdlDriver.rb, test/wsdl/test_emptycomplextype.rb, test/wsdl/marshal/test_wsdlmarshal.rb, test/xsd/test_xmlschemaparser.rb: use File.open(...) { |f| f.read } instead of File.open(...).read. [ruby-dev:21964]
* test/wsdl/emptycomplextype.wsdl, test/wsdl/test_emptycomplextype.rb: simplify the test case.
* test/wsdl/axisArray/*: add tests for axis's array encoding.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5022 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2003-11-25 02:31:33 -05:00
|
|
|
# This program is copyrighted free software by NAKAMURA, Hiroshi. You can
|
|
|
|
# redistribute it and/or modify it under the same terms of Ruby's license;
|
|
|
|
# either the dual license version in 2003, or any later version.
|
2003-09-24 11:18:44 -04:00
|
|
|
|
|
|
|
|
|
|
|
module XSD
|
|
|
|
|
|
|
|
|
|
|
|
module Charset
|
2005-09-15 10:33:53 -04:00
|
|
|
@internal_encoding = $KCODE
|
2003-09-24 11:18:44 -04:00
|
|
|
|
|
|
|
class XSDError < StandardError; end
|
|
|
|
class CharsetError < XSDError; end
|
|
|
|
class UnknownCharsetError < CharsetError; end
|
|
|
|
class CharsetConversionError < CharsetError; end
|
|
|
|
|
|
|
|
public
|
|
|
|
|
|
|
|
###
|
|
|
|
## Maps
|
|
|
|
#
|
|
|
|
EncodingConvertMap = {}
|
|
|
|
def Charset.init
|
2005-09-15 10:33:53 -04:00
|
|
|
EncodingConvertMap[['UTF8', 'X_ISO8859_1']] =
|
|
|
|
Proc.new { |str| str.unpack('U*').pack('C*') }
|
|
|
|
EncodingConvertMap[['X_ISO8859_1', 'UTF8']] =
|
|
|
|
Proc.new { |str| str.unpack('C*').pack('U*') }
|
2003-09-24 11:18:44 -04:00
|
|
|
begin
|
|
|
|
require 'xsd/iconvcharset'
|
2005-09-15 10:33:53 -04:00
|
|
|
@internal_encoding = 'UTF8'
|
|
|
|
sjtag = (/(mswin|bccwin|mingw|cygwin|emx)/ =~ RUBY_PLATFORM) ? 'cp932' :
|
|
|
|
'shift_jis'
|
|
|
|
EncodingConvertMap[['UTF8', 'EUC' ]] =
|
|
|
|
Proc.new { |str| IconvCharset.safe_iconv("euc-jp", "utf-8", str) }
|
|
|
|
EncodingConvertMap[['EUC' , 'UTF8']] =
|
|
|
|
Proc.new { |str| IconvCharset.safe_iconv("utf-8", "euc-jp", str) }
|
|
|
|
EncodingConvertMap[['EUC' , 'SJIS']] =
|
|
|
|
Proc.new { |str| IconvCharset.safe_iconv(sjtag, "euc-jp", str) }
|
|
|
|
EncodingConvertMap[['UTF8', 'SJIS']] =
|
|
|
|
Proc.new { |str| IconvCharset.safe_iconv(sjtag, "utf-8", str) }
|
|
|
|
EncodingConvertMap[['SJIS', 'UTF8']] =
|
|
|
|
Proc.new { |str| IconvCharset.safe_iconv("utf-8", sjtag, str) }
|
|
|
|
EncodingConvertMap[['SJIS', 'EUC' ]] =
|
|
|
|
Proc.new { |str| IconvCharset.safe_iconv("euc-jp", sjtag, str) }
|
2003-09-24 11:18:44 -04:00
|
|
|
rescue LoadError
|
|
|
|
begin
|
|
|
|
require 'nkf'
|
2005-09-15 10:33:53 -04:00
|
|
|
EncodingConvertMap[['EUC' , 'SJIS']] =
|
|
|
|
Proc.new { |str| NKF.nkf('-sXm0', str) }
|
|
|
|
EncodingConvertMap[['SJIS', 'EUC' ]] =
|
|
|
|
Proc.new { |str| NKF.nkf('-eXm0', str) }
|
2003-09-24 11:18:44 -04:00
|
|
|
rescue LoadError
|
|
|
|
end
|
|
|
|
|
|
|
|
begin
|
|
|
|
require 'uconv'
|
2005-09-15 10:33:53 -04:00
|
|
|
@internal_encoding = 'UTF8'
|
2003-09-24 11:18:44 -04:00
|
|
|
EncodingConvertMap[['UTF8', 'EUC' ]] = Uconv.method(:u8toeuc)
|
|
|
|
EncodingConvertMap[['UTF8', 'SJIS']] = Uconv.method(:u8tosjis)
|
|
|
|
EncodingConvertMap[['EUC' , 'UTF8']] = Uconv.method(:euctou8)
|
|
|
|
EncodingConvertMap[['SJIS', 'UTF8']] = Uconv.method(:sjistou8)
|
|
|
|
rescue LoadError
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
self.init
|
|
|
|
|
|
|
|
CharsetMap = {
|
|
|
|
'NONE' => 'us-ascii',
|
|
|
|
'EUC' => 'euc-jp',
|
|
|
|
'SJIS' => 'shift_jis',
|
|
|
|
'UTF8' => 'utf-8',
|
2005-09-15 10:33:53 -04:00
|
|
|
'X_ISO_8859_1' => 'iso-8859-1',
|
|
|
|
'X_UNKNOWN' => nil,
|
2003-09-24 11:18:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
###
|
|
|
|
## handlers
|
|
|
|
#
|
|
|
|
def Charset.encoding
|
2005-09-15 10:33:53 -04:00
|
|
|
@internal_encoding
|
2003-09-24 11:18:44 -04:00
|
|
|
end
|
|
|
|
|
2003-12-03 23:05:51 -05:00
|
|
|
def Charset.encoding=(encoding)
|
* lib/{soap,wsdl,xsd}, test/{soap,wsdl,xsd}: imported soap4r/1.5.4.
== SOAP client and server ==
=== for both client side and server side ===
* improved document/literal service support.
style(rpc,document)/use(encoding, literal) combination are all
supported. for the detail about combination, see
test/soap/test_style.rb.
* let WSDLEncodedRegistry#soap2obj map SOAP/OM to Ruby according to
WSDL as well as obj2soap. closes #70.
* let SOAP::Mapping::Object handle XML attribute for doc/lit service.
you can set/get XML attribute via accessor methods which as a name
'xmlattr_' prefixed (<foo name="bar"/> -> Foo#xmlattr_name).
=== client side ===
* WSDLDriver capitalized name operation bug fixed. from
1.5.3-ruby1.8.2, operation which has capitalized name (such as
KeywordSearchRequest in AWS) is defined as a method having
uncapitalized name. (converted with GenSupport.safemethodname
to handle operation name 'foo-bar'). it introduced serious
incompatibility; in the past, it was defined as a capitalized.
define capitalized method as well under that circumstance.
* added new factory interface 'WSDLDriverFactory#create_rpc_driver'
to create RPC::Driver, not WSDLDriver (RPC::Driver and WSDLDriver
are merged). 'WSDLDriverFactory#create_driver' still creates
WSDLDriver for compatibility but it warns that the method is
deprecated. please use create_rpc_driver instead of create_driver.
* allow to use an URI object as an endpoint_url even with net/http,
not http-access2.
=== server side ===
* added mod_ruby support to SOAP::CGIStub. rename a CGI script
server.cgi to server.rb and let mod_ruby's RubyHandler handles the
script. CGIStub detects if it's running under mod_ruby environment
or not.
* added fcgi support to SOAP::CGIStub. see the sample at
sample/soap/calc/server.fcgi. (almost same as server.cgi but has
fcgi handler at the bottom.)
* allow to return a SOAPFault object to respond customized SOAP fault.
* added the interface 'generate_explicit_type' for server side
(CGIStub, HTTPServer). call 'self.generate_explicit_type = true'
if you want to return simplified XML even if it's rpc/encoded
service.
== WSDL ==
=== WSDL definition ===
* improved XML Schema support such as extension, restriction,
simpleType, complexType + simpleContent, ref, length, import,
include.
* reduced "unknown element/attribute" warnings (warn only 1 time for
each QName).
* importing XSD file at schemaLocation with xsd:import.
=== code generation from WSDL ===
* generator crashed when there's '-' in defined element/attribute
name.
* added ApacheMap WSDL definition.
* sample/{soap,wsdl}: removed.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8500 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-05-22 09:03:38 -04:00
|
|
|
warn("xsd charset is set to #{encoding}") if $DEBUG
|
2005-09-15 10:33:53 -04:00
|
|
|
@internal_encoding = encoding
|
2003-12-03 23:05:51 -05:00
|
|
|
end
|
|
|
|
|
2005-09-15 10:33:53 -04:00
|
|
|
def Charset.xml_encoding_label
|
|
|
|
charset_label(@internal_encoding)
|
2003-09-24 11:18:44 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def Charset.encoding_to_xml(str, charset)
|
2005-09-15 10:33:53 -04:00
|
|
|
encoding_conv(str, @internal_encoding, charset_str(charset))
|
2003-09-24 11:18:44 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def Charset.encoding_from_xml(str, charset)
|
2005-09-15 10:33:53 -04:00
|
|
|
encoding_conv(str, charset_str(charset), @internal_encoding)
|
2003-09-24 11:18:44 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def Charset.encoding_conv(str, enc_from, enc_to)
|
|
|
|
if enc_from == enc_to or enc_from == 'NONE' or enc_to == 'NONE'
|
|
|
|
str
|
|
|
|
elsif converter = EncodingConvertMap[[enc_from, enc_to]]
|
|
|
|
converter.call(str)
|
|
|
|
else
|
|
|
|
raise CharsetConversionError.new(
|
2005-09-15 10:33:53 -04:00
|
|
|
"Converter not found: #{enc_from} -> #{enc_to}")
|
2003-09-24 11:18:44 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def Charset.charset_label(encoding)
|
|
|
|
CharsetMap[encoding.upcase]
|
|
|
|
end
|
|
|
|
|
|
|
|
def Charset.charset_str(label)
|
2004-12-20 08:50:15 -05:00
|
|
|
if CharsetMap.respond_to?(:key)
|
2005-09-15 10:33:53 -04:00
|
|
|
CharsetMap.key(label.downcase) || 'X_UNKNOWN'
|
2004-12-20 08:50:15 -05:00
|
|
|
else
|
2005-09-15 10:33:53 -04:00
|
|
|
CharsetMap.index(label.downcase) || 'X_UNKNOWN'
|
2004-12-20 08:50:15 -05:00
|
|
|
end
|
2003-09-24 11:18:44 -04:00
|
|
|
end
|
|
|
|
|
2003-09-28 05:33:59 -04:00
|
|
|
# us_ascii = '[\x00-\x7F]'
|
|
|
|
us_ascii = '[\x9\xa\xd\x20-\x7F]' # XML 1.0 restricted.
|
2005-09-15 10:33:53 -04:00
|
|
|
USASCIIRegexp = Regexp.new("\\A#{us_ascii}*\\z", nil, "NONE")
|
2003-09-28 05:33:59 -04:00
|
|
|
|
2003-09-24 11:18:44 -04:00
|
|
|
twobytes_euc = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])'
|
|
|
|
threebytes_euc = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])'
|
2005-09-15 10:33:53 -04:00
|
|
|
character_euc = "(?:#{us_ascii}|#{twobytes_euc}|#{threebytes_euc})"
|
|
|
|
EUCRegexp = Regexp.new("\\A#{character_euc}*\\z", nil, "NONE")
|
2003-09-24 11:18:44 -04:00
|
|
|
|
|
|
|
# onebyte_sjis = '[\x00-\x7F\xA1-\xDF]'
|
|
|
|
onebyte_sjis = '[\x9\xa\xd\x20-\x7F\xA1-\xDF]' # XML 1.0 restricted.
|
|
|
|
twobytes_sjis = '(?:[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])'
|
2005-09-15 10:33:53 -04:00
|
|
|
character_sjis = "(?:#{onebyte_sjis}|#{twobytes_sjis})"
|
|
|
|
SJISRegexp = Regexp.new("\\A#{character_sjis}*\\z", nil, "NONE")
|
2003-09-24 11:18:44 -04:00
|
|
|
|
|
|
|
# 0xxxxxxx
|
|
|
|
# 110yyyyy 10xxxxxx
|
|
|
|
twobytes_utf8 = '(?:[\xC0-\xDF][\x80-\xBF])'
|
|
|
|
# 1110zzzz 10yyyyyy 10xxxxxx
|
|
|
|
threebytes_utf8 = '(?:[\xE0-\xEF][\x80-\xBF][\x80-\xBF])'
|
|
|
|
# 11110uuu 10uuuzzz 10yyyyyy 10xxxxxx
|
|
|
|
fourbytes_utf8 = '(?:[\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])'
|
2005-09-15 10:33:53 -04:00
|
|
|
character_utf8 =
|
|
|
|
"(?:#{us_ascii}|#{twobytes_utf8}|#{threebytes_utf8}|#{fourbytes_utf8})"
|
|
|
|
UTF8Regexp = Regexp.new("\\A#{character_utf8}*\\z", nil, "NONE")
|
2003-09-24 11:18:44 -04:00
|
|
|
|
2003-09-28 05:33:59 -04:00
|
|
|
def Charset.is_us_ascii(str)
|
|
|
|
USASCIIRegexp =~ str
|
|
|
|
end
|
|
|
|
|
2003-09-24 11:18:44 -04:00
|
|
|
def Charset.is_utf8(str)
|
|
|
|
UTF8Regexp =~ str
|
|
|
|
end
|
|
|
|
|
|
|
|
def Charset.is_euc(str)
|
|
|
|
EUCRegexp =~ str
|
|
|
|
end
|
|
|
|
|
|
|
|
def Charset.is_sjis(str)
|
|
|
|
SJISRegexp =~ str
|
|
|
|
end
|
|
|
|
|
|
|
|
def Charset.is_ces(str, code = $KCODE)
|
|
|
|
case code
|
|
|
|
when 'NONE'
|
2003-09-28 05:33:59 -04:00
|
|
|
is_us_ascii(str)
|
2003-09-24 11:18:44 -04:00
|
|
|
when 'UTF8'
|
|
|
|
is_utf8(str)
|
|
|
|
when 'EUC'
|
|
|
|
is_euc(str)
|
|
|
|
when 'SJIS'
|
|
|
|
is_sjis(str)
|
|
|
|
else
|
2005-09-15 10:33:53 -04:00
|
|
|
raise UnknownCharsetError.new("Unknown charset: #{code}")
|
2003-09-24 11:18:44 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
end
|