1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/lib/xsd/charset.rb
nahi 29c5ab0b77 * lib/{soap,wsdl,xsd}, test/{soap,wsdl,xsd}: imported soap4r/1.5.5.
#nnn is a ticket number at http://dev.ctor.org/soap4r

          * SOAP

            * allow to configure an envelope namespace of SOAP request. (#124)
                TemporaryNamespace = 'http://www.w3.org/2003/05/soap-envelope'
                @client.options["soap.envelope.requestnamespace"] =
                  TemporaryNamespace
                @client.options["soap.envelope.responsenamespace"] =
                  TemporaryNamespace
                @client.do_proc(...)

            * let SOAP request XML indent space configuable.  see
              "soap.envelope.no_indent" option. (#130)

            * let external CES configuable.
              ex. client["soap.mapping.external_ces"] = 'SJIS'.  $KCODE is used
              by default. (#133)
                external CES ::= CES used in Ruby object of client and server
                internal CES ::= CES used in SOAP/OM

            * add iso-8859-1 external CES support. (#106)

            * fixed illegal 'qualified' handling of elements.  it caused
              ASP.NET inteoperability problem. (#144)

            * added 'soap.envelope.use_numeric_character_reference' (boolean)
              option to let query XML use numeric character reference in XML,
              not plain UTF-8 character.  !GoogleSearch server seems to not
              allow plain UTF-8 character since 2005-08-15 update. (#147)

            * SOAP::Header::SimpleHeader (de)serialization throws an exception
              on !SimpleHeader.on_(in|out)bound when header is a String.  so we
              could not use a simple single element headerItem.  fixed.  thanks
              to emil. (#129)

            * out parameter of rpc operation did not work.  (#132)

            * follow HTTP redirect only if using http-access2.  (#125) (#145)

            * add a workaround for importing an WSDL whose path begins with
              drive letter.  (#115)

          * WSDL

            * SOAP Data which is defined as a simpletype was not mapped
              correctly to Ruby obj when using wsdl2ruby.rb generated classdef
              file. (#123)

            * rpc/literal support. (#118)

            * re-implemented local element qualify/unqualify control.  handles
              elementFormDefault and form in WSDL.  (#119)

            * Array of an element which has simpleType causes a crash. (#128)

            * prarmeterOrder may not contain return part so it can be shorter
              than parts size.  Thanks to Hugh.  (#139)

          * Samples

            * added !BasicAuth client sample. (#117)

            * added Base64 client/server sample.

            * added Flickr SOAP interface client sample. (#122)

            * added !SalesForce client sample. (#135)

            * updated Thawte CA certificate for !GoogleAdWords sample.

            * updated a client script with the newer version made by Johan.
              thanks!

            * shortened long file names. (#120)

            * fixed typo in authheader sample. (#129)

            * updated deprecated method usage.  (#138)


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9169 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-15 14:33:53 +00:00

187 lines
5.2 KiB
Ruby

# XSD4R - Charset handling library.
# Copyright (C) 2001, 2003, 2005 NAKAMURA, Hiroshi <nahi@ruby-lang.org>.
# This program is copyrighted free software by NAKAMURA, Hiroshi. You can
# redistribute it and/or modify it under the same terms of Ruby's license;
# either the dual license version in 2003, or any later version.
module XSD
module Charset
@internal_encoding = $KCODE
class XSDError < StandardError; end
class CharsetError < XSDError; end
class UnknownCharsetError < CharsetError; end
class CharsetConversionError < CharsetError; end
public
###
## Maps
#
EncodingConvertMap = {}
def Charset.init
EncodingConvertMap[['UTF8', 'X_ISO8859_1']] =
Proc.new { |str| str.unpack('U*').pack('C*') }
EncodingConvertMap[['X_ISO8859_1', 'UTF8']] =
Proc.new { |str| str.unpack('C*').pack('U*') }
begin
require 'xsd/iconvcharset'
@internal_encoding = 'UTF8'
sjtag = (/(mswin|bccwin|mingw|cygwin|emx)/ =~ RUBY_PLATFORM) ? 'cp932' :
'shift_jis'
EncodingConvertMap[['UTF8', 'EUC' ]] =
Proc.new { |str| IconvCharset.safe_iconv("euc-jp", "utf-8", str) }
EncodingConvertMap[['EUC' , 'UTF8']] =
Proc.new { |str| IconvCharset.safe_iconv("utf-8", "euc-jp", str) }
EncodingConvertMap[['EUC' , 'SJIS']] =
Proc.new { |str| IconvCharset.safe_iconv(sjtag, "euc-jp", str) }
EncodingConvertMap[['UTF8', 'SJIS']] =
Proc.new { |str| IconvCharset.safe_iconv(sjtag, "utf-8", str) }
EncodingConvertMap[['SJIS', 'UTF8']] =
Proc.new { |str| IconvCharset.safe_iconv("utf-8", sjtag, str) }
EncodingConvertMap[['SJIS', 'EUC' ]] =
Proc.new { |str| IconvCharset.safe_iconv("euc-jp", sjtag, str) }
rescue LoadError
begin
require 'nkf'
EncodingConvertMap[['EUC' , 'SJIS']] =
Proc.new { |str| NKF.nkf('-sXm0', str) }
EncodingConvertMap[['SJIS', 'EUC' ]] =
Proc.new { |str| NKF.nkf('-eXm0', str) }
rescue LoadError
end
begin
require 'uconv'
@internal_encoding = 'UTF8'
EncodingConvertMap[['UTF8', 'EUC' ]] = Uconv.method(:u8toeuc)
EncodingConvertMap[['UTF8', 'SJIS']] = Uconv.method(:u8tosjis)
EncodingConvertMap[['EUC' , 'UTF8']] = Uconv.method(:euctou8)
EncodingConvertMap[['SJIS', 'UTF8']] = Uconv.method(:sjistou8)
rescue LoadError
end
end
end
self.init
CharsetMap = {
'NONE' => 'us-ascii',
'EUC' => 'euc-jp',
'SJIS' => 'shift_jis',
'UTF8' => 'utf-8',
'X_ISO_8859_1' => 'iso-8859-1',
'X_UNKNOWN' => nil,
}
###
## handlers
#
def Charset.encoding
@internal_encoding
end
def Charset.encoding=(encoding)
warn("xsd charset is set to #{encoding}") if $DEBUG
@internal_encoding = encoding
end
def Charset.xml_encoding_label
charset_label(@internal_encoding)
end
def Charset.encoding_to_xml(str, charset)
encoding_conv(str, @internal_encoding, charset_str(charset))
end
def Charset.encoding_from_xml(str, charset)
encoding_conv(str, charset_str(charset), @internal_encoding)
end
def Charset.encoding_conv(str, enc_from, enc_to)
if enc_from == enc_to or enc_from == 'NONE' or enc_to == 'NONE'
str
elsif converter = EncodingConvertMap[[enc_from, enc_to]]
converter.call(str)
else
raise CharsetConversionError.new(
"Converter not found: #{enc_from} -> #{enc_to}")
end
end
def Charset.charset_label(encoding)
CharsetMap[encoding.upcase]
end
def Charset.charset_str(label)
if CharsetMap.respond_to?(:key)
CharsetMap.key(label.downcase) || 'X_UNKNOWN'
else
CharsetMap.index(label.downcase) || 'X_UNKNOWN'
end
end
# us_ascii = '[\x00-\x7F]'
us_ascii = '[\x9\xa\xd\x20-\x7F]' # XML 1.0 restricted.
USASCIIRegexp = Regexp.new("\\A#{us_ascii}*\\z", nil, "NONE")
twobytes_euc = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])'
threebytes_euc = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])'
character_euc = "(?:#{us_ascii}|#{twobytes_euc}|#{threebytes_euc})"
EUCRegexp = Regexp.new("\\A#{character_euc}*\\z", nil, "NONE")
# onebyte_sjis = '[\x00-\x7F\xA1-\xDF]'
onebyte_sjis = '[\x9\xa\xd\x20-\x7F\xA1-\xDF]' # XML 1.0 restricted.
twobytes_sjis = '(?:[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])'
character_sjis = "(?:#{onebyte_sjis}|#{twobytes_sjis})"
SJISRegexp = Regexp.new("\\A#{character_sjis}*\\z", nil, "NONE")
# 0xxxxxxx
# 110yyyyy 10xxxxxx
twobytes_utf8 = '(?:[\xC0-\xDF][\x80-\xBF])'
# 1110zzzz 10yyyyyy 10xxxxxx
threebytes_utf8 = '(?:[\xE0-\xEF][\x80-\xBF][\x80-\xBF])'
# 11110uuu 10uuuzzz 10yyyyyy 10xxxxxx
fourbytes_utf8 = '(?:[\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])'
character_utf8 =
"(?:#{us_ascii}|#{twobytes_utf8}|#{threebytes_utf8}|#{fourbytes_utf8})"
UTF8Regexp = Regexp.new("\\A#{character_utf8}*\\z", nil, "NONE")
def Charset.is_us_ascii(str)
USASCIIRegexp =~ str
end
def Charset.is_utf8(str)
UTF8Regexp =~ str
end
def Charset.is_euc(str)
EUCRegexp =~ str
end
def Charset.is_sjis(str)
SJISRegexp =~ str
end
def Charset.is_ces(str, code = $KCODE)
case code
when 'NONE'
is_us_ascii(str)
when 'UTF8'
is_utf8(str)
when 'EUC'
is_euc(str)
when 'SJIS'
is_sjis(str)
else
raise UnknownCharsetError.new("Unknown charset: #{code}")
end
end
end
end