mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
20fa0df5cd
such as HTTP_PROXY. * lib/soap/property.rb: property implementation. * lib/soap/streamHandler.rb, lib/soap/wsdlDriver.rb, lib/soap/rpc/driver.rb: use soap/property.rb. * lib/wsdl/importer.rb, lib/soap/wsdlDriver.rb, lib/soap/rpc/driver.rb: use SOAP::Env. * lib/soap/netHttpClient.rb: add basic_auth, ssl_config, and cookie management interface, but ignored for now. * lib/xsd/charset.rb: add XSD::Charset.encoding= interface to set wiredump charset explicitly. it was fixed to 'utf-8' when iconv or uconv module was found. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5104 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
167 lines
4.7 KiB
Ruby
167 lines
4.7 KiB
Ruby
# XSD4R - Charset handling library.
|
|
# Copyright (C) 2001, 2003 NAKAMURA, Hiroshi <nahi@ruby-lang.org>.
|
|
|
|
# This program is copyrighted free software by NAKAMURA, Hiroshi. You can
|
|
# redistribute it and/or modify it under the same terms of Ruby's license;
|
|
# either the dual license version in 2003, or any later version.
|
|
|
|
|
|
module XSD
|
|
|
|
|
|
module Charset
|
|
@encoding = $KCODE
|
|
|
|
class XSDError < StandardError; end
|
|
class CharsetError < XSDError; end
|
|
class UnknownCharsetError < CharsetError; end
|
|
class CharsetConversionError < CharsetError; end
|
|
|
|
public
|
|
|
|
###
|
|
## Maps
|
|
#
|
|
EncodingConvertMap = {}
|
|
def Charset.init
|
|
begin
|
|
require 'xsd/iconvcharset'
|
|
@encoding = 'UTF8'
|
|
sjtag = (/(mswin|bccwin|mingw|cygwin|emx)/ =~ RUBY_PLATFORM) ? 'cp932' : 'shift_jis'
|
|
EncodingConvertMap[['UTF8', 'EUC' ]] = Proc.new { |str| IconvCharset.safe_iconv("euc-jp", "utf-8", str) }
|
|
EncodingConvertMap[['EUC' , 'UTF8']] = Proc.new { |str| IconvCharset.safe_iconv("utf-8", "euc-jp", str) }
|
|
EncodingConvertMap[['EUC' , 'SJIS']] = Proc.new { |str| IconvCharset.safe_iconv(sjtag, "euc-jp", str) }
|
|
EncodingConvertMap[['UTF8', 'SJIS']] = Proc.new { |str| IconvCharset.safe_iconv(sjtag, "utf-8", str) }
|
|
EncodingConvertMap[['SJIS', 'UTF8']] = Proc.new { |str| IconvCharset.safe_iconv("utf-8", sjtag, str) }
|
|
EncodingConvertMap[['SJIS', 'EUC' ]] = Proc.new { |str| IconvCharset.safe_iconv("euc-jp", sjtag, str) }
|
|
rescue LoadError
|
|
begin
|
|
require 'nkf'
|
|
EncodingConvertMap[['EUC' , 'SJIS']] = Proc.new { |str| NKF.nkf('-sXm0', str) }
|
|
EncodingConvertMap[['SJIS', 'EUC' ]] = Proc.new { |str| NKF.nkf('-eXm0', str) }
|
|
rescue LoadError
|
|
end
|
|
|
|
begin
|
|
require 'uconv'
|
|
@encoding = 'UTF8'
|
|
EncodingConvertMap[['UTF8', 'EUC' ]] = Uconv.method(:u8toeuc)
|
|
EncodingConvertMap[['UTF8', 'SJIS']] = Uconv.method(:u8tosjis)
|
|
EncodingConvertMap[['EUC' , 'UTF8']] = Uconv.method(:euctou8)
|
|
EncodingConvertMap[['SJIS', 'UTF8']] = Uconv.method(:sjistou8)
|
|
rescue LoadError
|
|
end
|
|
end
|
|
end
|
|
self.init
|
|
|
|
CharsetMap = {
|
|
'NONE' => 'us-ascii',
|
|
'EUC' => 'euc-jp',
|
|
'SJIS' => 'shift_jis',
|
|
'UTF8' => 'utf-8',
|
|
}
|
|
|
|
|
|
###
|
|
## handlers
|
|
#
|
|
def Charset.encoding
|
|
@encoding
|
|
end
|
|
|
|
def Charset.encoding=(encoding)
|
|
STDERR.puts("xsd charset is set to #{encoding}") if $DEBUG
|
|
@encoding = encoding
|
|
end
|
|
|
|
def Charset.encoding_label
|
|
charset_label(@encoding)
|
|
end
|
|
|
|
def Charset.encoding_to_xml(str, charset)
|
|
encoding_conv(str, @encoding, charset_str(charset))
|
|
end
|
|
|
|
def Charset.encoding_from_xml(str, charset)
|
|
encoding_conv(str, charset_str(charset), @encoding)
|
|
end
|
|
|
|
def Charset.encoding_conv(str, enc_from, enc_to)
|
|
if enc_from == enc_to or enc_from == 'NONE' or enc_to == 'NONE'
|
|
str
|
|
elsif converter = EncodingConvertMap[[enc_from, enc_to]]
|
|
converter.call(str)
|
|
else
|
|
raise CharsetConversionError.new(
|
|
"Converter not found: #{ enc_from } -> #{ enc_to }")
|
|
end
|
|
end
|
|
|
|
def Charset.charset_label(encoding)
|
|
CharsetMap[encoding.upcase]
|
|
end
|
|
|
|
def Charset.charset_str(label)
|
|
CharsetMap.index(label.downcase)
|
|
end
|
|
|
|
# us_ascii = '[\x00-\x7F]'
|
|
us_ascii = '[\x9\xa\xd\x20-\x7F]' # XML 1.0 restricted.
|
|
USASCIIRegexp = Regexp.new("\\A#{ us_ascii }*\\z", nil, "NONE")
|
|
|
|
twobytes_euc = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])'
|
|
threebytes_euc = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])'
|
|
character_euc = "(?:#{ us_ascii }|#{ twobytes_euc }|#{ threebytes_euc })"
|
|
EUCRegexp = Regexp.new("\\A#{ character_euc }*\\z", nil, "NONE")
|
|
|
|
# onebyte_sjis = '[\x00-\x7F\xA1-\xDF]'
|
|
onebyte_sjis = '[\x9\xa\xd\x20-\x7F\xA1-\xDF]' # XML 1.0 restricted.
|
|
twobytes_sjis = '(?:[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])'
|
|
character_sjis = "(?:#{ onebyte_sjis }|#{ twobytes_sjis })"
|
|
SJISRegexp = Regexp.new("\\A#{ character_sjis }*\\z", nil, "NONE")
|
|
|
|
# 0xxxxxxx
|
|
# 110yyyyy 10xxxxxx
|
|
twobytes_utf8 = '(?:[\xC0-\xDF][\x80-\xBF])'
|
|
# 1110zzzz 10yyyyyy 10xxxxxx
|
|
threebytes_utf8 = '(?:[\xE0-\xEF][\x80-\xBF][\x80-\xBF])'
|
|
# 11110uuu 10uuuzzz 10yyyyyy 10xxxxxx
|
|
fourbytes_utf8 = '(?:[\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])'
|
|
character_utf8 = "(?:#{ us_ascii }|#{ twobytes_utf8 }|#{ threebytes_utf8 }|#{ fourbytes_utf8 })"
|
|
UTF8Regexp = Regexp.new("\\A#{ character_utf8 }*\\z", nil, "NONE")
|
|
|
|
def Charset.is_us_ascii(str)
|
|
USASCIIRegexp =~ str
|
|
end
|
|
|
|
def Charset.is_utf8(str)
|
|
UTF8Regexp =~ str
|
|
end
|
|
|
|
def Charset.is_euc(str)
|
|
EUCRegexp =~ str
|
|
end
|
|
|
|
def Charset.is_sjis(str)
|
|
SJISRegexp =~ str
|
|
end
|
|
|
|
def Charset.is_ces(str, code = $KCODE)
|
|
case code
|
|
when 'NONE'
|
|
is_us_ascii(str)
|
|
when 'UTF8'
|
|
is_utf8(str)
|
|
when 'EUC'
|
|
is_euc(str)
|
|
when 'SJIS'
|
|
is_sjis(str)
|
|
else
|
|
raise UnknownCharsetError.new("Unknown charset: #{ code }")
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
end
|