mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Applied Nobu's patch to the XML document encoding structure in REXML. It
passes all of REXML's native tests as well as a couple of others, and should fix potential threading issues. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
ff866f0a8f
commit
47bd6a4f37
9 changed files with 98 additions and 114 deletions
|
@ -1,6 +1,16 @@
|
|||
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
|
||||
module REXML
|
||||
module Encoding
|
||||
@@uconv_available = false
|
||||
@encoding_methods = {}
|
||||
def self.register(enc, &block)
|
||||
@encoding_methods[enc] = block
|
||||
end
|
||||
def self.apply(obj, enc)
|
||||
@encoding_methods[enc][obj]
|
||||
end
|
||||
def self.encoding_method(enc)
|
||||
@encoding_methods[enc]
|
||||
end
|
||||
|
||||
# Native, default format is UTF-8, so it is declared here rather than in
|
||||
# an encodings/ definition.
|
||||
|
@ -18,26 +28,24 @@ module REXML
|
|||
if enc and enc != UTF_8
|
||||
@encoding = enc.upcase
|
||||
begin
|
||||
load 'rexml/encodings/ICONV.rb'
|
||||
instance_eval @@__REXML_encoding_methods
|
||||
Iconv::iconv( UTF_8, @encoding, "" )
|
||||
require 'rexml/encodings/ICONV.rb'
|
||||
Encoding.apply(self, "ICONV")
|
||||
rescue LoadError, Exception => err
|
||||
raise "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
|
||||
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
|
||||
@encoding.untaint
|
||||
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
||||
begin
|
||||
load enc_file
|
||||
instance_eval @@__REXML_encoding_methods
|
||||
require enc_file
|
||||
Encoding.apply(self, @encoding)
|
||||
rescue LoadError
|
||||
puts $!.message
|
||||
raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
|
||||
puts $!.message
|
||||
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
|
||||
end
|
||||
end
|
||||
else
|
||||
enc = UTF_8
|
||||
@encoding = enc.upcase
|
||||
load 'rexml/encodings/UTF-8.rb'
|
||||
instance_eval @@__REXML_encoding_methods
|
||||
@encoding = UTF_8
|
||||
require 'rexml/encodings/UTF-8.rb'
|
||||
Encoding.apply(self, @encoding)
|
||||
end
|
||||
ensure
|
||||
$VERBOSE = old_verbosity
|
||||
|
|
|
@ -1,37 +1,20 @@
|
|||
begin
|
||||
require 'iconv'
|
||||
require 'uconv'
|
||||
|
||||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-EOL
|
||||
def decode(str)
|
||||
return Iconv::iconv("utf-8", "euc-jp", str)[0]
|
||||
end
|
||||
|
||||
def encode content
|
||||
return Iconv::iconv("euc-jp", "utf-8", content)[0]
|
||||
end
|
||||
EOL
|
||||
module REXML
|
||||
module Encoding
|
||||
def decode_eucjp(str)
|
||||
Uconv::euctou8(str)
|
||||
end
|
||||
end
|
||||
rescue LoadError
|
||||
begin
|
||||
require 'uconv'
|
||||
|
||||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-EOL
|
||||
def decode(str)
|
||||
return Uconv::euctou8(str)
|
||||
end
|
||||
def encode_eucjp content
|
||||
Uconv::u8toeuc(content)
|
||||
end
|
||||
|
||||
def encode content
|
||||
return Uconv::u8toeuc(content)
|
||||
end
|
||||
EOL
|
||||
register("EUC-JP") do |obj|
|
||||
class << obj
|
||||
alias decode decode_eucjp
|
||||
alias encode encode_eucjp
|
||||
end
|
||||
end
|
||||
rescue LoadError
|
||||
raise "uconv or iconv is required for Japanese encoding support."
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,14 +3,20 @@ raise LoadError unless defined? Iconv
|
|||
|
||||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-EOL
|
||||
def decode( str )
|
||||
return Iconv::iconv("utf-8", @encoding, str)[0]
|
||||
def decode_iconv(str)
|
||||
Iconv.conv(UTF_8, @encoding, str)
|
||||
end
|
||||
|
||||
def encode( content )
|
||||
return Iconv::iconv(@encoding, "utf-8", content)[0]
|
||||
def encode_iconv(content)
|
||||
Iconv.conv(@encoding, UTF_8, content)
|
||||
end
|
||||
|
||||
register("ICONV") do |obj|
|
||||
Iconv.conv(UTF_8, obj.encoding, nil)
|
||||
class << obj
|
||||
alias decode decode_iconv
|
||||
alias encode encode_iconv
|
||||
end
|
||||
end
|
||||
EOL
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,25 +1,7 @@
|
|||
require 'rexml/encodings/US-ASCII'
|
||||
|
||||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods = %q~
|
||||
# Convert from UTF-8
|
||||
def encode content
|
||||
array_utf8 = content.unpack('U*')
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
if num <= 0xFF
|
||||
array_enc << num
|
||||
else
|
||||
# Numeric entity (&#nnnn;); shard by Stefan Scholl
|
||||
array_enc.concat "&\##{num};".unpack('C*')
|
||||
end
|
||||
end
|
||||
array_enc.pack('C*')
|
||||
end
|
||||
|
||||
# Convert to UTF-8
|
||||
def decode(str)
|
||||
str.unpack('C*').pack('U*')
|
||||
end
|
||||
~
|
||||
register("ISO-8859-1", &encoding_method("US-ASCII"))
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,37 +1,22 @@
|
|||
begin
|
||||
require 'iconv'
|
||||
require 'uconv'
|
||||
|
||||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-EOL
|
||||
def decode(str)
|
||||
return Iconv::iconv("utf-8", "shift_jis", str)[0]
|
||||
end
|
||||
|
||||
def encode content
|
||||
return Iconv::iconv("shift_jis", "utf-8", content)[0]
|
||||
end
|
||||
EOL
|
||||
module REXML
|
||||
module Encoding
|
||||
def decode_sjis content
|
||||
Uconv::u8tosjis(content)
|
||||
end
|
||||
end
|
||||
rescue LoadError
|
||||
begin
|
||||
require 'uconv'
|
||||
|
||||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-EOL
|
||||
def encode(content)
|
||||
Uconv::u8tosjis(content)
|
||||
end
|
||||
def encode_sjis(str)
|
||||
Uconv::sjistou8(str)
|
||||
end
|
||||
|
||||
def decode(str)
|
||||
Uconv::sjistou8(str)
|
||||
end
|
||||
EOL
|
||||
b = proc do |obj|
|
||||
class << obj
|
||||
alias decode decode_sjis
|
||||
alias encode encode_sjis
|
||||
end
|
||||
end
|
||||
rescue LoadError
|
||||
raise "uconv or iconv is required for Japanese encoding support."
|
||||
register("SHIFT-JIS", &b)
|
||||
register("SHIFT_JIS", &b)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-EOL
|
||||
def encode content
|
||||
def encode_unile content
|
||||
array_utf8 = content.unpack("U*")
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
|
@ -16,7 +15,7 @@ module REXML
|
|||
array_enc.pack('C*')
|
||||
end
|
||||
|
||||
def decode(str)
|
||||
def decode_unile(str)
|
||||
array_enc=str.unpack('C*')
|
||||
array_utf8 = []
|
||||
2.step(array_enc.size-1, 2){|i|
|
||||
|
@ -24,6 +23,12 @@ module REXML
|
|||
}
|
||||
array_utf8.pack('U*')
|
||||
end
|
||||
EOL
|
||||
|
||||
register(UNILE) do |obj|
|
||||
class << obj
|
||||
alias decode decode_unile
|
||||
alias encode encode_unile
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods = %q~
|
||||
# Convert from UTF-8
|
||||
def encode content
|
||||
def encode_ascii content
|
||||
array_utf8 = content.unpack('U*')
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
|
@ -17,9 +16,15 @@ module REXML
|
|||
end
|
||||
|
||||
# Convert to UTF-8
|
||||
def decode(str)
|
||||
def decode_ascii(str)
|
||||
str.unpack('C*').pack('U*')
|
||||
end
|
||||
~
|
||||
|
||||
register("US-ASCII") do |obj|
|
||||
class << obj
|
||||
alias decode decode_ascii
|
||||
alias encode encode_ascii
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-EOL
|
||||
def encode content
|
||||
def encode_utf16 content
|
||||
array_utf8 = content.unpack("U*")
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
|
@ -16,7 +15,7 @@ module REXML
|
|||
array_enc.pack('C*')
|
||||
end
|
||||
|
||||
def decode(str)
|
||||
def decode_utf16(str)
|
||||
array_enc=str.unpack('C*')
|
||||
array_utf8 = []
|
||||
2.step(array_enc.size-1, 2){|i|
|
||||
|
@ -24,6 +23,12 @@ module REXML
|
|||
}
|
||||
array_utf8.pack('U*')
|
||||
end
|
||||
EOL
|
||||
|
||||
register(UTF_16) do |obj|
|
||||
class << obj
|
||||
alias decode decode_utf16
|
||||
alias encode encode_utf16
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-EOL
|
||||
def encode content
|
||||
def encode_utf8 content
|
||||
content
|
||||
end
|
||||
|
||||
def decode(str)
|
||||
def decode_utf8(str)
|
||||
str
|
||||
end
|
||||
EOL
|
||||
|
||||
register(UTF_8) do |obj|
|
||||
class << obj
|
||||
alias decode decode_utf8
|
||||
alias encode encode_utf8
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue