1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Applied Nobu's patch to the XML document encoding structure in REXML. It

passes all of REXML's native tests as well as a couple of others, and should
fix potential threading issues.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2005-04-09 17:03:32 +00:00
parent ff866f0a8f
commit 47bd6a4f37
9 changed files with 98 additions and 114 deletions

View file

@ -1,6 +1,16 @@
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
module REXML
module Encoding
@@uconv_available = false
@encoding_methods = {}
def self.register(enc, &block)
@encoding_methods[enc] = block
end
def self.apply(obj, enc)
@encoding_methods[enc][obj]
end
def self.encoding_method(enc)
@encoding_methods[enc]
end
# Native, default format is UTF-8, so it is declared here rather than in
# an encodings/ definition.
@ -18,26 +28,24 @@ module REXML
if enc and enc != UTF_8
@encoding = enc.upcase
begin
load 'rexml/encodings/ICONV.rb'
instance_eval @@__REXML_encoding_methods
Iconv::iconv( UTF_8, @encoding, "" )
require 'rexml/encodings/ICONV.rb'
Encoding.apply(self, "ICONV")
rescue LoadError, Exception => err
raise "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
@encoding.untaint
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
begin
load enc_file
instance_eval @@__REXML_encoding_methods
require enc_file
Encoding.apply(self, @encoding)
rescue LoadError
puts $!.message
raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
puts $!.message
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
end
end
else
enc = UTF_8
@encoding = enc.upcase
load 'rexml/encodings/UTF-8.rb'
instance_eval @@__REXML_encoding_methods
@encoding = UTF_8
require 'rexml/encodings/UTF-8.rb'
Encoding.apply(self, @encoding)
end
ensure
$VERBOSE = old_verbosity

View file

@ -1,37 +1,20 @@
begin
require 'iconv'
require 'uconv'
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode(str)
return Iconv::iconv("utf-8", "euc-jp", str)[0]
end
def encode content
return Iconv::iconv("euc-jp", "utf-8", content)[0]
end
EOL
module REXML
module Encoding
def decode_eucjp(str)
Uconv::euctou8(str)
end
end
rescue LoadError
begin
require 'uconv'
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode(str)
return Uconv::euctou8(str)
end
def encode_eucjp content
Uconv::u8toeuc(content)
end
def encode content
return Uconv::u8toeuc(content)
end
EOL
register("EUC-JP") do |obj|
class << obj
alias decode decode_eucjp
alias encode encode_eucjp
end
end
rescue LoadError
raise "uconv or iconv is required for Japanese encoding support."
end
end

View file

@ -3,14 +3,20 @@ raise LoadError unless defined? Iconv
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode( str )
return Iconv::iconv("utf-8", @encoding, str)[0]
def decode_iconv(str)
Iconv.conv(UTF_8, @encoding, str)
end
def encode( content )
return Iconv::iconv(@encoding, "utf-8", content)[0]
def encode_iconv(content)
Iconv.conv(@encoding, UTF_8, content)
end
register("ICONV") do |obj|
Iconv.conv(UTF_8, obj.encoding, nil)
class << obj
alias decode decode_iconv
alias encode encode_iconv
end
end
EOL
end
end

View file

@ -1,25 +1,7 @@
require 'rexml/encodings/US-ASCII'
module REXML
module Encoding
@@__REXML_encoding_methods = %q~
# Convert from UTF-8
def encode content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
array_enc.pack('C*')
end
# Convert to UTF-8
def decode(str)
str.unpack('C*').pack('U*')
end
~
register("ISO-8859-1", &encoding_method("US-ASCII"))
end
end

View file

@ -1,37 +1,22 @@
begin
require 'iconv'
require 'uconv'
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode(str)
return Iconv::iconv("utf-8", "shift_jis", str)[0]
end
def encode content
return Iconv::iconv("shift_jis", "utf-8", content)[0]
end
EOL
module REXML
module Encoding
def decode_sjis content
Uconv::u8tosjis(content)
end
end
rescue LoadError
begin
require 'uconv'
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def encode(content)
Uconv::u8tosjis(content)
end
def encode_sjis(str)
Uconv::sjistou8(str)
end
def decode(str)
Uconv::sjistou8(str)
end
EOL
b = proc do |obj|
class << obj
alias decode decode_sjis
alias encode encode_sjis
end
end
rescue LoadError
raise "uconv or iconv is required for Japanese encoding support."
register("SHIFT-JIS", &b)
register("SHIFT_JIS", &b)
end
end

View file

@ -1,7 +1,6 @@
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def encode content
def encode_unile content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
@ -16,7 +15,7 @@ module REXML
array_enc.pack('C*')
end
def decode(str)
def decode_unile(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
@ -24,6 +23,12 @@ module REXML
}
array_utf8.pack('U*')
end
EOL
register(UNILE) do |obj|
class << obj
alias decode decode_unile
alias encode encode_unile
end
end
end
end

View file

@ -1,8 +1,7 @@
module REXML
module Encoding
@@__REXML_encoding_methods = %q~
# Convert from UTF-8
def encode content
def encode_ascii content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
@ -17,9 +16,15 @@ module REXML
end
# Convert to UTF-8
def decode(str)
def decode_ascii(str)
str.unpack('C*').pack('U*')
end
~
register("US-ASCII") do |obj|
class << obj
alias decode decode_ascii
alias encode encode_ascii
end
end
end
end

View file

@ -1,7 +1,6 @@
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def encode content
def encode_utf16 content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
@ -16,7 +15,7 @@ module REXML
array_enc.pack('C*')
end
def decode(str)
def decode_utf16(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
@ -24,6 +23,12 @@ module REXML
}
array_utf8.pack('U*')
end
EOL
register(UTF_16) do |obj|
class << obj
alias decode decode_utf16
alias encode encode_utf16
end
end
end
end

View file

@ -1,13 +1,18 @@
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def encode content
def encode_utf8 content
content
end
def decode(str)
def decode_utf8(str)
str
end
EOL
register(UTF_8) do |obj|
class << obj
alias decode decode_utf8
alias encode encode_utf8
end
end
end
end