mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	Applied Nobu's patch to the XML document encoding structure in REXML. It
passes all of REXML's native tests as well as a couple of others, and should fix potential threading issues. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
		
							parent
							
								
									ff866f0a8f
								
							
						
					
					
						commit
						47bd6a4f37
					
				
					 9 changed files with 98 additions and 114 deletions
				
			
		| 
						 | 
				
			
			@ -1,6 +1,16 @@
 | 
			
		|||
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
 | 
			
		||||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		@@uconv_available = false
 | 
			
		||||
               @encoding_methods = {}
 | 
			
		||||
               def self.register(enc, &block)
 | 
			
		||||
                       @encoding_methods[enc] = block
 | 
			
		||||
               end
 | 
			
		||||
               def self.apply(obj, enc)
 | 
			
		||||
                       @encoding_methods[enc][obj]
 | 
			
		||||
               end
 | 
			
		||||
               def self.encoding_method(enc)
 | 
			
		||||
                       @encoding_methods[enc]
 | 
			
		||||
               end
 | 
			
		||||
 | 
			
		||||
		# Native, default format is UTF-8, so it is declared here rather than in
 | 
			
		||||
		# an encodings/ definition.
 | 
			
		||||
| 
						 | 
				
			
			@ -18,26 +28,24 @@ module REXML
 | 
			
		|||
				if enc and enc != UTF_8
 | 
			
		||||
					@encoding = enc.upcase
 | 
			
		||||
					begin
 | 
			
		||||
            load 'rexml/encodings/ICONV.rb'
 | 
			
		||||
						instance_eval @@__REXML_encoding_methods
 | 
			
		||||
						Iconv::iconv( UTF_8, @encoding, "" )
 | 
			
		||||
                                               require 'rexml/encodings/ICONV.rb'
 | 
			
		||||
                                               Encoding.apply(self, "ICONV")
 | 
			
		||||
					rescue LoadError, Exception => err
 | 
			
		||||
						raise "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
 | 
			
		||||
                                               raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
 | 
			
		||||
						@encoding.untaint 
 | 
			
		||||
						enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
 | 
			
		||||
						begin
 | 
			
		||||
              load enc_file
 | 
			
		||||
							instance_eval @@__REXML_encoding_methods
 | 
			
		||||
                                                       require enc_file
 | 
			
		||||
                                                       Encoding.apply(self, @encoding)
 | 
			
		||||
						rescue LoadError
 | 
			
		||||
              puts $!.message
 | 
			
		||||
							raise Exception.new( "No decoder found for encoding #@encoding.  Please install iconv." )
 | 
			
		||||
                                                       puts $!.message
 | 
			
		||||
                                                       raise ArgumentError, "No decoder found for encoding #@encoding.  Please install iconv."
 | 
			
		||||
						end
 | 
			
		||||
					end
 | 
			
		||||
				else
 | 
			
		||||
					enc = UTF_8
 | 
			
		||||
					@encoding = enc.upcase
 | 
			
		||||
          load 'rexml/encodings/UTF-8.rb' 
 | 
			
		||||
					instance_eval @@__REXML_encoding_methods
 | 
			
		||||
                                       @encoding = UTF_8
 | 
			
		||||
                                       require 'rexml/encodings/UTF-8.rb'
 | 
			
		||||
                                       Encoding.apply(self, @encoding)
 | 
			
		||||
				end
 | 
			
		||||
			ensure
 | 
			
		||||
				$VERBOSE = old_verbosity
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,37 +1,20 @@
 | 
			
		|||
begin
 | 
			
		||||
  require 'iconv'
 | 
			
		||||
require 'uconv'
 | 
			
		||||
 | 
			
		||||
  module REXML
 | 
			
		||||
    module Encoding
 | 
			
		||||
      @@__REXML_encoding_methods =<<-EOL
 | 
			
		||||
      def decode(str)
 | 
			
		||||
        return Iconv::iconv("utf-8", "euc-jp", str)[0]
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      def encode content
 | 
			
		||||
        return Iconv::iconv("euc-jp", "utf-8", content)[0]
 | 
			
		||||
      end
 | 
			
		||||
      EOL
 | 
			
		||||
module REXML
 | 
			
		||||
  module Encoding
 | 
			
		||||
    def decode_eucjp(str)
 | 
			
		||||
      Uconv::euctou8(str)
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
rescue LoadError
 | 
			
		||||
  begin
 | 
			
		||||
    require 'uconv'
 | 
			
		||||
 | 
			
		||||
    module REXML
 | 
			
		||||
      module Encoding
 | 
			
		||||
        @@__REXML_encoding_methods =<<-EOL
 | 
			
		||||
        def decode(str)
 | 
			
		||||
          return Uconv::euctou8(str)
 | 
			
		||||
        end
 | 
			
		||||
    def encode_eucjp content
 | 
			
		||||
      Uconv::u8toeuc(content)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
        def encode content
 | 
			
		||||
          return Uconv::u8toeuc(content)
 | 
			
		||||
        end
 | 
			
		||||
        EOL
 | 
			
		||||
    register("EUC-JP") do |obj|
 | 
			
		||||
      class << obj
 | 
			
		||||
        alias decode decode_eucjp
 | 
			
		||||
        alias encode encode_eucjp
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  rescue LoadError
 | 
			
		||||
		raise "uconv or iconv is required for Japanese encoding support."
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,14 +3,20 @@ raise LoadError unless defined? Iconv
 | 
			
		|||
 | 
			
		||||
module REXML
 | 
			
		||||
  module Encoding
 | 
			
		||||
    @@__REXML_encoding_methods =<<-EOL
 | 
			
		||||
    def decode( str )
 | 
			
		||||
      return Iconv::iconv("utf-8", @encoding, str)[0]
 | 
			
		||||
    def decode_iconv(str)
 | 
			
		||||
      Iconv.conv(UTF_8, @encoding, str)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def encode( content )
 | 
			
		||||
      return Iconv::iconv(@encoding, "utf-8", content)[0]
 | 
			
		||||
    def encode_iconv(content)
 | 
			
		||||
      Iconv.conv(@encoding, UTF_8, content)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    register("ICONV") do |obj|
 | 
			
		||||
      Iconv.conv(UTF_8, obj.encoding, nil)
 | 
			
		||||
      class << obj
 | 
			
		||||
        alias decode decode_iconv
 | 
			
		||||
        alias encode encode_iconv
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
    EOL
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,25 +1,7 @@
 | 
			
		|||
require 'rexml/encodings/US-ASCII'
 | 
			
		||||
 | 
			
		||||
module REXML
 | 
			
		||||
  module Encoding
 | 
			
		||||
    @@__REXML_encoding_methods = %q~
 | 
			
		||||
    # Convert from UTF-8
 | 
			
		||||
    def encode content
 | 
			
		||||
      array_utf8 = content.unpack('U*')
 | 
			
		||||
      array_enc = []
 | 
			
		||||
      array_utf8.each do |num|
 | 
			
		||||
        if num <= 0xFF
 | 
			
		||||
          array_enc << num
 | 
			
		||||
        else
 | 
			
		||||
          # Numeric entity (&#nnnn;); shard by  Stefan Scholl
 | 
			
		||||
          array_enc.concat "&\##{num};".unpack('C*')
 | 
			
		||||
        end
 | 
			
		||||
      end
 | 
			
		||||
      array_enc.pack('C*')
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # Convert to UTF-8
 | 
			
		||||
    def decode(str)
 | 
			
		||||
      str.unpack('C*').pack('U*')
 | 
			
		||||
    end
 | 
			
		||||
    ~
 | 
			
		||||
    register("ISO-8859-1", &encoding_method("US-ASCII"))
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,37 +1,22 @@
 | 
			
		|||
begin
 | 
			
		||||
  require 'iconv'
 | 
			
		||||
require 'uconv'
 | 
			
		||||
 | 
			
		||||
  module REXML
 | 
			
		||||
    module Encoding
 | 
			
		||||
      @@__REXML_encoding_methods =<<-EOL
 | 
			
		||||
      def decode(str)
 | 
			
		||||
        return Iconv::iconv("utf-8", "shift_jis", str)[0]
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      def encode content
 | 
			
		||||
        return Iconv::iconv("shift_jis", "utf-8", content)[0]
 | 
			
		||||
      end
 | 
			
		||||
      EOL
 | 
			
		||||
module REXML
 | 
			
		||||
  module Encoding
 | 
			
		||||
    def decode_sjis content
 | 
			
		||||
      Uconv::u8tosjis(content)
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
rescue LoadError
 | 
			
		||||
  begin 
 | 
			
		||||
    require 'uconv'
 | 
			
		||||
 | 
			
		||||
    module REXML
 | 
			
		||||
      module Encoding
 | 
			
		||||
        @@__REXML_encoding_methods =<<-EOL
 | 
			
		||||
        def encode(content)
 | 
			
		||||
          Uconv::u8tosjis(content)
 | 
			
		||||
        end
 | 
			
		||||
    def encode_sjis(str)
 | 
			
		||||
      Uconv::sjistou8(str)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
        def decode(str)
 | 
			
		||||
          Uconv::sjistou8(str)
 | 
			
		||||
        end
 | 
			
		||||
        EOL
 | 
			
		||||
    b = proc do |obj|
 | 
			
		||||
      class << obj
 | 
			
		||||
        alias decode decode_sjis
 | 
			
		||||
        alias encode encode_sjis
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  rescue LoadError
 | 
			
		||||
    raise "uconv or iconv is required for Japanese encoding support."
 | 
			
		||||
    register("SHIFT-JIS", &b)
 | 
			
		||||
    register("SHIFT_JIS", &b)
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,6 @@
 | 
			
		|||
module REXML
 | 
			
		||||
  module Encoding
 | 
			
		||||
    @@__REXML_encoding_methods =<<-EOL
 | 
			
		||||
    def encode content
 | 
			
		||||
    def encode_unile content
 | 
			
		||||
      array_utf8 = content.unpack("U*")
 | 
			
		||||
      array_enc = []
 | 
			
		||||
      array_utf8.each do |num|
 | 
			
		||||
| 
						 | 
				
			
			@ -16,7 +15,7 @@ module REXML
 | 
			
		|||
      array_enc.pack('C*')
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def decode(str)
 | 
			
		||||
    def decode_unile(str)
 | 
			
		||||
      array_enc=str.unpack('C*')
 | 
			
		||||
      array_utf8 = []
 | 
			
		||||
      2.step(array_enc.size-1, 2){|i| 
 | 
			
		||||
| 
						 | 
				
			
			@ -24,6 +23,12 @@ module REXML
 | 
			
		|||
      }
 | 
			
		||||
      array_utf8.pack('U*')
 | 
			
		||||
    end
 | 
			
		||||
    EOL
 | 
			
		||||
 | 
			
		||||
    register(UNILE) do |obj|
 | 
			
		||||
      class << obj
 | 
			
		||||
        alias decode decode_unile
 | 
			
		||||
        alias encode encode_unile
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,8 +1,7 @@
 | 
			
		|||
module REXML
 | 
			
		||||
  module Encoding
 | 
			
		||||
    @@__REXML_encoding_methods = %q~
 | 
			
		||||
    # Convert from UTF-8
 | 
			
		||||
    def encode content
 | 
			
		||||
    def encode_ascii content
 | 
			
		||||
      array_utf8 = content.unpack('U*')
 | 
			
		||||
      array_enc = []
 | 
			
		||||
      array_utf8.each do |num|
 | 
			
		||||
| 
						 | 
				
			
			@ -17,9 +16,15 @@ module REXML
 | 
			
		|||
    end
 | 
			
		||||
 | 
			
		||||
    # Convert to UTF-8
 | 
			
		||||
    def decode(str)
 | 
			
		||||
    def decode_ascii(str)
 | 
			
		||||
      str.unpack('C*').pack('U*')
 | 
			
		||||
    end
 | 
			
		||||
    ~
 | 
			
		||||
 | 
			
		||||
    register("US-ASCII") do |obj|
 | 
			
		||||
      class << obj
 | 
			
		||||
        alias decode decode_ascii
 | 
			
		||||
        alias encode encode_ascii
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,6 @@
 | 
			
		|||
module REXML
 | 
			
		||||
  module Encoding
 | 
			
		||||
    @@__REXML_encoding_methods =<<-EOL
 | 
			
		||||
    def encode content
 | 
			
		||||
    def encode_utf16 content
 | 
			
		||||
      array_utf8 = content.unpack("U*")
 | 
			
		||||
      array_enc = []
 | 
			
		||||
      array_utf8.each do |num|
 | 
			
		||||
| 
						 | 
				
			
			@ -16,7 +15,7 @@ module REXML
 | 
			
		|||
      array_enc.pack('C*')
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def decode(str)
 | 
			
		||||
    def decode_utf16(str)
 | 
			
		||||
      array_enc=str.unpack('C*')
 | 
			
		||||
      array_utf8 = []
 | 
			
		||||
      2.step(array_enc.size-1, 2){|i| 
 | 
			
		||||
| 
						 | 
				
			
			@ -24,6 +23,12 @@ module REXML
 | 
			
		|||
      }
 | 
			
		||||
      array_utf8.pack('U*')
 | 
			
		||||
    end
 | 
			
		||||
    EOL
 | 
			
		||||
 | 
			
		||||
    register(UTF_16) do |obj|
 | 
			
		||||
      class << obj
 | 
			
		||||
        alias decode decode_utf16
 | 
			
		||||
        alias encode encode_utf16
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,13 +1,18 @@
 | 
			
		|||
module REXML
 | 
			
		||||
  module Encoding
 | 
			
		||||
    @@__REXML_encoding_methods =<<-EOL
 | 
			
		||||
    def encode content
 | 
			
		||||
    def encode_utf8 content
 | 
			
		||||
      content
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def decode(str)
 | 
			
		||||
    def decode_utf8(str)
 | 
			
		||||
      str
 | 
			
		||||
    end
 | 
			
		||||
    EOL
 | 
			
		||||
 | 
			
		||||
    register(UTF_8) do |obj|
 | 
			
		||||
      class << obj
 | 
			
		||||
        alias decode decode_utf8
 | 
			
		||||
        alias encode encode_utf8
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue