2005-04-09 13:03:32 -04:00
|
|
|
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
|
2003-06-09 21:31:01 -04:00
|
|
|
module REXML
|
|
|
|
module Encoding
|
2005-04-25 01:09:08 -04:00
|
|
|
@encoding_methods = {}
|
|
|
|
def self.register(enc, &block)
|
|
|
|
@encoding_methods[enc] = block
|
|
|
|
end
|
|
|
|
def self.apply(obj, enc)
|
|
|
|
@encoding_methods[enc][obj]
|
|
|
|
end
|
|
|
|
def self.encoding_method(enc)
|
|
|
|
@encoding_methods[enc]
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
|
|
|
# Native, default format is UTF-8, so it is declared here rather than in
|
|
|
|
# an encodings/ definition.
|
|
|
|
UTF_8 = 'UTF-8'
|
2003-10-10 08:54:46 -04:00
|
|
|
UTF_16 = 'UTF-16'
|
|
|
|
UNILE = 'UNILE'
|
2003-06-09 21:31:01 -04:00
|
|
|
|
|
|
|
# ID ---> Encoding name
|
|
|
|
attr_reader :encoding
|
|
|
|
def encoding=( enc )
|
2003-10-10 08:54:46 -04:00
|
|
|
old_verbosity = $VERBOSE
|
|
|
|
begin
|
|
|
|
$VERBOSE = false
|
|
|
|
return if defined? @encoding and enc == @encoding
|
2005-12-09 09:31:47 -05:00
|
|
|
if enc
|
|
|
|
raise ArgumentError, "Bad encoding name #{enc}" unless /\A[\w-]+\z/n =~ enc
|
|
|
|
@encoding = enc.upcase.untaint
|
|
|
|
else
|
|
|
|
@encoding = UTF_8
|
|
|
|
end
|
|
|
|
err = nil
|
|
|
|
[@encoding, "ICONV"].each do |enc|
|
2003-10-10 08:54:46 -04:00
|
|
|
begin
|
2005-12-09 09:31:47 -05:00
|
|
|
require File.join("rexml", "encodings", "#{enc}.rb")
|
|
|
|
return Encoding.apply(self, enc)
|
2003-10-10 08:54:46 -04:00
|
|
|
rescue LoadError, Exception => err
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
end
|
2005-12-09 09:31:47 -05:00
|
|
|
puts err.message
|
|
|
|
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
|
2003-10-10 08:54:46 -04:00
|
|
|
ensure
|
|
|
|
$VERBOSE = old_verbosity
|
2003-06-09 21:31:01 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2003-10-10 08:54:46 -04:00
|
|
|
def check_encoding str
|
|
|
|
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
|
|
|
return UTF_16 if str[0] == 254 && str[1] == 255
|
|
|
|
return UNILE if str[0] == 255 && str[1] == 254
|
|
|
|
str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
|
|
|
|
return $1.upcase if $1
|
|
|
|
return UTF_8
|
2003-06-09 21:31:01 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|