mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/rexml/encoding.rb: use Ruby native encoding mechnism. [ruby-dev:42464]
* lib/rexml/encodings/: remove. * lib/rexml/document.rb, lib/rexml/formatters/default.rb, lib/rexml/output.rb, lib/rexml/parseexception.rb, lib/rexml/parsers/baseparser.rb, lib/rexml/source.rb, lib/rexml/xmldecl.rb: use Ruby's native Encoding object. * test/rexml/, test/rss/: follow the above encoding chagnes. * NEWS: add REXML's incompatible change about encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29646 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
767fe5170d
commit
994f066f76
28 changed files with 126 additions and 489 deletions
|
@ -1,71 +1,67 @@
|
|||
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
|
||||
module REXML
|
||||
module Encoding
|
||||
@encoding_methods = {}
|
||||
def self.register(enc, &block)
|
||||
@encoding_methods[enc] = block
|
||||
end
|
||||
def self.apply(obj, enc)
|
||||
@encoding_methods[enc][obj]
|
||||
end
|
||||
def self.encoding_method(enc)
|
||||
@encoding_methods[enc]
|
||||
end
|
||||
|
||||
# Native, default format is UTF-8, so it is declared here rather than in
|
||||
# an encodings/ definition.
|
||||
UTF_8 = 'UTF-8'
|
||||
UTF_16 = 'UTF-16'
|
||||
UNILE = 'UNILE'
|
||||
|
||||
# ID ---> Encoding name
|
||||
attr_reader :encoding
|
||||
def encoding=( enc )
|
||||
old_verbosity = $VERBOSE
|
||||
begin
|
||||
$VERBOSE = false
|
||||
enc = enc.nil? ? nil : enc.upcase
|
||||
return false if defined? @encoding and enc == @encoding
|
||||
if enc and enc != UTF_8
|
||||
@encoding = enc
|
||||
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
|
||||
@encoding.untaint
|
||||
begin
|
||||
require 'rexml/encodings/ICONV.rb'
|
||||
Encoding.apply(self, "ICONV")
|
||||
rescue LoadError, Exception
|
||||
begin
|
||||
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
||||
require enc_file
|
||||
Encoding.apply(self, @encoding)
|
||||
rescue LoadError => err
|
||||
puts err.message
|
||||
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
|
||||
end
|
||||
end
|
||||
else
|
||||
@encoding = UTF_8
|
||||
require 'rexml/encodings/UTF-8.rb'
|
||||
Encoding.apply(self, @encoding)
|
||||
end
|
||||
ensure
|
||||
$VERBOSE = old_verbosity
|
||||
# ID ---> Encoding object
|
||||
attr_reader :encoding
|
||||
def encoding=(encoding)
|
||||
if encoding.is_a?(String)
|
||||
original_encoding = encoding
|
||||
encoding = find_encoding(encoding)
|
||||
unless encoding
|
||||
raise ArgumentError, "Bad encoding name #{original_encoding}"
|
||||
end
|
||||
end
|
||||
return false if defined?(@encoding) and encoding == @encoding
|
||||
if encoding and encoding != ::Encoding::UTF_8
|
||||
@encoding = encoding
|
||||
else
|
||||
@encoding = ::Encoding::UTF_8
|
||||
end
|
||||
true
|
||||
end
|
||||
|
||||
def check_encoding str
|
||||
def check_encoding(xml)
|
||||
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
||||
if str[0,2] == "\xfe\xff"
|
||||
str[0,2] = ""
|
||||
return UTF_16
|
||||
elsif str[0,2] == "\xff\xfe"
|
||||
str[0,2] = ""
|
||||
return UNILE
|
||||
end
|
||||
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
|
||||
return $3.upcase if $3
|
||||
return UTF_8
|
||||
if xml[0, 2] == "\xfe\xff"
|
||||
xml[0, 2] = ""
|
||||
::Encoding::UTF_16BE
|
||||
elsif xml[0, 2] == "\xff\xfe"
|
||||
xml[0, 2] = ""
|
||||
::Encoding::UTF_16LE
|
||||
else
|
||||
if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
|
||||
\s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
|
||||
encoding_name = $3
|
||||
if /\Autf-16\z/i =~ encoding_name
|
||||
::Encoding::UTF_16BE
|
||||
else
|
||||
find_encoding(encoding_name)
|
||||
end
|
||||
else
|
||||
::Encoding::UTF_8
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def encode(string)
|
||||
string.encode(@encoding)
|
||||
end
|
||||
|
||||
def decode(string)
|
||||
string.encode(::Encoding::UTF_8, @encoding)
|
||||
end
|
||||
|
||||
private
|
||||
def find_encoding(name)
|
||||
case name
|
||||
when "UTF-16"
|
||||
name = "UTF-16BE"
|
||||
when /\Ashift-jis\z/i
|
||||
name = "Shift_JIS"
|
||||
when /\ACP-(\d+)\z/
|
||||
name = "CP#{$1}"
|
||||
end
|
||||
::Encoding.find(name)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue