mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
994f066f76
* lib/rexml/encodings/: remove. * lib/rexml/document.rb, lib/rexml/formatters/default.rb, lib/rexml/output.rb, lib/rexml/parseexception.rb, lib/rexml/parsers/baseparser.rb, lib/rexml/source.rb, lib/rexml/xmldecl.rb: use Ruby's native Encoding object. * test/rexml/, test/rss/: follow the above encoding chagnes. * NEWS: add REXML's incompatible change about encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29646 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
67 lines
1.5 KiB
Ruby
67 lines
1.5 KiB
Ruby
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
|
|
module REXML
|
|
module Encoding
|
|
# ID ---> Encoding object
|
|
attr_reader :encoding
|
|
def encoding=(encoding)
|
|
if encoding.is_a?(String)
|
|
original_encoding = encoding
|
|
encoding = find_encoding(encoding)
|
|
unless encoding
|
|
raise ArgumentError, "Bad encoding name #{original_encoding}"
|
|
end
|
|
end
|
|
return false if defined?(@encoding) and encoding == @encoding
|
|
if encoding and encoding != ::Encoding::UTF_8
|
|
@encoding = encoding
|
|
else
|
|
@encoding = ::Encoding::UTF_8
|
|
end
|
|
true
|
|
end
|
|
|
|
def check_encoding(xml)
|
|
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
|
if xml[0, 2] == "\xfe\xff"
|
|
xml[0, 2] = ""
|
|
::Encoding::UTF_16BE
|
|
elsif xml[0, 2] == "\xff\xfe"
|
|
xml[0, 2] = ""
|
|
::Encoding::UTF_16LE
|
|
else
|
|
if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
|
|
\s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
|
|
encoding_name = $3
|
|
if /\Autf-16\z/i =~ encoding_name
|
|
::Encoding::UTF_16BE
|
|
else
|
|
find_encoding(encoding_name)
|
|
end
|
|
else
|
|
::Encoding::UTF_8
|
|
end
|
|
end
|
|
end
|
|
|
|
def encode(string)
|
|
string.encode(@encoding)
|
|
end
|
|
|
|
def decode(string)
|
|
string.encode(::Encoding::UTF_8, @encoding)
|
|
end
|
|
|
|
private
|
|
def find_encoding(name)
|
|
case name
|
|
when "UTF-16"
|
|
name = "UTF-16BE"
|
|
when /\Ashift-jis\z/i
|
|
name = "Shift_JIS"
|
|
when /\ACP-(\d+)\z/
|
|
name = "CP#{$1}"
|
|
end
|
|
::Encoding.find(name)
|
|
end
|
|
end
|
|
end
|