mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* Changes to the encoding mechanism. If iconv is found, it is used first
for encoding changes. This should be the case on all 1.8 installations. When it isn't found (<1.6), the native REXML encoding mechanism is used. This cleaned out some files, and tightened up the code a bit; and iconv should be faster than the pure Ruby code. * Changed deprecated assert_not_nil to assert throughout the tests. * Parse exceptions are a little more verbose, and extend RuntimeError. * Bug fixes to XPathParser * The Light API is still shifting, like the sands of the desert. * Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and tightened error reporting in the base parser git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
662532be00
commit
7d21c237cc
23 changed files with 185 additions and 224 deletions
|
@ -2,61 +2,49 @@ module REXML
|
|||
module Encoding
|
||||
@@uconv_available = false
|
||||
|
||||
ENCODING_CLAIMS = { }
|
||||
|
||||
def Encoding.claim( encoding_str, match=nil )
|
||||
if match
|
||||
ENCODING_CLAIMS[ match ] = encoding_str
|
||||
else
|
||||
ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/i ] = encoding_str
|
||||
end
|
||||
end
|
||||
|
||||
# Native, default format is UTF-8, so it is declared here rather than in
|
||||
# an encodings/ definition.
|
||||
UTF_8 = 'UTF-8'
|
||||
claim( UTF_8 )
|
||||
UTF_16 = 'UTF-16'
|
||||
UNILE = 'UNILE'
|
||||
|
||||
# ID ---> Encoding name
|
||||
attr_reader :encoding
|
||||
def encoding=( enc )
|
||||
enc = UTF_8 unless enc
|
||||
@encoding = enc.upcase
|
||||
require "rexml/encodings/#@encoding" unless @encoding == UTF_8
|
||||
old_verbosity = $VERBOSE
|
||||
begin
|
||||
$VERBOSE = false
|
||||
return if defined? @encoding and enc == @encoding
|
||||
if enc and enc != UTF_8
|
||||
@encoding = enc.upcase
|
||||
begin
|
||||
load 'rexml/encodings/ICONV.rb'
|
||||
Iconv::iconv( UTF_8, @encoding, "" )
|
||||
rescue LoadError, Exception => err
|
||||
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
||||
begin
|
||||
load enc_file
|
||||
rescue LoadError
|
||||
raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
|
||||
end
|
||||
end
|
||||
else
|
||||
enc = UTF_8
|
||||
@encoding = enc.upcase
|
||||
load 'rexml/encodings/UTF-8.rb'
|
||||
end
|
||||
ensure
|
||||
$VERBOSE = old_verbosity
|
||||
end
|
||||
end
|
||||
|
||||
def check_encoding str
|
||||
rv = ENCODING_CLAIMS.find{|k,v| str =~ k }
|
||||
# Raise an exception if there is a declared encoding and we don't
|
||||
# recognize it
|
||||
unless rv
|
||||
if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/
|
||||
raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)."
|
||||
else
|
||||
return UTF_8
|
||||
end
|
||||
end
|
||||
return rv[1]
|
||||
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
||||
return UTF_16 if str[0] == 254 && str[1] == 255
|
||||
return UNILE if str[0] == 255 && str[1] == 254
|
||||
str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
|
||||
return $1.upcase if $1
|
||||
return UTF_8
|
||||
end
|
||||
|
||||
def to_utf_8(str)
|
||||
return str
|
||||
end
|
||||
|
||||
def from_utf_8 content
|
||||
return content
|
||||
end
|
||||
end
|
||||
|
||||
module Encodingses
|
||||
encodings = []
|
||||
$:.each do |incl_dir|
|
||||
if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0
|
||||
encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ]
|
||||
end
|
||||
encodings.collect!{ |f| File.basename(f) }
|
||||
encodings.uniq!
|
||||
end
|
||||
encodings.each { |enc| require "rexml/encodings/#{enc}" }
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue