1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Fixes ticket:110 (more UTF-16 problems)

Missing include for UndefinedNamespaceException was causing errors in some
    cases.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13816 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2007-11-04 04:52:08 +00:00
parent b3ab1dbf34
commit 06f2b5b1d8
4 changed files with 18 additions and 6 deletions

View file

@ -56,8 +56,13 @@ module REXML
def check_encoding str def check_encoding str
# We have to recognize UTF-16, LSB UTF-16, and UTF-8 # We have to recognize UTF-16, LSB UTF-16, and UTF-8
return UTF_16 if /\A\xfe\xff/n =~ str if str[0] == 0xfe && str[1] == 0xff
return UNILE if /\A\xff\xfe/n =~ str str[0,2] = ""
return UTF_16
elsif str[0] == 0xff && str[1] == 0xfe
str[0,2] = ""
return UNILE
end
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
return $3.upcase if $3 return $3.upcase if $3
return UTF_8 return UTF_8

View file

@ -1,4 +1,5 @@
require 'rexml/parseexception' require 'rexml/parseexception'
require 'rexml/undefinednamespaceexception'
require 'rexml/source' require 'rexml/source'
require 'set' require 'set'
@ -191,6 +192,7 @@ module REXML
end end
return [ :end_document ] if empty? return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0 return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
@source.read if @source.buffer.size<2 @source.read if @source.buffer.size<2
#STDERR.puts "BUFFER = #{@source.buffer.inspect}" #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil if @document_status == nil

View file

@ -1,4 +1,5 @@
require 'rexml/validation/validationexception' require 'rexml/validation/validationexception'
require 'rexml/undefinednamespaceexception'
module REXML module REXML
module Parsers module Parsers

View file

@ -135,6 +135,7 @@ module REXML
def initialize(arg, block_size=500, encoding=nil) def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg @er_source = @source = arg
@to_utf = false @to_utf = false
# Determining the encoding is a deceptively difficult issue to resolve. # Determining the encoding is a deceptively difficult issue to resolve.
# First, we check the first two bytes for UTF-16. Then we # First, we check the first two bytes for UTF-16. Then we
# assume that the encoding is at least ASCII enough for the '>', and # assume that the encoding is at least ASCII enough for the '>', and
@ -146,13 +147,16 @@ module REXML
str = @source.read( 2 ) str = @source.read( 2 )
if encoding if encoding
self.encoding = encoding self.encoding = encoding
elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str elsif 0xfe == str[0] && 0xff == str[1]
self.encoding = check_encoding( str ) @line_break = "\000>"
elsif (0xef == str[0] && 0xbb == str[1]) elsif 0xff == str[0] && 0xfe == str[1]
@line_break = ">\000"
elsif 0xef == str[0] && 0xbb == str[1]
str += @source.read(1) str += @source.read(1)
str = '' if (0xbf == str[2]) str = '' if (0xbf == str[2])
@line_break = ">"
else else
@line_break = '>' @line_break = ">"
end end
super str+@source.readline( @line_break ) super str+@source.readline( @line_break )
end end