mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Fixes ticket:110 (more UTF-16 problems)
Missing include for UndefinedNamespaceException was causing errors in some cases. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13816 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
b3ab1dbf34
commit
06f2b5b1d8
4 changed files with 18 additions and 6 deletions
|
@ -56,8 +56,13 @@ module REXML
|
||||||
|
|
||||||
def check_encoding str
|
def check_encoding str
|
||||||
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
||||||
return UTF_16 if /\A\xfe\xff/n =~ str
|
if str[0] == 0xfe && str[1] == 0xff
|
||||||
return UNILE if /\A\xff\xfe/n =~ str
|
str[0,2] = ""
|
||||||
|
return UTF_16
|
||||||
|
elsif str[0] == 0xff && str[1] == 0xfe
|
||||||
|
str[0,2] = ""
|
||||||
|
return UNILE
|
||||||
|
end
|
||||||
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
|
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
|
||||||
return $3.upcase if $3
|
return $3.upcase if $3
|
||||||
return UTF_8
|
return UTF_8
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
require 'rexml/parseexception'
|
require 'rexml/parseexception'
|
||||||
|
require 'rexml/undefinednamespaceexception'
|
||||||
require 'rexml/source'
|
require 'rexml/source'
|
||||||
require 'set'
|
require 'set'
|
||||||
|
|
||||||
|
@ -191,6 +192,7 @@ module REXML
|
||||||
end
|
end
|
||||||
return [ :end_document ] if empty?
|
return [ :end_document ] if empty?
|
||||||
return @stack.shift if @stack.size > 0
|
return @stack.shift if @stack.size > 0
|
||||||
|
#STDERR.puts @source.encoding
|
||||||
@source.read if @source.buffer.size<2
|
@source.read if @source.buffer.size<2
|
||||||
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
||||||
if @document_status == nil
|
if @document_status == nil
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
require 'rexml/validation/validationexception'
|
require 'rexml/validation/validationexception'
|
||||||
|
require 'rexml/undefinednamespaceexception'
|
||||||
|
|
||||||
module REXML
|
module REXML
|
||||||
module Parsers
|
module Parsers
|
||||||
|
|
|
@ -135,6 +135,7 @@ module REXML
|
||||||
def initialize(arg, block_size=500, encoding=nil)
|
def initialize(arg, block_size=500, encoding=nil)
|
||||||
@er_source = @source = arg
|
@er_source = @source = arg
|
||||||
@to_utf = false
|
@to_utf = false
|
||||||
|
|
||||||
# Determining the encoding is a deceptively difficult issue to resolve.
|
# Determining the encoding is a deceptively difficult issue to resolve.
|
||||||
# First, we check the first two bytes for UTF-16. Then we
|
# First, we check the first two bytes for UTF-16. Then we
|
||||||
# assume that the encoding is at least ASCII enough for the '>', and
|
# assume that the encoding is at least ASCII enough for the '>', and
|
||||||
|
@ -146,13 +147,16 @@ module REXML
|
||||||
str = @source.read( 2 )
|
str = @source.read( 2 )
|
||||||
if encoding
|
if encoding
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
|
elsif 0xfe == str[0] && 0xff == str[1]
|
||||||
self.encoding = check_encoding( str )
|
@line_break = "\000>"
|
||||||
elsif (0xef == str[0] && 0xbb == str[1])
|
elsif 0xff == str[0] && 0xfe == str[1]
|
||||||
|
@line_break = ">\000"
|
||||||
|
elsif 0xef == str[0] && 0xbb == str[1]
|
||||||
str += @source.read(1)
|
str += @source.read(1)
|
||||||
str = '' if (0xbf == str[2])
|
str = '' if (0xbf == str[2])
|
||||||
|
@line_break = ">"
|
||||||
else
|
else
|
||||||
@line_break = '>'
|
@line_break = ">"
|
||||||
end
|
end
|
||||||
super str+@source.readline( @line_break )
|
super str+@source.readline( @line_break )
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue