mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/rexml/source.rb (REXML::IOSource#initialize): Reduce
@line_break initialize code. It should be done only in #encoding=. * lib/rexml/parsers/baseparser.rb: Don't set UTF-16 encoding to source by encoding="UTF-16" in XML declaration because UTF-16XX source encoding should be set in Source#initialize or IOSource#intialize. They should handle BOM. Parser should not consider about it. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37361 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
100b3be9ae
commit
2a42c1bd3a
3 changed files with 50 additions and 14 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
Sun Oct 28 21:25:11 2012 Kouhei Sutou <kou@cozmixng.org>
|
||||
|
||||
* lib/rexml/source.rb (REXML::IOSource#initialize): Reduce
|
||||
@line_break initialize code. It should be done only in #encoding=.
|
||||
* lib/rexml/parsers/baseparser.rb: Don't set UTF-16 encoding to
|
||||
source by encoding="UTF-16" in XML declaration because UTF-16XX
|
||||
source encoding should be set in Source#initialize or
|
||||
IOSource#intialize. They should handle BOM. Parser should not
|
||||
consider about it.
|
||||
|
||||
Sun Oct 28 21:18:37 2012 Kouhei Sutou <kou@cozmixng.org>
|
||||
|
||||
* test/rexml/test_document.rb: Add tests for parsing XML encoded
|
||||
|
|
|
@ -212,7 +212,9 @@ module REXML
|
|||
version = version[1] unless version.nil?
|
||||
encoding = ENCODING.match(results)
|
||||
encoding = encoding[1] unless encoding.nil?
|
||||
@source.encoding = encoding
|
||||
if need_source_encoding_update?(encoding)
|
||||
@source.encoding = encoding
|
||||
end
|
||||
standalone = STANDALONE.match(results)
|
||||
standalone = standalone[1] unless standalone.nil?
|
||||
return [ :xmldecl, version, encoding, standalone ]
|
||||
|
@ -493,6 +495,13 @@ module REXML
|
|||
end
|
||||
rv
|
||||
end
|
||||
|
||||
private
|
||||
def need_source_encoding_update?(xml_declaration_encoding)
|
||||
return false if xml_declaration_encoding.nil?
|
||||
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
||||
true
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -144,22 +144,39 @@ module REXML
|
|||
# if there is one. If there isn't one, the file MUST be UTF-8, as per
|
||||
# the XML spec. If there is one, we can determine the encoding from
|
||||
# it.
|
||||
@buffer = ""
|
||||
str = @source.read( 2 ) || ''
|
||||
if encoding
|
||||
self.encoding = encoding
|
||||
elsif str[0,2] == "\xfe\xff"
|
||||
@line_break = "\000>"
|
||||
elsif str[0,2] == "\xff\xfe"
|
||||
@line_break = ">\000"
|
||||
elsif str[0,2] == "\xef\xbb"
|
||||
str += @source.read(1)
|
||||
str = '' if (str[2,1] == "\xBF")
|
||||
@line_break = ">"
|
||||
super("", encoding)
|
||||
else
|
||||
@line_break = ">"
|
||||
need_super_with_line = false
|
||||
str = @source.read( 2 ) || ''
|
||||
str.force_encoding("ASCII-8BIT")
|
||||
if str[0, 2] == "\xfe\xff"
|
||||
@source.binmode
|
||||
@source.set_encoding("UTF-16BE")
|
||||
super("", "UTF-16BE")
|
||||
elsif str[0, 2] == "\xff\xfe"
|
||||
@source.binmode
|
||||
@source.set_encoding("UTF-16LE")
|
||||
super("", "UTF-16LE")
|
||||
elsif str[0, 2] == "\xef\xbb"
|
||||
str += @source.read(1)
|
||||
if str[2, 1] == "\xBF"
|
||||
@source.set_encoding("UTF-8")
|
||||
super("", "UTF-8")
|
||||
else
|
||||
need_super_with_line = true
|
||||
end
|
||||
else
|
||||
need_super_with_line = true
|
||||
end
|
||||
if need_super_with_line
|
||||
if @source.eof?
|
||||
super(str)
|
||||
else
|
||||
super(str + @source.readline(">"))
|
||||
end
|
||||
end
|
||||
end
|
||||
super( @source.eof? ? str : str+@source.readline( @line_break ) )
|
||||
|
||||
if !@to_utf and
|
||||
@buffer.respond_to?(:force_encoding) and
|
||||
|
|
Loading…
Add table
Reference in a new issue