1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* lib/rexml/parsers/baseparser.rb: Fix a bug that UTF-8 is used

for UTF-16XX encoded XML that doesn't have encoding="UTF-16" in
  XML declration.
* test/rexml/test_document.rb: Add tests for the above change.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37363 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
kou 2012-10-28 12:42:37 +00:00
parent a750424cab
commit a7322e3678
3 changed files with 32 additions and 0 deletions

View file

@ -1,3 +1,10 @@
Sun Oct 28 21:40:13 2012 Kouhei Sutou <kou@cozmixng.org>
* lib/rexml/parsers/baseparser.rb: Fix a bug that UTF-8 is used
for UTF-16XX encoded XML that doesn't have encoding="UTF-16" in
XML declration.
* test/rexml/test_document.rb: Add tests for the above change.
Sun Oct 28 21:37:34 2012 Kouhei Sutou <kou@cozmixng.org>
* test/rexml/test_document.rb: Group tests that they parse

View file

@ -215,6 +215,9 @@ module REXML
if need_source_encoding_update?(encoding)
@source.encoding = encoding
end
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
encoding = "UTF-16"
end
standalone = STANDALONE.match(results)
standalone = standalone[1] unless standalone.nil?
return [ :xmldecl, version, encoding, standalone ]

View file

@ -246,5 +246,27 @@ EOX
assert_equal("UTF-16", document.encoding)
end
end
class NoEncodingTest < self
def test_utf_16le
xml = <<-EOX.encode("UTF-16LE").force_encoding("ASCII-8BIT")
<?xml version="1.0"?>
<message>Hello world!</message>
EOX
bom = "\ufeff".encode("UTF-16LE").force_encoding("ASCII-8BIT")
document = REXML::Document.new(bom + xml)
assert_equal("UTF-16", document.encoding)
end
def test_utf_16be
xml = <<-EOX.encode("UTF-16BE").force_encoding("ASCII-8BIT")
<?xml version="1.0"?>
<message>Hello world!</message>
EOX
bom = "\ufeff".encode("UTF-16BE").force_encoding("ASCII-8BIT")
document = REXML::Document.new(bom + xml)
assert_equal("UTF-16", document.encoding)
end
end
end
end