1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding

a String which means the name of the encoding.
  this partially revert r29646.

* lib/rexml/document.rb: follow above.

* lib/rexml/output.rb: ditto.

* lib/rexml/parsers/baseparser.rb: ditto.

* lib/rexml/source.rb: ditto.

* lib/rexml/xmldecl.rb: ditto.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@31008 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2011-03-02 15:36:48 +00:00
parent cddcffb8f9
commit f25ff846f6
11 changed files with 114 additions and 147 deletions

View file

@ -1,3 +1,19 @@
Thu Mar 3 00:36:29 2011 NARUSE, Yui <naruse@ruby-lang.org>
* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding
a String which means the name of the encoding.
this partially revert r29646.
* lib/rexml/document.rb: follow above.
* lib/rexml/output.rb: ditto.
* lib/rexml/parsers/baseparser.rb: ditto.
* lib/rexml/source.rb: ditto.
* lib/rexml/xmldecl.rb: ditto.
Wed Mar 2 23:19:56 2011 Nobuyoshi Nakada <nobu@ruby-lang.org> Wed Mar 2 23:19:56 2011 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (str_byte_substr): return nil for negative length. * string.c (str_byte_substr): return nil for negative length.

View file

@ -184,7 +184,7 @@ module REXML
# that IE's limited abilities can handle. This hack inserts a space # that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false # before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
if xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output) if xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, xml_decl.encoding ) output = Output.new( output, xml_decl.encoding )
end end
formatter = if indent > -1 formatter = if indent > -1

View file

@ -1,8 +1,9 @@
module REXML module REXML
module Encoding module Encoding
# ID ---> Encoding object # ID ---> Encoding name
attr_reader :encoding attr_reader :encoding
def encoding=(encoding) def encoding=(encoding)
encoding = encoding.name if encoding.is_a?(Encoding)
if encoding.is_a?(String) if encoding.is_a?(String)
original_encoding = encoding original_encoding = encoding
encoding = find_encoding(encoding) encoding = find_encoding(encoding)
@ -11,35 +12,25 @@ module REXML
end end
end end
return false if defined?(@encoding) and encoding == @encoding return false if defined?(@encoding) and encoding == @encoding
if encoding and encoding != ::Encoding::UTF_8 if encoding
@encoding = encoding @encoding = encoding.upcase
else else
@encoding = ::Encoding::UTF_8 @encoding = 'UTF-8'
end end
true true
end end
def check_encoding(xml) def check_encoding(xml)
# We have to recognize UTF-16, LSB UTF-16, and UTF-8 # We have to recognize UTF-16BE, UTF-16LE, and UTF-8
if xml[0, 2] == "\xfe\xff" if xml[0, 2] == "\xfe\xff"
xml[0, 2] = "" xml[0, 2] = ""
::Encoding::UTF_16BE return 'UTF-16BE'
elsif xml[0, 2] == "\xff\xfe" elsif xml[0, 2] == "\xff\xfe"
xml[0, 2] = "" xml[0, 2] = ""
::Encoding::UTF_16LE return 'UTF-16LE'
else
if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
\s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
encoding_name = $3
if /\Autf-16\z/i =~ encoding_name
::Encoding::UTF_16BE
else
find_encoding(encoding_name)
end
else
::Encoding::UTF_8
end
end end
xml =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
return $3 ? $3.upcase : 'UTF-8'
end end
def encode(string) def encode(string)
@ -53,14 +44,19 @@ module REXML
private private
def find_encoding(name) def find_encoding(name)
case name case name
when "UTF-16"
name = "UTF-16BE"
when /\Ashift-jis\z/i when /\Ashift-jis\z/i
name = "Shift_JIS" return "SHIFT_JIS"
when /\ACP-(\d+)\z/ when /\ACP-(\d+)\z/
name = "CP#{$1}" name = "CP#{$1}"
when /\AUTF-8\z/i
return name
end end
::Encoding.find(name) begin
::Encoding::Converter.search_convpath(name, 'UTF-8')
rescue ::Encoding::ConverterNotFoundError
return nil
end
name
end end
end end
end end

View file

@ -22,7 +22,7 @@ module REXML
case node case node
when Document when Document
if node.xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output) if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, node.xml_decl.encoding ) output = Output.new( output, node.xml_decl.encoding )
end end
write_document( node, output ) write_document( node, output )

View file

@ -10,7 +10,7 @@ module REXML
@output = real_IO @output = real_IO
self.encoding = encd self.encoding = encd
@to_utf = (@encoding != ::Encoding::UTF_8) @to_utf = encd != 'UTF-8'
end end
def <<( content ) def <<( content )

View file

@ -248,7 +248,7 @@ module REXML
@document_status = :after_doctype @document_status = :after_doctype
@source.read if @source.buffer.size<2 @source.read if @source.buffer.size<2
md = @source.match(/\s*/um, true) md = @source.match(/\s*/um, true)
if @source.encoding == ::Encoding::UTF_8 if @source.encoding == "UTF-8"
@source.buffer.force_encoding(::Encoding::UTF_8) @source.buffer.force_encoding(::Encoding::UTF_8)
end end
end end

View file

@ -54,14 +54,12 @@ module REXML
def encoding=(enc) def encoding=(enc)
return unless super return unless super
@line_break = encode( '>' ) @line_break = encode( '>' )
if @encoding != ::Encoding::UTF_8 if @encoding != 'UTF-8'
@buffer = decode(@buffer) @buffer = decode(@buffer)
@to_utf = true @to_utf = true
else else
@to_utf = false @to_utf = false
if @buffer.respond_to? :force_encoding @buffer.force_encoding ::Encoding::UTF_8
@buffer.force_encoding ::Encoding::UTF_8
end
end end
end end

View file

@ -109,20 +109,9 @@ module REXML
end end
private private
def normalized_encoding_name(_encoding)
if _encoding == ::Encoding::UTF_16BE
"UTF-16"
else
return _encoding.name
end
end
def content(enc) def content(enc)
rv = "version='#@version'" rv = "version='#@version'"
if @writeencoding || enc.to_s !~ /\Autf-8\z/i rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
encoding_name = normalized_encoding_name(enc)
rv << " encoding='#{encoding_name}'"
end
rv << " standalone='#@standalone'" if @standalone rv << " standalone='#@standalone'" if @standalone
rv rv
end end

View file

@ -241,7 +241,7 @@ DELIMITER
end end
doc = REXML::Document.new(source_iso) doc = REXML::Document.new(source_iso)
assert_equal('ISO-8859-1', doc.xml_decl.encoding.to_s) assert_equal('ISO-8859-1', doc.xml_decl.encoding)
assert_equal(koln_utf, doc.root.text) assert_equal(koln_utf, doc.root.text)
doc.write(out="") doc.write(out="")
assert_equal(source_iso, out ) assert_equal(source_iso, out )
@ -255,23 +255,21 @@ DELIMITER
<position><aktuell datum="01-10-11">Technik</aktuell></position> <position><aktuell datum="01-10-11">Technik</aktuell></position>
<hauptspalte> <hauptspalte>
<headline>Technik</headline> <headline>Technik</headline>
Die Technik ist das Rückgrat der meisten Geschäftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abläufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die nötigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann. Die Technik ist das R\xFCckgrat der meisten Gesch\xFCftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abl\xFCufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die n\xFCtigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann.
</hauptspalte> </hauptspalte>
<nebenspalte> <nebenspalte>
<link ziel="Flash/">Flash</link><umbruch/> <link ziel="Flash/">Flash</link><umbruch/>
Nützliches von Flashern r Flasher.<umbruch/> N\xFCtzliches von Flashern f\xFCr Flasher.<umbruch/>
<link neu="ja" ziel="Cvs/">CVS-FAQ</link><umbruch/> <link neu="ja" ziel="Cvs/">CVS-FAQ</link><umbruch/>
FAQ zur Benutzung von CVS bei HOB FAQ zur Benutzung von CVS bei HOB
</nebenspalte> </nebenspalte>
</intranet> </intranet>
EOF EOF
tn = XPath.first(doc, "//nebenspalte/text()[2]") tn = XPath.first(doc, "//nebenspalte/text()[2]")
expected_iso = "Nützliches von Flashern für Flasher." expected_iso = "N\xFCtzliches von Flashern f\xFCr Flasher."
expected_utf = expected_iso.unpack('C*').pack('U*') expected_utf = expected_iso.unpack('C*').pack('U*')
if expected_utf.respond_to? :encode expected_iso.force_encoding(::Encoding::ISO_8859_1)
expected_iso.force_encoding("iso-8859-1") expected_utf.force_encoding(::Encoding::UTF_8)
expected_utf.force_encoding(::Encoding::UTF_8)
end
assert_equal(expected_utf, tn.to_s.strip) assert_equal(expected_utf, tn.to_s.strip)
f = REXML::Formatters::Default.new f = REXML::Formatters::Default.new
f.write( tn, Output.new(o = "", "ISO-8859-1") ) f.write( tn, Output.new(o = "", "ISO-8859-1") )

View file

@ -230,34 +230,12 @@ class Tester < Test::Unit::TestCase
doc = Document.new(docin) doc = Document.new(docin)
doc.write(test="") doc.write(test="")
assert_equal(31, doc.doctype.size) assert_equal(31, doc.doctype.size)
# Here's a little ditty from Tobias...
src = <<-EOL
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
"http://www.w3.org/TR/SVG/DTD/svg10.dtd"
[
<!-- <!ENTITY % fast-slow "0 0 .5 1">-->
<!--<!ENTITY % slow-fast ".5 0 1 1">-->
<!ENTITY hover_ani
'<animateTransform attributeName="transform"
type="scale" restart="whenNotActive" values="1;0.96"
dur="0.5s" calcMode="spline" keySplines="0 0 .5 1"
fill="freeze" begin="mouseover"/>
<animateTransform attributeName="transform"
type="scale" restart="whenNotActive" values="0.96;1"
dur="0.5s" calcMode="spline" keySplines=".5 0 1 1"
fill="freeze" begin="mouseover+0.5s"/>'
>
]
> <a/>
EOL
end end
def test_document def test_document
# Testing cloning # Testing cloning
source = "<element/>" source = "<element/>"
doc = Document.new source doc = Document.new source
doc2 = Document.new doc
# Testing Root # Testing Root
assert_equal doc.root.name.to_s, "element" assert_equal doc.root.name.to_s, "element"
@ -642,11 +620,10 @@ class Tester < Test::Unit::TestCase
end end
def test_line def test_line
doc = Document.new File.new(fixture_path("bad.xml")) Document.new File.new(fixture_path("bad.xml"))
assert_fail "There should have been an error" assert_fail "There should have been an error"
rescue Exception rescue Exception
# We should get here # We should get here
er = $!
assert($!.line == 5, "Should have been an error on line 5, "+ assert($!.line == 5, "Should have been an error on line 5, "+
"but was reported as being on line #{$!.line}" ) "but was reported as being on line #{$!.line}" )
end end
@ -664,13 +641,11 @@ class Tester < Test::Unit::TestCase
def test_exception def test_exception
source = SourceFactory.create_from "<a/>" source = SourceFactory.create_from "<a/>"
p = ParseException.new( "dummy message", source ) p = ParseException.new( "dummy message", source )
s = p.to_s
begin begin
raise "dummy" raise "dummy"
rescue Exception rescue Exception
p.continued_exception = $! p.continued_exception = $!
end end
s = p.to_s
end end
def test_bad_content def test_bad_content
@ -682,7 +657,7 @@ class Tester < Test::Unit::TestCase
assert_equal "content>content", tree_gt.elements[1].text assert_equal "content>content", tree_gt.elements[1].text
# This isn't # This isn't
begin begin
tree_lt = Document.new in_lt Document.new in_lt
assert_fail "Should have gotten a parse error" assert_fail "Should have gotten a parse error"
rescue ParseException rescue ParseException
end end
@ -856,8 +831,6 @@ EOL
def test_attlist_write def test_attlist_write
file=File.new(fixture_path("foo.xml")) file=File.new(fixture_path("foo.xml"))
doc=Document.new file doc=Document.new file
root = doc.root
out = '' out = ''
doc.write(out) doc.write(out)
end end
@ -865,7 +838,7 @@ EOL
def test_more_namespaces def test_more_namespaces
assert_raise( REXML::UndefinedNamespaceException, assert_raise( REXML::UndefinedNamespaceException,
%Q{Should have gotten an Undefined Namespace error} ) { %Q{Should have gotten an Undefined Namespace error} ) {
doc1 = Document.new("<r><p><n:c/></p></r>") Document.new("<r><p><n:c/></p></r>")
} }
doc2 = Document.new("<r xmlns:n='1'><p><n:c/></p></r>") doc2 = Document.new("<r xmlns:n='1'><p><n:c/></p></r>")
es = XPath.match(doc2, '//c') es = XPath.match(doc2, '//c')
@ -916,7 +889,7 @@ EOL
end end
def test_oses_with_bad_EOLs def test_oses_with_bad_EOLs
d = Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n") Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
end end
# Contributed (with patch to fix bug) by Kouhei # Contributed (with patch to fix bug) by Kouhei
@ -1024,7 +997,6 @@ EOL
document.write(s) document.write(s)
end end
def test_write_cdata def test_write_cdata
src = "<a>A</a>" src = "<a>A</a>"
doc = REXML::Document.new( src ) doc = REXML::Document.new( src )
@ -1045,7 +1017,7 @@ EOL
<x:b x:n="foo"/> <x:b x:n="foo"/>
</a> </a>
EOL EOL
d = REXML::Document.new( source ) d = Document.new( source )
assert_equal( 'foo', REXML::XPath.first(d.root, "//x:b/@x:n").value ) assert_equal( 'foo', REXML::XPath.first(d.root, "//x:b/@x:n").value )
assert_equal( nil, REXML::XPath.first(d.root, "//x:b/@x:n", {})) assert_equal( nil, REXML::XPath.first(d.root, "//x:b/@x:n", {}))
end end
@ -1233,17 +1205,17 @@ EOL
def test_ticket_21 def test_ticket_21
src = "<foo bar=value/>" src = "<foo bar=value/>"
assert_raise( ParseException, "invalid XML should be caught" ) { assert_raise( ParseException, "invalid XML should be caught" ) {
d = REXML::Document.new(src) Document.new(src)
} }
begin begin
d = REXML::Document.new(src) Document.new(src)
rescue rescue
assert_match( /missing attribute quote/, $!.message ) assert_match( /missing attribute quote/, $!.message )
end end
end end
def test_ticket_63 def test_ticket_63
d = REXML::Document.new(File.new(fixture_path("t63-1.xml"))) Document.new(File.new(fixture_path("t63-1.xml")))
end end
def test_ticket_75 def test_ticket_75
@ -1275,9 +1247,9 @@ EOL
def test_ticket_88 def test_ticket_88
doc = REXML::Document.new("<?xml version=\"1.0\" encoding=\"shift_jis\"?>") doc = REXML::Document.new("<?xml version=\"1.0\" encoding=\"shift_jis\"?>")
assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s) assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
doc = REXML::Document.new("<?xml version = \"1.0\" encoding = \"shift_jis\"?>") doc = REXML::Document.new("<?xml version = \"1.0\" encoding = \"shift_jis\"?>")
assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s) assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
end end
def test_ticket_85 def test_ticket_85
@ -1295,8 +1267,6 @@ ENDXML
</bar> </bar>
</foo>" </foo>"
zml = "<foo><bar><bob name='jimmy'/></bar></foo>"
# The pretty printer ignores all whitespace, anyway so output1 == output2 # The pretty printer ignores all whitespace, anyway so output1 == output2
f = REXML::Formatters::Pretty.new( 2 ) f = REXML::Formatters::Pretty.new( 2 )
d = Document.new( xml, :ignore_whitespace_nodes=>:all ) d = Document.new( xml, :ignore_whitespace_nodes=>:all )
@ -1358,7 +1328,7 @@ ENDXML
# Per .2.5 Node Tests of XPath spec # Per .2.5 Node Tests of XPath spec
assert_raise( REXML::UndefinedNamespaceException, assert_raise( REXML::UndefinedNamespaceException,
%Q{Should have gotten an Undefined Namespace error} ) { %Q{Should have gotten an Undefined Namespace error} ) {
d = Document.new("<a><n:b/></a>") Document.new("<a><n:b/></a>")
} }
end end

View file

@ -18,7 +18,7 @@ class EncodingTester < Test::Unit::TestCase
def test_encoded_in_encoded_out def test_encoded_in_encoded_out
doc = Document.new( @encoded ) doc = Document.new( @encoded )
doc.write( out="" ) doc.write( out="" )
out.force_encoding('binary') if out.respond_to? :force_encoding out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @encoded, out ) assert_equal( @encoded, out )
end end
@ -26,12 +26,12 @@ class EncodingTester < Test::Unit::TestCase
def test_encoded_in_change_out def test_encoded_in_change_out
doc = Document.new( @encoded ) doc = Document.new( @encoded )
doc.xml_decl.encoding = "UTF-8" doc.xml_decl.encoding = "UTF-8"
assert_equal( ::Encoding::UTF_8, doc.encoding ) assert_equal("UTF-8", doc.encoding)
REXML::Formatters::Default.new.write( doc.root, out="" ) REXML::Formatters::Default.new.write( doc.root, out="" )
out.force_encoding('binary') if out.respond_to? :force_encoding out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @not_encoded, out ) assert_equal( @not_encoded, out )
char = XPath.first( doc, "/a/b/text()" ).to_s char = XPath.first( doc, "/a/b/text()" ).to_s
char.force_encoding('binary') if char.respond_to? :force_encoding char.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( "ĉ", char ) assert_equal( "ĉ", char )
end end
@ -39,7 +39,7 @@ class EncodingTester < Test::Unit::TestCase
def test_encoded_in_different_out def test_encoded_in_different_out
doc = Document.new( @encoded ) doc = Document.new( @encoded )
REXML::Formatters::Default.new.write( doc.root, Output.new( out="", "UTF-8" ) ) REXML::Formatters::Default.new.write( doc.root, Output.new( out="", "UTF-8" ) )
out.force_encoding('binary') if out.respond_to? :force_encoding out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @not_encoded, out ) assert_equal( @not_encoded, out )
end end
@ -47,9 +47,9 @@ class EncodingTester < Test::Unit::TestCase
def test_in_change_out def test_in_change_out
doc = Document.new( @not_encoded ) doc = Document.new( @not_encoded )
doc.xml_decl.encoding = "ISO-8859-3" doc.xml_decl.encoding = "ISO-8859-3"
assert_equal( ::Encoding::ISO_8859_3, doc.encoding ) assert_equal("ISO-8859-3", doc.encoding)
doc.write( out="" ) doc.write( out="" )
out.force_encoding('binary') if out.respond_to? :force_encoding out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @encoded, out ) assert_equal( @encoded, out )
end end
@ -57,7 +57,7 @@ class EncodingTester < Test::Unit::TestCase
def test_in_different_out def test_in_different_out
doc = Document.new( @not_encoded ) doc = Document.new( @not_encoded )
doc.write( Output.new( out="", "ISO-8859-3" ) ) doc.write( Output.new( out="", "ISO-8859-3" ) )
out.force_encoding('binary') if out.respond_to? :force_encoding out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @encoded, out ) assert_equal( @encoded, out )
end end
@ -66,10 +66,10 @@ class EncodingTester < Test::Unit::TestCase
def test_in_different_access def test_in_different_access
doc = Document.new <<-EOL doc = Document.new <<-EOL
<?xml version='1.0' encoding='ISO-8859-1'?> <?xml version='1.0' encoding='ISO-8859-1'?>
<a a="ÿ">ÿ</a> <a a="\xFF">\xFF</a>
EOL EOL
expect = "\303\277" expect = "\303\277"
expect.force_encoding('UTF-8') if expect.respond_to? :force_encoding expect.force_encoding(::Encoding::UTF_8)
assert_equal( expect, doc.elements['a'].attributes['a'] ) assert_equal( expect, doc.elements['a'].attributes['a'] )
assert_equal( expect, doc.elements['a'].text ) assert_equal( expect, doc.elements['a'].text )
end end
@ -86,7 +86,7 @@ class EncodingTester < Test::Unit::TestCase
def test_ticket_110 def test_ticket_110
utf16 = REXML::Document.new(File.new(fixture_path("ticket_110_utf16.xml"))) utf16 = REXML::Document.new(File.new(fixture_path("ticket_110_utf16.xml")))
assert_equal( ::Encoding::UTF_16BE, utf16.encoding ) assert_equal(utf16.encoding, "UTF-16")
assert( utf16[0].kind_of?(REXML::XMLDecl)) assert( utf16[0].kind_of?(REXML::XMLDecl))
end end
end end