* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding

a String which means the name of the encoding. this partially revert r29646. * lib/rexml/document.rb: follow above. * lib/rexml/output.rb: ditto. * lib/rexml/parsers/baseparser.rb: ditto. * lib/rexml/source.rb: ditto. * lib/rexml/xmldecl.rb: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@31008 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2011-03-02 15:36:48 +00:00 · 2011-03-02 15:36:48 +00:00 · f25ff846f6
commit f25ff846f6
parent cddcffb8f9
11 changed files with 114 additions and 147 deletions
--- a/16
+++ b/16
@ -1,3 +1,19 @@
 Thu Mar  3 00:36:29 2011  NARUSE, Yui  <naruse@ruby-lang.org>
 	* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding
 	  a String which means the name of the encoding.
 	  this partially revert r29646.
 	* lib/rexml/document.rb: follow above.
 	* lib/rexml/output.rb: ditto.
 	* lib/rexml/parsers/baseparser.rb: ditto.
 	* lib/rexml/source.rb: ditto.
 	* lib/rexml/xmldecl.rb: ditto.
 Wed Mar  2 23:19:56 2011  Nobuyoshi Nakada  <nobu@ruby-lang.org>
 	* string.c (str_byte_substr): return nil for negative length.
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@ -184,7 +184,7 @@ module REXML
    #   that IE's limited abilities can handle.  This hack inserts a space
    #   before the /> on empty tags.  Defaults to false
    def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
-      if xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
+      if xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
        output = Output.new( output, xml_decl.encoding )
      end
      formatter = if indent > -1
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@ -1,8 +1,9 @@
 module REXML
  module Encoding
-    # ID ---> Encoding object
+    # ID ---> Encoding name
    attr_reader :encoding
    def encoding=(encoding)
      encoding = encoding.name if encoding.is_a?(Encoding)
      if encoding.is_a?(String)
        original_encoding = encoding
        encoding = find_encoding(encoding)
@ -11,35 +12,25 @@ module REXML
        end
      end
      return false if defined?(@encoding) and encoding == @encoding
-      if encoding and encoding != ::Encoding::UTF_8
+      if encoding
-        @encoding = encoding
+        @encoding = encoding.upcase
      else
-        @encoding = ::Encoding::UTF_8
+        @encoding = 'UTF-8'
      end
      true
    end
    def check_encoding(xml)
-      # We have to recognize UTF-16, LSB UTF-16, and UTF-8
+      # We have to recognize UTF-16BE, UTF-16LE, and UTF-8
      if xml[0, 2] == "\xfe\xff"
        xml[0, 2] = ""
-        ::Encoding::UTF_16BE
+        return 'UTF-16BE'
      elsif xml[0, 2] == "\xff\xfe"
        xml[0, 2] = ""
-        ::Encoding::UTF_16LE
+        return 'UTF-16LE'
      else
        if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
            \s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
          encoding_name = $3
          if /\Autf-16\z/i =~ encoding_name
            ::Encoding::UTF_16BE
          else
            find_encoding(encoding_name)
          end
        else
          ::Encoding::UTF_8
        end
      end
      xml =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
      return $3 ? $3.upcase : 'UTF-8'
    end
    def encode(string)
@ -53,14 +44,19 @@ module REXML
    private
    def find_encoding(name)
      case name
      when "UTF-16"
        name = "UTF-16BE"
      when /\Ashift-jis\z/i
-        name = "Shift_JIS"
+        return "SHIFT_JIS"
      when /\ACP-(\d+)\z/
        name = "CP#{$1}"
      when /\AUTF-8\z/i
        return name
      end
-      ::Encoding.find(name)
+      begin
        ::Encoding::Converter.search_convpath(name, 'UTF-8')
      rescue ::Encoding::ConverterNotFoundError
        return nil
      end
      name
    end
  end
 end
--- a/lib/rexml/formatters/default.rb
+++ b/lib/rexml/formatters/default.rb
@ -22,7 +22,7 @@ module REXML
        case node
        when Document
-          if node.xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
+          if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
            output = Output.new( output, node.xml_decl.encoding )
          end
          write_document( node, output )
--- a/lib/rexml/output.rb
+++ b/lib/rexml/output.rb
@ -10,7 +10,7 @@ module REXML
      @output = real_IO
      self.encoding = encd
-      @to_utf = (@encoding != ::Encoding::UTF_8)
+      @to_utf = encd != 'UTF-8'
    end
    def <<( content )
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@ -248,7 +248,7 @@ module REXML
            @document_status = :after_doctype
            @source.read if @source.buffer.size<2
            md = @source.match(/\s*/um, true)
-            if @source.encoding == ::Encoding::UTF_8
+            if @source.encoding == "UTF-8"
              @source.buffer.force_encoding(::Encoding::UTF_8)
            end
          end
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@ -54,14 +54,12 @@ module REXML
    def encoding=(enc)
      return unless super
      @line_break = encode( '>' )
-      if @encoding != ::Encoding::UTF_8
+      if @encoding != 'UTF-8'
        @buffer = decode(@buffer)
        @to_utf = true
      else
        @to_utf = false
-        if @buffer.respond_to? :force_encoding
+        @buffer.force_encoding ::Encoding::UTF_8
          @buffer.force_encoding ::Encoding::UTF_8
        end
      end
    end
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@ -109,20 +109,9 @@ module REXML
    end
    private
    def normalized_encoding_name(_encoding)
      if _encoding == ::Encoding::UTF_16BE
        "UTF-16"
      else
        return _encoding.name
      end
    end
    def content(enc)
      rv = "version='#@version'"
-      if @writeencoding || enc.to_s !~ /\Autf-8\z/i
+      rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
        encoding_name = normalized_encoding_name(enc)
        rv << " encoding='#{encoding_name}'"
      end
      rv << " standalone='#@standalone'" if @standalone
      rv
    end
--- a/test/rexml/test_contrib.rb
+++ b/test/rexml/test_contrib.rb
@ -241,7 +241,7 @@ DELIMITER
    end
    doc = REXML::Document.new(source_iso)
-    assert_equal('ISO-8859-1', doc.xml_decl.encoding.to_s)
+    assert_equal('ISO-8859-1', doc.xml_decl.encoding)
    assert_equal(koln_utf, doc.root.text)
    doc.write(out="")
    assert_equal(source_iso, out )
@ -255,23 +255,21 @@ DELIMITER
 <position><aktuell datum="01-10-11">Technik</aktuell></position>
 <hauptspalte>
 <headline>Technik</headline>
-Die Technik ist das Rückgrat der meisten Geschäftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abläufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die nötigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann.
+Die Technik ist das R\xFCckgrat der meisten Gesch\xFCftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abl\xFCufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die n\xFCtigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann.
 </hauptspalte>
 <nebenspalte>
  <link ziel="Flash/">Flash</link><umbruch/>
-  Nützliches von Flashern für Flasher.<umbruch/>
+  N\xFCtzliches von Flashern f\xFCr Flasher.<umbruch/>
  <link neu="ja" ziel="Cvs/">CVS-FAQ</link><umbruch/>
  FAQ zur Benutzung von CVS bei HOB
 </nebenspalte>
 </intranet>
 EOF
    tn = XPath.first(doc, "//nebenspalte/text()[2]")
-    expected_iso = "Nützliches von Flashern für Flasher."
+    expected_iso = "N\xFCtzliches von Flashern f\xFCr Flasher."
-                expected_utf = expected_iso.unpack('C*').pack('U*')
+    expected_utf = expected_iso.unpack('C*').pack('U*')
-                if expected_utf.respond_to? :encode
+    expected_iso.force_encoding(::Encoding::ISO_8859_1)
-      expected_iso.force_encoding("iso-8859-1")
+    expected_utf.force_encoding(::Encoding::UTF_8)
      expected_utf.force_encoding(::Encoding::UTF_8)
                end
    assert_equal(expected_utf, tn.to_s.strip)
    f = REXML::Formatters::Default.new
    f.write( tn, Output.new(o = "", "ISO-8859-1") )
--- a/test/rexml/test_core.rb
+++ b/test/rexml/test_core.rb
@ -230,34 +230,12 @@ class Tester < Test::Unit::TestCase
    doc = Document.new(docin)
    doc.write(test="")
    assert_equal(31, doc.doctype.size)
    # Here's a little ditty from Tobias...
    src = <<-EOL
    <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
    "http://www.w3.org/TR/SVG/DTD/svg10.dtd"
    [
    <!-- <!ENTITY % fast-slow "0 0  .5 1">-->
    <!--<!ENTITY % slow-fast ".5 0  1 1">-->
    <!ENTITY hover_ani
    '<animateTransform attributeName="transform"
    type="scale" restart="whenNotActive" values="1;0.96"
    dur="0.5s" calcMode="spline" keySplines="0 0  .5 1"
    fill="freeze" begin="mouseover"/>
    <animateTransform  attributeName="transform"
    type="scale" restart="whenNotActive" values="0.96;1"
    dur="0.5s" calcMode="spline" keySplines=".5 0  1 1"
    fill="freeze" begin="mouseover+0.5s"/>'
    >
    ]
    > <a/>
    EOL
  end
  def test_document
    # Testing cloning
    source = "<element/>"
    doc = Document.new source
    doc2 = Document.new doc
    # Testing Root
    assert_equal doc.root.name.to_s, "element"
@ -642,11 +620,10 @@ class Tester < Test::Unit::TestCase
  end
  def test_line
-    doc = Document.new File.new(fixture_path("bad.xml"))
+    Document.new File.new(fixture_path("bad.xml"))
    assert_fail "There should have been an error"
  rescue Exception
    # We should get here
    er = $!
    assert($!.line == 5, "Should have been an error on line 5, "+
      "but was reported as being on line #{$!.line}" )
  end
@ -664,13 +641,11 @@ class Tester < Test::Unit::TestCase
  def test_exception
    source = SourceFactory.create_from "<a/>"
    p = ParseException.new( "dummy message", source )
    s = p.to_s
    begin
      raise "dummy"
    rescue Exception
      p.continued_exception = $!
    end
    s = p.to_s
  end
  def test_bad_content
@ -682,7 +657,7 @@ class Tester < Test::Unit::TestCase
    assert_equal "content>content", tree_gt.elements[1].text
    # This isn't
    begin
-      tree_lt = Document.new in_lt
+      Document.new in_lt
      assert_fail "Should have gotten a parse error"
    rescue ParseException
    end
@ -856,8 +831,6 @@ EOL
  def test_attlist_write
    file=File.new(fixture_path("foo.xml"))
    doc=Document.new file
    root = doc.root 
    out = ''
    doc.write(out)
  end
@ -865,7 +838,7 @@ EOL
  def test_more_namespaces
    assert_raise( REXML::UndefinedNamespaceException,
                   %Q{Should have gotten an Undefined Namespace error} )  {
-      doc1 = Document.new("<r><p><n:c/></p></r>")
+      Document.new("<r><p><n:c/></p></r>")
    }
    doc2 = Document.new("<r xmlns:n='1'><p><n:c/></p></r>")
    es = XPath.match(doc2, '//c')
@ -916,7 +889,7 @@ EOL
  end
  def test_oses_with_bad_EOLs
-    d = Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
+    Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
  end
  # Contributed (with patch to fix bug) by Kouhei
@ -1024,7 +997,6 @@ EOL
    document.write(s)
  end
  def test_write_cdata
    src = "<a>A</a>"
    doc = REXML::Document.new( src )
@ -1045,7 +1017,7 @@ EOL
      <x:b x:n="foo"/>
    </a>
    EOL
-    d = REXML::Document.new( source )
+    d = Document.new( source )
    assert_equal( 'foo', REXML::XPath.first(d.root, "//x:b/@x:n").value )
    assert_equal( nil, REXML::XPath.first(d.root, "//x:b/@x:n", {}))
  end
@ -1233,17 +1205,17 @@ EOL
  def test_ticket_21
    src = "<foo bar=value/>"
    assert_raise( ParseException, "invalid XML should be caught" ) {
-      d = REXML::Document.new(src)
+      Document.new(src)
    }
    begin
-      d = REXML::Document.new(src)
+      Document.new(src)
    rescue
      assert_match( /missing attribute quote/, $!.message )
    end
  end
  def test_ticket_63
-    d = REXML::Document.new(File.new(fixture_path("t63-1.xml")))
+    Document.new(File.new(fixture_path("t63-1.xml")))
  end
  def test_ticket_75
@ -1275,9 +1247,9 @@ EOL
  def test_ticket_88
    doc = REXML::Document.new("<?xml version=\"1.0\" encoding=\"shift_jis\"?>")
-    assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s)
+    assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
    doc = REXML::Document.new("<?xml version = \"1.0\" encoding = \"shift_jis\"?>")
-    assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s)
+    assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
  end
  def test_ticket_85
@ -1295,8 +1267,6 @@ ENDXML
  </bar>
 </foo>"
    zml = "<foo><bar><bob name='jimmy'/></bar></foo>"
    # The pretty printer ignores all whitespace, anyway so output1 == output2
    f = REXML::Formatters::Pretty.new( 2 )
    d = Document.new( xml, :ignore_whitespace_nodes=>:all )
@ -1358,7 +1328,7 @@ ENDXML
    # Per .2.5 Node Tests of XPath spec
    assert_raise( REXML::UndefinedNamespaceException,
                   %Q{Should have gotten an Undefined Namespace error} )  {
-      d = Document.new("<a><n:b/></a>") 
+      Document.new("<a><n:b/></a>")
    }
  end
--- a/test/rexml/test_encoding.rb
+++ b/test/rexml/test_encoding.rb
@ -18,7 +18,7 @@ class EncodingTester < Test::Unit::TestCase
  def test_encoded_in_encoded_out
    doc = Document.new( @encoded )
    doc.write( out="" )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @encoded, out )
  end
@ -26,12 +26,12 @@ class EncodingTester < Test::Unit::TestCase
  def test_encoded_in_change_out
    doc = Document.new( @encoded )
    doc.xml_decl.encoding = "UTF-8"
-    assert_equal( ::Encoding::UTF_8, doc.encoding )
+    assert_equal("UTF-8", doc.encoding)
    REXML::Formatters::Default.new.write( doc.root, out="" )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @not_encoded, out )
    char = XPath.first( doc, "/a/b/text()" ).to_s
-    char.force_encoding('binary') if char.respond_to? :force_encoding
+    char.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( "ĉ", char )
  end
@ -39,7 +39,7 @@ class EncodingTester < Test::Unit::TestCase
  def test_encoded_in_different_out
    doc = Document.new( @encoded )
    REXML::Formatters::Default.new.write( doc.root, Output.new( out="", "UTF-8" ) )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @not_encoded, out )
  end
@ -47,9 +47,9 @@ class EncodingTester < Test::Unit::TestCase
  def test_in_change_out
    doc = Document.new( @not_encoded )
    doc.xml_decl.encoding = "ISO-8859-3"
-    assert_equal( ::Encoding::ISO_8859_3, doc.encoding )
+    assert_equal("ISO-8859-3", doc.encoding)
    doc.write( out="" )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @encoded, out )
  end
@ -57,7 +57,7 @@ class EncodingTester < Test::Unit::TestCase
  def test_in_different_out
    doc = Document.new( @not_encoded )
    doc.write( Output.new( out="", "ISO-8859-3" ) )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @encoded, out )
  end
@ -66,10 +66,10 @@ class EncodingTester < Test::Unit::TestCase
  def test_in_different_access
    doc = Document.new <<-EOL
    <?xml version='1.0' encoding='ISO-8859-1'?>
-    <a a="ÿ">ÿ</a>
+    <a a="\xFF">\xFF</a>
    EOL
    expect = "\303\277"
-    expect.force_encoding('UTF-8') if expect.respond_to? :force_encoding
+    expect.force_encoding(::Encoding::UTF_8)
    assert_equal( expect, doc.elements['a'].attributes['a'] )
    assert_equal( expect, doc.elements['a'].text )
  end
@ -86,7 +86,7 @@ class EncodingTester < Test::Unit::TestCase
  def test_ticket_110
    utf16 = REXML::Document.new(File.new(fixture_path("ticket_110_utf16.xml")))
-    assert_equal( ::Encoding::UTF_16BE, utf16.encoding )
+    assert_equal(utf16.encoding, "UTF-16")
    assert( utf16[0].kind_of?(REXML::XMLDecl))
  end
 end