Merges upstream changes for REXML v3.1.7

http://www.germane-software.com/repos/rexml/tags/3.1.7 r1278@bean: ser | 2007-06-07 00:53:06 -0400 Fixed a double-encoding bug. This was a regression, related to ticket:48. r1292@bean: ser | 2007-07-25 08:19:36 -0400 r1279@bean: ser | 2007-06-09 23:19:02 -0400 Fixes ticket:89 -- encoding CP-1252 was broken. ISO-8859-15 had the same problem. Also in this patch is a fix to merge.rb (unused, but it should at least contain no errors), and a unit test for ticket:88. r1293@bean: ser | 2007-07-25 08:19:37 -0400 r1281@bean: ser | 2007-07-24 11:08:48 -0400 Addresses ticket:85 This is a major rewrite of the XML formatting code. The XML writers have all been extracted out of the classes and put into their own class containers. This makes writing parsers easier, and cleaner. There are three formatters, which correspond to the previous three XML writing modes: REXML::Formatters::Default Prints the XML document exactly as it was parsed REXML::Formatters::Pretty Pretty prints the XML document, destroying whitespace in the document REXML::Formatters::Transitive Pretty prints the XML document, preserving whitespace All of the write() functions have been deprecated (some are still used, but these will also go away) except the write() function on Document, which is left for convenience. To pretty print an XML document the canonical way: formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces formatter.write( document, output ) r1294@bean: ser | 2007-07-25 08:19:38 -0400 r1283@bean: ser | 2007-07-24 19:53:30 -0400 This goes with the previous commit. r1295@bean: ser | 2007-07-25 08:19:39 -0400 r1285@bean: ser | 2007-07-24 20:02:07 -0400 And THIS goes with the previous two patches. Dammit. r1296@bean: ser | 2007-07-25 08:19:40 -0400 r1287@bean: ser | 2007-07-24 20:12:25 -0400 Applied patch from Jeff Barczewski. Note that this changes what the values of the name and IDs are from the previous behavior -- the values no longer include the quotes. This is the correct behavior, so I'm leaving it in, but it is not backwards compatible. Also fixes the serializer so that it outputs the doctype in a correct format (needed as a result of this change). r1297@bean: ser | 2007-07-25 08:38:38 -0400 Version update git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2007-07-25 12:47:23 +00:00 · 2007-07-25 12:47:23 +00:00 · 1d8c98a486
commit 1d8c98a486
parent fead3c93e7
23 changed files with 1689 additions and 1328 deletions
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@ -18,16 +18,32 @@ module REXML
 		PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
 		# Constructor.
    # FIXME: The parser doesn't catch illegal characters in attributes
    #
    # first:: 
    #   Either: an Attribute, which this new attribute will become a
    #   clone of; or a String, which is the name of this attribute
    # second::
    #   If +first+ is an Attribute, then this may be an Element, or nil.
    #   If nil, then the Element parent of this attribute is the parent
    #   of the +first+ Attribute.  If the first argument is a String, 
    #   then this must also be a String, and is the content of the attribute.  
    #   If this is the content, it must be fully normalized (contain no
    #   illegal characters).
    # parent::
    #   Ignored unless +first+ is a String; otherwise, may be the Element 
    #   parent of this attribute, or nil.
    #
 		#
 		#  Attribute.new( attribute_to_clone )
-		#  Attribute.new( source )
+		#  Attribute.new( attribute_to_clone, parent_element )
 		#  Attribute.new( "attr", "attr_value" )
 		#  Attribute.new( "attr", "attr_value", parent_element )
 		def initialize( first, second=nil, parent=nil )
 			@normalized = @unnormalized = @element = nil
 			if first.kind_of? Attribute
 				self.name = first.expanded_name
-				@value = first.value
+				@unnormalized = first.value
 				if second.kind_of? Element
 					@element = second
 				else
@ -36,7 +52,7 @@ module REXML
 			elsif first.kind_of? String
 				@element = parent if parent.kind_of? Element
 				self.name = first
-				@value = second.to_s
+				@normalized = second.to_s
 			else
 				raise "illegal argument #{first.class.name} to Attribute constructor"
 			end
@ -72,7 +88,7 @@ module REXML
 		# Returns true if other is an Attribute and has the same name and value,
 		# false otherwise.
 		def ==( other )
-			other.kind_of?(Attribute) and other.name==name and other.value==@value
+			other.kind_of?(Attribute) and other.name==name and other.value==value
 		end
 		# Creates (and returns) a hash from both the name and value
@ -87,7 +103,11 @@ module REXML
 		#  b = Attribute.new( "ns:x", "y" )
 		#  b.to_string     # -> "ns:x='y'"
 		def to_string
-			"#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
+			if @element and @element.context and @element.context[:attribute_quote] == :quote
 				%Q^#@expanded_name="#{to_s().gsub(/"/, '&quote;')}"^
 			else
 				"#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
 			end
 		end
 		# Returns the attribute value, with entities replaced
@ -100,8 +120,9 @@ module REXML
 				doctype = doc.doctype if doc
 			end
 			@normalized = Text::normalize( @unnormalized, doctype )
 			@unnormalized = nil
-			@normalized = Text::normalize( @value, doctype )
+      @normalized
 		end
 		# Returns the UNNORMALIZED value of this attribute.  That is, entities
@ -113,8 +134,9 @@ module REXML
 				doc = @element.document
 				doctype = doc.doctype if doc
 			end
 			@unnormalized = Text::unnormalize( @normalized, doctype )
 			@normalized = nil
-			@unnormalized = Text::unnormalize( @value, doctype )
+      @unnormalized
 		end
 		# Returns a copy of this attribute
--- a/lib/rexml/cdata.rb
+++ b/lib/rexml/cdata.rb
@ -39,31 +39,26 @@ module REXML
      @string
    end
    # == DEPRECATED
    # See the rexml/formatters package
    #
 		# Generates XML output of this object
 		#
 		# output::
 		#   Where to write the string.  Defaults to $stdout
 		# indent::
-		#   An integer.  If -1, no indenting will be used; otherwise, the
+    #   The amount to indent this node by
 		#   indentation will be this number of spaces, and children will be
 		#   indented an additional amount.  Defaults to -1.
 		# transitive::
-		#   If transitive is true and indent is >= 0, then the output will be
+    #   Ignored
 		#   pretty-printed in such a way that the added whitespace does not affect
 		#   the absolute *value* of the document -- that is, it leaves the value
 		#   and number of Text nodes in the document unchanged.
 		# ie_hack::
-		#   Internet Explorer is the worst piece of crap to have ever been
+    #   Ignored
 		#   written, with the possible exception of Windows itself.  Since IE is
 		#   unable to parse proper XML, we have to provide a hack to generate XML
 		#   that IE's limited abilities can handle.  This hack inserts a space 
 		#   before the /> on empty tags.
 		#
 		# _Examples_
 		#  c = CData.new( " Some text " )
 		#  c.write( $stdout )     #->  <![CDATA[ Some text ]]>
 		def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
-      #indent( output, indent ) unless transitive
+      Kernel.warn( "#{self.class.name}.write is deprecated" )
 			indent( output, indent )
 			output << START
 			output << @string
 			output << STOP
--- a/lib/rexml/comment.rb
+++ b/lib/rexml/comment.rb
@ -34,6 +34,9 @@ module REXML
 			Comment.new self
 		end
    # == DEPRECATED
    # See REXML::Formatters
    #
 		# output::
 		#	 Where to write the string
 		# indent::
@ -45,6 +48,7 @@ module REXML
 		# ie_hack::
 		#	 Needed for conformity to the child API, but not used by this class.
 		def write( output, indent=-1, transitive=false, ie_hack=false )
      Kernel.warn("Comment.write is deprecated.  See REXML::Formatters")
 			indent( output, indent )
 			output << START
 			output << @string
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@ -98,38 +98,30 @@ module REXML
    # output::
    #   Where to write the string
    # indent::
-    #   An integer.  If -1, no indenting will be used; otherwise, the
+    #   An integer.  If -1, no indentation will be used; otherwise, the
    #   indentation will be this number of spaces, and children will be
    #   indented an additional amount.
    # transitive::
-    #   If transitive is true and indent is >= 0, then the output will be
+    #   Ignored
    #   pretty-printed in such a way that the added whitespace does not affect
    #   the absolute *value* of the document -- that is, it leaves the value
    #   and number of Text nodes in the document unchanged.
    # ie_hack::
-    #   Internet Explorer is the worst piece of crap to have ever been
+    #   Ignored
    #   written, with the possible exception of Windows itself.  Since IE is
    #   unable to parse proper XML, we have to provide a hack to generate XML
    #   that IE's limited abilities can handle.  This hack inserts a space 
    #   before the /> on empty tags.
    #
    def write( output, indent=0, transitive=false, ie_hack=false )
      f = REXML::Formatters::Default.new
      indent( output, indent )
      output << START
      output << ' '
      output << @name
      output << " #@external_id" if @external_id
-      output << " #@long_name" if @long_name
+      output << " #{@long_name.inspect}" if @long_name
-      output << " #@uri" if @uri
+      output << " #{@uri.inspect}" if @uri
      unless @children.empty?
        next_indent = indent + 1
        output << ' ['
        child = nil    # speed
        @children.each { |child|
          output << "\n"
-          child.write( output, next_indent )
+          f.write( child, output )
        }
        #output << '   '*next_indent
        output << "\n]"
      end
      output << STOP
@ -219,8 +211,10 @@ module REXML
      @string+'>'
    end
    # == DEPRECATED
    # See REXML::Formatters
    #
    def write( output, indent )
      output << ('   '*indent) if indent > 0
      output << to_s
    end
  end
@ -264,7 +258,6 @@ module REXML
    end
    def write( output, indent=-1 )
      output << ('   '*indent) if indent > 0
      output << to_s
    end
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@ -31,9 +31,6 @@ module REXML
 	  # to be sources of valid XML documents.
 	  # @param context if supplied, contains the context of the document;
 	  # this should be a Hash.
 	  # NOTE that I'm not sure what the context is for; I cloned it out of
 	  # the Electric XML API (in which it also seems to do nothing), and it
 	  # is now legacy.  It may do something, someday... it may disappear.
 		def initialize( source = nil, context = {} )
 			super()
 			@context = context
@ -142,14 +139,53 @@ module REXML
 			xml_decl().stand_alone?
 		end
-		# Write the XML tree out, optionally with indent.  This writes out the
+    # Write the XML tree out.  This writes the entire XML document, including
-		# entire XML document, including XML declarations, doctype declarations,
+    # declarations and processing instructions.
-		# and processing instructions (if any are given).
+    #
 		# A controversial point is whether Document should always write the XML
 		# declaration (<?xml version='1.0'?>) whether or not one is given by the
 		# user (or source document).  REXML does not write one if one was not
 		# specified, because it adds unneccessary bandwidth to applications such
 		# as XML-RPC.
    #
    # _Examples_
    #   Document.new("<a><b/></a>").serialize
    #
    #   output_string = ""
    #   tr = Transitive.new( output_string )
    #   Document.new("<a><b/></a>").serialize( tr )
    #
    # formatter::
    #   One of the rexml/formatters classes.  If none is given, then the Pretty
    #   formatter will be used to dump the XML to the STDOUT.
    def serialize( formatter = nil )
      if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) 
        output = Output.new( output, xml_decl.encoding )
      end
      formatter = REXML::Pretty.new( $stdout ) if (formatter.nil?)
 			@children.each { |node|
        puts "node = #{node.inspect}"
 				indent( output, indent ) if node.node_type == :element
 				if node.write( output, indent, transitive, ie_hack )
          output << "\n" unless indent<0 or node == @children[-1]
        end
 			}
    end
    # Write the XML tree out, optionally with indent.  This writes out the
 		# entire XML document, including XML declarations, doctype declarations,
 		# and processing instructions (if any are given).
    #
 		# A controversial point is whether Document should always write the XML
 		# declaration (<?xml version='1.0'?>) whether or not one is given by the
 		# user (or source document).  REXML does not write one if one was not
 		# specified, because it adds unneccessary bandwidth to applications such
 		# as XML-RPC.
    #
    # See also the classes in the rexml/formatters package for the proper way
    # to change the default formatting of XML output
 		#
 		#
 		# output::
@ -160,7 +196,7 @@ module REXML
 		#   indentation will be twice this number of spaces, and children will be
 		#   indented an additional amount.  For a value of 3, every item will be 
    #   indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
-		# transitive::
+		# trans::
 		#   If transitive is true and indent is >= 0, then the output will be
 		#   pretty-printed in such a way that the added whitespace does not affect
 		#   the absolute *value* of the document -- that is, it leaves the value
@ -171,14 +207,20 @@ module REXML
 		#   unable to parse proper XML, we have to provide a hack to generate XML
 		#   that IE's limited abilities can handle.  This hack inserts a space 
 		#   before the /> on empty tags.  Defaults to false
-		def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
+		def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
-			output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+      if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
-			@children.each { |node|
+        output = Output.new( output, xml_decl.encoding )
-				indent( output, indent ) if node.node_type == :element
+      end
-				if node.write( output, indent, transitive, ie_hack )
+      formatter = if indent > -1
-          output << "\n" unless indent<0 or node == @children[-1]
+          if transitive
            REXML::Formatters::Transitive.new( indent, ie_hack )
          else
            REXML::Formatters::Pretty.new( indent, ie_hack )
          end
        else
          REXML::Formatters::Default.new( ie_hack )
        end
-			}
+      formatter.write( self, output )
 		end
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@ -58,8 +58,8 @@ module REXML
      # We have to recognize UTF-16, LSB UTF-16, and UTF-8
      return UTF_16 if /\A\xfe\xff/n =~ str
      return UNILE if /\A\xff\xfe/n =~ str
-      str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
+      str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
-      return $1.upcase if $1
+      return $3.upcase if $3
      return UTF_8
    end
  end
--- a/lib/rexml/encodings/CP-1252.rb
+++ b/lib/rexml/encodings/CP-1252.rb
@ -3,9 +3,15 @@
 #
 module REXML
  module Encoding
-    @@__REXML_encoding_methods = %q~
+  	register( "CP-1252" ) do |o|
  		class << o
  			alias encode encode_cp1252
 			alias decode decode_cp1252
  		end
  	end
    # Convert from UTF-8
-    def encode content
+    def encode_cp1252(content)
      array_utf8 = content.unpack('U*')
      array_enc = []
      array_utf8.each do |num|
@ -54,7 +60,7 @@ module REXML
    end
    # Convert to UTF-8
-    def decode(str)
+    def decode_cp1252(str)
      array_latin9 = str.unpack('C*')
      array_enc = []
      array_latin9.each do |num|
@ -93,6 +99,5 @@ module REXML
      end
      array_enc.pack('U*')
    end
    ~
  end
 end
--- a/lib/rexml/encodings/ISO-8859-15.rb
+++ b/lib/rexml/encodings/ISO-8859-15.rb
@ -3,9 +3,13 @@
 #
 module REXML
  module Encoding
-    @@__REXML_encoding_methods = %q~
+  	register("ISO-8859-15") do |o|
  		alias encode to_iso_8859_15
 		alias decode from_iso_8859_15
  	end
    # Convert from UTF-8
-    def to_iso_8859_15 content
+    def to_iso_8859_15(content)
      array_utf8 = content.unpack('U*')
      array_enc = []
      array_utf8.each do |num|
@ -64,6 +68,5 @@ module REXML
      end
      array_enc.pack('U*')
    end
    ~
  end
 end
--- a/lib/rexml/entity.rb
+++ b/lib/rexml/entity.rb
@ -89,6 +89,12 @@ module REXML
 		# Write out a fully formed, correct entity definition (assuming the Entity
 		# object itself is valid.)
    #
    # out::
    #   An object implementing <TT>&lt;&lt;<TT> to which the entity will be
    #   output
    # indent::
    #   *DEPRECATED* and ignored
 		def write out, indent=-1
 			out << '<!ENTITY '
 			out << '% ' if @reference
--- a/lib/rexml/formatters/default.rb
+++ b/lib/rexml/formatters/default.rb
@ -0,0 +1,109 @@
 module REXML
  module Formatters
    class Default
      # Prints out the XML document with no formatting -- except if id_hack is
      # set.
      #
      # ie_hack::
      #   If set to true, then inserts whitespace before the close of an empty
      #   tag, so that IE's bad XML parser doesn't choke.
      def initialize( ie_hack=false )
        @ie_hack = ie_hack
      end
      # Writes the node to some output.
      #
      # node::
      #   The node to write
      # output::
      #   A class implementing <TT>&lt;&lt;</TT>.  Pass in an Output object to
      #   change the output encoding.
      def write( node, output )
        case node
        when Document 
          if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
            output = Output.new( output, node.xml_decl.encoding )
          end
          write_document( node, output )
        when Element
          write_element( node, output )
        when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
             Attribute, AttlistDecl
          node.write( output,-1 )
        when Instruction
          write_instruction( node, output )
        when DocType, XMLDecl
          node.write( output )
        when Comment
          write_comment( node, output )
        when CData
          write_cdata( node, output )
        when Text
          write_text( node, output )
        else
          raise Exception.new("XML FORMATTING ERROR")
        end
      end
      protected
      def write_document( node, output )
        node.children.each { |child| write( child, output ) }
      end
      def write_element( node, output )
        output << "<#{node.expanded_name}"
        node.attributes.each_attribute do |attr|
          output << " "
          attr.write( output )
        end unless node.attributes.empty?
        if node.children.empty?
          output << " " if @ie_hack
          output << "/" 
        else
          output << ">"
          node.children.each { |child|
            write( child, output )
          }
          output << "</#{node.expanded_name}"
        end
        output << ">"
      end
      def write_text( node, output )
        output << node.to_s()
      end
      def write_comment( node, output )
        output << Comment::START
        output << node.to_s
        output << Comment::STOP
      end
      def write_cdata( node, output )
        output << CData::START
        output << node.to_s
        output << CData::STOP
      end
      def write_instruction( node, output )
        output << Instruction::START.sub(/\\/u, '')
        output << node.target
        output << ' '
        output << node.content
        output << Instruction::STOP.sub(/\\/u, '')
      end
    end
  end
 end
--- a/lib/rexml/formatters/pretty.rb
+++ b/lib/rexml/formatters/pretty.rb
@ -0,0 +1,134 @@
 require 'rexml/formatters/default'
 module REXML
  module Formatters
    # Pretty-prints an XML document.  This destroys whitespace in text nodes
    # and will insert carriage returns and indentations.
    #
    # TODO: Add an option to print attributes on new lines
    class Pretty < Default
      # If compact is set to true, then the formatter will attempt to use as
      # little space as possible
      attr_accessor :compact
      # The width of a page.  Used for formatting text
      attr_accessor :width
      # Create a new pretty printer.
      #
      # output::
      #   An object implementing '<<(String)', to which the output will be written.
      # indentation::
      #   An integer greater than 0.  The indentation of each level will be
      #   this number of spaces.  If this is < 1, the behavior of this object
      #   is undefined.  Defaults to 2.
      # ie_hack::
      #   If true, the printer will insert whitespace before closing empty
      #   tags, thereby allowing Internet Explorer's feeble XML parser to
      #   function. Defaults to false.
      def initialize( indentation=2, ie_hack=false )
        @indentation = indentation
        @level = 0
        @ie_hack = ie_hack
        @width = 80
      end
      protected
      def write_element(node, output)
        output << ' '*@level
        output << "<#{node.expanded_name}"
        node.attributes.each_attribute do |attr|
          output << " "
          attr.write( output )
        end unless node.attributes.empty?
        if node.children.empty?
          if @ie_hack
            output << " "
          end
          output << "/" 
        else
          output << ">"
          # If compact and all children are text, and if the formatted output
          # is less than the specified width, then try to print everything on
          # one line
          skip = false
          if compact
            if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
              string = ""
              node.children.each { |child| write( child, string, 0 ) }
              if string.length + @level < @width
                output << string
                skip = true
              end
            end
          end
          unless skip
            output << "\n"
            @level += @indentation
            node.children.each { |child|
              next if child.kind_of?(Text) and child.to_s.strip.length == 0
              write( child, output )
              output << "\n"
            }
            @level -= @indentation
            output << ' '*@level
          end
          output << "</#{node.expanded_name}"
        end
        output << ">"
      end
      def write_text( node, output )
        s = node.to_s()
        s.gsub!(/\s/,' ')
        s.squeeze!(" ")
        s = wrap(s, 80-@level)
        s = indent_text(s, @level, " ", true)
        output << (' '*@level + s)
      end
      def write_comment( node, output)
        output << ' ' * @level
        super
      end
      def write_cdata( node, output)
        output << ' ' * @level
        super
      end
      def write_document( node, output )
        # Ok, this is a bit odd.  All XML documents have an XML declaration,
        # but it may not write itself if the user didn't specifically add it,
        # either through the API or in the input document.  If it doesn't write
        # itself, then we don't need a carriage return... which makes this
        # logic more complex.
        node.children.each { |child|
          next if child == node.children[-1] and child.instance_of?(Text)
          unless child == node.children[0] or child.instance_of?(Text) or
            (child == node.children[1] and !node.children[0].writethis)
            output << "\n"
          end
          write( child, output )
        }
      end
      private
      def indent_text(string, level=1, style="\t", indentfirstline=true)
        return string if level < 0
        string.gsub(/\n/, "\n#{style*level}")
      end
      def wrap(string, width)
        # Recursivly wrap string at width.
        return string if string.length <= width
        place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
        return string[0,place] + "\n" + wrap(string[place+1..-1], width)
      end
    end
  end
 end
--- a/lib/rexml/formatters/transitive.rb
+++ b/lib/rexml/formatters/transitive.rb
@ -0,0 +1,56 @@
 require 'rexml/formatters/pretty'
 module REXML
  module Formatters
    # The Transitive formatter writes an XML document that parses to an
    # identical document as the source document.  This means that no extra
    # whitespace nodes are inserted, and whitespace within text nodes is
    # preserved.  Within these constraints, the document is pretty-printed,
    # with whitespace inserted into the metadata to introduce formatting.
    #
    # Note that this is only useful if the original XML is not already
    # formatted.  Since this formatter does not alter whitespace nodes, the
    # results of formatting already formatted XML will be odd.
    class Transitive < Default
      def initialize( indentation=2 )
        @indentation = indentation
        @level = 0
      end
      protected
      def write_element( node, output )
        output << "<#{node.expanded_name}"
        node.attributes.each_attribute do |attr|
          output << " "
          attr.write( output )
        end unless node.attributes.empty?
        output << "\n"
        output << ' '*@level
        if node.children.empty?
          output << "/" 
        else
          output << ">"
          # If compact and all children are text, and if the formatted output
          # is less than the specified width, then try to print everything on
          # one line
          skip = false
          @level += @indentation
          node.children.each { |child|
            write( child, output )
          }
          @level -= @indentation
          output << "</#{node.expanded_name}"
          output << "\n"
          output << ' '*@level
        end
        output << ">"
      end
      def write_text( node, output )
        output << node.to_s()
      end
    end
  end
 end
--- a/lib/rexml/instruction.rb
+++ b/lib/rexml/instruction.rb
@ -38,7 +38,11 @@ module REXML
 			Instruction.new self
 		end
    # == DEPRECATED
    # See the rexml/formatters package
    #
 		def write writer, indent=-1, transitive=false, ie_hack=false
      Kernel.warn( "#{self.class.name}.write is deprecated" )
 			indent(writer, indent)
 			writer << START.sub(/\\/u, '')
 			writer << @target
--- a/lib/rexml/node.rb
+++ b/lib/rexml/node.rb
@ -18,10 +18,19 @@ module REXML
 			@parent[ ind - 1 ]
 		end
-		def to_s indent=-1
+    # indent::
-			rv = ""
+    #   *DEPRECATED* This parameter is now ignored.  See the formatters in the
-			write rv,indent
+    #   REXML::Formatters package for changing the output style.
-			rv
+		def to_s indent=nil
      unless indent.nil?
        Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
        f = REXML::Formatters::Pretty.new( indent )
        f.write( self, rv, indent )
      else
        f = REXML::Formatters::Default.new
        f.write( self, rv = "" )
      end
      return rv
 		end
 		def indent to, ind
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@ -53,7 +53,7 @@ module REXML
      STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
      ENTITY_START = /^\s*<!ENTITY/
-      IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+      IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
      ELEMENTDECL_START = /^\s*<!ELEMENT/um
      ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
      SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
@ -217,10 +217,10 @@ module REXML
            close = md[2]
            identity =~ IDENTITY
            name = $1
-            raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
+            raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
            pub_sys = $2.nil? ? nil : $2.strip
-            long_name = $3.nil? ? nil : $3.strip
+            long_name = $4.nil? ? nil : $4.strip
-            uri = $4.nil? ? nil : $4.strip
+            uri = $6.nil? ? nil : $6.strip
            args = [ :start_doctype, name, pub_sys, long_name, uri ]
            if close == ">"
              @document_status = :after_doctype
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@ -94,6 +94,8 @@ module REXML
 					when :end_document
 						handle( :end_document )
 						break
                                        when :start_doctype
                                                handle( :doctype, *event[1..-1])
 					when :end_doctype
 						context = context[1]
 					when :start_element
@ -167,7 +169,7 @@ module REXML
          when :entitydecl
            @entities[ event[1] ] = event[2] if event.size == 3
 						handle( *event )
-					when :processing_instruction, :comment, :doctype, :attlistdecl, 
+					when :processing_instruction, :comment, :attlistdecl, 
 						:elementdecl, :cdata, :notationdecl, :xmldecl
 						handle( *event )
 					end
--- a/lib/rexml/parsers/xpathparser.rb
+++ b/lib/rexml/parsers/xpathparser.rb
@ -551,7 +551,7 @@ module REXML
          end
        end
        #puts "BEFORE WITH '#{rest}'"
-        rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
+        rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
        parsed.concat(n)
        return rest
      end
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@ -10,8 +10,8 @@
 #
 # Main page:: http://www.germane-software.com/software/rexml
 # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
-# Version:: 3.1.5
+# Version:: 3.1.7
-# Date:: 2006/250
+# Date:: 2007/206
 # 
 # This API documentation can be downloaded from the REXML home page, or can
 # be accessed online[http://www.germane-software.com/software/rexml_doc]
@ -20,9 +20,10 @@
 # or can be accessed 
 # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
 module REXML
-	COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
+	COPYRIGHT = "Copyright © 2001-2007 Sean Russell <ser@germane-software.com>"
-  DATE = "2006/250"
+  DATE = "2007/206"
-  VERSION = "3.1.5"
+  VERSION = "3.1.7"
 	REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
  Copyright = COPYRIGHT
  Version = VERSION
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@ -1,139 +1,139 @@
 require 'rexml/encoding'
 module REXML
-	# Generates Source-s.  USE THIS CLASS.
+  # Generates Source-s.  USE THIS CLASS.
-	class SourceFactory
+  class SourceFactory
-		# Generates a Source object
+    # Generates a Source object
-		# @param arg Either a String, or an IO
+    # @param arg Either a String, or an IO
-		# @return a Source, or nil if a bad argument was given
+    # @return a Source, or nil if a bad argument was given
-		def SourceFactory::create_from(arg)
+    def SourceFactory::create_from(arg)
      if arg.kind_of? String
-			  Source.new(arg)
+        Source.new(arg)
      elsif arg.respond_to? :read and
            arg.respond_to? :readline and
            arg.respond_to? :nil? and
            arg.respond_to? :eof?
-				IOSource.new(arg)
+        IOSource.new(arg)
      elsif arg.kind_of? Source
        arg
      else
        raise "#{source.class} is not a valid input stream.  It must walk \n"+
        "like either a String, IO, or Source."
      end
-		end
+    end
-	end
+  end
-	# A Source can be searched for patterns, and wraps buffers and other
+  # A Source can be searched for patterns, and wraps buffers and other
-	# objects and provides consumption of text
+  # objects and provides consumption of text
-	class Source
+  class Source
-		include Encoding
+    include Encoding
-		# The current buffer (what we're going to read next)
+    # The current buffer (what we're going to read next)
-		attr_reader :buffer
+    attr_reader :buffer
-		# The line number of the last consumed text
+    # The line number of the last consumed text
-		attr_reader :line
+    attr_reader :line
-		attr_reader :encoding
+    attr_reader :encoding
-		# Constructor
+    # Constructor
-		# @param arg must be a String, and should be a valid XML document
+    # @param arg must be a String, and should be a valid XML document
    # @param encoding if non-null, sets the encoding of the source to this
    # value, overriding all encoding detection
-		def initialize(arg, encoding=nil)
+    def initialize(arg, encoding=nil)
-			@orig = @buffer = arg
+      @orig = @buffer = arg
      if encoding
        self.encoding = encoding
      else
        self.encoding = check_encoding( @buffer )
      end
-			@line = 0
+      @line = 0
-		end
+    end
-		# Inherited from Encoding
+    # Inherited from Encoding
-		# Overridden to support optimized en/decoding
+    # Overridden to support optimized en/decoding
-		def encoding=(enc)
+    def encoding=(enc)
-			return unless super
+      return unless super
-			@line_break = encode( '>' )
+      @line_break = encode( '>' )
-			if enc != UTF_8
+      if enc != UTF_8
-				@buffer = decode(@buffer)
+        @buffer = decode(@buffer)
-				@to_utf = true
+        @to_utf = true
-			else
+      else
-				@to_utf = false
+        @to_utf = false
-			end
+      end
-		end
+    end
-		# Scans the source for a given pattern.  Note, that this is not your
+    # Scans the source for a given pattern.  Note, that this is not your
-		# usual scan() method.  For one thing, the pattern argument has some
+    # usual scan() method.  For one thing, the pattern argument has some
-		# requirements; for another, the source can be consumed.  You can easily
+    # requirements; for another, the source can be consumed.  You can easily
-		# confuse this method.  Originally, the patterns were easier
+    # confuse this method.  Originally, the patterns were easier
-		# to construct and this method more robust, because this method 
+    # to construct and this method more robust, because this method 
-		# generated search regexes on the fly; however, this was 
+    # generated search regexes on the fly; however, this was 
-		# computationally expensive and slowed down the entire REXML package 
+    # computationally expensive and slowed down the entire REXML package 
-		# considerably, since this is by far the most commonly called method.
+    # considerably, since this is by far the most commonly called method.
-		# @param pattern must be a Regexp, and must be in the form of
+    # @param pattern must be a Regexp, and must be in the form of
-		# /^\s*(#{your pattern, with no groups})(.*)/.  The first group
+    # /^\s*(#{your pattern, with no groups})(.*)/.  The first group
-		# will be returned; the second group is used if the consume flag is
+    # will be returned; the second group is used if the consume flag is
-		# set.
+    # set.
-		# @param consume if true, the pattern returned will be consumed, leaving
+    # @param consume if true, the pattern returned will be consumed, leaving
-		# everything after it in the Source.
+    # everything after it in the Source.
-		# @return the pattern, if found, or nil if the Source is empty or the
+    # @return the pattern, if found, or nil if the Source is empty or the
-		# pattern is not found.
+    # pattern is not found.
-		def scan(pattern, cons=false)
+    def scan(pattern, cons=false)
-			return nil if @buffer.nil?
+      return nil if @buffer.nil?
-			rv = @buffer.scan(pattern)
+      rv = @buffer.scan(pattern)
-			@buffer = $' if cons and rv.size>0
+      @buffer = $' if cons and rv.size>0
-			rv
+      rv
-		end
+    end
-		def read
+    def read
-		end
+    end
-		def consume( pattern )
+    def consume( pattern )
-			@buffer = $' if pattern.match( @buffer )
+      @buffer = $' if pattern.match( @buffer )
-		end
+    end
-		def match_to( char, pattern )
+    def match_to( char, pattern )
-			return pattern.match(@buffer)
+      return pattern.match(@buffer)
-		end
+    end
-		def match_to_consume( char, pattern )
+    def match_to_consume( char, pattern )
-			md = pattern.match(@buffer)
+      md = pattern.match(@buffer)
-			@buffer = $'
+      @buffer = $'
-			return md
+      return md
-		end
+    end
-		def match(pattern, cons=false)
+    def match(pattern, cons=false)
-			md = pattern.match(@buffer)
+      md = pattern.match(@buffer)
-			@buffer = $' if cons and md
+      @buffer = $' if cons and md
-			return md
+      return md
-		end
+    end
-		# @return true if the Source is exhausted
+    # @return true if the Source is exhausted
-		def empty?
+    def empty?
-			@buffer == ""
+      @buffer == ""
-		end
+    end
    def position
      @orig.index( @buffer )
    end
-		# @return the current line in the source
+    # @return the current line in the source
-		def current_line
+    def current_line
-			lines = @orig.split
+      lines = @orig.split
-			res = lines.grep @buffer[0..30]
+      res = lines.grep @buffer[0..30]
-			res = res[-1] if res.kind_of? Array
+      res = res[-1] if res.kind_of? Array
-			lines.index( res ) if res
+      lines.index( res ) if res
-		end
+    end
-	end
+  end
-	# A Source that wraps an IO.  See the Source class for method
+  # A Source that wraps an IO.  See the Source class for method
-	# documentation
+  # documentation
-	class IOSource < Source
+  class IOSource < Source
-		#attr_reader :block_size
+    #attr_reader :block_size
    # block_size has been deprecated
-		def initialize(arg, block_size=500, encoding=nil)
+    def initialize(arg, block_size=500, encoding=nil)
-			@er_source = @source = arg
+      @er_source = @source = arg
-			@to_utf = false
+      @to_utf = false
      # Determining the encoding is a deceptively difficult issue to resolve.
      # First, we check the first two bytes for UTF-16.  Then we
      # assume that the encoding is at least ASCII enough for the '>', and
@ -147,86 +147,89 @@ module REXML
        self.encoding = encoding
      elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
        self.encoding = check_encoding( str )
      elsif (0xef == str[0] && 0xbb == str[1])
        str += @source.read(1)
        str = '' if (0xbf == str[2])
      else
        @line_break = '>'
      end
      super str+@source.readline( @line_break )
    end
-		def scan(pattern, cons=false)
+    def scan(pattern, cons=false)
-			rv = super
+      rv = super
-			# You'll notice that this next section is very similar to the same
+      # You'll notice that this next section is very similar to the same
-			# section in match(), but just a liiittle different.  This is
+      # section in match(), but just a liiittle different.  This is
-			# because it is a touch faster to do it this way with scan()
+      # because it is a touch faster to do it this way with scan()
-			# than the way match() does it; enough faster to warrent duplicating
+      # than the way match() does it; enough faster to warrent duplicating
-			# some code
+      # some code
-			if rv.size == 0
+      if rv.size == 0
-				until @buffer =~ pattern or @source.nil?
+        until @buffer =~ pattern or @source.nil?
-					begin
+          begin
-						# READLINE OPT
+            # READLINE OPT
-						#str = @source.read(@block_size)
+            #str = @source.read(@block_size)
-						str = @source.readline(@line_break)
+            str = @source.readline(@line_break)
-						str = decode(str) if @to_utf and str
+            str = decode(str) if @to_utf and str
-						@buffer << str
+            @buffer << str
          rescue Iconv::IllegalSequence
            raise
-					rescue
+          rescue
-						@source = nil
+            @source = nil
-					end
+          end
-				end
+        end
-				rv = super
+        rv = super
-			end
+      end
-			rv.taint
+      rv.taint
-			rv
+      rv
-		end
+    end
-		def read
+    def read
-			begin
+      begin
        str = @source.readline(@line_break)
-				str = decode(str) if @to_utf and str 
+        str = decode(str) if @to_utf and str 
-				@buffer << str
+        @buffer << str
-			rescue Exception, NameError
+      rescue Exception, NameError
-				@source = nil
+        @source = nil
-			end
+      end
-		end
+    end
-		def consume( pattern )
+    def consume( pattern )
-			match( pattern, true )
+      match( pattern, true )
-		end
+    end
-		def match( pattern, cons=false )
+    def match( pattern, cons=false )
-			rv = pattern.match(@buffer)
+      rv = pattern.match(@buffer)
-			@buffer = $' if cons and rv
+      @buffer = $' if cons and rv
-			while !rv and @source
+      while !rv and @source
-				begin
+        begin
          str = @source.readline(@line_break)
-					str = decode(str) if @to_utf and str
+          str = decode(str) if @to_utf and str
-					@buffer << str
+          @buffer << str
-					rv = pattern.match(@buffer)
+          rv = pattern.match(@buffer)
-					@buffer = $' if cons and rv
+          @buffer = $' if cons and rv
-				rescue
+        rescue
-					@source = nil
+          @source = nil
-				end
+        end
-			end
+      end
-			rv.taint
+      rv.taint
-			rv
+      rv
-		end
+    end
-		
+    
-		def empty?
+    def empty?
-			super and ( @source.nil? || @source.eof? )
+      super and ( @source.nil? || @source.eof? )
-		end
+    end
    def position
      @er_source.stat.pipe? ? 0 : @er_source.pos
    end
-		# @return the current line in the source
+    # @return the current line in the source
-		def current_line
+    def current_line
      begin
-        pos = @er_source.pos				# The byte position in the source
+        pos = @er_source.pos        # The byte position in the source
-        lineno = @er_source.lineno	# The XML < position in the source
+        lineno = @er_source.lineno  # The XML < position in the source
        @er_source.rewind
-        line = 0										# The \r\n position in the source
+        line = 0                    # The \r\n position in the source
        begin
          while @er_source.pos < pos
            @er_source.readline
@ -238,7 +241,7 @@ module REXML
        pos = -1
        line = -1
      end
-			[pos, lineno, line]
+      [pos, lineno, line]
-		end
+    end
-	end
+  end
 end
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@ -211,16 +211,17 @@ module REXML
      return new_string
    end
    # == DEPRECATED
    # See REXML::Formatters
    #
    def write( writer, indent=-1, transitive=false, ie_hack=false ) 
-      s = to_s()
+      Kernel.warn("#{self.class.name}.write is deprecated.  See REXML::Formatters")
-      if not (@parent and @parent.whitespace) then
+      formatter = if indent > -1
-        s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
+          REXML::Formatters::Pretty.new( indent )
-        if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
+        else
-          s = indent_text(s, indent, @parent.context[:indentstyle], false)
+          REXML::Formatters::Default.new
        end
-        s.squeeze!(" \n\t") if @parent and !@parent.whitespace
+      formatter.write( self, writer )
      end
      writer << s
    end
    # FIXME
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@ -13,7 +13,7 @@ module REXML
 		STOP = '\?>';
 		attr_accessor :version, :standalone
-    attr_reader :writeencoding
+    attr_reader :writeencoding, :writethis
 		def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
      @writethis = true
@ -37,9 +37,14 @@ module REXML
 			XMLDecl.new(self)
 		end
-		def write writer, indent=-1, transitive=false, ie_hack=false
+    # indent::
    #   Ignored.  There must be no whitespace before an XML declaration
    # transitive::
    #   Ignored
    # ie_hack::
    #   Ignored
 		def write(writer, indent=-1, transitive=false, ie_hack=false)
      return nil unless @writethis or writer.kind_of? Output
 			indent( writer, indent )
 			writer << START.sub(/\\/u, '')
      if writer.kind_of? Output
        writer << " #{content writer.encoding}"
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@ -352,7 +352,8 @@ module REXML
        when :following_sibling
          #puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
          results = []
-          for node in nodeset
+          nodeset.each do |node|
            next if node.parent.nil?
            all_siblings = node.parent.children
            current_index = all_siblings.index( node )
            following_siblings = all_siblings[ current_index+1 .. -1 ]
@ -363,13 +364,14 @@ module REXML
        when :preceding_sibling
          results = []
-          for node in nodeset
+          nodeset.each do |node|
            next if node.parent.nil?
            all_siblings = node.parent.children
            current_index = all_siblings.index( node )
-            preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse
+            preceding_siblings = all_siblings[ 0, current_index ].reverse
-            #results += expr( path_stack.dclone, preceding_siblings )
+            results += preceding_siblings
          end
-          nodeset = preceding_siblings || []
+          nodeset = results
          node_types = ELEMENTS
        when :preceding