* lib/rexml: Merge fixes since 1.8.6 made solely on the ruby_1_8_6

branch. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@16067 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2008-04-18 07:07:11 +00:00 · 2008-04-18 07:07:11 +00:00 · 2f1fa7e402
commit 2f1fa7e402
parent 40e7794993
28 changed files with 1785 additions and 1373 deletions
--- a/5
+++ b/5
@ -1,3 +1,8 @@
+Fri Apr 18 16:01:37 2008  Akinori MUSHA  <knu@iDaemons.org>
+
+	* lib/rexml: Merge fixes since 1.8.6 made solely on the ruby_1_8_6
+	  branch.
+
 Fri Apr 18 07:56:18 2008  Hidetoshi NAGAI  <nagai@ai.kyutech.ac.jp>

 	* ext/tk/lib/tk.rb, ext/tk/lib/tk/scrollbar.rb, ext/tk/lib/tk/scale.rb:
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@ -18,25 +18,41 @@ module REXML
 		PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um

 		# Constructor.
+    # FIXME: The parser doesn't catch illegal characters in attributes
+    #
+    # first:: 
+    #   Either: an Attribute, which this new attribute will become a
+    #   clone of; or a String, which is the name of this attribute
+    # second::
+    #   If +first+ is an Attribute, then this may be an Element, or nil.
+    #   If nil, then the Element parent of this attribute is the parent
+    #   of the +first+ Attribute.  If the first argument is a String, 
+    #   then this must also be a String, and is the content of the attribute.  
+    #   If this is the content, it must be fully normalized (contain no
+    #   illegal characters).
+    # parent::
+    #   Ignored unless +first+ is a String; otherwise, may be the Element 
+    #   parent of this attribute, or nil.
+    #
 		#
 		#  Attribute.new( attribute_to_clone )
-		#  Attribute.new( source )
+		#  Attribute.new( attribute_to_clone, parent_element )
 		#  Attribute.new( "attr", "attr_value" )
 		#  Attribute.new( "attr", "attr_value", parent_element )
 		def initialize( first, second=nil, parent=nil )
 			@normalized = @unnormalized = @element = nil
 			if first.kind_of? Attribute
 				self.name = first.expanded_name
-				@value = first.value
+				@unnormalized = first.value
 				if second.kind_of? Element
 					@element = second
 				else
 					@element = first.element
 				end
 			elsif first.kind_of? String
-				@element = parent if parent.kind_of? Element
+				@element = parent
 				self.name = first
-				@value = second.to_s
+				@normalized = second.to_s
 			else
 				raise "illegal argument #{first.class.name} to Attribute constructor"
 			end
@ -72,7 +88,7 @@ module REXML
 		# Returns true if other is an Attribute and has the same name and value,
 		# false otherwise.
 		def ==( other )
-			other.kind_of?(Attribute) and other.name==name and other.value==@value
+			other.kind_of?(Attribute) and other.name==name and other.value==value
 		end

 		# Creates (and returns) a hash from both the name and value
@ -87,8 +103,12 @@ module REXML
 		#  b = Attribute.new( "ns:x", "y" )
 		#  b.to_string     # -> "ns:x='y'"
 		def to_string
+			if @element and @element.context and @element.context[:attribute_quote] == :quote
+				%Q^#@expanded_name="#{to_s().gsub(/"/, '&quote;')}"^
+			else
 				"#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
 			end
+		end

 		# Returns the attribute value, with entities replaced
 		def to_s
@ -100,8 +120,9 @@ module REXML
 				doctype = doc.doctype if doc
 			end

+			@normalized = Text::normalize( @unnormalized, doctype )
 			@unnormalized = nil
-			@normalized = Text::normalize( @value, doctype )
+      @normalized
 		end

 		# Returns the UNNORMALIZED value of this attribute.  That is, entities
@ -113,8 +134,9 @@ module REXML
 				doc = @element.document
 				doctype = doc.doctype if doc
 			end
+			@unnormalized = Text::unnormalize( @normalized, doctype )
 			@normalized = nil
-			@unnormalized = Text::unnormalize( @value, doctype )
+      @unnormalized
 		end

 		# Returns a copy of this attribute
--- a/lib/rexml/cdata.rb
+++ b/lib/rexml/cdata.rb
@ -39,31 +39,26 @@ module REXML
      @string
    end

+    # == DEPRECATED
+    # See the rexml/formatters package
+    #
 		# Generates XML output of this object
 		#
 		# output::
 		#   Where to write the string.  Defaults to $stdout
 		# indent::
-		#   An integer.  If -1, no indenting will be used; otherwise, the
-		#   indentation will be this number of spaces, and children will be
-		#   indented an additional amount.  Defaults to -1.
+    #   The amount to indent this node by
 		# transitive::
-		#   If transitive is true and indent is >= 0, then the output will be
-		#   pretty-printed in such a way that the added whitespace does not affect
-		#   the absolute *value* of the document -- that is, it leaves the value
-		#   and number of Text nodes in the document unchanged.
+    #   Ignored
 		# ie_hack::
-		#   Internet Explorer is the worst piece of crap to have ever been
-		#   written, with the possible exception of Windows itself.  Since IE is
-		#   unable to parse proper XML, we have to provide a hack to generate XML
-		#   that IE's limited abilities can handle.  This hack inserts a space 
-		#   before the /> on empty tags.
+    #   Ignored
 		#
 		# _Examples_
 		#  c = CData.new( " Some text " )
 		#  c.write( $stdout )     #->  <![CDATA[ Some text ]]>
 		def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
-      #indent( output, indent ) unless transitive
+      Kernel.warn( "#{self.class.name}.write is deprecated" )
+			indent( output, indent )
 			output << START
 			output << @string
 			output << STOP
--- a/lib/rexml/comment.rb
+++ b/lib/rexml/comment.rb
@ -34,6 +34,9 @@ module REXML
 			Comment.new self
 		end

+    # == DEPRECATED
+    # See REXML::Formatters
+    #
 		# output::
 		#	 Where to write the string
 		# indent::
@ -45,6 +48,7 @@ module REXML
 		# ie_hack::
 		#	 Needed for conformity to the child API, but not used by this class.
 		def write( output, indent=-1, transitive=false, ie_hack=false )
+      Kernel.warn("Comment.write is deprecated.  See REXML::Formatters")
 			indent( output, indent )
 			output << START
 			output << @string
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@ -98,38 +98,30 @@ module REXML
    # output::
    #   Where to write the string
    # indent::
-    #   An integer.  If -1, no indenting will be used; otherwise, the
+    #   An integer.  If -1, no indentation will be used; otherwise, the
    #   indentation will be this number of spaces, and children will be
    #   indented an additional amount.
    # transitive::
-    #   If transitive is true and indent is >= 0, then the output will be
-    #   pretty-printed in such a way that the added whitespace does not affect
-    #   the absolute *value* of the document -- that is, it leaves the value
-    #   and number of Text nodes in the document unchanged.
+    #   Ignored
    # ie_hack::
-    #   Internet Explorer is the worst piece of crap to have ever been
-    #   written, with the possible exception of Windows itself.  Since IE is
-    #   unable to parse proper XML, we have to provide a hack to generate XML
-    #   that IE's limited abilities can handle.  This hack inserts a space 
-    #   before the /> on empty tags.
-    #
+    #   Ignored
    def write( output, indent=0, transitive=false, ie_hack=false )
+      f = REXML::Formatters::Default.new
      indent( output, indent )
      output << START
      output << ' '
      output << @name
      output << " #@external_id" if @external_id
-      output << " #@long_name" if @long_name
-      output << " #@uri" if @uri
+      output << " #{@long_name.inspect}" if @long_name
+      output << " #{@uri.inspect}" if @uri
      unless @children.empty?
        next_indent = indent + 1
        output << ' ['
        child = nil    # speed
        @children.each { |child|
          output << "\n"
-          child.write( output, next_indent )
+          f.write( child, output )
        }
-        #output << '   '*next_indent
        output << "\n]"
      end
      output << STOP
@ -219,8 +211,10 @@ module REXML
      @string+'>'
    end

+    # == DEPRECATED
+    # See REXML::Formatters
+    #
    def write( output, indent )
-      output << ('   '*indent) if indent > 0
      output << to_s
    end
  end
@ -264,7 +258,6 @@ module REXML
    end

    def write( output, indent=-1 )
-      output << ('   '*indent) if indent > 0
      output << to_s
    end
    
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@ -31,9 +31,6 @@ module REXML
 	  # to be sources of valid XML documents.
 	  # @param context if supplied, contains the context of the document;
 	  # this should be a Hash.
-	  # NOTE that I'm not sure what the context is for; I cloned it out of
-	  # the Electric XML API (in which it also seems to do nothing), and it
-	  # is now legacy.  It may do something, someday... it may disappear.
 		def initialize( source = nil, context = {} )
 			super()
 			@context = context
@ -69,6 +66,7 @@ module REXML
 		def add( child )
 			if child.kind_of? XMLDecl
 				@children.unshift child
+        child.parent = self
 			elsif child.kind_of? DocType
        # Find first Element or DocType node and insert the decl right 
        # before it.  If there is no such node, just insert the child at the
@ -145,21 +143,32 @@ module REXML
    # Write the XML tree out, optionally with indent.  This writes out the
    # entire XML document, including XML declarations, doctype declarations,
    # and processing instructions (if any are given).
+    #
    # A controversial point is whether Document should always write the XML
    # declaration (<?xml version='1.0'?>) whether or not one is given by the
    # user (or source document).  REXML does not write one if one was not
    # specified, because it adds unneccessary bandwidth to applications such
    # as XML-RPC.
    #
+    # See also the classes in the rexml/formatters package for the proper way
+    # to change the default formatting of XML output
+    #
+    # _Examples_
+    #   Document.new("<a><b/></a>").serialize
+    #
+    #   output_string = ""
+    #   tr = Transitive.new( output_string )
+    #   Document.new("<a><b/></a>").serialize( tr )
    #
    # output::
    #	  output an object which supports '<< string'; this is where the
    #   document will be written.
    # indent::
    #   An integer.  If -1, no indenting will be used; otherwise, the
-		#   indentation will be this number of spaces, and children will be
-		#   indented an additional amount.  Defaults to -1
-		# transitive::
+    #   indentation will be twice this number of spaces, and children will be
+    #   indented an additional amount.  For a value of 3, every item will be 
+    #   indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
+    # trans::
    #   If transitive is true and indent is >= 0, then the output will be
    #   pretty-printed in such a way that the added whitespace does not affect
    #   the absolute *value* of the document -- that is, it leaves the value
@ -170,14 +179,20 @@ module REXML
    #   unable to parse proper XML, we have to provide a hack to generate XML
    #   that IE's limited abilities can handle.  This hack inserts a space 
    #   before the /> on empty tags.  Defaults to false
-		def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
-			output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
-			@children.each { |node|
-				indent( output, indent ) if node.node_type == :element
-				if node.write( output, indent, transitive, ie_hack )
-          output << "\n" unless indent<0 or node == @children[-1]
+		def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
+      if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+        output = Output.new( output, xml_decl.encoding )
      end
-			}
+      formatter = if indent > -1
+          if trans
+            REXML::Formatters::Transitive.new( indent, ie_hack )
+          else
+            REXML::Formatters::Pretty.new( indent, ie_hack )
+          end
+        else
+          REXML::Formatters::Default.new( ie_hack )
+        end
+      formatter.write( self, output )
 		end

 		
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@ -295,14 +295,9 @@ module REXML
    def add_element element, attrs=nil
      raise "First argument must be either an element name, or an Element object" if element.nil?
      el = @elements.add(element)
-			if attrs.kind_of? Hash
      attrs.each do |key, value|
-					el.attributes[key]=value if key =~ /^xmlns:/
-				end
-				attrs.each do |key, value|
-					el.attributes[key]=value if key !~ /^xmlns:/
-				end
-			end
+        el.attributes[key]=Attribute.new(key,value,self)
+      end	if attrs.kind_of? Hash
      el
    end

@ -558,6 +553,7 @@ module REXML
    def attribute( name, namespace=nil )
      prefix = nil
      prefix = namespaces.index(namespace) if namespace
+      prefix = nil if prefix == 'xmlns'
      attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
    end

@ -577,7 +573,8 @@ module REXML
    # value:: 
    #   Required if +key+ is a String, and ignored if the first argument is
    #   an Attribute.  This is a String, and is used as the value
-		#   of the new Attribute.
+    #   of the new Attribute.  This should be the unnormalized value of the
+    #   attribute (without entities).
    # Returns:: the Attribute added
    #  e = Element.new 'e'
    #  e.add_attribute( 'a', 'b' )               #-> <e a='b'/>
@ -649,6 +646,9 @@ module REXML
      find_all { |child| child.kind_of? Text }.freeze
    end

+    # == DEPRECATED
+    # See REXML::Formatters
+    #
    # Writes out this element, and recursively, all children.
    # output::
    #	  output an object which supports '<< string'; this is where the
@ -672,37 +672,17 @@ module REXML
    #  doc.write( out )     #-> doc is written to the string 'out'
    #  doc.write( $stdout ) #-> doc written to the console
    def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
-			#print "ID:#{indent}"
-			writer << "<#@expanded_name"
-
-			@attributes.each_attribute do |attr|
-				writer << " "
-				attr.write( writer, indent )
-			end unless @attributes.empty?
-
-			if @children.empty?
-        if transitive and indent>-1
-          writer << "\n"
-          indent( writer, indent )
-        elsif ie_hack
-          writer << " " 
-        end
-				writer << "/" 
+      Kernel.warn("#{self.class.name}.write is deprecated.  See REXML::Formatters")
+      formatter = if indent > -1
+          if transitive
+            REXML::Formatters::Transitive.new( indent, ie_hack )
          else
-				if transitive and indent>-1 and !@children[0].kind_of? Text
-					writer << "\n"
-					indent writer, indent+1
+            REXML::Formatters::Pretty.new( indent, ie_hack )
          end
-				writer << ">"
-				write_children( writer, indent, transitive, ie_hack )
-				writer << "</#{expanded_name}"
+        else
+          REXML::Formatters::Default.new( ie_hack )
        end
-			if transitive and indent>-1 and !@children.empty?
-				writer << "\n"
-				indent -= 1 if next_sibling.nil?
-				indent(writer, indent)
-			end
-			writer << ">"
+      formatter.write( self, output )
    end


@ -730,29 +710,6 @@ module REXML
        return if max>0 and num == max
      }
    end
-
-		# A private helper method
-		def write_children( writer, indent, transitive, ie_hack )
-			cr = (indent < 0) ? '' : "\n"
-			if indent == -1
-				each { |child| child.write( writer, indent, transitive, ie_hack ) }
-			else
-				next_indent = indent+1
-				last_child=nil
-				each { |child|
-					unless child.kind_of? Text or last_child.kind_of? Text or transitive
-						writer << cr
-						indent(writer, next_indent)
-					end
-					child.write( writer, next_indent, transitive, ie_hack )
-					last_child = child
-				}
-				unless last_child.kind_of? Text or transitive
-					writer << cr
-					indent( writer, indent )
-				end
-			end
-		end
  end

  ########################################################################
@ -898,15 +855,15 @@ module REXML
    #   Source (see Element.initialize).  If not supplied or nil, a
    #   new, default Element will be constructed
    # Returns:: the added Element
-		#  a = Element.new 'a'
-		#  a.elements.add Element.new 'b'  #-> <a><b/></a>
-		#  a.elements.add 'c'              #-> <a><b/><c/></a>
+    #  a = Element.new('a')
+    #  a.elements.add(Element.new('b'))  #-> <a><b/></a>
+    #  a.elements.add('c')               #-> <a><b/><c/></a>
    def add element=nil
      rv = nil
      if element.nil?
-				Element.new "", self, @element.context
+        Element.new("", self, @element.context)
      elsif not element.kind_of?(Element)
-				Element.new element, self, @element.context
+        Element.new(element, self, @element.context)
      else
        @element << element
        element.context = @element.context
@ -1006,10 +963,11 @@ module REXML
    # name:: an XPath attribute name.  Namespaces are relevant here.
    # Returns:: 
    #   the String value of the matching attribute, or +nil+ if no
-		#   matching attribute was found.
+    #   matching attribute was found.  This is the unnormalized value
+    #   (with entities expanded).
    # 
-		#  doc = Document.new "<a foo:att='1' bar:att='2' att='3'/>"
-		#  doc.root.attributes['att']         #-> '3'
+    #  doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
+    #  doc.root.attributes['att']         #-> '<'
    #  doc.root.attributes['bar:att']     #-> '2'
    def [](name)
      attr = get_attribute(name)
@ -1119,7 +1077,15 @@ module REXML
        delete attr
        return
      end
-			value = Attribute.new(name, value) unless value.kind_of? Attribute
+      element_document = @element.document
+      unless value.kind_of? Attribute
+        if @element.document and @element.document.doctype
+          value = Text::normalize( value, @element.document.doctype )
+        else
+          value = Text::normalize( value, nil )
+        end
+        value = Attribute.new(name, value)
+      end
      value.element = @element
      old_attr = fetch(value.name, nil)
      if old_attr.nil?
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@ -56,10 +56,15 @@ module REXML

    def check_encoding str
      # We have to recognize UTF-16, LSB UTF-16, and UTF-8
-      return UTF_16 if /\A\xfe\xff/n =~ str
-      return UNILE if /\A\xff\xfe/n =~ str
-      str =~ /^\s*<?xml\s*version\s*=\s*(['"]).*?\2\s*encoding\s*=\s*(["'])(.*?)\2/um
-      return $1.upcase if $1
+      if str[0] == 0xfe && str[1] == 0xff
+        str[0,2] = ""
+        return UTF_16
+      elsif str[0] == 0xff && str[1] == 0xfe
+        str[0,2] = ""
+        return UNILE
+      end
+      str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
+      return $3.upcase if $3
      return UTF_8
    end
  end
--- a/lib/rexml/encodings/CP-1252.rb
+++ b/lib/rexml/encodings/CP-1252.rb
@ -3,9 +3,15 @@
 #
 module REXML
  module Encoding
-    @@__REXML_encoding_methods = %q~
+  	register( "CP-1252" ) do |o|
+  		class << o
+  			alias encode encode_cp1252
+			alias decode decode_cp1252
+  		end
+  	end
+
    # Convert from UTF-8
-    def encode content
+    def encode_cp1252(content)
      array_utf8 = content.unpack('U*')
      array_enc = []
      array_utf8.each do |num|
@ -54,7 +60,7 @@ module REXML
    end
    
    # Convert to UTF-8
-    def decode(str)
+    def decode_cp1252(str)
      array_latin9 = str.unpack('C*')
      array_enc = []
      array_latin9.each do |num|
@ -93,6 +99,5 @@ module REXML
      end
      array_enc.pack('U*')
    end
-    ~
  end
 end
--- a/lib/rexml/encodings/ISO-8859-15.rb
+++ b/lib/rexml/encodings/ISO-8859-15.rb
@ -3,9 +3,13 @@
 #
 module REXML
  module Encoding
-    @@__REXML_encoding_methods = %q~
+  	register("ISO-8859-15") do |o|
+  		alias encode to_iso_8859_15
+      alias decode from_iso_8859_15
+  	end
+
    # Convert from UTF-8
-    def to_iso_8859_15 content
+    def to_iso_8859_15(content)
      array_utf8 = content.unpack('U*')
      array_enc = []
      array_utf8.each do |num|
@ -64,6 +68,5 @@ module REXML
      end
      array_enc.pack('U*')
    end
-    ~
  end
 end
--- a/lib/rexml/encodings/UTF-16.rb
+++ b/lib/rexml/encodings/UTF-16.rb
@ -16,7 +16,7 @@ module REXML
    end

    def decode_utf16(str)
-      str = str[2..-1] if /^\376\377/ =~ str
+      str = str[2..-1] if /^\376\377/n =~ str
      array_enc=str.unpack('C*')
      array_utf8 = []
      0.step(array_enc.size-1, 2){|i| 
--- a/lib/rexml/entity.rb
+++ b/lib/rexml/entity.rb
@ -89,6 +89,12 @@ module REXML

 		# Write out a fully formed, correct entity definition (assuming the Entity
 		# object itself is valid.)
+    #
+    # out::
+    #   An object implementing <TT>&lt;&lt;<TT> to which the entity will be
+    #   output
+    # indent::
+    #   *DEPRECATED* and ignored
 		def write out, indent=-1
 			out << '<!ENTITY '
 			out << '% ' if @reference
--- a/lib/rexml/formatters/default.rb
+++ b/lib/rexml/formatters/default.rb
@ -0,0 +1,109 @@
+module REXML
+  module Formatters
+    class Default
+      # Prints out the XML document with no formatting -- except if id_hack is
+      # set.
+      #
+      # ie_hack::
+      #   If set to true, then inserts whitespace before the close of an empty
+      #   tag, so that IE's bad XML parser doesn't choke.
+      def initialize( ie_hack=false )
+        @ie_hack = ie_hack
+      end
+
+      # Writes the node to some output.
+      #
+      # node::
+      #   The node to write
+      # output::
+      #   A class implementing <TT>&lt;&lt;</TT>.  Pass in an Output object to
+      #   change the output encoding.
+      def write( node, output )
+        case node
+
+        when Document 
+          if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+            output = Output.new( output, node.xml_decl.encoding )
+          end
+          write_document( node, output )
+
+        when Element
+          write_element( node, output )
+
+        when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
+             Attribute, AttlistDecl
+          node.write( output,-1 )
+
+        when Instruction
+          write_instruction( node, output )
+
+        when DocType, XMLDecl
+          node.write( output )
+
+        when Comment
+          write_comment( node, output )
+
+        when CData
+          write_cdata( node, output )
+
+        when Text
+          write_text( node, output )
+
+        else
+          raise Exception.new("XML FORMATTING ERROR")
+
+        end
+      end
+
+      protected
+      def write_document( node, output )
+        node.children.each { |child| write( child, output ) }
+      end
+
+      def write_element( node, output )
+        output << "<#{node.expanded_name}"
+
+        node.attributes.each_attribute do |attr|
+          output << " "
+          attr.write( output )
+        end unless node.attributes.empty?
+
+        if node.children.empty?
+          output << " " if @ie_hack
+          output << "/" 
+        else
+          output << ">"
+          node.children.each { |child|
+            write( child, output )
+          }
+          output << "</#{node.expanded_name}"
+        end
+        output << ">"
+      end
+
+      def write_text( node, output )
+        output << node.to_s()
+      end
+
+      def write_comment( node, output )
+        output << Comment::START
+        output << node.to_s
+        output << Comment::STOP
+      end
+
+      def write_cdata( node, output )
+        output << CData::START
+        output << node.to_s
+        output << CData::STOP
+      end
+
+      def write_instruction( node, output )
+        output << Instruction::START.sub(/\\/u, '')
+        output << node.target
+        output << ' '
+        output << node.content
+        output << Instruction::STOP.sub(/\\/u, '')
+      end
+    end
+  end
+end
--- a/lib/rexml/formatters/pretty.rb
+++ b/lib/rexml/formatters/pretty.rb
@ -0,0 +1,137 @@
+require 'rexml/formatters/default'
+
+module REXML
+  module Formatters
+    # Pretty-prints an XML document.  This destroys whitespace in text nodes
+    # and will insert carriage returns and indentations.
+    #
+    # TODO: Add an option to print attributes on new lines
+    class Pretty < Default
+
+      # If compact is set to true, then the formatter will attempt to use as
+      # little space as possible
+      attr_accessor :compact
+      # The width of a page.  Used for formatting text
+      attr_accessor :width
+
+      # Create a new pretty printer.
+      #
+      # output::
+      #   An object implementing '<<(String)', to which the output will be written.
+      # indentation::
+      #   An integer greater than 0.  The indentation of each level will be
+      #   this number of spaces.  If this is < 1, the behavior of this object
+      #   is undefined.  Defaults to 2.
+      # ie_hack::
+      #   If true, the printer will insert whitespace before closing empty
+      #   tags, thereby allowing Internet Explorer's feeble XML parser to
+      #   function. Defaults to false.
+      def initialize( indentation=2, ie_hack=false )
+        @indentation = indentation
+        @level = 0
+        @ie_hack = ie_hack
+        @width = 80
+      end
+
+      protected
+      def write_element(node, output)
+        output << ' '*@level
+        output << "<#{node.expanded_name}"
+
+        node.attributes.each_attribute do |attr|
+          output << " "
+          attr.write( output )
+        end unless node.attributes.empty?
+
+        if node.children.empty?
+          if @ie_hack
+            output << " "
+          end
+          output << "/" 
+        else
+          output << ">"
+          # If compact and all children are text, and if the formatted output
+          # is less than the specified width, then try to print everything on
+          # one line
+          skip = false
+          if compact
+            if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
+              string = ""
+              old_level = @level
+              @level = 0
+              node.children.each { |child| write( child, string ) }
+              @level = old_level
+              if string.length < @width
+                output << string
+                skip = true
+              end
+            end
+          end
+          unless skip
+            output << "\n"
+            @level += @indentation
+            node.children.each { |child|
+              next if child.kind_of?(Text) and child.to_s.strip.length == 0
+              write( child, output )
+              output << "\n"
+            }
+            @level -= @indentation
+            output << ' '*@level
+          end
+          output << "</#{node.expanded_name}"
+        end
+        output << ">"
+      end
+
+      def write_text( node, output )
+        s = node.to_s()
+        s.gsub!(/\s/,' ')
+        s.squeeze!(" ")
+        s = wrap(s, 80-@level)
+        s = indent_text(s, @level, " ", true)
+        output << (' '*@level + s)
+      end
+
+      def write_comment( node, output)
+        output << ' ' * @level
+        super
+      end
+
+      def write_cdata( node, output)
+        output << ' ' * @level
+        super
+      end
+
+      def write_document( node, output )
+        # Ok, this is a bit odd.  All XML documents have an XML declaration,
+        # but it may not write itself if the user didn't specifically add it,
+        # either through the API or in the input document.  If it doesn't write
+        # itself, then we don't need a carriage return... which makes this
+        # logic more complex.
+        node.children.each { |child|
+          next if child == node.children[-1] and child.instance_of?(Text)
+          unless child == node.children[0] or child.instance_of?(Text) or
+            (child == node.children[1] and !node.children[0].writethis)
+            output << "\n"
+          end
+          write( child, output )
+        }
+      end
+
+      private
+      def indent_text(string, level=1, style="\t", indentfirstline=true)
+        return string if level < 0
+        string.gsub(/\n/, "\n#{style*level}")
+      end
+
+      def wrap(string, width)
+        # Recursivly wrap string at width.
+        return string if string.length <= width
+        place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
+        return string[0,place] + "\n" + wrap(string[place+1..-1], width)
+      end
+
+    end
+  end
+end
+
--- a/lib/rexml/formatters/transitive.rb
+++ b/lib/rexml/formatters/transitive.rb
@ -0,0 +1,56 @@
+require 'rexml/formatters/pretty'
+
+module REXML
+  module Formatters
+    # The Transitive formatter writes an XML document that parses to an
+    # identical document as the source document.  This means that no extra
+    # whitespace nodes are inserted, and whitespace within text nodes is
+    # preserved.  Within these constraints, the document is pretty-printed,
+    # with whitespace inserted into the metadata to introduce formatting.
+    #
+    # Note that this is only useful if the original XML is not already
+    # formatted.  Since this formatter does not alter whitespace nodes, the
+    # results of formatting already formatted XML will be odd.
+    class Transitive < Default
+      def initialize( indentation=2 )
+        @indentation = indentation
+        @level = 0
+      end
+
+      protected
+      def write_element( node, output )
+        output << "<#{node.expanded_name}"
+
+        node.attributes.each_attribute do |attr|
+          output << " "
+          attr.write( output )
+        end unless node.attributes.empty?
+
+        output << "\n"
+        output << ' '*@level
+        if node.children.empty?
+          output << "/" 
+        else
+          output << ">"
+          # If compact and all children are text, and if the formatted output
+          # is less than the specified width, then try to print everything on
+          # one line
+          skip = false
+          @level += @indentation
+          node.children.each { |child|
+            write( child, output )
+          }
+          @level -= @indentation
+          output << "</#{node.expanded_name}"
+          output << "\n"
+          output << ' '*@level
+        end
+        output << ">"
+      end
+
+      def write_text( node, output )
+        output << node.to_s()
+      end
+    end
+  end
+end
--- a/lib/rexml/functions.rb
+++ b/lib/rexml/functions.rb
@ -339,7 +339,6 @@ module REXML
        object.to_f
      else
        str = string( object )
-        #puts "STRING OF #{object.inspect} = #{str}"
        # If XPath ever gets scientific notation...
        #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
        if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
--- a/lib/rexml/instruction.rb
+++ b/lib/rexml/instruction.rb
@ -38,7 +38,11 @@ module REXML
 			Instruction.new self
 		end
 		
+    # == DEPRECATED
+    # See the rexml/formatters package
+    #
 		def write writer, indent=-1, transitive=false, ie_hack=false
+      Kernel.warn( "#{self.class.name}.write is deprecated" )
 			indent(writer, indent)
 			writer << START.sub(/\\/u, '')
 			writer << @target
--- a/lib/rexml/node.rb
+++ b/lib/rexml/node.rb
@ -1,4 +1,6 @@
 require "rexml/parseexception"
+require "rexml/formatters/pretty"
+require "rexml/formatters/default"

 module REXML
 	# Represents a node in the tree.  Nodes are never encountered except as
@ -18,10 +20,19 @@ module REXML
 			@parent[ ind - 1 ]
 		end

-		def to_s indent=-1
-			rv = ""
-			write rv,indent
-			rv
+    # indent::
+    #   *DEPRECATED* This parameter is now ignored.  See the formatters in the
+    #   REXML::Formatters package for changing the output style.
+		def to_s indent=nil
+      unless indent.nil?
+        Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
+        f = REXML::Formatters::Pretty.new( indent )
+        f.write( self, rv, indent )
+      else
+        f = REXML::Formatters::Default.new
+        f.write( self, rv = "" )
+      end
+      return rv
 		end

 		def indent to, ind
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@ -1,5 +1,7 @@
 require 'rexml/parseexception'
+require 'rexml/undefinednamespaceexception'
 require 'rexml/source'
+require 'set'

 module REXML
  module Parsers
@ -24,7 +26,8 @@ module REXML
    # Nat Price gave me some good ideas for the API.
    class BaseParser
      NCNAME_STR= '[\w:][\-\w\d.]*'
-      NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+      NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+      UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"

      NAMECHAR = '[\-\w\d\.:]'
      NAME = "([\\w:]#{NAMECHAR}*)"
@ -35,7 +38,7 @@ module REXML

      DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
      DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
-      ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+      ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
      COMMENT_START = /\A<!--/u
      COMMENT_PATTERN = /<!--(.*?)-->/um
      CDATA_START = /\A<!\[CDATA\[/u
@ -45,7 +48,7 @@ module REXML
      XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
      INSTRUCTION_START = /\A<\?/u
      INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
-      TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+      TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
      CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um

      VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
@ -53,7 +56,7 @@ module REXML
      STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um

      ENTITY_START = /^\s*<!ENTITY/
-      IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+      IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
      ELEMENTDECL_START = /^\s*<!ELEMENT/um
      ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
      SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
@ -133,6 +136,7 @@ module REXML
        @tags = []
        @stack = []
        @entities = []
+        @nsstack = []
      end

      def position
@ -188,6 +192,7 @@ module REXML
        end
        return [ :end_document ] if empty?
        return @stack.shift if @stack.size > 0
+        #STDERR.puts @source.encoding
        @source.read if @source.buffer.size<2
        #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
        if @document_status == nil
@ -213,14 +218,15 @@ module REXML
            return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
          when DOCTYPE_START
            md = @source.match( DOCTYPE_PATTERN, true )
+            @nsstack.unshift(curr_ns=Set.new)
            identity = md[1]
            close = md[2]
            identity =~ IDENTITY
            name = $1
-            raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
+            raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
            pub_sys = $2.nil? ? nil : $2.strip
-            long_name = $3.nil? ? nil : $3.strip
-            uri = $4.nil? ? nil : $4.strip
+            long_name = $4.nil? ? nil : $4.strip
+            uri = $6.nil? ? nil : $6.strip
            args = [ :start_doctype, name, pub_sys, long_name, uri ]
            if close == ">"
              @document_status = :after_doctype
@ -288,6 +294,9 @@ module REXML
                val = attdef[3]
                val = attdef[4] if val == "#FIXED "
                pairs[attdef[0]] = val
+                if attdef[0] =~ /^xmlns:(.*)/
+                  @nsstack[0] << $1
+                end
              end
            end
            return [ :attlistdecl, element, pairs, contents ]
@ -312,6 +321,7 @@ module REXML
        begin
          if @source.buffer[0] == ?<
            if @source.buffer[1] == ?/
+              @nsstack.shift
              last_tag = @tags.pop
              #md = @source.match_to_consume( '>', CLOSE_MATCH)
              md = @source.match( CLOSE_MATCH, true )
@ -345,19 +355,47 @@ module REXML
                raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
                raise REXML::ParseException.new("malformed XML: missing tag start", @source) 
              end
-              attrs = []
-              if md[2].size > 0
-                attrs = md[2].scan( ATTRIBUTE_PATTERN )
+              attributes = {}
+              prefixes = Set.new
+              prefixes << md[2] if md[2]
+              @nsstack.unshift(curr_ns=Set.new)
+              if md[4].size > 0
+                attrs = md[4].scan( ATTRIBUTE_PATTERN )
                raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+                attrs.each { |a,b,c,d,e| 
+                  if b == "xmlns"
+                    if c == "xml"
+                      if d != "http://www.w3.org/XML/1998/namespace"
+                        msg = "The 'xml' prefix must not be bound to any other namespace "+
+                        "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+                        raise REXML::ParseException.new( msg, @source, self )
+                      end
+                    elsif c == "xmlns"
+                      msg = "The 'xmlns' prefix must not be declared "+
+                      "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+                      raise REXML::ParseException.new( msg, @source, self)
+                    end
+                    curr_ns << c
+                  elsif b
+                    prefixes << b unless b == "xml"
+                  end
+                  attributes[a] = e 
+                }
              end
        
-              if md[4]
+              # Verify that all of the prefixes have been defined
+              for prefix in prefixes
+                unless @nsstack.find{|k| k.member?(prefix)}
+                  raise UndefinedNamespaceException.new(prefix,@source,self)
+                end
+              end
+
+              if md[6]
                @closed = md[1]
+                @nsstack.shift
              else
                @tags.push( md[1] )
              end
-              attributes = {}
-              attrs.each { |a,b,c| attributes[a] = c }
              return [ :start_element, md[1], attributes ]
            end
          else
@ -371,6 +409,8 @@ module REXML
            # return PullEvent.new( :text, md[1], unnormalized )
            return [ :text, md[1] ]
          end
+        rescue REXML::UndefinedNamespaceException
+          raise
        rescue REXML::ParseException
          raise
        rescue Exception, NameError => error
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@ -94,6 +94,8 @@ module REXML
 					when :end_document
 						handle( :end_document )
 						break
+          when :start_doctype
+            handle( :doctype, *event[1..-1])
 					when :end_doctype
 						context = context[1]
 					when :start_element
@ -167,7 +169,7 @@ module REXML
          when :entitydecl
            @entities[ event[1] ] = event[2] if event.size == 3
 						handle( *event )
-					when :processing_instruction, :comment, :doctype, :attlistdecl, 
+					when :processing_instruction, :comment, :attlistdecl, 
 						:elementdecl, :cdata, :notationdecl, :xmldecl
 						handle( *event )
 					end
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@ -1,4 +1,5 @@
 require 'rexml/validation/validationexception'
+require 'rexml/undefinednamespaceexception'

 module REXML
  module Parsers
@ -29,8 +30,7 @@ module REXML
              return
            when :start_element
              tag_stack.push(event[1])
-              # find the observers for namespaces
-              @build_context = @build_context.add_element( event[1], event[2] )
+              el = @build_context = @build_context.add_element( event[1], event[2] )
            when :end_element
              tag_stack.pop
              @build_context = @build_context.parent
@ -86,6 +86,8 @@ module REXML
          end
        rescue REXML::Validation::ValidationException
          raise
+        rescue REXML::UndefinedNamespaceException
+          raise
        rescue
          raise ParseException.new( $!.message, @parser.source, @parser, $! )
        end
--- a/lib/rexml/parsers/xpathparser.rb
+++ b/lib/rexml/parsers/xpathparser.rb
@ -551,7 +551,7 @@ module REXML
          end
        end
        #puts "BEFORE WITH '#{rest}'"
-        rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
+        rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
        parsed.concat(n)
        return rest
      end
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
 # REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
 #
 # REXML is a _pure_ Ruby, XML 1.0 conforming,
@ -10,8 +11,9 @@
 #
 # Main page:: http://www.germane-software.com/software/rexml
 # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
-# Version:: 3.1.6
-# Date:: 2006/335
+# Version:: 3.1.7.2
+# Date:: 2007/275
+# Revision:: $Revision$
 # 
 # This API documentation can be downloaded from the REXML home page, or can
 # be accessed online[http://www.germane-software.com/software/rexml_doc]
@ -20,9 +22,10 @@
 # or can be accessed 
 # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
 module REXML
-	COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
-  DATE = "2006/335"
-  VERSION = "3.1.6"
+  COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
+  VERSION = "3.1.7.2"
+  DATE = "2007/275"
+  REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip

  Copyright = COPYRIGHT
  Version = VERSION
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@ -18,7 +18,7 @@ module REXML
        arg
      else
        raise "#{arg.class} is not a valid input stream.  It must walk \n"+
-        "like either a String, IO, or Source."
+          "like either a String, an IO, or a Source."
      end
    end
  end
@ -134,6 +134,7 @@ module REXML
    def initialize(arg, block_size=500, encoding=nil)
      @er_source = @source = arg
      @to_utf = false
+
      # Determining the encoding is a deceptively difficult issue to resolve.
      # First, we check the first two bytes for UTF-16.  Then we
      # assume that the encoding is at least ASCII enough for the '>', and
@ -145,10 +146,16 @@ module REXML
      str = @source.read( 2 )
      if encoding
        self.encoding = encoding
-      elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
-        self.encoding = check_encoding( str )
+      elsif 0xfe == str[0] && 0xff == str[1]
+        @line_break = "\000>"
+      elsif 0xff == str[0] && 0xfe == str[1]
+        @line_break = ">\000"
+      elsif 0xef == str[0] && 0xbb == str[1]
+        str += @source.read(1)
+        str = '' if (0xbf == str[2])
+        @line_break = ">"
      else
-        @line_break = '>'
+        @line_break = ">"
      end
      super str+@source.readline( @line_break )
    end
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@ -211,16 +211,17 @@ module REXML
      return new_string
    end
 
+    # == DEPRECATED
+    # See REXML::Formatters
+    #
    def write( writer, indent=-1, transitive=false, ie_hack=false ) 
-      s = to_s()
-      if not (@parent and @parent.whitespace) then
-        s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
-        if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
-          s = indent_text(s, indent, @parent.context[:indentstyle], false)
+      Kernel.warn("#{self.class.name}.write is deprecated.  See REXML::Formatters")
+      formatter = if indent > -1
+          REXML::Formatters::Pretty.new( indent )
+        else
+          REXML::Formatters::Default.new
        end
-        s.squeeze!(" \n\t") if @parent and !@parent.whitespace
-      end
-      writer << s
+      formatter.write( self, writer )
    end

    # FIXME
--- a/lib/rexml/undefinednamespaceexception.rb
+++ b/lib/rexml/undefinednamespaceexception.rb
@ -0,0 +1,8 @@
+require 'rexml/parseexception'
+module REXML
+  class UndefinedNamespaceException < ParseException
+    def initialize( prefix, source, parser )
+      super( "Undefined prefix #{prefix} found" )
+    end
+  end
+end
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@ -13,7 +13,7 @@ module REXML
 		STOP = '\?>';

 		attr_accessor :version, :standalone
-    attr_reader :writeencoding
+    attr_reader :writeencoding, :writethis

 		def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
      @writethis = true
@ -37,9 +37,14 @@ module REXML
 			XMLDecl.new(self)
 		end

-		def write writer, indent=-1, transitive=false, ie_hack=false
+    # indent::
+    #   Ignored.  There must be no whitespace before an XML declaration
+    # transitive::
+    #   Ignored
+    # ie_hack::
+    #   Ignored
+		def write(writer, indent=-1, transitive=false, ie_hack=false)
      return nil unless @writethis or writer.kind_of? Output
-			indent( writer, indent )
 			writer << START.sub(/\\/u, '')
      if writer.kind_of? Output
        writer << " #{content writer.encoding}"
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@ -160,6 +160,7 @@ module REXML
      node_types = ELEMENTS
      return nodeset if path_stack.length == 0 || nodeset.length == 0
      while path_stack.length > 0
+        #puts "#"*5
        #puts "Path stack = #{path_stack.inspect}"
        #puts "Nodeset is #{nodeset.inspect}"
        if nodeset.length == 0
@ -351,7 +352,8 @@ module REXML
        when :following_sibling
          #puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
          results = []
-          for node in nodeset
+          nodeset.each do |node|
+            next if node.parent.nil?
            all_siblings = node.parent.children
            current_index = all_siblings.index( node )
            following_siblings = all_siblings[ current_index+1 .. -1 ]
@ -362,13 +364,14 @@ module REXML

        when :preceding_sibling
          results = []
-          for node in nodeset
+          nodeset.each do |node|
+            next if node.parent.nil?
            all_siblings = node.parent.children
            current_index = all_siblings.index( node )
-            preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse
-            #results += expr( path_stack.dclone, preceding_siblings )
+            preceding_siblings = all_siblings[ 0, current_index ].reverse
+            results += preceding_siblings
          end
-          nodeset = preceding_siblings || []
+          nodeset = results
          node_types = ELEMENTS

        when :preceding
@ -389,15 +392,21 @@ module REXML
          node_types = ELEMENTS

        when :namespace
+          #puts "In :namespace"
          new_nodeset = []
          prefix = path_stack.shift
          for node in nodeset
            if (node.node_type == :element or node.node_type == :attribute)
-              if (node.node_type == :element)
+              if @namespaces
+                namespaces = @namespaces
+              elsif (node.node_type == :element)
                namespaces = node.namespaces
              else
                namespaces = node.element.namesapces
              end
+              #puts "Namespaces = #{namespaces.inspect}"
+              #puts "Prefix = #{prefix.inspect}"
+              #puts "Node.namespace = #{node.namespace}"
              if (node.namespace == namespaces[prefix])
                new_nodeset << node
              end