REXML changes backported from the 1.9 branch:

* Minor source documentation changes * Changes to the pretty-printing code, including the addition of the word- wrap submission. * Bug fix for missing quotations in NOTATION DTD items * Bug fixes and improvements to whitespace handling in text nodes * Refactoring and bug fixes in encoding support * Minor speed optimizations in the core parser * Bug fixes in the SAX2 parserthe core parser * Copyright fixes * Version bump to REXML 3.0.0 * A change that caused speed degredation has been reversed * Addition of a value=() method in Text, for replacing the contents of a text node * Fixed the document order of the descendant-or-self axis in XPath git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@6071 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2004-04-02 03:26:19 +00:00 · 2004-04-02 03:26:19 +00:00 · 7a07ba45a0
commit 7a07ba45a0
parent 354d68f80b
16 changed files with 141 additions and 91 deletions
--- a/lib/rexml/comment.rb
+++ b/lib/rexml/comment.rb
@ -39,7 +39,10 @@ module REXML
 		#   indentation will be this number of spaces, and children will be
 		#   indented an additional amount.
 		# transitive::
-		#   Who knows?
+		#   If transitive is true and indent is >= 0, then the output will be
 		#   pretty-printed in such a way that the added whitespace does not affect
 		#   the absolute *value* of the document -- that is, it leaves the value
 		#   and number of Text nodes in the document unchanged.
 		# ie_hack::
 		#   Internet Explorer is the worst piece of crap to have ever been
 		#   written, with the possible exception of Windows itself.  Since IE is
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@ -92,7 +92,10 @@ module REXML
 		#   indentation will be this number of spaces, and children will be
 		#   indented an additional amount.
 		# transitive::
-		#   Who knows?
+		#   If transitive is true and indent is >= 0, then the output will be
 		#   pretty-printed in such a way that the added whitespace does not affect
 		#   the absolute *value* of the document -- that is, it leaves the value
 		#   and number of Text nodes in the document unchanged.
 		# ie_hack::
 		#   Internet Explorer is the worst piece of crap to have ever been
 		#   written, with the possible exception of Windows itself.  Since IE is
@ -109,7 +112,7 @@ module REXML
 			output << " #@long_name" if @long_name
 			output << " #@uri" if @uri
 			unless @children.empty?
-				next_indent = indent + 2
+				next_indent = indent + 1
 				output << ' ['
 				child = nil		# speed
 				@children.each { |child|
@ -123,6 +126,10 @@ module REXML
 			output << STOP
 		end
    def context
      @parent.context
    end
 		def entity( name )
 			@entities[name].unnormalized if @entities[name]
 		end
@ -185,7 +192,7 @@ module REXML
 		end
 		def to_s
-			"<!NOTATION #@name #@middle #@rest>"
+			"<!NOTATION #@name '#@middle #@rest'>"
 		end
 		def write( output, indent=-1 )
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@ -145,7 +145,10 @@ module REXML
 		#   indentation will be this number of spaces, and children will be
 		#   indented an additional amount.  Defaults to -1
 		# transitive::
-		#   What the heck does this do? Defaults to false
+		#   If transitive is true and indent is >= 0, then the output will be
 		#   pretty-printed in such a way that the added whitespace does not affect
 		#   the absolute *value* of the document -- that is, it leaves the value
 		#   and number of Text nodes in the document unchanged.
 		# ie_hack::
 		#   Internet Explorer is the worst piece of crap to have ever been
 		#   written, with the possible exception of Windows itself.  Since IE is
@ -191,7 +194,7 @@ module REXML
 							build_context[-1] << event[1]
 						else
 							build_context.add( 
-								Text.new( event[1], true, nil, true ) 
+								Text.new( event[1], build_context.whitespace, nil, true ) 
 							) unless (
 								event[1].strip.size==0 and 
 								build_context.ignore_whitespace_nodes
--- a/lib/rexml/dtd/entitydecl.rb
+++ b/lib/rexml/dtd/entitydecl.rb
@ -42,7 +42,7 @@ module REXML
 			end
 			def write( output, indent )
-				output << ('   '*indent) if indent > 0
+        indent( output, indent )
 				output << to_s
 			end
--- a/lib/rexml/dtd/notationdecl.rb
+++ b/lib/rexml/dtd/notationdecl.rb
@ -25,7 +25,7 @@ module REXML
 			end
 			def write( output, indent )
-				output << ('   '*indent) if indent > 0
+        indent( output, indent )
 				output << to_s
 			end
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@ -98,8 +98,9 @@ module REXML
 		# is the case if:
 		# 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
 		# 2. The context has :+respect_whitespace+ set to :+all+ or
-		#    an array containing the name of this element, and :+compress_whitespace+
+		#    an array containing the name of this element, and 
-		#    isn't set to :+all+ or an array containing the name of this element.
+    #    :+compress_whitespace+ isn't set to :+all+ or an array containing the 
    #    name of this element.
 		# The evaluation is tested against +expanded_name+, and so is namespace
 		# sensitive.
 		def whitespace
@ -606,7 +607,9 @@ module REXML
 		#   indentation will be this number of spaces, and children will be
 		#   indented an additional amount.  Defaults to -1
 		# transitive::
-		#   What the heck does this do? Defaults to false
+		#   If transitive is true and indent is >= 0, then the output will be
 		#   pretty-printed in such a way that the added whitespace does not affect
 		#   the parse tree of the document
 		# ie_hack::
 		#   Internet Explorer is the worst piece of crap to have ever been
 		#   written, with the possible exception of Windows itself.  Since IE is
@ -632,7 +635,7 @@ module REXML
 			else
 				if transitive and indent>-1 and !@children[0].kind_of? Text
 					writer << "\n"
-					indent writer, indent+2
+					indent writer, indent+1
 				end
 				writer << ">"
 				write_children( writer, indent, transitive, ie_hack )
@ -640,7 +643,7 @@ module REXML
 			end
 			if transitive and indent>-1
 				writer << "\n"
-				indent -= 2 if next_sibling.nil?
+				indent -= 1 if next_sibling.nil?
 				indent(writer, indent)
 			end
 			writer << ">"
@ -661,12 +664,10 @@ module REXML
 		# A private helper method
 		def write_children( writer, indent, transitive, ie_hack )
 			cr = (indent < 0) ? '' : "\n"
 			#if size == 1 and @children[0].kind_of?(Text)
 			#	self[0].write( writer, -1 )
 			if indent == -1
 				each { |child| child.write( writer, indent, transitive, ie_hack ) }
 			else
-				next_indent = indent+2
+				next_indent = indent+1
 				last_child=nil
 				each { |child|
 					unless child.kind_of? Text or last_child.kind_of? Text or transitive
--- a/lib/rexml/encodings/ISO-8859-1.rb
+++ b/lib/rexml/encodings/ISO-8859-1.rb
@ -1,6 +1,6 @@
 module REXML
  module Encoding
-    @@__REXML_encoding_methods =<<-'EOL'
+    @@__REXML_encoding_methods = %q~
    # Convert from UTF-8
    def encode content
      array_utf8 = content.unpack('U*')
@ -20,6 +20,6 @@ module REXML
    def decode(str)
      str.unpack('C*').pack('U*')
    end
-    EOL
+    ~
  end
 end
--- a/lib/rexml/encodings/SHIFT_JIS.rb
+++ b/lib/rexml/encodings/SHIFT_JIS.rb
@ -1,33 +1 @@
-begin
+require 'rexml/encodings/SHIFT-JIS'
 	require 'uconv'
 	module REXML
 		module Encoding
 			def to_shift_jis content
 				Uconv::u8tosjis(content)
 			end
 			def from_shift_jis(str)
 				Uconv::sjistou8(str)
 			end
 		end
 	end
 rescue LoadError
  begin
 	require 'iconv'
 	module REXML
 		module Encoding
 			def from_shift_jis(str)
 				return Iconv::iconv("utf-8", "shift_jis", str).join('')
 			end
 			def to_shift_jis content
 				return Iconv::iconv("shift_jis", "utf-8", content).join('')
 			end
 		end
 	end
  rescue LoadError
 	raise "uconv or iconv is required for Japanese encoding support."
  end
 end
--- a/lib/rexml/encodings/US-ASCII.rb
+++ b/lib/rexml/encodings/US-ASCII.rb
@ -1,6 +1,6 @@
 module REXML
  module Encoding
-    @@__REXML_encoding_methods =<<-'EOL'
+    @@__REXML_encoding_methods = %q~
    # Convert from UTF-8
    def encode content
      array_utf8 = content.unpack('U*')
@ -20,6 +20,6 @@ module REXML
    def decode(str)
      str.unpack('C*').pack('U*')
    end
-    EOL
+    ~
  end
 end
--- a/lib/rexml/node.rb
+++ b/lib/rexml/node.rb
@ -25,7 +25,12 @@ module REXML
 		end
 		def indent to, ind
-			to << " "*ind unless ind<1
+ 			if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
 				indentstyle = @parent.context[:indentstyle]
 			else
 				indentstyle = '  '
 			end
 			to << indentstyle*ind unless ind<1
 		end
 		def parent?
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@ -122,14 +122,14 @@ module REXML
 			# Returns true if there are no more events
 			def empty?
-				!has_next?
+        #puts "@source.empty? = #{@source.empty?}"
        #puts "@stack.empty? = #{@stack.empty?}"
        return (@source.empty? and @stack.empty?)
 			end
 			# Returns true if there are more events.  Synonymous with !empty?
 			def has_next?
-				return true if @closed
+        return !(@source.empty? and @stack.empty?)
 				@source.read if @source.buffer.size==0 and !@source.empty?
 				(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
 			end
 			# Push an event back on the head of the stream.  This method
@ -329,9 +329,12 @@ module REXML
 						end
 					else
 						md = @source.match( TEXT_PATTERN, true )
-						#md = @source.match_to_consume( '<', TEXT_PATTERN )
+            if md[0].length == 0
-						#@source.read
+              #puts "EMPTY = #{empty?}"
-						raise REXML::ParseException("no text to add") if md[0].length == 0
+              #puts "BUFFER = \"#{@source.buffer}\""
              @source.match( /(\s+)/, true )
            end
            #return [ :text, "" ] if md[0].length == 0
 						# unnormalized = Text::unnormalize( md[1], self )
 						# return PullEvent.new( :text, md[1], unnormalized )
 						return [ :text, md[1] ]
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@ -45,7 +45,7 @@ module REXML
 					if args.size == 2
 						args[1].each { |match| @procs << [args[0], match, blok] }
 					else
-						add( [args[0], /.*/, blok] )
+						add( [args[0], nil, blok] )
 					end
 				elsif args[0].kind_of? Array
 					if args.size == 2
@ -54,7 +54,7 @@ module REXML
 						args[0].each { |match| add( [ :start_element, match, blok ] ) }
 					end
 				else
-					add([nil, /.*/, args[0]])
+					add([nil, nil, args[0]])
 				end
 			end
@ -164,9 +164,10 @@ module REXML
 			def get_procs( symbol, name )
 				return nil if @procs.size == 0
 				@procs.find_all do |sym, match, block|
          #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
 					(
 						(sym.nil? or symbol == sym) and 
-						(name.nil? or (
+						((name.nil? and match.nil?) or match.nil? or (
 							(name == match) or
 							(match.kind_of? Regexp and name =~ match)
 							)
@ -179,7 +180,7 @@ module REXML
 				@listeners.find_all do |sym, match, block|
 					(
 						(sym.nil? or symbol == sym) and 
-						(name.nil? or (
+						((name.nil? and match.nil?) or match.nil? or (
 							(name == match) or
 							(match.kind_of? Regexp and name =~ match)
 							)
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@ -20,7 +20,7 @@
 # be accessed online at http://www.germane-software.com/software/rexml_doc
 # A tutorial is available in docs/tutorial.html
 module REXML
-	Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
+	Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell <ser@germane-software.com>"
-	Date = "+2003/346"
+	Date = "+2004/088"
-	Version = "2.7.3"
+	Version = "3.0.0"
 end
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@ -31,7 +31,6 @@ module REXML
 		def initialize(arg)
 			@orig = @buffer = arg
 			self.encoding = check_encoding( @buffer )
 			#@buffer = decode(@buffer) unless @encoding == UTF_8
 			@line = 0
 		end
@ -96,7 +95,7 @@ module REXML
 		# @return true if the Source is exhausted
 		def empty?
-			@buffer.nil?
+			@buffer == ""
 		end
 		# @return the current line in the source
@ -113,17 +112,14 @@ module REXML
 	class IOSource < Source
 		#attr_reader :block_size
    # block_size has been deprecated
 		def initialize(arg, block_size=500)
 			@er_source = @source = arg
 			@to_utf = false
-			# READLINE OPT
+      # FIXME
-			# The following was commented out when IOSource started using readline
+      # This is broken.  If the user puts in enough carriage returns, this can fail
-			# to pull the data from the stream.
+      # to calculate the correct encoding.
-			#@block_size = block_size
+      super @source.read( 100 )
 			#super @source.read(@block_size)
 			@line_break = '>'
 			#super @source.readline( "\n" )
 			super @source.readline( @line_break )+@source.read
 			@line_break = encode( '>' )
 		end
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@ -164,9 +164,54 @@ module REXML
 			end
 			@unnormalized = Text::unnormalize( @string, doctype )
 		end
 		def wrap(string, width, addnewline=false)
 			# Recursivly wrap string at width.
 			return string if string.length <= width
 			place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
 			if addnewline then
 				return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
 			else
 				return string[0,place] + "\n" + wrap(string[place+1..-1], width)
 			end
 		end
    # Sets the contents of this text node.  This expects the text to be 
    # unnormalized.  It returns self.
    #
    #   e = Element.new( "a" )
    #   e.add_text( "foo" )   # <a>foo</a>
    #   e[0].value = "bar"    # <a>bar</a>
    #   e[0].value = "<a>"    # <a>&lt;a&gt;</a>
    def value=( val )
 			@string = val.gsub( /\r\n?/, "\n" )
      @unnormalized = nil
      @normalized = nil
      @raw = false
    end
 		def indent(string, level=1, style="\t", indentfirstline=true)
      return string if level < 0
 			new_string = ''
 			string.each { |line|
 				indent_string = style * level
 				new_line = (indent_string + line).sub(/[\s]+$/,'')
 				new_string << new_line
 			}
 			new_string.strip! unless indentfirstline
 			return new_string
 		end
 		def write( writer, indent=-1, transitive=false, ie_hack=false ) 
-			writer << to_s()
+			s = to_s()
      if not (@parent and @parent.whitespace) then
        s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
        if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
          s = indent(s, indent, @parent.context[:indentstyle], false)
        end
        s.squeeze!(" \n\t") if @parent and !@parent.whitespace
      end
      writer << s
 		end
 		# Writes out text, substituting special characters beforehand.
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@ -29,7 +29,8 @@ module REXML
 		def parse path, nodeset
 			path_stack = @parser.parse( path )
-			#puts "PARSE: #{path} => #{path_stack.inspect}"
+      #puts "PARSE: #{path} => #{path_stack.inspect}"
      #puts "PARSE: nodeset = #{nodeset.collect{|x|x.to_s}.inspect}"
 			match( path_stack, nodeset )
 		end
@ -46,7 +47,7 @@ module REXML
 		def match( path_stack, nodeset ) 
 			while ( path_stack.size > 0 and nodeset.size > 0 ) 
-				#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.type}.inspect}'"
+				#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
 				nodeset = internal_parse( path_stack, nodeset )
 				#puts "NODESET: #{nodeset.size}"
 				#puts "PATH_STACK: #{path_stack.inspect}"
@ -55,8 +56,9 @@ module REXML
 		end
 		def internal_parse path_stack, nodeset
      #puts "INTERNAL_PARSE RETURNING WITH NO RESULTS" if nodeset.size == 0 or path_stack.size == 0
 			return nodeset if nodeset.size == 0 or path_stack.size == 0
-			#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.type}.inspect}"
+			#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.class}.inspect}"
 			case path_stack.shift
 			when :document
 				return [ nodeset[0].root.parent ]
@ -205,7 +207,7 @@ module REXML
 					Functions::index = index+1
 					#puts "Node #{node} and index=#{index+1}"
 					result = Predicate( predicate, node )
-					#puts "Predicate returned #{result} (#{result.type}) for #{node.type}"
+					#puts "Predicate returned #{result} (#{result.class}) for #{node.class}"
 					if result.kind_of? Numeric
 						#puts "#{result} == #{index} => #{result == index}"
 						new_nodeset << node if result == (index+1)
@ -285,6 +287,7 @@ module REXML
 		end
 		##########################################################
    # FIXME
 		# The next two methods are BAD MOJO!
 		# This is my achilles heel.  If anybody thinks of a better
 		# way of doing this, be my guest.  This really sucks, but 
@ -294,24 +297,39 @@ module REXML
 		def descendant_or_self( path_stack, nodeset )
 			rs = []
 			d_o_s( path_stack, nodeset, rs )
-			#puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
+      #puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
-			rs.flatten.compact
+      document_order(rs.flatten.compact)
 		end
 		def d_o_s( p, ns, r )
 			#puts r.collect{|n|n.to_s}.inspect
 			#puts ns.collect{|n|n.to_s}.inspect
 			nt = nil
 			ns.each_index do |i|
 				n = ns[i]
 				x = match( p.clone, [ n ] )
 				#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
 				nt = n.node_type
-				d_o_s( p, n.children, x ) if nt == :element or nt == :document
+				d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
-				r[i,0] = [x] if x.size > 0
+        r.concat(x) if x.size > 0
 			end
 		end
    # Reorders an array of nodes so that they are in document order
    # It tries to do this efficiently.
    def document_order( array_of_nodes )
      new_arry = []
      array_of_nodes.each { |node|
        node_idx = [] 
        np = node.node_type == :attribute ? node.element : node
        while np.parent and np.parent.node_type == :element
          node_idx << np.parent.children.index( np )
          np = np.parent
        end
        new_arry << [ node_idx.reverse.join, node ]
      }
      new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
    end
    def recurse( nodeset, &block )
      for node in nodeset
 	      yield node
@ -324,7 +342,7 @@ module REXML
 		def Predicate( predicate, node )
 			predicate = predicate.clone
 			#puts "#"*20
-			#puts "Predicate( #{predicate.inspect}, #{node.type} )"
+			#puts "Predicate( #{predicate.inspect}, #{node.class} )"
 			results = []
 			case (predicate[0])
 			when :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq