* Cross-patch from Ruby CVS; mostly Nabu edits.

* Fixes ticket:68. ***** Note that this is an API change!!! ***** NOTE that this involves an API change! Entity declarations in the doctype now generate events that carry two, not one, arguments. * Implements ticket:15, using gwrite's suggestion. This allows Element to be subclassed. * Fixed namespaces handling in XPath and element. ***** Note that this is an API change!!! ***** Element.namespaces() now returns a hash of namespace mappings which are relevant for that node. * Fixes a bug in multiple decodings * The changeset 1230:1231 was bad. The default behavior is *not* to use the native REXML encodings by default, but rather to use ICONV by default. I'll have to think of a better way of managing translations, but the REXML codecs are (a) less reliable than ICONV, but more importantly (b) slower. The real solution is to use ICONV by default, but allow users to specify that they want to use the pure Ruby codecs. * Fixes ticket:61 (xpath_parser) * Fixes ticket:63 (UTF-16; UNILE decoding was bad) * Improves parsing error messages a little * Adds the ability to override the encoding detection in Source construction * Fixes an edge case in Functions::string, where document nodes weren't correctly converted * Fixes Functions::string() for Element and Document nodes * Fixes some problems in entity handling * Addresses ticket:66 * Fixes ticket:71 * Addresses ticket:78 NOTE: that this also fixes what is technically another bug in REXML. REXML's XPath parser used to allow exponential notation in numbers. The XPath spec is specific about what a number is, and scientific notation is not included. Therefore, this has been fixed. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@11315 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2006-12-01 02:20:08 +00:00 · 2006-12-01 02:20:08 +00:00 · f114b85d89
commit f114b85d89
parent d2205c869e
14 changed files with 136 additions and 81 deletions
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@ -94,7 +94,7 @@ module REXML
 		#   new_a = d.root.clone
 		#   puts new_a  # => "<a/>"
 		def clone
-			Element.new self
+			self.class.new self
 		end
 		# Evaluates to the root node of the document that this element 
@ -200,9 +200,9 @@ module REXML
 		end
 		def namespaces
-			namespaces = []
+			namespaces = {}
 			namespaces = parent.namespaces if parent
-			namespaces |= attributes.namespaces
+			namespaces = namespaces.merge( attributes.namespaces )
 			return namespaces
 		end
@ -494,13 +494,12 @@ module REXML
 		#  doc.root.add_element 'c'    #-> '<a><b/>Elliott<c/></a>'
 		#  doc.root.text = 'Russell'   #-> '<a><b/>Russell<c/></a>'
 		#  doc.root.text = nil         #-> '<a><b/><c/></a>'
-		def text=( text )
+    def text=( text )
      if text.kind_of? String
        text = Text.new( text, whitespace(), nil, raw() )
      elsif text and !text.kind_of? Text
        text = Text.new( text.to_s, whitespace(), nil, raw() )
      end
 			old_text = get_text
 			if text.nil?
 				old_text.remove unless old_text.nil?
@ -557,13 +556,9 @@ module REXML
 		#################################################
 		def attribute( name, namespace=nil )
-			prefix = ''
+			prefix = nil
-			if namespace
+      prefix = namespaces.index(namespace) if namespace
-				prefix = attributes.prefixes.each { |prefix|
+			attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
 					return "#{prefix}:" if namespace( prefix ) == namespace
 				} || ''
 			end
 			attributes.get_attribute( "#{prefix}#{name}" )
 		end
 		# Evaluates to +true+ if this element has any attributes set, false
@ -1172,16 +1167,16 @@ module REXML
 		end
 		def namespaces
-			namespaces = []
+			namespaces = {}
 			each_attribute do |attribute|
-				namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
+				namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
 			end
 			if @element.document and @element.document.doctype
 				expn = @element.expanded_name
 				expn = @element.document.doctype.name if expn.size == 0
 				@element.document.doctype.attributes_of(expn).each {
 					|attribute|
-					namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
+					namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
 				}
 			end
 			namespaces
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@ -24,21 +24,22 @@ module REXML
      old_verbosity = $VERBOSE
      begin
        $VERBOSE = false
-        return if defined? @encoding and enc == @encoding
+        enc = enc.nil? ? nil : enc.upcase
        return false if defined? @encoding and enc == @encoding
        if enc and enc != UTF_8
-          @encoding = enc.upcase
+          @encoding = enc
          raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
          @encoding.untaint 
          begin
            require 'rexml/encodings/ICONV.rb'
            Encoding.apply(self, "ICONV")
-          rescue LoadError, Exception => err
+          rescue LoadError, Exception
            raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
            @encoding.untaint 
            enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
            begin
              enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
              require enc_file
              Encoding.apply(self, @encoding)
-            rescue LoadError
+            rescue LoadError => err
-              puts $!.message
+              puts err.message
              raise ArgumentError, "No decoder found for encoding #@encoding.  Please install iconv."
            end
          end
@ -50,6 +51,7 @@ module REXML
      ensure
        $VERBOSE = old_verbosity
      end
      true
    end
    def check_encoding str
--- a/lib/rexml/encodings/UNILE.rb
+++ b/lib/rexml/encodings/UNILE.rb
@ -18,7 +18,7 @@ module REXML
    def decode_unile(str)
      array_enc=str.unpack('C*')
      array_utf8 = []
-      2.step(array_enc.size-1, 2){|i| 
+      0.step(array_enc.size-1, 2){|i| 
        array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
      }
      array_utf8.pack('U*')
--- a/lib/rexml/encodings/UTF-16.rb
+++ b/lib/rexml/encodings/UTF-16.rb
@ -16,9 +16,10 @@ module REXML
    end
    def decode_utf16(str)
      str = str[2..-1] if /^\376\377/ =~ str
      array_enc=str.unpack('C*')
      array_utf8 = []
-      2.step(array_enc.size-1, 2){|i| 
+      0.step(array_enc.size-1, 2){|i| 
        array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
      }
      array_utf8.pack('U*')
--- a/lib/rexml/functions.rb
+++ b/lib/rexml/functions.rb
@ -117,16 +117,30 @@ module REXML
      elsif defined? object.node_type
        if object.node_type == :attribute
          object.value
-        elsif object.node_type == :element
+        elsif object.node_type == :element || object.node_type == :document
-          object.text
+          string_value(object)
        else
          object.to_s
        end
      elsif object.nil?
        return ""
      else
        object.to_s
      end
    end
    def Functions::string_value( o )
      rv = ""
      o.children.each { |e|
        if e.node_type == :text
          rv << e.to_s
        elsif e.node_type == :element
          rv << string_value( e )
        end
      }
      rv
    end
    # UNTESTED
    def Functions::concat( *objects )
      objects.join
@ -139,7 +153,7 @@ module REXML
    # Fixed by Mike Stok
    def Functions::contains( string, test )
-      string(string).include? string(test)
+      string(string).include?(string(test))
    end
    # Kouhei fixed this 
@ -326,7 +340,9 @@ module REXML
      else
        str = string( object )
        #puts "STRING OF #{object.inspect} = #{str}"
-        if str =~ /^-?\.?\d/
+        # If XPath ever gets scientific notation...
        #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
        if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
          str.to_f
        else
          (0.0 / 0.0)
--- a/lib/rexml/node.rb
+++ b/lib/rexml/node.rb
@ -55,10 +55,8 @@ module REXML
      return nil
    end
-    # Returns the index that +self+ has in its parent's elements array, so that
+    # Returns the position that +self+ holds in its parent's array, indexed
-    # the following equation holds true:
+    # from 1.
    #
    #   node == node.parent.elements[node.index_in_parent]
    def index_in_parent
      parent.index(self)+1
    end
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@ -146,8 +146,6 @@ module REXML
      # Returns true if there are no more events
      def empty?
        #STDERR.puts "@source.empty? = #{@source.empty?}"
        #STDERR.puts "@stack.empty? = #{@stack.empty?}"
        return (@source.empty? and @stack.empty?)
      end
@ -365,8 +363,6 @@ module REXML
          else
            md = @source.match( TEXT_PATTERN, true )
            if md[0].length == 0
              puts "EMPTY = #{empty?}"
              puts "BUFFER = \"#{@source.buffer}\""
              @source.match( /(\s+)/, true )
            end
            #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@ -16,6 +16,10 @@ module REXML
 				@tag_stack = []
        @entities = {}
 			end
      def source
        @parser.source
      end
      def add_listener( listener )
        @parser.add_listener( listener )
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@ -23,7 +23,8 @@ module REXML
            case event[0]
            when :end_document
              unless tag_stack.empty?
-                raise ParseException.new("No close tag for #{tag_stack.inspect}")
+                #raise ParseException.new("No close tag for #{tag_stack.inspect}")
                raise ParseException.new("No close tag for #{@build_context.xpath}")
              end
              return
            when :start_element
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@ -10,8 +10,8 @@
 #
 # Main page:: http://www.germane-software.com/software/rexml
 # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
-# Version:: 3.1.5
+# Version:: 3.1.6
-# Date:: 2006/250
+# Date:: 2006/335
 # 
 # This API documentation can be downloaded from the REXML home page, or can
 # be accessed online[http://www.germane-software.com/software/rexml_doc]
@ -21,8 +21,8 @@
 # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
 module REXML
 	COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
-  DATE = "2006/250"
+  DATE = "2006/335"
-  VERSION = "3.1.5"
+  VERSION = "3.1.6"
  Copyright = COPYRIGHT
  Version = VERSION
--- a/lib/rexml/sax2listener.rb
+++ b/lib/rexml/sax2listener.rb
@ -70,7 +70,7 @@ module REXML
 		#  ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
 		#  <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
 		#  ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
-		def entitydecl content
+		def entitydecl name, decl
 		end
 		# <!NOTATION ...>
 		def notationdecl content
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@ -6,7 +6,7 @@ module REXML
 		# Generates a Source object
 		# @param arg Either a String, or an IO
 		# @return a Source, or nil if a bad argument was given
-		def SourceFactory::create_from arg#, slurp=true
+		def SourceFactory::create_from(arg)
      if arg.kind_of? String
 			  Source.new(arg)
      elsif arg.respond_to? :read and
@ -35,16 +35,23 @@ module REXML
 		# Constructor
 		# @param arg must be a String, and should be a valid XML document
-		def initialize(arg)
+    # @param encoding if non-null, sets the encoding of the source to this
    # value, overriding all encoding detection
 		def initialize(arg, encoding=nil)
 			@orig = @buffer = arg
-			self.encoding = check_encoding( @buffer )
+      if encoding
        self.encoding = encoding
      else
        self.encoding = check_encoding( @buffer )
      end
 			@line = 0
 		end
 		# Inherited from Encoding
 		# Overridden to support optimized en/decoding
 		def encoding=(enc)
-			super
+			return unless super
 			@line_break = encode( '>' )
 			if enc != UTF_8
 				@buffer = decode(@buffer)
@ -124,7 +131,7 @@ module REXML
 		#attr_reader :block_size
    # block_size has been deprecated
-		def initialize(arg, block_size=500)
+		def initialize(arg, block_size=500, encoding=nil)
 			@er_source = @source = arg
 			@to_utf = false
      # Determining the encoding is a deceptively difficult issue to resolve.
@ -134,10 +141,12 @@ module REXML
      # if there is one.  If there isn't one, the file MUST be UTF-8, as per
      # the XML spec.  If there is one, we can determine the encoding from
      # it.
      @buffer = ""
      str = @source.read( 2 )
-      if /\A(?:\xfe\xff|\xff\xfe)/n =~ str
+      if encoding
        self.encoding = encoding
      elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
        self.encoding = check_encoding( str )
        @line_break = encode( '>' )
      else
        @line_break = '>'
      end
@ -159,6 +168,8 @@ module REXML
 						str = @source.readline(@line_break)
 						str = decode(str) if @to_utf and str
 						@buffer << str
          rescue Iconv::IllegalSequence
            raise
 					rescue
 						@source = nil
 					end
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@ -42,6 +42,7 @@ module REXML
    # Use this field if you have entities defined for some text, and you don't
    # want REXML to escape that text in output.
    #   Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
    #   Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
    #   Text.new( "<&", false, nil, true )  #-> Parse exception
    #   Text.new( "&lt;&amp;", false, nil, true )  #-> "&lt;&amp;"
    #   # Assume that the entity "s" is defined to be "sean"
@ -172,17 +173,6 @@ module REXML
      end
      @unnormalized = Text::unnormalize( @string, doctype )
    end
     def wrap(string, width, addnewline=false)
       # Recursivly wrap string at width.
       return string if string.length <= width
       place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
       if addnewline then
         return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
       else
         return string[0,place] + "\n" + wrap(string[place+1..-1], width)
       end
     end
    # Sets the contents of this text node.  This expects the text to be 
    # unnormalized.  It returns self.
@ -198,17 +188,28 @@ module REXML
      @raw = false
    end
-     def indent_text(string, level=1, style="\t", indentfirstline=true)
+     def wrap(string, width, addnewline=false)
-      return string if level < 0
+       # Recursivly wrap string at width.
-       new_string = ''
+       return string if string.length <= width
-       string.each { |line|
+       place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
-         indent_string = style * level
+       if addnewline then
-         new_line = (indent_string + line).sub(/[\s]+$/,'')
+         return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
-         new_string << new_line
+       else
-       }
+         return string[0,place] + "\n" + wrap(string[place+1..-1], width)
-       new_string.strip! unless indentfirstline
+       end
       return new_string
     end
    def indent_text(string, level=1, style="\t", indentfirstline=true)
      return string if level < 0
      new_string = ''
      string.each { |line|
        indent_string = style * level
        new_line = (indent_string + line).sub(/[\s]+$/,'')
        new_string << new_line
      }
      new_string.strip! unless indentfirstline
      return new_string
    end
    def write( writer, indent=-1, transitive=false, ie_hack=false ) 
      s = to_s()
@ -286,9 +287,10 @@ module REXML
    def Text::normalize( input, doctype=nil, entity_filter=nil )
      copy = input
      # Doing it like this rather than in a loop improves the speed
      #copy = copy.gsub( EREFERENCE, '&amp;' )
      copy = copy.gsub( "&", "&amp;" )
      if doctype
        # Replace all ampersands that aren't part of an entity
        copy = copy.gsub( EREFERENCE, '&amp;' )
        doctype.entities.each_value do |entity|
          copy = copy.gsub( entity.value, 
            "&#{entity.name};" ) if entity.value and 
@ -296,7 +298,6 @@ module REXML
        end
      else
        # Replace all ampersands that aren't part of an entity
        copy = copy.gsub( EREFERENCE, '&amp;' )
        DocType::DEFAULT_ENTITIES.each_value do |entity|
          copy = copy.gsub(entity.value, "&#{entity.name};" )
        end
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@ -162,6 +162,10 @@ module REXML
      while path_stack.length > 0
        #puts "Path stack = #{path_stack.inspect}"
        #puts "Nodeset is #{nodeset.inspect}"
        if nodeset.length == 0
          path_stack.clear
          return []
        end
        case (op = path_stack.shift)
        when :document
          nodeset = [ nodeset[0].root_node ]
@ -235,9 +239,11 @@ module REXML
            name = path_stack.shift
            for element in nodeset
              if element.node_type == :element
-                #puts element.name
+                #puts "Element name = #{element.name}"
-                attr = element.attribute( name, get_namespace(element, prefix) )
+                #puts "get_namespace( #{element.inspect}, #{prefix} ) = #{get_namespace(element, prefix)}"
-                new_nodeset << attr if attr
+                attrib = element.attribute( name, get_namespace(element, prefix) )
                #puts "attrib = #{attrib.inspect}"
                new_nodeset << attrib if attrib
              end
            end
          when :any
@ -299,8 +305,10 @@ module REXML
              #puts "Adding node #{node.inspect}" if result == (index+1)
              new_nodeset << node if result == (index+1)
            elsif result.instance_of? Array
-              #puts "Adding node #{node.inspect}" if result.size > 0
+              if result.size > 0 and result.inject(false) {|k,s| s or k}
-              new_nodeset << node if result.size > 0
+                #puts "Adding node #{node.inspect}" if result.size > 0
                new_nodeset << node if result.size > 0
              end
            else
              #puts "Adding node #{node.inspect}" if result
              new_nodeset << node if result
@ -381,9 +389,19 @@ module REXML
          node_types = ELEMENTS
        when :namespace
-          new_set = []
+          new_nodeset = []
          prefix = path_stack.shift
          for node in nodeset
-            new_nodeset << node.namespace if node.node_type == :element or node.node_type == :attribute
+            if (node.node_type == :element or node.node_type == :attribute)
              if (node.node_type == :element)
                namespaces = node.namespaces
              else
                namespaces = node.element.namesapces
              end
              if (node.namespace == namespaces[prefix])
                new_nodeset << node
              end
            end
          end
          nodeset = new_nodeset
@ -404,6 +422,18 @@ module REXML
          #puts "RES => #{res.inspect}"
          return res
        when :and
          left = expr( path_stack.shift, nodeset.dup, context )
          #puts "LEFT => #{left.inspect} (#{left.class.name})"
          if left == false || left.nil? || !left.inject(false) {|a,b| a | b}
            return []
          end
          right = expr( path_stack.shift, nodeset.dup, context )
          #puts "RIGHT => #{right.inspect} (#{right.class.name})"
          res = equality_relational_compare( left, op, right )
          #puts "RES => #{res.inspect}"
          return res
        when :div
          left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
          right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
@ -477,7 +507,7 @@ module REXML
    # The next two methods are BAD MOJO!
    # This is my achilles heel.  If anybody thinks of a better
    # way of doing this, be my guest.  This really sucks, but 
-    # it took me three days to get it to work at all.
+    # it is a wonder it works at all.
    # ########################################################
    def descendant_or_self( path_stack, nodeset )