diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index 435076420a..11e2039609 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -94,7 +94,7 @@ module REXML # new_a = d.root.clone # puts new_a # => "" def clone - Element.new self + self.class.new self end # Evaluates to the root node of the document that this element @@ -200,9 +200,9 @@ module REXML end def namespaces - namespaces = [] + namespaces = {} namespaces = parent.namespaces if parent - namespaces |= attributes.namespaces + namespaces = namespaces.merge( attributes.namespaces ) return namespaces end @@ -494,13 +494,12 @@ module REXML # doc.root.add_element 'c' #-> 'Elliott' # doc.root.text = 'Russell' #-> 'Russell' # doc.root.text = nil #-> '' - def text=( text ) + def text=( text ) if text.kind_of? String text = Text.new( text, whitespace(), nil, raw() ) elsif text and !text.kind_of? Text text = Text.new( text.to_s, whitespace(), nil, raw() ) end - old_text = get_text if text.nil? old_text.remove unless old_text.nil? @@ -557,13 +556,9 @@ module REXML ################################################# def attribute( name, namespace=nil ) - prefix = '' - if namespace - prefix = attributes.prefixes.each { |prefix| - return "#{prefix}:" if namespace( prefix ) == namespace - } || '' - end - attributes.get_attribute( "#{prefix}#{name}" ) + prefix = nil + prefix = namespaces.index(namespace) if namespace + attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" ) end # Evaluates to +true+ if this element has any attributes set, false @@ -1172,16 +1167,16 @@ module REXML end def namespaces - namespaces = [] + namespaces = {} each_attribute do |attribute| - namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' + namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' end if @element.document and @element.document.doctype expn = @element.expanded_name expn = @element.document.doctype.name if expn.size == 0 @element.document.doctype.attributes_of(expn).each { |attribute| - namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' + namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' } end namespaces diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index f003d6cc3b..e35c3acf7c 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -24,21 +24,22 @@ module REXML old_verbosity = $VERBOSE begin $VERBOSE = false - return if defined? @encoding and enc == @encoding + enc = enc.nil? ? nil : enc.upcase + return false if defined? @encoding and enc == @encoding if enc and enc != UTF_8 - @encoding = enc.upcase + @encoding = enc + raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ + @encoding.untaint begin require 'rexml/encodings/ICONV.rb' Encoding.apply(self, "ICONV") - rescue LoadError, Exception => err - raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ - @encoding.untaint - enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) + rescue LoadError, Exception begin + enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) require enc_file Encoding.apply(self, @encoding) - rescue LoadError - puts $!.message + rescue LoadError => err + puts err.message raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv." end end @@ -50,6 +51,7 @@ module REXML ensure $VERBOSE = old_verbosity end + true end def check_encoding str diff --git a/lib/rexml/encodings/UNILE.rb b/lib/rexml/encodings/UNILE.rb index 0560a08361..d054140c40 100644 --- a/lib/rexml/encodings/UNILE.rb +++ b/lib/rexml/encodings/UNILE.rb @@ -18,7 +18,7 @@ module REXML def decode_unile(str) array_enc=str.unpack('C*') array_utf8 = [] - 2.step(array_enc.size-1, 2){|i| + 0.step(array_enc.size-1, 2){|i| array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100) } array_utf8.pack('U*') diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb index 972169755e..792adfd44d 100644 --- a/lib/rexml/encodings/UTF-16.rb +++ b/lib/rexml/encodings/UTF-16.rb @@ -16,9 +16,10 @@ module REXML end def decode_utf16(str) + str = str[2..-1] if /^\376\377/ =~ str array_enc=str.unpack('C*') array_utf8 = [] - 2.step(array_enc.size-1, 2){|i| + 0.step(array_enc.size-1, 2){|i| array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100) } array_utf8.pack('U*') diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb index d741dbdab7..cad4f6a8c9 100644 --- a/lib/rexml/functions.rb +++ b/lib/rexml/functions.rb @@ -117,16 +117,30 @@ module REXML elsif defined? object.node_type if object.node_type == :attribute object.value - elsif object.node_type == :element - object.text + elsif object.node_type == :element || object.node_type == :document + string_value(object) else object.to_s end + elsif object.nil? + return "" else object.to_s end end + def Functions::string_value( o ) + rv = "" + o.children.each { |e| + if e.node_type == :text + rv << e.to_s + elsif e.node_type == :element + rv << string_value( e ) + end + } + rv + end + # UNTESTED def Functions::concat( *objects ) objects.join @@ -139,7 +153,7 @@ module REXML # Fixed by Mike Stok def Functions::contains( string, test ) - string(string).include? string(test) + string(string).include?(string(test)) end # Kouhei fixed this @@ -326,7 +340,9 @@ module REXML else str = string( object ) #puts "STRING OF #{object.inspect} = #{str}" - if str =~ /^-?\.?\d/ + # If XPath ever gets scientific notation... + #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/ + if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/ str.to_f else (0.0 / 0.0) diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb index e5dec72a9d..7226e5be6c 100644 --- a/lib/rexml/node.rb +++ b/lib/rexml/node.rb @@ -55,10 +55,8 @@ module REXML return nil end - # Returns the index that +self+ has in its parent's elements array, so that - # the following equation holds true: - # - # node == node.parent.elements[node.index_in_parent] + # Returns the position that +self+ holds in its parent's array, indexed + # from 1. def index_in_parent parent.index(self)+1 end diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index c57ea58dc7..fecd801d6f 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -146,8 +146,6 @@ module REXML # Returns true if there are no more events def empty? - #STDERR.puts "@source.empty? = #{@source.empty?}" - #STDERR.puts "@stack.empty? = #{@stack.empty?}" return (@source.empty? and @stack.empty?) end @@ -365,8 +363,6 @@ module REXML else md = @source.match( TEXT_PATTERN, true ) if md[0].length == 0 - puts "EMPTY = #{empty?}" - puts "BUFFER = \"#{@source.buffer}\"" @source.match( /(\s+)/, true ) end #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0 diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 61a216cec1..6c7fbe000a 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -16,6 +16,10 @@ module REXML @tag_stack = [] @entities = {} end + + def source + @parser.source + end def add_listener( listener ) @parser.add_listener( listener ) diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index 500a53f426..a53fa41925 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -23,7 +23,8 @@ module REXML case event[0] when :end_document unless tag_stack.empty? - raise ParseException.new("No close tag for #{tag_stack.inspect}") + #raise ParseException.new("No close tag for #{tag_stack.inspect}") + raise ParseException.new("No close tag for #{@build_context.xpath}") end return when :start_element diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 68759ab3f8..bff1cd9815 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -10,8 +10,8 @@ # # Main page:: http://www.germane-software.com/software/rexml # Author:: Sean Russell -# Version:: 3.1.5 -# Date:: 2006/250 +# Version:: 3.1.6 +# Date:: 2006/335 # # This API documentation can be downloaded from the REXML home page, or can # be accessed online[http://www.germane-software.com/software/rexml_doc] @@ -21,8 +21,8 @@ # online[http://www.germane-software.com/software/rexml/docs/tutorial.html] module REXML COPYRIGHT = "Copyright © 2001-2006 Sean Russell " - DATE = "2006/250" - VERSION = "3.1.5" + DATE = "2006/335" + VERSION = "3.1.6" Copyright = COPYRIGHT Version = VERSION diff --git a/lib/rexml/sax2listener.rb b/lib/rexml/sax2listener.rb index 9a992917e6..8db1389d06 100644 --- a/lib/rexml/sax2listener.rb +++ b/lib/rexml/sax2listener.rb @@ -70,7 +70,7 @@ module REXML # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""] # # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"] - def entitydecl content + def entitydecl name, decl end # def notationdecl content diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index c51f504811..2fee99c0e9 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -6,7 +6,7 @@ module REXML # Generates a Source object # @param arg Either a String, or an IO # @return a Source, or nil if a bad argument was given - def SourceFactory::create_from arg#, slurp=true + def SourceFactory::create_from(arg) if arg.kind_of? String Source.new(arg) elsif arg.respond_to? :read and @@ -35,16 +35,23 @@ module REXML # Constructor # @param arg must be a String, and should be a valid XML document - def initialize(arg) + # @param encoding if non-null, sets the encoding of the source to this + # value, overriding all encoding detection + def initialize(arg, encoding=nil) @orig = @buffer = arg - self.encoding = check_encoding( @buffer ) + if encoding + self.encoding = encoding + else + self.encoding = check_encoding( @buffer ) + end @line = 0 end + # Inherited from Encoding # Overridden to support optimized en/decoding def encoding=(enc) - super + return unless super @line_break = encode( '>' ) if enc != UTF_8 @buffer = decode(@buffer) @@ -124,7 +131,7 @@ module REXML #attr_reader :block_size # block_size has been deprecated - def initialize(arg, block_size=500) + def initialize(arg, block_size=500, encoding=nil) @er_source = @source = arg @to_utf = false # Determining the encoding is a deceptively difficult issue to resolve. @@ -134,10 +141,12 @@ module REXML # if there is one. If there isn't one, the file MUST be UTF-8, as per # the XML spec. If there is one, we can determine the encoding from # it. + @buffer = "" str = @source.read( 2 ) - if /\A(?:\xfe\xff|\xff\xfe)/n =~ str + if encoding + self.encoding = encoding + elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str self.encoding = check_encoding( str ) - @line_break = encode( '>' ) else @line_break = '>' end @@ -159,6 +168,8 @@ module REXML str = @source.readline(@line_break) str = decode(str) if @to_utf and str @buffer << str + rescue Iconv::IllegalSequence + raise rescue @source = nil end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 55bc9f50f8..3de9170623 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -42,6 +42,7 @@ module REXML # Use this field if you have entities defined for some text, and you don't # want REXML to escape that text in output. # Text.new( "<&", false, nil, false ) #-> "<&" + # Text.new( "<&", false, nil, false ) #-> "&lt;&amp;" # Text.new( "<&", false, nil, true ) #-> Parse exception # Text.new( "<&", false, nil, true ) #-> "<&" # # Assume that the entity "s" is defined to be "sean" @@ -172,17 +173,6 @@ module REXML end @unnormalized = Text::unnormalize( @string, doctype ) end - - def wrap(string, width, addnewline=false) - # Recursivly wrap string at width. - return string if string.length <= width - place = string.rindex(' ', width) # Position in string with last ' ' before cutoff - if addnewline then - return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) - else - return string[0,place] + "\n" + wrap(string[place+1..-1], width) - end - end # Sets the contents of this text node. This expects the text to be # unnormalized. It returns self. @@ -198,17 +188,28 @@ module REXML @raw = false end - def indent_text(string, level=1, style="\t", indentfirstline=true) - return string if level < 0 - new_string = '' - string.each { |line| - indent_string = style * level - new_line = (indent_string + line).sub(/[\s]+$/,'') - new_string << new_line - } - new_string.strip! unless indentfirstline - return new_string + def wrap(string, width, addnewline=false) + # Recursivly wrap string at width. + return string if string.length <= width + place = string.rindex(' ', width) # Position in string with last ' ' before cutoff + if addnewline then + return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) + else + return string[0,place] + "\n" + wrap(string[place+1..-1], width) + end end + + def indent_text(string, level=1, style="\t", indentfirstline=true) + return string if level < 0 + new_string = '' + string.each { |line| + indent_string = style * level + new_line = (indent_string + line).sub(/[\s]+$/,'') + new_string << new_line + } + new_string.strip! unless indentfirstline + return new_string + end def write( writer, indent=-1, transitive=false, ie_hack=false ) s = to_s() @@ -286,9 +287,10 @@ module REXML def Text::normalize( input, doctype=nil, entity_filter=nil ) copy = input # Doing it like this rather than in a loop improves the speed + #copy = copy.gsub( EREFERENCE, '&' ) + copy = copy.gsub( "&", "&" ) if doctype # Replace all ampersands that aren't part of an entity - copy = copy.gsub( EREFERENCE, '&' ) doctype.entities.each_value do |entity| copy = copy.gsub( entity.value, "&#{entity.name};" ) if entity.value and @@ -296,7 +298,6 @@ module REXML end else # Replace all ampersands that aren't part of an entity - copy = copy.gsub( EREFERENCE, '&' ) DocType::DEFAULT_ENTITIES.each_value do |entity| copy = copy.gsub(entity.value, "&#{entity.name};" ) end diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index a813236e10..3393113d6a 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -162,6 +162,10 @@ module REXML while path_stack.length > 0 #puts "Path stack = #{path_stack.inspect}" #puts "Nodeset is #{nodeset.inspect}" + if nodeset.length == 0 + path_stack.clear + return [] + end case (op = path_stack.shift) when :document nodeset = [ nodeset[0].root_node ] @@ -235,9 +239,11 @@ module REXML name = path_stack.shift for element in nodeset if element.node_type == :element - #puts element.name - attr = element.attribute( name, get_namespace(element, prefix) ) - new_nodeset << attr if attr + #puts "Element name = #{element.name}" + #puts "get_namespace( #{element.inspect}, #{prefix} ) = #{get_namespace(element, prefix)}" + attrib = element.attribute( name, get_namespace(element, prefix) ) + #puts "attrib = #{attrib.inspect}" + new_nodeset << attrib if attrib end end when :any @@ -299,8 +305,10 @@ module REXML #puts "Adding node #{node.inspect}" if result == (index+1) new_nodeset << node if result == (index+1) elsif result.instance_of? Array - #puts "Adding node #{node.inspect}" if result.size > 0 - new_nodeset << node if result.size > 0 + if result.size > 0 and result.inject(false) {|k,s| s or k} + #puts "Adding node #{node.inspect}" if result.size > 0 + new_nodeset << node if result.size > 0 + end else #puts "Adding node #{node.inspect}" if result new_nodeset << node if result @@ -381,9 +389,19 @@ module REXML node_types = ELEMENTS when :namespace - new_set = [] + new_nodeset = [] + prefix = path_stack.shift for node in nodeset - new_nodeset << node.namespace if node.node_type == :element or node.node_type == :attribute + if (node.node_type == :element or node.node_type == :attribute) + if (node.node_type == :element) + namespaces = node.namespaces + else + namespaces = node.element.namesapces + end + if (node.namespace == namespaces[prefix]) + new_nodeset << node + end + end end nodeset = new_nodeset @@ -404,6 +422,18 @@ module REXML #puts "RES => #{res.inspect}" return res + when :and + left = expr( path_stack.shift, nodeset.dup, context ) + #puts "LEFT => #{left.inspect} (#{left.class.name})" + if left == false || left.nil? || !left.inject(false) {|a,b| a | b} + return [] + end + right = expr( path_stack.shift, nodeset.dup, context ) + #puts "RIGHT => #{right.inspect} (#{right.class.name})" + res = equality_relational_compare( left, op, right ) + #puts "RES => #{res.inspect}" + return res + when :div left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f @@ -477,7 +507,7 @@ module REXML # The next two methods are BAD MOJO! # This is my achilles heel. If anybody thinks of a better # way of doing this, be my guest. This really sucks, but - # it took me three days to get it to work at all. + # it is a wonder it works at all. # ######################################################## def descendant_or_self( path_stack, nodeset )