If Integer, the Element indexed by that number will be
# removed.
# Returns:: the element that was removed.
# doc.delete_element "/a/b/c[@id='4']"
# doc.delete_element doc.elements["//k"]
# doc.delete_element 1
def delete_element element
@elements.delete element
end
# Evaluates to +true+ if this element has at least one child Element
# doc = Document.new "Text"
# doc.root.has_elements # -> true
# doc.elements["/a/b"].has_elements # -> false
# doc.elements["/a/c"].has_elements # -> false
def has_elements?
!@elements.empty?
end
# Iterates through the child elements, yielding for each Element that
# has a particular attribute set.
# key::
# the name of the attribute to search for
# value::
# the value of the attribute
# max::
# (optional) causes this method to return after yielding
# for this number of matching children
# name::
# (optional) if supplied, this is an XPath that filters
# the children to check.
#
# doc = Document.new ""
# # Yields b, c, d
# doc.root.each_element_with_attribute( 'id' ) {|e| p e}
# # Yields b, d
# doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
# # Yields b
# doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
# # Yields d
# doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
each_with_something( proc {|child|
if value.nil?
child.attributes[key] != nil
else
child.attributes[key]==value
end
}, max, name, &block )
end
# Iterates through the children, yielding for each Element that
# has a particular text set.
# text::
# the text to search for. If nil, or not supplied, will itterate
# over all +Element+ children that contain at least one +Text+ node.
# max::
# (optional) causes this method to return after yielding
# for this number of matching children
# name::
# (optional) if supplied, this is an XPath that filters
# the children to check.
#
# doc = Document.new 'bbd'
# # Yields b, c, d
# doc.each_element_with_text {|e|p e}
# # Yields b, c
# doc.each_element_with_text('b'){|e|p e}
# # Yields b
# doc.each_element_with_text('b', 1){|e|p e}
# # Yields d
# doc.each_element_with_text(nil, 0, 'd'){|e|p e}
def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
each_with_something( proc {|child|
if text.nil?
child.has_text?
else
child.text == text
end
}, max, name, &block )
end
# Synonym for Element.elements.each
def each_element( xpath=nil, &block ) # :yields: Element
@elements.each( xpath, &block )
end
# Synonym for Element.to_a
# This is a little slower than calling elements.each directly.
# xpath:: any XPath by which to search for elements in the tree
# Returns:: an array of Elements that match the supplied path
def get_elements( xpath )
@elements.to_a( xpath )
end
# Returns the next sibling that is an element, or nil if there is
# no Element sibling after this one
# doc = Document.new 'text'
# doc.root.elements['b'].next_element #->
# doc.root.elements['c'].next_element #-> nil
def next_element
element = next_sibling
element = element.next_sibling until element.nil? or element.kind_of? Element
return element
end
# Returns the previous sibling that is an element, or nil if there is
# no Element sibling prior to this one
# doc = Document.new 'text'
# doc.root.elements['c'].previous_element #->
# doc.root.elements['b'].previous_element #-> nil
def previous_element
element = previous_sibling
element = element.previous_sibling until element.nil? or element.kind_of? Element
return element
end
#################################################
# Text #
#################################################
# Evaluates to +true+ if this element has at least one Text child
def has_text?
not text().nil?
end
# A convenience method which returns the String value of the _first_
# child text element, if one exists, and +nil+ otherwise.
#
# Note that an element may have multiple Text elements, perhaps
# separated by other children. Be aware that this method only returns
# the first Text node.
#
# This method returns the +value+ of the first text child node, which
# ignores the +raw+ setting, so always returns normalized text. See
# the Text::value documentation.
#
# doc = Document.new "some text this is bold! more text
"
# # The element 'p' has two text elements, "some text " and " more text".
# doc.root.text #-> "some text "
def text( path = nil )
rv = get_text(path)
return rv.value unless rv.nil?
nil
end
# Returns the first child Text node, if any, or +nil+ otherwise.
# This method returns the actual +Text+ node, rather than the String content.
# doc = Document.new "some text this is bold! more text
"
# # The element 'p' has two text elements, "some text " and " more text".
# doc.root.get_text.value #-> "some text "
def get_text path = nil
rv = nil
if path
element = @elements[ path ]
rv = element.get_text unless element.nil?
else
rv = @children.find { |node| node.kind_of? Text }
end
return rv
end
# Sets the first Text child of this object. See text() for a
# discussion about Text children.
#
# If a Text child already exists, the child is replaced by this
# content. This means that Text content can be deleted by calling
# this method with a nil argument. In this case, the next Text
# child becomes the first Text child. In no case is the order of
# any siblings disturbed.
# text::
# If a String, a new Text child is created and added to
# this Element as the first Text child. If Text, the text is set
# as the first Child element. If nil, then any existing first Text
# child is removed.
# Returns:: this Element.
# doc = Document.new ''
# doc.root.text = 'Sean' #-> 'Sean'
# doc.root.text = 'Elliott' #-> 'Elliott'
# doc.root.add_element 'c' #-> 'Elliott'
# doc.root.text = 'Russell' #-> 'Russell'
# doc.root.text = nil #-> ''
def text=( text )
if text.kind_of? String
text = Text.new( text, whitespace(), nil, raw() )
elsif text and !text.kind_of? Text
text = Text.new( text.to_s, whitespace(), nil, raw() )
end
old_text = get_text
if text.nil?
old_text.remove unless old_text.nil?
else
if old_text.nil?
self << text
else
old_text.replace_with( text )
end
end
return self
end
# A helper method to add a Text child. Actual Text instances can
# be added with regular Parent methods, such as add() and <<()
# text::
# if a String, a new Text instance is created and added
# to the parent. If Text, the object is added directly.
# Returns:: this Element
# e = Element.new('a') #->
# e.add_text 'foo' #-> foo
# e.add_text Text.new(' bar') #-> foo bar
# Note that at the end of this example, the branch has 3 nodes; the 'e'
# element and 2 Text node children.
def add_text( text )
if text.kind_of? String
if @children[-1].kind_of? Text
@children[-1] << text
return
end
text = Text.new( text, whitespace(), nil, raw() )
end
self << text unless text.nil?
return self
end
def node_type
:element
end
def xpath
path_elements = []
cur = self
path_elements << __to_xpath_helper( self )
while cur.parent
cur = cur.parent
path_elements << __to_xpath_helper( cur )
end
return path_elements.reverse.join( "/" )
end
#################################################
# Attributes #
#################################################
def attribute( name, namespace=nil )
prefix = ''
if namespace
prefix = attributes.prefixes.each { |prefix|
return "#{prefix}:" if namespace( prefix ) == namespace
} || ''
end
attributes.get_attribute( "#{prefix}#{name}" )
end
# Evaluates to +true+ if this element has any attributes set, false
# otherwise.
def has_attributes?
return !@attributes.empty?
end
# Adds an attribute to this element, overwriting any existing attribute
# by the same name.
# key::
# can be either an Attribute or a String. If an Attribute,
# the attribute is added to the list of Element attributes. If String,
# the argument is used as the name of the new attribute, and the value
# parameter must be supplied.
# value::
# Required if +key+ is a String, and ignored if the first argument is
# an Attribute. This is a String, and is used as the value
# of the new Attribute.
# Returns:: the Attribute added
# e = Element.new 'e'
# e.add_attribute( 'a', 'b' ) #->
# e.add_attribute( 'x:a', 'c' ) #->
# e.add_attribute Attribute.new('b', 'd') #->
def add_attribute( key, value=nil )
if key.kind_of? Attribute
@attributes << key
else
@attributes[key] = value
end
end
# Add multiple attributes to this element.
# hash:: is either a hash, or array of arrays
# el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
# el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
def add_attributes hash
if hash.kind_of? Hash
hash.each_pair {|key, value| @attributes[key] = value }
elsif hash.kind_of? Array
hash.each { |value| @attributes[ value[0] ] = value[1] }
end
end
# Removes an attribute
# key::
# either an Attribute or a String. In either case, the
# attribute is found by matching the attribute name to the argument,
# and then removed. If no attribute is found, no action is taken.
# Returns::
# the attribute removed, or nil if this Element did not contain
# a matching attribute
# e = Element.new('E')
# e.add_attribute( 'name', 'Sean' ) #->
# r = e.add_attribute( 'sur:name', 'Russell' ) #->
# e.delete_attribute( 'name' ) #->
# e.delete_attribute( r ) #->
def delete_attribute(key)
attr = @attributes.get_attribute(key)
attr.remove unless attr.nil?
end
#################################################
# Other Utilities #
#################################################
# Get an array of all CData children.
# IMMUTABLE
def cdatas
find_all { |child| child.kind_of? CData }.freeze
end
# Get an array of all Comment children.
# IMMUTABLE
def comments
find_all { |child| child.kind_of? Comment }.freeze
end
# Get an array of all Instruction children.
# IMMUTABLE
def instructions
find_all { |child| child.kind_of? Instruction }.freeze
end
# Get an array of all Text children.
# IMMUTABLE
def texts
find_all { |child| child.kind_of? Text }.freeze
end
# Writes out this element, and recursively, all children.
# output::
# output an object which supports '<< string'; this is where the
# document will be written.
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
# indentation will be this number of spaces, and children will be
# indented an additional amount. Defaults to -1
# transitive::
# If transitive is true and indent is >= 0, then the output will be
# pretty-printed in such a way that the added whitespace does not affect
# the parse tree of the document
# ie_hack::
# Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false
#
# out = ''
# doc.write( out ) #-> doc is written to the string 'out'
# doc.write( $stdout ) #-> doc written to the console
def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
#print "ID:#{indent}"
writer << "<#@expanded_name"
@attributes.each_attribute do |attr|
writer << " "
attr.write( writer, indent )
end unless @attributes.empty?
if @children.empty?
if transitive and indent>-1
writer << "\n"
indent( writer, indent )
elsif ie_hack
writer << " "
end
writer << "/"
else
if transitive and indent>-1 and !@children[0].kind_of? Text
writer << "\n"
indent writer, indent+1
end
writer << ">"
write_children( writer, indent, transitive, ie_hack )
writer << "#{expanded_name}"
end
if transitive and indent>-1 and !@children.empty?
writer << "\n"
indent -= 1 if next_sibling.nil?
indent(writer, indent)
end
writer << ">"
end
private
def __to_xpath_helper node
rv = node.expanded_name
if node.parent
results = node.parent.find_all {|n|
n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
}
if results.length > 1
idx = results.index( node )
rv << "[#{idx+1}]"
end
end
rv
end
# A private helper method
def each_with_something( test, max=0, name=nil )
num = 0
child=nil
@elements.each( name ){ |child|
yield child if test.call(child) and num += 1
return if max>0 and num == max
}
end
# A private helper method
def write_children( writer, indent, transitive, ie_hack )
cr = (indent < 0) ? '' : "\n"
if indent == -1
each { |child| child.write( writer, indent, transitive, ie_hack ) }
else
next_indent = indent+1
last_child=nil
each { |child|
unless child.kind_of? Text or last_child.kind_of? Text or transitive
writer << cr
indent(writer, next_indent)
end
child.write( writer, next_indent, transitive, ie_hack )
last_child = child
}
unless last_child.kind_of? Text or transitive
writer << cr
indent( writer, indent )
end
end
end
end
########################################################################
# ELEMENTS #
########################################################################
# A class which provides filtering of children for Elements, and
# XPath search support. You are expected to only encounter this class as
# the element.elements object. Therefore, you are
# _not_ expected to instantiate this yourself.
class Elements
include Enumerable
# Constructor
# parent:: the parent Element
def initialize parent
@element = parent
end
# Fetches a child element. Filters only Element children, regardless of
# the XPath match.
# index::
# the search parameter. This is either an Integer, which
# will be used to find the index'th child Element, or an XPath,
# which will be used to search for the Element. Because
# of the nature of XPath searches, any element in the connected XML
# document can be fetched through any other element. The
# Integer index is 1-based, not 0-based. This means that the first
# child element is at index 1, not 0, and the +n+th element is at index
# +n+, not n-1. This is because XPath indexes element children
# starting from 1, not 0, and the indexes should be the same.
# name::
# optional, and only used in the first argument is an
# Integer. In that case, the index'th child Element that has the
# supplied name will be returned. Note again that the indexes start at 1.
# Returns:: the first matching Element, or nil if no child matched
# doc = Document.new ''
# doc.root.elements[1] #->
# doc.root.elements['c'] #->
# doc.root.elements[2,'c'] #->
def []( index, name=nil)
if index.kind_of? Integer
raise "index (#{index}) must be >= 1" if index < 1
name = literalize(name) if name
num = 0
child = nil
@element.find { |child|
child.kind_of? Element and
(name.nil? ? true : child.has_name?( name )) and
(num += 1) == index
}
else
return XPath::first( @element, index )
#{ |element|
# return element if element.kind_of? Element
#}
#return nil
end
end
# Sets an element, replacing any previous matching element. If no
# existing element is found ,the element is added.
# index:: Used to find a matching element to replace. See []().
# element::
# The element to replace the existing element with
# the previous element
# Returns:: nil if no previous element was found.
#
# doc = Document.new ''
# doc.root.elements[10] = Element.new('b') #->
# doc.root.elements[1] #->
# doc.root.elements[1] = Element.new('c') #->
# doc.root.elements['c'] = Element.new('d') #->
def []=( index, element )
previous = self[index]
if previous.nil?
@element.add element
else
previous.replace_with element
end
return previous
end
# Returns +true+ if there are no +Element+ children, +false+ otherwise
def empty?
@element.find{ |child| child.kind_of? Element}.nil?
end
# Returns the index of the supplied child (starting at 1), or -1 if
# the element is not a child
# element:: an +Element+ child
def index element
rv = 0
found = @element.find do |child|
child.kind_of? Element and
(rv += 1) and
child == element
end
return rv if found == element
return -1
end
# Deletes a child Element
# element::
# Either an Element, which is removed directly; an
# xpath, where the first matching child is removed; or an Integer,
# where the n'th Element is removed.
# Returns:: the removed child
# doc = Document.new ''
# b = doc.root.elements[1]
# doc.root.elements.delete b #->
# doc.elements.delete("a/c[@id='1']") #->
# doc.root.elements.delete 1 #->
def delete element
if element.kind_of? Element
@element.delete element
else
el = self[element]
el.remove if el
end
end
# Removes multiple elements. Filters for Element children, regardless of
# XPath matching.
# xpath:: all elements matching this String path are removed.
# Returns:: an Array of Elements that have been removed
# doc = Document.new ''
# deleted = doc.elements.delete_all 'a/c' #-> [, , , ]
def delete_all( xpath )
rv = []
XPath::each( @element, xpath) {|element|
rv << element if element.kind_of? Element
}
rv.each do |element|
@element.delete element
element.remove
end
return rv
end
# Adds an element
# element::
# if supplied, is either an Element, String, or
# Source (see Element.initialize). If not supplied or nil, a
# new, default Element will be constructed
# Returns:: the added Element
# a = Element.new 'a'
# a.elements.add Element.new 'b' #->
# a.elements.add 'c' #->
def add element=nil
rv = nil
if element.nil?
Element.new "", self, @element.context
elsif not element.kind_of?(Element)
Element.new element, self, @element.context
else
@element << element
element.context = @element.context
element
end
end
alias :<< :add
# Iterates through all of the child Elements, optionally filtering
# them by a given XPath
# xpath::
# optional. If supplied, this is a String XPath, and is used to
# filter the children, so that only matching children are yielded. Note
# that XPaths are automatically filtered for Elements, so that
# non-Element children will not be yielded
# doc = Document.new 'sean'
# doc.root.each {|e|p e} #-> Yields b, c, d, b, c, d elements
# doc.root.each('b') {|e|p e} #-> Yields b, b elements
# doc.root.each('child::node()') {|e|p e}
# #-> Yields , , , , ,
# XPath.each(doc.root, 'child::node()', &block)
# #-> Yields , , , sean, , ,
def each( xpath=nil, &block)
XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
end
# Returns the number of +Element+ children of the parent object.
# doc = Document.new 'seanelliottrussell'
# doc.root.size #-> 6, 3 element and 3 text nodes
# doc.root.elements.size #-> 3
def size
count = 0
@element.each {|child| count+=1 if child.kind_of? Element }
count
end
# Returns an Array of Element children. An XPath may be supplied to
# filter the children. Only Element children are returned, even if the
# supplied XPath matches non-Element children.
# doc = Document.new 'seanelliott'
# doc.root.elements.to_a #-> [ , ]
# doc.root.elements.to_a("child::node()") #-> [ , ]
# XPath.match(doc.root, "child::node()") #-> [ sean, , elliott, ]
def to_a( xpath=nil )
rv = XPath.match( @element, xpath )
return rv.find_all{|e| e.kind_of? Element} if xpath
rv
end
private
# Private helper class. Removes quotes from quoted strings
def literalize name
name = name[1..-2] if name[0] == ?' or name[0] == ?" #'
name
end
end
########################################################################
# ATTRIBUTES #
########################################################################
# A class that defines the set of Attributes of an Element and provides
# operations for accessing elements in that set.
class Attributes < Hash
# Constructor
# element:: the Element of which this is an Attribute
def initialize element
@element = element
end
# Fetches an attribute value. If you want to get the Attribute itself,
# use get_attribute()
# name:: an XPath attribute name. Namespaces are relevant here.
# Returns::
# the String value of the matching attribute, or +nil+ if no
# matching attribute was found.
#
# doc = Document.new ""
# doc.root.attributes['att'] #-> '3'
# doc.root.attributes['bar:att'] #-> '2'
def [](name)
attr = get_attribute(name)
return attr.value unless attr.nil?
return nil
end
def to_a
values.flatten
end
# Returns the number of attributes the owning Element contains.
# doc = Document ""
# doc.root.attributes.length #-> 3
def length
c = 0
each_attribute { c+=1 }
c
end
alias :size :length
# Itterates over the attributes of an Element. Yields actual Attribute
# nodes, not String values.
#
# doc = Document.new ''
# doc.root.attributes.each_attribute {|attr|
# p attr.expanded_name+" => "+attr.value
# }
def each_attribute # :yields: attribute
each_value do |val|
if val.kind_of? Attribute
yield val
else
val.each_value { |atr| yield atr }
end
end
end
# Itterates over each attribute of an Element, yielding the expanded name
# and value as a pair of Strings.
#
# doc = Document.new ''
# doc.root.attributes.each {|name, value| p name+" => "+value }
def each
each_attribute do |attr|
yield attr.expanded_name, attr.value
end
end
# Fetches an attribute
# name::
# the name by which to search for the attribute. Can be a
# prefix:name namespace name.
# Returns:: The first matching attribute, or nil if there was none. This
# value is an Attribute node, not the String value of the attribute.
# doc = Document.new ''
# doc.root.attributes.get_attribute("foo").value #-> "2"
# doc.root.attributes.get_attribute("x:foo").value #-> "1"
def get_attribute( name )
attr = fetch( name, nil )
if attr.nil?
return nil if name.nil?
# Look for prefix
name =~ Namespace::NAMESPLIT
prefix, n = $1, $2
if prefix
attr = fetch( n, nil )
# check prefix
if attr == nil
elsif attr.kind_of? Attribute
return attr if prefix == attr.prefix
else
attr = attr[ prefix ]
return attr
end
end
element_document = @element.document
if element_document and element_document.doctype
expn = @element.expanded_name
expn = element_document.doctype.name if expn.size == 0
attr_val = element_document.doctype.attribute_of(expn, name)
return Attribute.new( name, attr_val ) if attr_val
end
return nil
end
if attr.kind_of? Hash
attr = attr[ @element.prefix ]
end
return attr
end
# Sets an attribute, overwriting any existing attribute value by the
# same name. Namespace is significant.
# name:: the name of the attribute
# value::
# (optional) If supplied, the value of the attribute. If
# nil, any existing matching attribute is deleted.
# Returns::
# Owning element
# doc = Document.new ""
# doc.root.attributes['y:foo'] = '2'
# doc.root.attributes['foo'] = '4'
# doc.root.attributes['x:foo'] = nil
def []=( name, value )
if value.nil? # Delete the named attribute
attr = get_attribute(name)
delete attr
return
end
value = Attribute.new(name, value) unless value.kind_of? Attribute
value.element = @element
old_attr = fetch(value.name, nil)
if old_attr.nil?
store(value.name, value)
elsif old_attr.kind_of? Hash
old_attr[value.prefix] = value
elsif old_attr.prefix != value.prefix
# Check for conflicting namespaces
raise ParseException.new(
"Namespace conflict in adding attribute \"#{value.name}\": "+
"Prefix \"#{old_attr.prefix}\" = "+
"\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
"\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
@element.namespace( old_attr.prefix ) ==
@element.namespace( value.prefix )
store value.name, { old_attr.prefix => old_attr,
value.prefix => value }
else
store value.name, value
end
return @element
end
# Returns an array of Strings containing all of the prefixes declared
# by this set of # attributes. The array does not include the default
# namespace declaration, if one exists.
# doc = Document.new("")
# prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
def prefixes
ns = []
each_attribute do |attribute|
ns << attribute.name if attribute.prefix == 'xmlns'
end
if @element.document and @element.document.doctype
expn = @element.expanded_name
expn = @element.document.doctype.name if expn.size == 0
@element.document.doctype.attributes_of(expn).each {
|attribute|
ns << attribute.name if attribute.prefix == 'xmlns'
}
end
ns
end
def namespaces
namespaces = []
each_attribute do |attribute|
namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
end
if @element.document and @element.document.doctype
expn = @element.expanded_name
expn = @element.document.doctype.name if expn.size == 0
@element.document.doctype.attributes_of(expn).each {
|attribute|
namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
}
end
namespaces
end
# Removes an attribute
# attribute::
# either a String, which is the name of the attribute to remove --
# namespaces are significant here -- or the attribute to remove.
# Returns:: the owning element
# doc = Document.new ""
# doc.root.attributes.delete 'foo' #-> "
# doc.root.attributes.delete 'x:foo' #-> "
# attr = doc.root.attributes.get_attribute('y:foo')
# doc.root.attributes.delete attr #-> "
def delete( attribute )
name = nil
prefix = nil
if attribute.kind_of? Attribute
name = attribute.name
prefix = attribute.prefix
else
attribute =~ Namespace::NAMESPLIT
prefix, name = $1, $2
prefix = '' unless prefix
end
old = fetch(name, nil)
attr = nil
if old.kind_of? Hash # the supplied attribute is one of many
attr = old.delete(prefix)
if old.size == 1
repl = nil
old.each_value{|v| repl = v}
store name, repl
end
elsif old.nil?
return @element
else # the supplied attribute is a top-level one
attr = old
res = super(name)
end
@element
end
# Adds an attribute, overriding any existing attribute by the
# same name. Namespaces are significant.
# attribute:: An Attribute
def add( attribute )
self[attribute.name] = attribute
end
alias :<< :add
# Deletes all attributes matching a name. Namespaces are significant.
# name::
# A String; all attributes that match this path will be removed
# Returns:: an Array of the Attributes that were removed
def delete_all( name )
rv = []
each_attribute { |attribute|
rv << attribute if attribute.expanded_name == name
}
rv.each{ |attr| attr.remove }
return rv
end
end
end