mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Initial revision
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
ca02190d88
commit
ea7a527a2a
53 changed files with 6674 additions and 0 deletions
62
lib/rexml/attlistdecl.rb
Normal file
62
lib/rexml/attlistdecl.rb
Normal file
|
@ -0,0 +1,62 @@
|
|||
#vim:ts=2 sw=2 noexpandtab:
|
||||
require 'rexml/child'
|
||||
require 'rexml/source'
|
||||
|
||||
module REXML
|
||||
# This class needs:
|
||||
# * Documentation
|
||||
# * Work! Not all types of attlists are intelligently parsed, so we just
|
||||
# spew back out what we get in. This works, but it would be better if
|
||||
# we formatted the output ourselves.
|
||||
#
|
||||
# AttlistDecls provide *just* enough support to allow namespace
|
||||
# declarations. If you need some sort of generalized support, or have an
|
||||
# interesting idea about how to map the hideous, terrible design of DTD
|
||||
# AttlistDecls onto an intuitive Ruby interface, let me know. I'm desperate
|
||||
# for anything to make DTDs more palateable.
|
||||
class AttlistDecl < Child
|
||||
include Enumerable
|
||||
|
||||
# What is this? Got me.
|
||||
attr_reader :element_name
|
||||
|
||||
# Create an AttlistDecl, pulling the information from a Source. Notice
|
||||
# that this isn't very convenient; to create an AttlistDecl, you basically
|
||||
# have to format it yourself, and then have the initializer parse it.
|
||||
# Sorry, but for the forseeable future, DTD support in REXML is pretty
|
||||
# weak on convenience. Have I mentioned how much I hate DTDs?
|
||||
def initialize(source)
|
||||
super()
|
||||
if (source.kind_of? Array)
|
||||
@element_name, @pairs, @contents = *source
|
||||
end
|
||||
end
|
||||
|
||||
# Access the attlist attribute/value pairs.
|
||||
# value = attlist_decl[ attribute_name ]
|
||||
def [](key)
|
||||
@pairs[key]
|
||||
end
|
||||
|
||||
# Whether an attlist declaration includes the given attribute definition
|
||||
# if attlist_decl.include? "xmlns:foobar"
|
||||
def include?(key)
|
||||
@pairs.keys.include? key
|
||||
end
|
||||
|
||||
# Itterate over the key/value pairs:
|
||||
# attlist_decl.each { |attribute_name, attribute_value| ... }
|
||||
def each(&block)
|
||||
@pairs.each(&block)
|
||||
end
|
||||
|
||||
# Write out exactly what we got in.
|
||||
def write out, indent=-1
|
||||
out << @contents
|
||||
end
|
||||
|
||||
def node_type
|
||||
:attlistdecl
|
||||
end
|
||||
end
|
||||
end
|
151
lib/rexml/attribute.rb
Normal file
151
lib/rexml/attribute.rb
Normal file
|
@ -0,0 +1,151 @@
|
|||
require "rexml/namespace"
|
||||
require 'rexml/text'
|
||||
|
||||
module REXML
|
||||
# Defines an Element Attribute; IE, a attribute=value pair, as in:
|
||||
# <element attribute="value"/>. Attributes can be in their own
|
||||
# namespaces. General users of REXML will not interact with the
|
||||
# Attribute class much.
|
||||
class Attribute
|
||||
include Node
|
||||
include Namespace
|
||||
|
||||
# The element to which this attribute belongs
|
||||
attr_reader :element
|
||||
# The normalized value of this attribute. That is, the attribute with
|
||||
# entities intact.
|
||||
attr_writer :normalized
|
||||
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
||||
|
||||
# Constructor.
|
||||
#
|
||||
# Attribute.new( attribute_to_clone )
|
||||
# Attribute.new( source )
|
||||
# Attribute.new( "attr", "attr_value" )
|
||||
# Attribute.new( "attr", "attr_value", parent_element )
|
||||
def initialize( first, second=nil, parent=nil )
|
||||
@normalized = @unnormalized = @element = nil
|
||||
if first.kind_of? Attribute
|
||||
self.name = first.expanded_name
|
||||
@value = first.value
|
||||
if second.kind_of? Element
|
||||
@element = second
|
||||
else
|
||||
@element = first.element
|
||||
end
|
||||
elsif first.kind_of? String
|
||||
@element = parent if parent.kind_of? Element
|
||||
self.name = first
|
||||
@value = second
|
||||
else
|
||||
raise "illegal argument #{first.type} to Attribute constructor"
|
||||
end
|
||||
end
|
||||
|
||||
# Returns the namespace of the attribute.
|
||||
#
|
||||
# e = Element.new( "elns:myelement" )
|
||||
# e.add_attribute( "nsa:a", "aval" )
|
||||
# e.add_attribute( "b", "bval" )
|
||||
# e.attributes.get_attribute( "a" ).prefix # -> "nsa"
|
||||
# e.attributes.get_attribute( "b" ).prefix # -> "elns"
|
||||
# a = Attribute.new( "x", "y" )
|
||||
# a.prefix # -> ""
|
||||
def prefix
|
||||
pf = super
|
||||
if pf == ""
|
||||
pf = @element.prefix if @element
|
||||
end
|
||||
pf
|
||||
end
|
||||
|
||||
# Returns the namespace URL, if defined, or nil otherwise
|
||||
#
|
||||
# e = Element.new("el")
|
||||
# e.add_attributes({"xmlns:ns", "http://url"})
|
||||
# e.namespace( "ns" ) # -> "http://url"
|
||||
def namespace arg=nil
|
||||
arg = prefix if arg.nil?
|
||||
@element.namespace arg
|
||||
end
|
||||
|
||||
# Returns true if other is an Attribute and has the same name and value,
|
||||
# false otherwise.
|
||||
def ==( other )
|
||||
other.kind_of?(Attribute) and other.name==name and other.value==@value
|
||||
end
|
||||
|
||||
# Creates (and returns) a hash from both the name and value
|
||||
def hash
|
||||
name.hash + value.hash
|
||||
end
|
||||
|
||||
# Returns this attribute out as XML source, expanding the name
|
||||
#
|
||||
# a = Attribute.new( "x", "y" )
|
||||
# a.to_string # -> "x='y'"
|
||||
# b = Attribute.new( "ns:x", "y" )
|
||||
# b.to_string # -> "ns:x='y'"
|
||||
def to_string
|
||||
"#@expanded_name='#{to_s().gsub(/'/, ''')}'"
|
||||
end
|
||||
|
||||
# Returns the attribute value, with entities replaced
|
||||
def to_s
|
||||
return @normalized if @normalized
|
||||
|
||||
doctype = nil
|
||||
if @element
|
||||
doc = @element.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
|
||||
@unnormalized = nil
|
||||
@value = @normalized = Text::normalize( @value, doctype )
|
||||
end
|
||||
|
||||
# Returns the UNNORMALIZED value of this attribute. That is, entities
|
||||
# have been expanded to their values
|
||||
def value
|
||||
@unnormalized if @unnormalized
|
||||
doctype = nil
|
||||
if @element
|
||||
doc = @element.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
@normalized = nil
|
||||
@value = @unnormalized = Text::unnormalize( @value, doctype )
|
||||
end
|
||||
|
||||
# Returns a copy of this attribute
|
||||
def clone
|
||||
Attribute.new self
|
||||
end
|
||||
|
||||
# Sets the element of which this object is an attribute. Normally, this
|
||||
# is not directly called.
|
||||
#
|
||||
# Returns this attribute
|
||||
def element=( element )
|
||||
@element = element
|
||||
self
|
||||
end
|
||||
|
||||
# Removes this Attribute from the tree, and returns true if successfull
|
||||
#
|
||||
# This method is usually not called directly.
|
||||
def remove
|
||||
@element.attributes.delete self.name unless @element.nil?
|
||||
end
|
||||
|
||||
# Writes this attribute (EG, puts 'key="value"' to the output)
|
||||
def write( output, indent=-1 )
|
||||
output << to_string
|
||||
end
|
||||
|
||||
def node_type
|
||||
:attribute
|
||||
end
|
||||
end
|
||||
end
|
||||
#vim:ts=2 sw=2 noexpandtab:
|
68
lib/rexml/cdata.rb
Normal file
68
lib/rexml/cdata.rb
Normal file
|
@ -0,0 +1,68 @@
|
|||
require "rexml/text"
|
||||
|
||||
module REXML
|
||||
class CData < Text
|
||||
START = '<![CDATA['
|
||||
STOP = ']]>'
|
||||
ILLEGAL = /(]]>)/
|
||||
|
||||
# Constructor. CData is data between <![CDATA[ ... ]]>
|
||||
#
|
||||
# _Examples_
|
||||
# CData.new( source )
|
||||
# CData.new( "Here is some CDATA" )
|
||||
# CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
|
||||
def initialize( first, whitespace=true, parent=nil )
|
||||
super( first, whitespace, parent, true, true, ILLEGAL )
|
||||
end
|
||||
|
||||
# Make a copy of this object
|
||||
#
|
||||
# _Examples_
|
||||
# c = CData.new( "Some text" )
|
||||
# d = c.clone
|
||||
# d.to_s # -> "Some text"
|
||||
def clone
|
||||
CData.new self
|
||||
end
|
||||
|
||||
# Returns the content of this CData object
|
||||
#
|
||||
# _Examples_
|
||||
# c = CData.new( "Some text" )
|
||||
# c.to_s # -> "Some text"
|
||||
def to_s
|
||||
@string
|
||||
end
|
||||
|
||||
# Generates XML output of this object
|
||||
#
|
||||
# output::
|
||||
# Where to write the string. Defaults to $stdout
|
||||
# indent::
|
||||
# An integer. If -1, no indenting will be used; otherwise, the
|
||||
# indentation will be this number of spaces, and children will be
|
||||
# indented an additional amount. Defaults to -1.
|
||||
# transitive::
|
||||
# If transitive is true and indent is >= 0, then the output will be
|
||||
# pretty-printed in such a way that the added whitespace does not affect
|
||||
# the absolute *value* of the document -- that is, it leaves the value
|
||||
# and number of Text nodes in the document unchanged.
|
||||
# ie_hack::
|
||||
# Internet Explorer is the worst piece of crap to have ever been
|
||||
# written, with the possible exception of Windows itself. Since IE is
|
||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags.
|
||||
#
|
||||
# _Examples_
|
||||
# c = CData.new( " Some text " )
|
||||
# c.write( $stdout ) #-> <![CDATA[ Some text ]]>
|
||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
||||
indent( output, indent )
|
||||
output << START
|
||||
output << @string
|
||||
output << STOP
|
||||
end
|
||||
end
|
||||
end
|
96
lib/rexml/child.rb
Normal file
96
lib/rexml/child.rb
Normal file
|
@ -0,0 +1,96 @@
|
|||
require "rexml/node"
|
||||
|
||||
module REXML
|
||||
##
|
||||
# A Child object is something contained by a parent, and this class
|
||||
# contains methods to support that. Most user code will not use this
|
||||
# class directly.
|
||||
class Child
|
||||
include Node
|
||||
attr_reader :parent # The Parent of this object
|
||||
|
||||
# Constructor. Any inheritors of this class should call super to make
|
||||
# sure this method is called.
|
||||
# parent::
|
||||
# if supplied, the parent of this child will be set to the
|
||||
# supplied value, and self will be added to the parent
|
||||
def initialize( parent = nil )
|
||||
@parent = nil
|
||||
# Declare @parent, but don't define it. The next line sets the
|
||||
# parent.
|
||||
parent.add( self ) if parent
|
||||
end
|
||||
|
||||
# Replaces this object with another object. Basically, calls
|
||||
# Parent.replace_child
|
||||
#
|
||||
# Returns:: self
|
||||
def replace_with( child )
|
||||
@parent.replace_child( self, child )
|
||||
self
|
||||
end
|
||||
|
||||
# Removes this child from the parent.
|
||||
#
|
||||
# Returns:: self
|
||||
def remove
|
||||
unless @parent.nil?
|
||||
@parent.delete self
|
||||
end
|
||||
self
|
||||
end
|
||||
|
||||
# Sets the parent of this child to the supplied argument.
|
||||
#
|
||||
# other::
|
||||
# Must be a Parent object. If this object is the same object as the
|
||||
# existing parent of this child, no action is taken. Otherwise, this
|
||||
# child is removed from the current parent (if one exists), and is added
|
||||
# to the new parent.
|
||||
# Returns:: The parent added
|
||||
def parent=( other )
|
||||
return @parent if @parent == other
|
||||
@parent.delete self if defined? @parent and @parent
|
||||
@parent = other
|
||||
end
|
||||
|
||||
alias :next_sibling :next_sibling_node
|
||||
alias :previous_sibling :previous_sibling_node
|
||||
|
||||
# Sets the next sibling of this child. This can be used to insert a child
|
||||
# after some other child.
|
||||
# a = Element.new("a")
|
||||
# b = a.add_element("b")
|
||||
# c = Element.new("c")
|
||||
# b.next_sibling = c
|
||||
# # => <a><b/><c/></a>
|
||||
def next_sibling=( other )
|
||||
parent.insert_after self, other
|
||||
end
|
||||
|
||||
# Sets the previous sibling of this child. This can be used to insert a
|
||||
# child before some other child.
|
||||
# a = Element.new("a")
|
||||
# b = a.add_element("b")
|
||||
# c = Element.new("c")
|
||||
# b.previous_sibling = c
|
||||
# # => <a><b/><c/></a>
|
||||
def previous_sibling=(other)
|
||||
parent.insert_before self, other
|
||||
end
|
||||
|
||||
# Returns:: the document this child belongs to, or nil if this child
|
||||
# belongs to no document
|
||||
def document
|
||||
return parent.document unless parent.nil?
|
||||
nil
|
||||
end
|
||||
|
||||
# This doesn't yet handle encodings
|
||||
def bytes
|
||||
encoding = document.encoding
|
||||
|
||||
to_s
|
||||
end
|
||||
end
|
||||
end
|
79
lib/rexml/comment.rb
Normal file
79
lib/rexml/comment.rb
Normal file
|
@ -0,0 +1,79 @@
|
|||
require "rexml/child"
|
||||
|
||||
module REXML
|
||||
##
|
||||
# Represents an XML comment; that is, text between <!-- ... -->
|
||||
class Comment < Child
|
||||
include Comparable
|
||||
START = "<!--"
|
||||
STOP = "-->"
|
||||
|
||||
attr_accessor :string # The content text
|
||||
|
||||
##
|
||||
# Constructor. The first argument can be one of three types:
|
||||
# @param first If String, the contents of this comment are set to the
|
||||
# argument. If Comment, the argument is duplicated. If
|
||||
# Source, the argument is scanned for a comment.
|
||||
# @param second If the first argument is a Source, this argument
|
||||
# should be nil, not supplied, or a Parent to be set as the parent
|
||||
# of this object
|
||||
def initialize( first, second = nil )
|
||||
#puts "IN COMMENT CONSTRUCTOR; SECOND IS #{second.type}"
|
||||
super(second)
|
||||
if first.kind_of? String
|
||||
@string = first
|
||||
elsif first.kind_of? Comment
|
||||
@string = first.string
|
||||
end
|
||||
end
|
||||
|
||||
def clone
|
||||
Comment.new self
|
||||
end
|
||||
|
||||
# output::
|
||||
# Where to write the string
|
||||
# indent::
|
||||
# An integer. If -1, no indenting will be used; otherwise, the
|
||||
# indentation will be this number of spaces, and children will be
|
||||
# indented an additional amount.
|
||||
# transitive::
|
||||
# Who knows?
|
||||
# ie_hack::
|
||||
# Internet Explorer is the worst piece of crap to have ever been
|
||||
# written, with the possible exception of Windows itself. Since IE is
|
||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags.
|
||||
#
|
||||
def write( output, indent=-1, transitive=false, ie_hack=false )
|
||||
indent( output, indent )
|
||||
output << START
|
||||
output << @string
|
||||
output << STOP
|
||||
end
|
||||
|
||||
alias :to_s :string
|
||||
|
||||
##
|
||||
# Compares this Comment to another; the contents of the comment are used
|
||||
# in the comparison.
|
||||
def <=>(other)
|
||||
other.to_s <=> @string
|
||||
end
|
||||
|
||||
##
|
||||
# Compares this Comment to another; the contents of the comment are used
|
||||
# in the comparison.
|
||||
def ==( other )
|
||||
other.kind_of? Comment and
|
||||
(other <=> self) == 0
|
||||
end
|
||||
|
||||
def node_type
|
||||
:comment
|
||||
end
|
||||
end
|
||||
end
|
||||
#vim:ts=2 sw=2 noexpandtab:
|
182
lib/rexml/doctype.rb
Normal file
182
lib/rexml/doctype.rb
Normal file
|
@ -0,0 +1,182 @@
|
|||
require "rexml/parent"
|
||||
require "rexml/parseexception"
|
||||
require "rexml/namespace"
|
||||
require 'rexml/entity'
|
||||
require 'rexml/attlistdecl'
|
||||
require 'rexml/xmltokens'
|
||||
|
||||
module REXML
|
||||
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
|
||||
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as
|
||||
# being used to declare entities used in the document.
|
||||
class DocType < Parent
|
||||
include XMLTokens
|
||||
START = "<!DOCTYPE"
|
||||
STOP = ">"
|
||||
SYSTEM = "SYSTEM"
|
||||
PUBLIC = "PUBLIC"
|
||||
DEFAULT_ENTITIES = {
|
||||
'gt'=>EntityConst::GT,
|
||||
'lt'=>EntityConst::LT,
|
||||
'quot'=>EntityConst::QUOT,
|
||||
"apos"=>EntityConst::APOS
|
||||
}
|
||||
|
||||
# name is the name of the doctype
|
||||
# external_id is the referenced DTD, if given
|
||||
attr_reader :name, :external_id, :entities, :namespaces
|
||||
|
||||
# Constructor
|
||||
#
|
||||
# dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
|
||||
# # <!DOCTYPE foo '-//I/Hate/External/IDs'>
|
||||
# dt = DocType.new( doctype_to_clone )
|
||||
# # Incomplete. Shallow clone of doctype
|
||||
# source = Source.new( '<!DOCTYPE foo "bar">' )
|
||||
# dt = DocType.new( source )
|
||||
# # <!DOCTYPE foo "bar">
|
||||
# dt = DocType.new( source, some_document )
|
||||
# # Creates a doctype, and adds to the supplied document
|
||||
def initialize( first, parent=nil )
|
||||
@entities = DEFAULT_ENTITIES
|
||||
@long_name = @uri = nil
|
||||
if first.kind_of? String
|
||||
super()
|
||||
@name = first
|
||||
@external_id = parent
|
||||
elsif first.kind_of? DocType
|
||||
super( parent )
|
||||
@name = first.name
|
||||
@external_id = first.external_id
|
||||
elsif first.kind_of? Array
|
||||
super( parent )
|
||||
@name = first[0]
|
||||
@external_id = first[1]
|
||||
@long_name = first[2]
|
||||
@uri = first[3]
|
||||
end
|
||||
end
|
||||
|
||||
def node_type
|
||||
:doctype
|
||||
end
|
||||
|
||||
def attributes_of element
|
||||
rv = []
|
||||
each do |child|
|
||||
child.each do |key,val|
|
||||
rv << Attribute.new(key,val)
|
||||
end if child.kind_of? AttlistDecl and child.element_name == element
|
||||
end
|
||||
rv
|
||||
end
|
||||
|
||||
def attribute_of element, attribute
|
||||
att_decl = find do |child|
|
||||
child.kind_of? AttlistDecl and
|
||||
child.element_name == element and
|
||||
child.include? attribute
|
||||
end
|
||||
return nil unless att_decl
|
||||
att_decl[attribute]
|
||||
end
|
||||
|
||||
def clone
|
||||
DocType.new self
|
||||
end
|
||||
|
||||
# output::
|
||||
# Where to write the string
|
||||
# indent::
|
||||
# An integer. If -1, no indenting will be used; otherwise, the
|
||||
# indentation will be this number of spaces, and children will be
|
||||
# indented an additional amount.
|
||||
# transitive::
|
||||
# Who knows?
|
||||
# ie_hack::
|
||||
# Internet Explorer is the worst piece of crap to have ever been
|
||||
# written, with the possible exception of Windows itself. Since IE is
|
||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags.
|
||||
#
|
||||
def write( output, indent=0, transitive=false, ie_hack=false )
|
||||
indent( output, indent )
|
||||
output << START
|
||||
output << ' '
|
||||
output << @name
|
||||
output << " #@external_id" if @external_id
|
||||
output << " #@long_name" if @long_name
|
||||
output << " #@uri" if @uri
|
||||
unless @children.empty?
|
||||
next_indent = indent + 2
|
||||
output << ' ['
|
||||
child = nil # speed
|
||||
@children.each { |child|
|
||||
output << "\n"
|
||||
child.write( output, next_indent )
|
||||
}
|
||||
output << "\n"
|
||||
#output << ' '*next_indent
|
||||
output << "]"
|
||||
end
|
||||
output << STOP
|
||||
end
|
||||
|
||||
def entity( name )
|
||||
@entities[name].unnormalized if @entities[name]
|
||||
end
|
||||
|
||||
def add child
|
||||
super(child)
|
||||
@entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
|
||||
@entities[ child.name ] = child if child.kind_of? Entity
|
||||
end
|
||||
end
|
||||
|
||||
# We don't really handle any of these since we're not a validating
|
||||
# parser, so we can be pretty dumb about them. All we need to be able
|
||||
# to do is spew them back out on a write()
|
||||
|
||||
# This is an abstract class. You never use this directly; it serves as a
|
||||
# parent class for the specific declarations.
|
||||
class Declaration < Child
|
||||
def initialize src
|
||||
super()
|
||||
@string = src
|
||||
end
|
||||
|
||||
def to_s
|
||||
@string+'>'
|
||||
end
|
||||
|
||||
def write( output, indent )
|
||||
output << (' '*indent) if indent > 0
|
||||
output << to_s
|
||||
end
|
||||
end
|
||||
|
||||
public
|
||||
class ElementDecl < Declaration
|
||||
def initialize( src )
|
||||
super
|
||||
end
|
||||
end
|
||||
|
||||
class NotationDecl < Child
|
||||
def initialize name, middle, rest
|
||||
@name = name
|
||||
@middle = middle
|
||||
@rest = rest
|
||||
end
|
||||
|
||||
def to_s
|
||||
"<!NOTATION #@name #@middle #@rest>"
|
||||
end
|
||||
|
||||
def write( output, indent=-1 )
|
||||
output << (' '*indent) if indent > 0
|
||||
output << to_s
|
||||
end
|
||||
end
|
||||
end
|
237
lib/rexml/document.rb
Normal file
237
lib/rexml/document.rb
Normal file
|
@ -0,0 +1,237 @@
|
|||
require "rexml/element"
|
||||
require "rexml/xmldecl"
|
||||
require "rexml/source"
|
||||
require "rexml/comment"
|
||||
require "rexml/doctype"
|
||||
require "rexml/instruction"
|
||||
require "rexml/rexml"
|
||||
require "rexml/parseexception"
|
||||
require "rexml/output"
|
||||
require "rexml/parsers/baseparser"
|
||||
require "rexml/parsers/streamparser"
|
||||
|
||||
module REXML
|
||||
# Represents a full XML document, including PIs, a doctype, etc. A
|
||||
# Document has a single child that can be accessed by root().
|
||||
# Note that if you want to have an XML declaration written for a document
|
||||
# you create, you must add one; REXML documents do not write a default
|
||||
# declaration for you. See |DECLARATION| and |write|.
|
||||
class Document < Element
|
||||
# A convenient default XML declaration. If you want an XML declaration,
|
||||
# the easiest way to add one is mydoc << Document::DECLARATION
|
||||
DECLARATION = XMLDecl.new( "1.0", "UTF-8" )
|
||||
|
||||
# Constructor
|
||||
# @param source if supplied, must be a Document, String, or IO.
|
||||
# Documents have their context and Element attributes cloned.
|
||||
# Strings are expected to be valid XML documents. IOs are expected
|
||||
# to be sources of valid XML documents.
|
||||
# @param context if supplied, contains the context of the document;
|
||||
# this should be a Hash.
|
||||
# NOTE that I'm not sure what the context is for; I cloned it out of
|
||||
# the Electric XML API (in which it also seems to do nothing), and it
|
||||
# is now legacy. It may do something, someday... it may disappear.
|
||||
def initialize( source = nil, context = {} )
|
||||
super()
|
||||
@context = context
|
||||
return if source.nil?
|
||||
if source.kind_of? Document
|
||||
@context = source.context
|
||||
super source
|
||||
else
|
||||
build( source )
|
||||
end
|
||||
end
|
||||
|
||||
def node_type
|
||||
:document
|
||||
end
|
||||
|
||||
# Should be obvious
|
||||
def clone
|
||||
Document.new self
|
||||
end
|
||||
|
||||
# According to the XML spec, a root node has no expanded name
|
||||
def expanded_name
|
||||
''
|
||||
#d = doc_type
|
||||
#d ? d.name : "UNDEFINED"
|
||||
end
|
||||
|
||||
alias :name :expanded_name
|
||||
|
||||
# We override this, because XMLDecls and DocTypes must go at the start
|
||||
# of the document
|
||||
def add( child )
|
||||
if child.kind_of? XMLDecl
|
||||
@children.unshift child
|
||||
elsif child.kind_of? DocType
|
||||
if @children[0].kind_of? XMLDecl
|
||||
@children[1,0] = child
|
||||
else
|
||||
@children.unshift child
|
||||
end
|
||||
child.parent = self
|
||||
else
|
||||
rv = super
|
||||
raise "attempted adding second root element to document" if @elements.size > 1
|
||||
rv
|
||||
end
|
||||
end
|
||||
alias :<< :add
|
||||
|
||||
def add_element(arg=nil, arg2=nil)
|
||||
rv = super
|
||||
raise "attempted adding second root element to document" if @elements.size > 1
|
||||
rv
|
||||
end
|
||||
|
||||
# @return the root Element of the document, or nil if this document
|
||||
# has no children.
|
||||
def root
|
||||
@children.find { |item| item.kind_of? Element }
|
||||
end
|
||||
|
||||
# @return the DocType child of the document, if one exists,
|
||||
# and nil otherwise.
|
||||
def doctype
|
||||
@children.find { |item| item.kind_of? DocType }
|
||||
end
|
||||
|
||||
# @return the XMLDecl of this document; if no XMLDecl has been
|
||||
# set, the default declaration is returned.
|
||||
def xml_decl
|
||||
rv = @children.find { |item| item.kind_of? XMLDecl }
|
||||
rv = DECLARATION if rv.nil?
|
||||
rv
|
||||
end
|
||||
|
||||
# @return the XMLDecl version of this document as a String.
|
||||
# If no XMLDecl has been set, returns the default version.
|
||||
def version
|
||||
decl = xml_decl()
|
||||
decl.nil? ? XMLDecl.DEFAULT_VERSION : decl.version
|
||||
end
|
||||
|
||||
# @return the XMLDecl encoding of this document as a String.
|
||||
# If no XMLDecl has been set, returns the default encoding.
|
||||
def encoding
|
||||
decl = xml_decl()
|
||||
decl.nil? or decl.encoding.nil? ? XMLDecl.DEFAULT_ENCODING : decl.encoding
|
||||
end
|
||||
|
||||
# @return the XMLDecl standalone value of this document as a String.
|
||||
# If no XMLDecl has been set, returns the default setting.
|
||||
def stand_alone?
|
||||
decl = xml_decl()
|
||||
decl.nil? ? XMLDecl.DEFAULT_STANDALONE : decl.stand_alone?
|
||||
end
|
||||
|
||||
# Write the XML tree out, optionally with indent. This writes out the
|
||||
# entire XML document, including XML declarations, doctype declarations,
|
||||
# and processing instructions (if any are given).
|
||||
# A controversial point is whether Document should always write the XML
|
||||
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
||||
# user (or source document). REXML does not write one if one was not
|
||||
# specified, because it adds unneccessary bandwidth to applications such
|
||||
# as XML-RPC.
|
||||
#
|
||||
#
|
||||
# output::
|
||||
# output an object which supports '<< string'; this is where the
|
||||
# document will be written.
|
||||
# indent::
|
||||
# An integer. If -1, no indenting will be used; otherwise, the
|
||||
# indentation will be this number of spaces, and children will be
|
||||
# indented an additional amount. Defaults to -1
|
||||
# transitive::
|
||||
# What the heck does this do? Defaults to false
|
||||
# ie_hack::
|
||||
# Internet Explorer is the worst piece of crap to have ever been
|
||||
# written, with the possible exception of Windows itself. Since IE is
|
||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags. Defaults to false
|
||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
||||
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8"
|
||||
@children.each { |node|
|
||||
node.write( output, indent, transitive, ie_hack )
|
||||
output << "\n" unless indent<0 or node == @children[-1]
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
def Document::parse_stream( source, listener )
|
||||
Parsers::StreamParser.new( source, listener ).parse
|
||||
end
|
||||
|
||||
private
|
||||
def build( source )
|
||||
build_context = self
|
||||
parser = Parsers::BaseParser.new( source )
|
||||
tag_stack = []
|
||||
in_doctype = false
|
||||
entities = nil
|
||||
while true
|
||||
event = parser.pull
|
||||
case event[0]
|
||||
when :end_document
|
||||
return
|
||||
when :start_element
|
||||
tag_stack.push(event[1])
|
||||
# find the observers for namespaces
|
||||
build_context = build_context.add_element( event[1], event[2] )
|
||||
when :end_element
|
||||
tag_stack.pop
|
||||
build_context = build_context.parent
|
||||
when :text
|
||||
if not in_doctype
|
||||
if build_context[-1].instance_of? Text
|
||||
build_context[-1] << event[1]
|
||||
else
|
||||
build_context.add(
|
||||
Text.new( event[1], true, nil, true )
|
||||
) unless (
|
||||
event[1].strip.size == 0 and
|
||||
build_context.ignore_whitespace_nodes
|
||||
)
|
||||
end
|
||||
end
|
||||
when :comment
|
||||
c = Comment.new( event[1] )
|
||||
build_context.add( c )
|
||||
when :cdata
|
||||
c = CData.new( event[1] )
|
||||
build_context.add( c )
|
||||
when :processing_instruction
|
||||
build_context.add( Instruction.new( event[1], event[2] ) )
|
||||
when :end_doctype
|
||||
in_doctype = false
|
||||
entities.each { |k,v| entities[k] = build_context.entities[k].value }
|
||||
build_context = build_context.parent
|
||||
when :start_doctype
|
||||
doctype = DocType.new( event[1..-1], build_context )
|
||||
build_context = doctype
|
||||
entities = {}
|
||||
in_doctype = true
|
||||
when :attlistdecl
|
||||
n = AttlistDecl.new( event[1..-1] )
|
||||
build_context.add( n )
|
||||
when :elementdecl
|
||||
n = ElementDecl.new( event[1] )
|
||||
build_context.add(n)
|
||||
when :entitydecl
|
||||
entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
|
||||
build_context.add(Entity.new(event))
|
||||
when :notationdecl
|
||||
n = NotationDecl.new( *event[1..-1] )
|
||||
build_context.add( n )
|
||||
when :xmldecl
|
||||
x = XMLDecl.new( event[1], event[2], event[3] )
|
||||
build_context.add( x )
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
10
lib/rexml/dtd/attlistdecl.rb
Normal file
10
lib/rexml/dtd/attlistdecl.rb
Normal file
|
@ -0,0 +1,10 @@
|
|||
require "rexml/child"
|
||||
module REXML
|
||||
module DTD
|
||||
class AttlistDecl < Child
|
||||
START = "<!ATTLIST"
|
||||
START_RE = /^\s*#{START}/um
|
||||
PATTERN_RE = /\s*(#{START}.*?>)/um
|
||||
end
|
||||
end
|
||||
end
|
51
lib/rexml/dtd/dtd.rb
Normal file
51
lib/rexml/dtd/dtd.rb
Normal file
|
@ -0,0 +1,51 @@
|
|||
require "rexml/dtd/elementdecl"
|
||||
require "rexml/dtd/entitydecl"
|
||||
require "rexml/comment"
|
||||
require "rexml/dtd/notationdecl"
|
||||
require "rexml/dtd/attlistdecl"
|
||||
require "rexml/parent"
|
||||
|
||||
module REXML
|
||||
module DTD
|
||||
class Parser
|
||||
def Parser.parse( input )
|
||||
case input
|
||||
when String
|
||||
parse_helper input
|
||||
when File
|
||||
parse_helper input.read
|
||||
end
|
||||
end
|
||||
|
||||
# Takes a String and parses it out
|
||||
def Parser.parse_helper( input )
|
||||
contents = Parent.new
|
||||
while input.size > 0
|
||||
case input
|
||||
when ElementDecl.PATTERN_RE
|
||||
match = $&
|
||||
source = $'
|
||||
contents << EleemntDecl.new( match )
|
||||
when AttlistDecl.PATTERN_RE
|
||||
matchdata = $~
|
||||
source = $'
|
||||
contents << AttlistDecl.new( matchdata )
|
||||
when EntityDecl.PATTERN_RE
|
||||
matchdata = $~
|
||||
source = $'
|
||||
contents << EntityDecl.new( matchdata )
|
||||
when Comment.PATTERN_RE
|
||||
matchdata = $~
|
||||
source = $'
|
||||
contents << Comment.new( matchdata )
|
||||
when NotationDecl.PATTERN_RE
|
||||
matchdata = $~
|
||||
source = $'
|
||||
contents << NotationDecl.new( matchdata )
|
||||
end
|
||||
end
|
||||
contents
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
17
lib/rexml/dtd/elementdecl.rb
Normal file
17
lib/rexml/dtd/elementdecl.rb
Normal file
|
@ -0,0 +1,17 @@
|
|||
require "rexml/child"
|
||||
module REXML
|
||||
module DTD
|
||||
class ElementDecl < Child
|
||||
START = "<!ELEMENT"
|
||||
START_RE = /^\s*#{START}/um
|
||||
PATTERN_RE = /^\s*(#{START}.*?)>/um
|
||||
PATTERN_RE = /^\s*#{START}\s+((?:[:\w_][-\.\w_]*:)?[-!\*\.\w_]*)(.*?)>/
|
||||
#\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
|
||||
|
||||
def initialize match
|
||||
@name = match[1]
|
||||
@rest = match[2]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
56
lib/rexml/dtd/entitydecl.rb
Normal file
56
lib/rexml/dtd/entitydecl.rb
Normal file
|
@ -0,0 +1,56 @@
|
|||
require "rexml/child"
|
||||
module REXML
|
||||
module DTD
|
||||
class EntityDecl < Child
|
||||
START = "<!ENTITY"
|
||||
START_RE = /^\s*#{START}/um
|
||||
PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
|
||||
SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
|
||||
PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
|
||||
PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
|
||||
# <!ENTITY name SYSTEM "...">
|
||||
# <!ENTITY name "...">
|
||||
def initialize src
|
||||
super()
|
||||
md = nil
|
||||
if src.match( PUBLIC )
|
||||
md = src.match( PUBLIC, true )
|
||||
@middle = "PUBLIC"
|
||||
@content = "#{md[2]} #{md[4]}"
|
||||
elsif src.match( SYSTEM )
|
||||
md = src.match( SYSTEM, true )
|
||||
@middle = "SYSTEM"
|
||||
@content = md[2]
|
||||
elsif src.match( PLAIN )
|
||||
md = src.match( PLAIN, true )
|
||||
@middle = ""
|
||||
@content = md[2]
|
||||
elsif src.match( PERCENT )
|
||||
md = src.match( PERCENT, true )
|
||||
@middle = ""
|
||||
@content = md[2]
|
||||
end
|
||||
raise ParseException.new("failed Entity match", src) if md.nil?
|
||||
@name = md[1]
|
||||
end
|
||||
|
||||
def to_s
|
||||
rv = "<!ENTITY #@name "
|
||||
rv << "#@middle " if @middle.size > 0
|
||||
rv << @content
|
||||
rv
|
||||
end
|
||||
|
||||
def write( output, indent )
|
||||
output << (' '*indent) if indent > 0
|
||||
output << to_s
|
||||
end
|
||||
|
||||
def EntityDecl.parse_source source, listener
|
||||
md = source.match( PATTERN_RE, true )
|
||||
thing = md[0].squeeze " \t\n\r"
|
||||
listener.send inspect.downcase, thing
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
39
lib/rexml/dtd/notationdecl.rb
Normal file
39
lib/rexml/dtd/notationdecl.rb
Normal file
|
@ -0,0 +1,39 @@
|
|||
require "rexml/child"
|
||||
module REXML
|
||||
module DTD
|
||||
class NotationDecl < Child
|
||||
START = "<!NOTATION"
|
||||
START_RE = /^\s*#{START}/um
|
||||
PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
|
||||
SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
|
||||
def initialize src
|
||||
super()
|
||||
if src.match( PUBLIC )
|
||||
md = src.match( PUBLIC, true )
|
||||
elsif src.match( SYSTEM )
|
||||
md = src.match( SYSTEM, true )
|
||||
else
|
||||
raise ParseException.new( "error parsing notation: no matching pattern", src )
|
||||
end
|
||||
@name = md[1]
|
||||
@middle = md[2]
|
||||
@rest = md[3]
|
||||
end
|
||||
|
||||
def to_s
|
||||
"<!NOTATION #@name #@middle #@rest>"
|
||||
end
|
||||
|
||||
def write( output, indent )
|
||||
output << (' '*indent) if indent > 0
|
||||
output << to_s
|
||||
end
|
||||
|
||||
def NotationDecl.parse_source source, listener
|
||||
md = source.match( PATTERN_RE, true )
|
||||
thing = md[0].squeeze " \t\n\r"
|
||||
listener.send inspect.downcase, thing
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
1147
lib/rexml/element.rb
Normal file
1147
lib/rexml/element.rb
Normal file
File diff suppressed because it is too large
Load diff
62
lib/rexml/encoding.rb
Normal file
62
lib/rexml/encoding.rb
Normal file
|
@ -0,0 +1,62 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
@@uconv_available = false
|
||||
|
||||
ENCODING_CLAIMS = { }
|
||||
|
||||
def Encoding.claim( encoding_str, match=nil )
|
||||
if match
|
||||
ENCODING_CLAIMS[ match ] = encoding_str
|
||||
else
|
||||
ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/ ] = encoding_str
|
||||
end
|
||||
end
|
||||
|
||||
# Native, default format is UTF-8, so it is declared here rather than in
|
||||
# an encodings/ definition.
|
||||
UTF_8 = 'UTF-8'
|
||||
claim( UTF_8 )
|
||||
|
||||
# ID ---> Encoding name
|
||||
attr_reader :encoding
|
||||
def encoding=( enc )
|
||||
enc = UTF_8 unless enc
|
||||
@encoding = enc.upcase
|
||||
require "rexml/encodings/#@encoding" unless @encoding == UTF_8
|
||||
end
|
||||
|
||||
def check_encoding str
|
||||
rv = ENCODING_CLAIMS.find{|k,v| str =~ k }
|
||||
# Raise an exception if there is a declared encoding and we don't
|
||||
# recognize it
|
||||
unless rv
|
||||
if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/
|
||||
raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)."
|
||||
else
|
||||
return UTF_8
|
||||
end
|
||||
end
|
||||
return rv[1]
|
||||
end
|
||||
|
||||
def to_utf_8(str)
|
||||
return str
|
||||
end
|
||||
|
||||
def from_utf_8 content
|
||||
return content
|
||||
end
|
||||
end
|
||||
|
||||
module Encodingses
|
||||
encodings = []
|
||||
$:.each do |incl_dir|
|
||||
if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0
|
||||
encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ]
|
||||
end
|
||||
encodings.collect!{ |f| File.basename(f) }
|
||||
encodings.uniq!
|
||||
end
|
||||
encodings.each { |enc| require "rexml/encodings/#{enc}" }
|
||||
end
|
||||
end
|
17
lib/rexml/encodings/EUC-JP.rb
Normal file
17
lib/rexml/encodings/EUC-JP.rb
Normal file
|
@ -0,0 +1,17 @@
|
|||
begin
|
||||
require 'uconv'
|
||||
|
||||
module REXML
|
||||
module Encoding
|
||||
def from_euc_jp(str)
|
||||
return Uconv::euctou8(str)
|
||||
end
|
||||
|
||||
def to_euc_jp content
|
||||
return Uconv::u8toeuc(content)
|
||||
end
|
||||
end
|
||||
end
|
||||
rescue LoadError
|
||||
raise "uconv is required for Japanese encoding support."
|
||||
end
|
6
lib/rexml/encodings/EUC-JP_decl.rb
Normal file
6
lib/rexml/encodings/EUC-JP_decl.rb
Normal file
|
@ -0,0 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
EUC_JP = 'EUC-JP'
|
||||
claim( EUC_JP )
|
||||
end
|
||||
end
|
23
lib/rexml/encodings/ISO-8859-1.rb
Normal file
23
lib/rexml/encodings/ISO-8859-1.rb
Normal file
|
@ -0,0 +1,23 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
# Convert from UTF-8
|
||||
def to_iso_8859_1 content
|
||||
array_utf8 = content.unpack('U*')
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
if num <= 0xFF
|
||||
array_enc << num
|
||||
else
|
||||
# Numeric entity (&#nnnn;); shard by Stefan Scholl
|
||||
array_enc.concat "&\##{num};".unpack('C*')
|
||||
end
|
||||
end
|
||||
array_enc.pack('C*')
|
||||
end
|
||||
|
||||
# Convert to UTF-8
|
||||
def from_iso_8859_1(str)
|
||||
str.unpack('C*').pack('U*')
|
||||
end
|
||||
end
|
||||
end
|
6
lib/rexml/encodings/ISO-8859-1_decl.rb
Normal file
6
lib/rexml/encodings/ISO-8859-1_decl.rb
Normal file
|
@ -0,0 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
ISO_8859_1 = 'ISO-8859-1'
|
||||
claim( ISO_8859_1 )
|
||||
end
|
||||
end
|
17
lib/rexml/encodings/Shift-JIS.rb
Normal file
17
lib/rexml/encodings/Shift-JIS.rb
Normal file
|
@ -0,0 +1,17 @@
|
|||
begin
|
||||
require 'uconv'
|
||||
|
||||
module REXML
|
||||
module Encoding
|
||||
def to_shift_jis content
|
||||
Uconv::u8tosjis(content)
|
||||
end
|
||||
|
||||
def from_shift_jis(str)
|
||||
Uconv::sjistou8(str)
|
||||
end
|
||||
end
|
||||
end
|
||||
rescue LoadError
|
||||
raise "uconv is required for Japanese encoding support."
|
||||
end
|
6
lib/rexml/encodings/Shift-JIS_decl.rb
Normal file
6
lib/rexml/encodings/Shift-JIS_decl.rb
Normal file
|
@ -0,0 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
claim( 'Shift-JIS' )
|
||||
claim( 'Shift_JIS' )
|
||||
end
|
||||
end
|
17
lib/rexml/encodings/Shift_JIS.rb
Normal file
17
lib/rexml/encodings/Shift_JIS.rb
Normal file
|
@ -0,0 +1,17 @@
|
|||
begin
|
||||
require 'uconv'
|
||||
|
||||
module REXML
|
||||
module Encoding
|
||||
def to_shift_jis content
|
||||
Uconv::u8tosjis(content)
|
||||
end
|
||||
|
||||
def from_shift_jis(str)
|
||||
Uconv::sjistou8(str)
|
||||
end
|
||||
end
|
||||
end
|
||||
rescue LoadError
|
||||
raise "uconv is required for Japanese encoding support."
|
||||
end
|
27
lib/rexml/encodings/UNILE.rb
Normal file
27
lib/rexml/encodings/UNILE.rb
Normal file
|
@ -0,0 +1,27 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
def to_unile content
|
||||
array_utf8 = content.unpack("U*")
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
if ((num>>16) > 0)
|
||||
array_enc << ??
|
||||
array_enc << 0
|
||||
else
|
||||
array_enc << (num & 0xFF)
|
||||
array_enc << (num >> 8)
|
||||
end
|
||||
end
|
||||
array_enc.pack('C*')
|
||||
end
|
||||
|
||||
def from_unile(str)
|
||||
array_enc=str.unpack('C*')
|
||||
array_utf8 = []
|
||||
2.step(array_enc.size-1, 2){|i|
|
||||
array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
|
||||
}
|
||||
array_utf8.pack('U*')
|
||||
end
|
||||
end
|
||||
end
|
6
lib/rexml/encodings/UNILE_decl.rb
Normal file
6
lib/rexml/encodings/UNILE_decl.rb
Normal file
|
@ -0,0 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
UNILE = 'UNILE'
|
||||
claim( UNILE, /^\377\376/ )
|
||||
end
|
||||
end
|
23
lib/rexml/encodings/US-ASCII.rb
Normal file
23
lib/rexml/encodings/US-ASCII.rb
Normal file
|
@ -0,0 +1,23 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
# Convert from UTF-8
|
||||
def to_us_ascii content
|
||||
array_utf8 = content.unpack('U*')
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
if num <= 0xFF
|
||||
array_enc << num
|
||||
else
|
||||
# Numeric entity (&#nnnn;); shard by Stefan Scholl
|
||||
array_enc.concat "&\##{num};".unpack('C*')
|
||||
end
|
||||
end
|
||||
array_enc.pack('C*')
|
||||
end
|
||||
|
||||
# Convert to UTF-8
|
||||
def from_us_ascii(str)
|
||||
str.unpack('C*').pack('U*')
|
||||
end
|
||||
end
|
||||
end
|
6
lib/rexml/encodings/US-ASCII_decl.rb
Normal file
6
lib/rexml/encodings/US-ASCII_decl.rb
Normal file
|
@ -0,0 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
US_ASCII = 'US-ASCII'
|
||||
claim( US_ASCII )
|
||||
end
|
||||
end
|
27
lib/rexml/encodings/UTF-16.rb
Normal file
27
lib/rexml/encodings/UTF-16.rb
Normal file
|
@ -0,0 +1,27 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
def to_utf_16 content
|
||||
array_utf8 = content.unpack("U*")
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
if ((num>>16) > 0)
|
||||
array_enc << 0
|
||||
array_enc << ??
|
||||
else
|
||||
array_enc << (num >> 8)
|
||||
array_enc << (num & 0xFF)
|
||||
end
|
||||
end
|
||||
array_enc.pack('C*')
|
||||
end
|
||||
|
||||
def from_utf_16(str)
|
||||
array_enc=str.unpack('C*')
|
||||
array_utf8 = []
|
||||
2.step(arrayEnc.size-1, 2){|i|
|
||||
array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
|
||||
}
|
||||
array_utf8.pack('U*')
|
||||
end
|
||||
end
|
||||
end
|
6
lib/rexml/encodings/UTF-16_decl.rb
Normal file
6
lib/rexml/encodings/UTF-16_decl.rb
Normal file
|
@ -0,0 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
UTF_16 = 'UTF-16'
|
||||
claim( UTF_16, /^\376\377/ )
|
||||
end
|
||||
end
|
159
lib/rexml/entity.rb
Normal file
159
lib/rexml/entity.rb
Normal file
|
@ -0,0 +1,159 @@
|
|||
require 'rexml/child'
|
||||
require 'rexml/source'
|
||||
require 'rexml/xmltokens'
|
||||
|
||||
module REXML
|
||||
# God, I hate DTDs. I really do. Why this idiot standard still
|
||||
# plagues us is beyond me.
|
||||
class Entity < Child
|
||||
include XMLTokens
|
||||
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
||||
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
||||
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
||||
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
||||
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
||||
PEREFERENCE = "%#{NAME};"
|
||||
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
||||
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
||||
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
||||
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
||||
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
||||
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
||||
|
||||
attr_reader :name, :external, :ref, :ndata, :pubid
|
||||
|
||||
# Create a new entity. Simple entities can be constructed by passing a
|
||||
# name, value to the constructor; this creates a generic, plain entity
|
||||
# reference. For anything more complicated, you have to pass a Source to
|
||||
# the constructor with the entity definiton, or use the accessor methods.
|
||||
# +WARNING+: There is no validation of entity state except when the entity
|
||||
# is read from a stream. If you start poking around with the accessors,
|
||||
# you can easily create a non-conformant Entity. The best thing to do is
|
||||
# dump the stupid DTDs and use XMLSchema instead.
|
||||
#
|
||||
# e = Entity.new( 'amp', '&' )
|
||||
def initialize stream, value=nil, parent=nil, reference=false
|
||||
super(parent)
|
||||
@ndata = @pubid = @value = @external = nil
|
||||
if stream.kind_of? Array
|
||||
@name = stream[1]
|
||||
if stream[-1] == '%'
|
||||
@reference = true
|
||||
stream.pop
|
||||
else
|
||||
@reference = false
|
||||
end
|
||||
if stream[2] =~ /SYSTEM|PUBLIC/
|
||||
@external = stream[2]
|
||||
if @external == 'SYSTEM'
|
||||
@ref = stream[3]
|
||||
@ndata = stream[4] if stream.size == 5
|
||||
else
|
||||
@pubid = stream[3]
|
||||
@ref = stream[4]
|
||||
end
|
||||
else
|
||||
@value = stream[2]
|
||||
end
|
||||
else
|
||||
@reference = reference
|
||||
@external = nil
|
||||
@name = stream
|
||||
@value = value
|
||||
end
|
||||
end
|
||||
|
||||
# Evaluates whether the given string matchs an entity definition,
|
||||
# returning true if so, and false otherwise.
|
||||
def Entity::matches? string
|
||||
(ENTITYDECL =~ string) == 0
|
||||
end
|
||||
|
||||
# Evaluates to the unnormalized value of this entity; that is, replacing
|
||||
# all entities -- both %ent; and &ent; entities. This differs from
|
||||
# +value()+ in that +value+ only replaces %ent; entities.
|
||||
def unnormalized
|
||||
v = value()
|
||||
return nil if v.nil?
|
||||
@unnormalized = Text::unnormalize(v, parent)
|
||||
@unnormalized
|
||||
end
|
||||
|
||||
#once :unnormalized
|
||||
|
||||
# Returns the value of this entity unprocessed -- raw. This is the
|
||||
# normalized value; that is, with all %ent; and &ent; entities intact
|
||||
def normalized
|
||||
@value
|
||||
end
|
||||
|
||||
# Write out a fully formed, correct entity definition (assuming the Entity
|
||||
# object itself is valid.)
|
||||
def write out, indent=-1
|
||||
out << '<!ENTITY '
|
||||
out << '% ' if @reference
|
||||
out << @name
|
||||
out << ' '
|
||||
if @external
|
||||
out << @external << ' '
|
||||
if @pubid
|
||||
q = @pubid.include?('"')?"'":'"'
|
||||
out << q << @pubid << q << ' '
|
||||
end
|
||||
q = @ref.include?('"')?"'":'"'
|
||||
out << q << @ref << q
|
||||
out << ' NDATA ' << @ndata if @ndata
|
||||
else
|
||||
q = @value.include?('"')?"'":'"'
|
||||
out << q << @value << q
|
||||
end
|
||||
out << '>'
|
||||
end
|
||||
|
||||
# Returns this entity as a string. See write().
|
||||
def to_s
|
||||
rv = ''
|
||||
write rv
|
||||
rv
|
||||
end
|
||||
|
||||
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
||||
# Returns the value of this entity. At the moment, only internal entities
|
||||
# are processed. If the value contains internal references (IE,
|
||||
# %blah;), those are replaced with their values. IE, if the doctype
|
||||
# contains:
|
||||
# <!ENTITY % foo "bar">
|
||||
# <!ENTITY yada "nanoo %foo; nanoo>
|
||||
# then:
|
||||
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
||||
def value
|
||||
if @value
|
||||
matches = @value.scan(PEREFERENCE_RE)
|
||||
rv = @value.clone
|
||||
if @parent
|
||||
matches.each do |entity_reference|
|
||||
entity_value = @parent.entity( entity_reference[0] )
|
||||
rv.gsub!( /%#{entity_reference};/um, entity_value )
|
||||
end
|
||||
end
|
||||
return rv
|
||||
end
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
# This is a set of entity constants -- the ones defined in the XML
|
||||
# specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
|
||||
module EntityConst
|
||||
# +>+
|
||||
GT = Entity.new( 'gt', '>' )
|
||||
# +<+
|
||||
LT = Entity.new( 'lt', '<' )
|
||||
# +&+
|
||||
AMP = Entity.new( 'amp', '&' )
|
||||
# +"+
|
||||
QUOT = Entity.new( 'quot', '"' )
|
||||
# +'+
|
||||
APOS = Entity.new( 'apos', "'" )
|
||||
end
|
||||
end
|
360
lib/rexml/functions.rb
Normal file
360
lib/rexml/functions.rb
Normal file
|
@ -0,0 +1,360 @@
|
|||
module REXML
|
||||
# If you add a method, keep in mind two things:
|
||||
# (1) the first argument will always be a list of nodes from which to
|
||||
# filter. In the case of context methods (such as position), the function
|
||||
# should return an array with a value for each child in the array.
|
||||
# (2) all method calls from XML will have "-" replaced with "_".
|
||||
# Therefore, in XML, "local-name()" is identical (and actually becomes)
|
||||
# "local_name()"
|
||||
module Functions
|
||||
@@node = nil
|
||||
@@index = nil
|
||||
@@size = nil
|
||||
@@variables = {}
|
||||
@@namespace_context = {}
|
||||
|
||||
def Functions::node=(value); @@node = value; end
|
||||
def Functions::index=(value); @@index = value; end
|
||||
def Functions::size=(value); @@size = value; end
|
||||
def Functions::variables=(value); @@variables = value; end
|
||||
def Functions::namespace_context=(value)
|
||||
@@namespace_context = value
|
||||
end
|
||||
def Functions::node; @@node; end
|
||||
def Functions::index; @@index; end
|
||||
def Functions::size; @@size; end
|
||||
def Functions::variables; @@variables; end
|
||||
def Functions::namespace_context; @@namespace_context; end
|
||||
|
||||
def Functions::text( )
|
||||
return true if @@node.node_type == :text
|
||||
end
|
||||
|
||||
def Functions::last( )
|
||||
@@size
|
||||
end
|
||||
|
||||
def Functions::position( )
|
||||
@@index
|
||||
end
|
||||
|
||||
def Functions::count( node_set )
|
||||
node_set.size
|
||||
end
|
||||
|
||||
# Since REXML is non-validating, this method is not implemented as it
|
||||
# requires a DTD
|
||||
def Functions::id( object )
|
||||
end
|
||||
|
||||
# UNTESTED
|
||||
def Functions::local_name( node_set=nil )
|
||||
get_namespace( node_set ) do |node|
|
||||
return node.local_name
|
||||
end
|
||||
end
|
||||
|
||||
def Functions::namespace_uri( node_set=nil )
|
||||
get_namespace( node_set ) {|node| node.namespace}
|
||||
end
|
||||
|
||||
def Functions::name( node_set=nil )
|
||||
get_namespace( node_set ) do |node|
|
||||
node.expanded_name
|
||||
end
|
||||
end
|
||||
|
||||
# Helper method.
|
||||
def Functions::get_namespace( node_set = nil )
|
||||
if node_set == nil
|
||||
yield @@node if defined? @@node.namespace
|
||||
else
|
||||
if node_set.namespace
|
||||
yield node_set
|
||||
else
|
||||
return unless node_set.kind_of? Enumerable
|
||||
node_set.each { |node| yield node if defined? node.namespace }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# A node-set is converted to a string by returning the string-value of the
|
||||
# node in the node-set that is first in document order. If the node-set is
|
||||
# empty, an empty string is returned.
|
||||
#
|
||||
# A number is converted to a string as follows
|
||||
#
|
||||
# NaN is converted to the string NaN
|
||||
#
|
||||
# positive zero is converted to the string 0
|
||||
#
|
||||
# negative zero is converted to the string 0
|
||||
#
|
||||
# positive infinity is converted to the string Infinity
|
||||
#
|
||||
# negative infinity is converted to the string -Infinity
|
||||
#
|
||||
# if the number is an integer, the number is represented in decimal form
|
||||
# as a Number with no decimal point and no leading zeros, preceded by a
|
||||
# minus sign (-) if the number is negative
|
||||
#
|
||||
# otherwise, the number is represented in decimal form as a Number
|
||||
# including a decimal point with at least one digit before the decimal
|
||||
# point and at least one digit after the decimal point, preceded by a
|
||||
# minus sign (-) if the number is negative; there must be no leading zeros
|
||||
# before the decimal point apart possibly from the one required digit
|
||||
# immediately before the decimal point; beyond the one required digit
|
||||
# after the decimal point there must be as many, but only as many, more
|
||||
# digits as are needed to uniquely distinguish the number from all other
|
||||
# IEEE 754 numeric values.
|
||||
#
|
||||
# The boolean false value is converted to the string false. The boolean
|
||||
# true value is converted to the string true.
|
||||
#
|
||||
# An object of a type other than the four basic types is converted to a
|
||||
# string in a way that is dependent on that type.
|
||||
def Functions::string( object=nil )
|
||||
#object = @context unless object
|
||||
if object.instance_of? Array
|
||||
string( object[0] )
|
||||
elsif defined? object.node_type
|
||||
if object.node_type == :attribute
|
||||
object.value
|
||||
elsif object.node_type == :element
|
||||
object.text
|
||||
else
|
||||
object.to_s
|
||||
end
|
||||
else
|
||||
object.to_s
|
||||
end
|
||||
end
|
||||
|
||||
# UNTESTED
|
||||
def Functions::concat( *objects )
|
||||
objects.join
|
||||
end
|
||||
|
||||
# Fixed by Mike Stok
|
||||
def Functions::starts_with( string, test )
|
||||
string(string).index(string(test)) == 0
|
||||
end
|
||||
|
||||
# Fixed by Mike Stok
|
||||
def Functions::contains( string, test )
|
||||
string(string).include? string(test)
|
||||
end
|
||||
|
||||
# Kouhei fixed this
|
||||
def Functions::substring_before( string, test )
|
||||
ruby_string = string(string)
|
||||
ruby_index = ruby_string.index(string(test))
|
||||
if ruby_index.nil?
|
||||
""
|
||||
else
|
||||
ruby_string[ 0...ruby_index ]
|
||||
end
|
||||
end
|
||||
|
||||
# Kouhei fixed this too
|
||||
def Functions::substring_after( string, test )
|
||||
ruby_string = string(string)
|
||||
ruby_index = ruby_string.index(string(test))
|
||||
if ruby_index.nil?
|
||||
""
|
||||
else
|
||||
ruby_string[ ruby_index+1..-1 ]
|
||||
end
|
||||
end
|
||||
|
||||
# Take equal portions of Mike Stok and Sean Russell; mix
|
||||
# vigorously, and pour into a tall, chilled glass. Serves 10,000.
|
||||
def Functions::substring( string, start, length=nil )
|
||||
ruby_string = string(string)
|
||||
ruby_length = if length.nil?
|
||||
ruby_string.length.to_f
|
||||
else
|
||||
number(length)
|
||||
end
|
||||
ruby_start = number(start)
|
||||
|
||||
# Handle the special cases
|
||||
return '' if (
|
||||
ruby_length.nan? or
|
||||
ruby_start.nan? or
|
||||
ruby_start.infinite?
|
||||
)
|
||||
|
||||
infinite_length = ruby_length.infinite? == 1
|
||||
ruby_length = ruby_string.length if infinite_length
|
||||
|
||||
# Now, get the bounds. The XPath bounds are 1..length; the ruby bounds
|
||||
# are 0..length. Therefore, we have to offset the bounds by one.
|
||||
ruby_start = ruby_start.round - 1
|
||||
ruby_length = ruby_length.round
|
||||
|
||||
if ruby_start < 0
|
||||
ruby_length += ruby_start unless infinite_length
|
||||
ruby_start = 0
|
||||
end
|
||||
return '' if ruby_length <= 0
|
||||
ruby_string[ruby_start,ruby_length]
|
||||
end
|
||||
|
||||
# UNTESTED
|
||||
def Functions::string_length( string )
|
||||
string(string).length
|
||||
end
|
||||
|
||||
# UNTESTED
|
||||
def Functions::normalize_space( string=nil )
|
||||
string = string(@@node) if string.nil?
|
||||
if string.kind_of? Array
|
||||
string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
|
||||
else
|
||||
string.to_s.strip.gsub(/\s+/um, ' ')
|
||||
end
|
||||
end
|
||||
|
||||
# This is entirely Mike Stok's beast
|
||||
def Functions::translate( string, tr1, tr2 )
|
||||
from = string(tr1)
|
||||
to = string(tr2)
|
||||
|
||||
# the map is our translation table.
|
||||
#
|
||||
# if a character occurs more than once in the
|
||||
# from string then we ignore the second &
|
||||
# subsequent mappings
|
||||
#
|
||||
# if a charactcer maps to nil then we delete it
|
||||
# in the output. This happens if the from
|
||||
# string is longer than the to string
|
||||
#
|
||||
# there's nothing about - or ^ being special in
|
||||
# http://www.w3.org/TR/xpath#function-translate
|
||||
# so we don't build ranges or negated classes
|
||||
|
||||
map = Hash.new
|
||||
0.upto(from.length - 1) { |pos|
|
||||
from_char = from[pos]
|
||||
unless map.has_key? from_char
|
||||
map[from_char] =
|
||||
if pos < to.length
|
||||
to[pos]
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
}
|
||||
|
||||
string(string).unpack('U*').collect { |c|
|
||||
if map.has_key? c then map[c] else c end
|
||||
}.compact.pack('U*')
|
||||
end
|
||||
|
||||
# UNTESTED
|
||||
def Functions::boolean( object=nil )
|
||||
if object.kind_of? String
|
||||
if object =~ /\d+/u
|
||||
return object.to_f != 0
|
||||
else
|
||||
return object.size > 0
|
||||
end
|
||||
elsif object.kind_of? Array
|
||||
object = object.find{|x| x and true}
|
||||
end
|
||||
return object ? true : false
|
||||
end
|
||||
|
||||
# UNTESTED
|
||||
def Functions::not( object )
|
||||
not boolean( object )
|
||||
end
|
||||
|
||||
# UNTESTED
|
||||
def Functions::true( )
|
||||
true
|
||||
end
|
||||
|
||||
# UNTESTED
|
||||
def Functions::false( )
|
||||
false
|
||||
end
|
||||
|
||||
# UNTESTED
|
||||
def Functions::lang( language )
|
||||
lang = false
|
||||
node = @@node
|
||||
attr = nil
|
||||
until node.nil?
|
||||
if node.node_type == :element
|
||||
attr = node.attributes["xml:lang"]
|
||||
unless attr.nil?
|
||||
lang = compare_language(string(language), attr)
|
||||
break
|
||||
else
|
||||
end
|
||||
end
|
||||
node = node.parent
|
||||
end
|
||||
lang
|
||||
end
|
||||
|
||||
def Functions::compare_language lang1, lang2
|
||||
lang2.downcase.index(lang1.downcase) == 0
|
||||
end
|
||||
|
||||
# a string that consists of optional whitespace followed by an optional
|
||||
# minus sign followed by a Number followed by whitespace is converted to
|
||||
# the IEEE 754 number that is nearest (according to the IEEE 754
|
||||
# round-to-nearest rule) to the mathematical value represented by the
|
||||
# string; any other string is converted to NaN
|
||||
#
|
||||
# boolean true is converted to 1; boolean false is converted to 0
|
||||
#
|
||||
# a node-set is first converted to a string as if by a call to the string
|
||||
# function and then converted in the same way as a string argument
|
||||
#
|
||||
# an object of a type other than the four basic types is converted to a
|
||||
# number in a way that is dependent on that type
|
||||
def Functions::number( object=nil )
|
||||
object = @@node unless object
|
||||
if object == true
|
||||
Float(1)
|
||||
elsif object == false
|
||||
Float(0)
|
||||
elsif object.kind_of? Array
|
||||
string( object ).to_f
|
||||
elsif object.kind_of? Float
|
||||
object
|
||||
else
|
||||
object.to_s.to_f
|
||||
end
|
||||
end
|
||||
|
||||
def Functions::sum( nodes )
|
||||
end
|
||||
|
||||
def Functions::floor( number )
|
||||
number(number).floor
|
||||
end
|
||||
|
||||
def Functions::ceiling( number )
|
||||
number(number).ceil
|
||||
end
|
||||
|
||||
def Functions::round( number )
|
||||
begin
|
||||
number(number).round
|
||||
rescue FloatDomainError
|
||||
number(number)
|
||||
end
|
||||
end
|
||||
|
||||
def Functions::method_missing( id )
|
||||
puts "METHOD MISSING #{id.id2name}"
|
||||
XPath.match( @@node, id.id2name )
|
||||
end
|
||||
end
|
||||
end
|
62
lib/rexml/instruction.rb
Normal file
62
lib/rexml/instruction.rb
Normal file
|
@ -0,0 +1,62 @@
|
|||
require "rexml/child"
|
||||
require "rexml/source"
|
||||
|
||||
module REXML
|
||||
# Represents an XML Instruction; IE, <? ... ?>
|
||||
# TODO: Add parent arg (3rd arg) to constructor
|
||||
class Instruction < Child
|
||||
START = '<\?'
|
||||
STOP = '\?>'
|
||||
|
||||
# target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
|
||||
# content is everything else.
|
||||
attr_accessor :target, :content
|
||||
|
||||
# Constructs a new Instruction
|
||||
# @param target can be one of a number of things. If String, then
|
||||
# the target of this instruction is set to this. If an Instruction,
|
||||
# then the Instruction is shallowly cloned (target and content are
|
||||
# copied). If a Source, then the source is scanned and parsed for
|
||||
# an Instruction declaration.
|
||||
# @param content Must be either a String, or a Parent. Can only
|
||||
# be a Parent if the target argument is a Source. Otherwise, this
|
||||
# String is set as the content of this instruction.
|
||||
def initialize(target, content=nil)
|
||||
if target.kind_of? String
|
||||
super()
|
||||
@target = target
|
||||
@content = content
|
||||
elsif target.kind_of? Instruction
|
||||
super(content)
|
||||
@target = target.target
|
||||
@content = target.content
|
||||
end
|
||||
@content.strip! if @content
|
||||
end
|
||||
|
||||
def clone
|
||||
Instruction.new self
|
||||
end
|
||||
|
||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
||||
indent(writer, indent)
|
||||
writer << START.sub(/\\/u, '')
|
||||
writer << @target
|
||||
writer << ' '
|
||||
writer << @content
|
||||
writer << STOP.sub(/\\/u, '')
|
||||
end
|
||||
|
||||
# @return true if other is an Instruction, and the content and target
|
||||
# of the other matches the target and content of this object.
|
||||
def ==( other )
|
||||
other.kind_of? Instruction and
|
||||
other.target == @target and
|
||||
other.content == @content
|
||||
end
|
||||
|
||||
def node_type
|
||||
:processing_instruction
|
||||
end
|
||||
end
|
||||
end
|
232
lib/rexml/light/node.rb
Normal file
232
lib/rexml/light/node.rb
Normal file
|
@ -0,0 +1,232 @@
|
|||
require 'rexml/xmltokens'
|
||||
require 'rexml/light/node'
|
||||
|
||||
# Development model
|
||||
# document = Node.new
|
||||
|
||||
# Add an element "foo" to the document
|
||||
# foo = document << "foo"
|
||||
# # Set attribute "attr" on foo
|
||||
# foo["attr"] = "la"
|
||||
# # Set another attribute in a different namespace
|
||||
# foo["attr", "namespace"] = "too"
|
||||
# # Swap foo into another namespace
|
||||
# foo.namespace = "blah"
|
||||
# # Add a couple of element nodes to foo
|
||||
# foo << "a"
|
||||
# foo << "b"
|
||||
# # Access the children of foo in various ways
|
||||
# a = foo[0]
|
||||
# foo.each { |child|
|
||||
# #...
|
||||
# }
|
||||
# # Add text to foo
|
||||
# # Add instruction
|
||||
# # Add comment
|
||||
# # Get the root of the document
|
||||
# document == a.root
|
||||
# # Write the document out
|
||||
# puts document.to_s
|
||||
module REXML
|
||||
module Light
|
||||
# Represents a tagged XML element. Elements are characterized by
|
||||
# having children, attributes, and names, and can themselves be
|
||||
# children.
|
||||
class Node < Array
|
||||
alias :_old_get :[]
|
||||
alias :_old_put :[]=
|
||||
|
||||
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
|
||||
# Create a new element.
|
||||
def initialize node=nil
|
||||
if node.kind_of? String
|
||||
node = [ :text, node ]
|
||||
elsif node.nil?
|
||||
node = [ :start_document, nil, nil ]
|
||||
end
|
||||
replace( node )
|
||||
_old_put( 1, 0, 1 )
|
||||
_old_put( 1, nil )
|
||||
end
|
||||
|
||||
def size
|
||||
el!()
|
||||
super-4
|
||||
end
|
||||
|
||||
def each( &block )
|
||||
el!()
|
||||
size.times { |x| yield( at(x+4) ) }
|
||||
end
|
||||
|
||||
def name
|
||||
el!()
|
||||
at(2)
|
||||
end
|
||||
|
||||
def name=( name_str, ns=nil )
|
||||
el!()
|
||||
pfx = ''
|
||||
pfx = "#{prefix(ns)}:" if ns
|
||||
_old_put(1, "#{pfx}#{name_str}")
|
||||
end
|
||||
|
||||
def parent=( node )
|
||||
_old_put(1,node)
|
||||
end
|
||||
|
||||
def local_name
|
||||
el!()
|
||||
namesplit
|
||||
@name
|
||||
end
|
||||
|
||||
def local_name=( name_str )
|
||||
el!()
|
||||
_old_put( 1, "#@prefix:#{name_str}" )
|
||||
end
|
||||
|
||||
def prefix( namespace=nil )
|
||||
el!()
|
||||
prefix_of( self, namespace )
|
||||
end
|
||||
|
||||
def namespace( prefix=prefix() )
|
||||
el!()
|
||||
namespace_of( self, prefix )
|
||||
end
|
||||
|
||||
def namespace=( namespace )
|
||||
el!()
|
||||
@prefix = prefix( namespace )
|
||||
pfx = ''
|
||||
pfx = "#@prefix:" if @prefix.size > 0
|
||||
_old_put(1, "#{pfx}#@name")
|
||||
end
|
||||
|
||||
def []( reference, ns=nil )
|
||||
el!()
|
||||
if reference.kind_of? String
|
||||
pfx = ''
|
||||
pfx = "#{prefix(ns)}:" if ns
|
||||
at(3)["#{pfx}#{reference}"]
|
||||
elsif reference.kind_of? Range
|
||||
_old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
|
||||
else
|
||||
_old_get( 4+reference )
|
||||
end
|
||||
end
|
||||
|
||||
# Doesn't handle namespaces yet
|
||||
def []=( reference, ns, value=nil )
|
||||
el!()
|
||||
if reference.kind_of? String
|
||||
value = ns unless value
|
||||
at( 3 )[reference] = value
|
||||
elsif reference.kind_of? Range
|
||||
_old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
|
||||
else
|
||||
if value
|
||||
_old_put( 4+reference, ns, value )
|
||||
else
|
||||
_old_put( 4+reference, ns )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Append a child to this element, optionally under a provided namespace.
|
||||
# The namespace argument is ignored if the element argument is an Element
|
||||
# object. Otherwise, the element argument is a string, the namespace (if
|
||||
# provided) is the namespace the element is created in.
|
||||
def << element
|
||||
if text?
|
||||
at(-1) << element
|
||||
else
|
||||
newnode = Node.new( element )
|
||||
newnode.parent = self
|
||||
self.push( newnode )
|
||||
end
|
||||
at(-1)
|
||||
end
|
||||
|
||||
def node_type
|
||||
self[0]
|
||||
end
|
||||
|
||||
def text=( foo )
|
||||
replace = at(4).kind_of? String ? 1 : 0
|
||||
self._old_put(4,replace, normalizefoo)
|
||||
end
|
||||
|
||||
def root
|
||||
context = self
|
||||
context = context.at(1) while context.at(1)
|
||||
end
|
||||
|
||||
def element?
|
||||
at(0) == :start_element
|
||||
end
|
||||
|
||||
def has_name?( name, namespace = '' )
|
||||
el!()
|
||||
at(3) == name and namespace() == namespace
|
||||
end
|
||||
|
||||
def children
|
||||
el!()
|
||||
self
|
||||
end
|
||||
|
||||
def parent
|
||||
at(1)
|
||||
end
|
||||
|
||||
def text?
|
||||
at(0) == :text
|
||||
end
|
||||
|
||||
def to_s
|
||||
|
||||
end
|
||||
|
||||
def el!
|
||||
if text?()
|
||||
_old_put( 0, :start_element )
|
||||
push({})
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def namesplit
|
||||
return if @name.defined?
|
||||
at(2) =~ NAMESPLIT
|
||||
@prefix = '' || $1
|
||||
@name = $2
|
||||
end
|
||||
|
||||
def namespace_of( node, prefix=nil )
|
||||
if not prefix
|
||||
name = at(2)
|
||||
name =~ NAMESPLIT
|
||||
prefix = $1
|
||||
end
|
||||
to_find = 'xmlns'
|
||||
to_find = "xmlns:#{prefix}" if not prefix.nil?
|
||||
ns = at(3)[ to_find ]
|
||||
ns ? ns : namespace_of( @node[0], prefix )
|
||||
end
|
||||
|
||||
def prefix_of( node, namespace=nil )
|
||||
if not namespace
|
||||
name = node.name
|
||||
name =~ NAMESPLIT
|
||||
$1
|
||||
else
|
||||
ns = at(3).find { |k,v| v == namespace }
|
||||
ns ? ns : prefix_of( node.parent, namespace )
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
47
lib/rexml/namespace.rb
Normal file
47
lib/rexml/namespace.rb
Normal file
|
@ -0,0 +1,47 @@
|
|||
require 'rexml/xmltokens'
|
||||
|
||||
module REXML
|
||||
# Adds named attributes to an object.
|
||||
module Namespace
|
||||
# The name of the object, valid if set
|
||||
attr_reader :name, :expanded_name
|
||||
# The expanded name of the object, valid if name is set
|
||||
attr_accessor :prefix
|
||||
include XMLTokens
|
||||
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
|
||||
|
||||
# Sets the name and the expanded name
|
||||
def name=( name )
|
||||
@expanded_name = name
|
||||
name =~ NAMESPLIT
|
||||
if $1
|
||||
@prefix = $1
|
||||
else
|
||||
@prefix = ""
|
||||
@namespace = ""
|
||||
end
|
||||
@name = $2
|
||||
end
|
||||
|
||||
# Compares names optionally WITH namespaces
|
||||
def has_name?( other, ns=nil )
|
||||
if ns
|
||||
return (namespace() == ns and name() == other)
|
||||
elsif other.include? ":"
|
||||
return fully_expanded_name == other
|
||||
else
|
||||
return name == other
|
||||
end
|
||||
end
|
||||
|
||||
alias :local_name :name
|
||||
|
||||
# Fully expand the name, even if the prefix wasn't specified in the
|
||||
# source file.
|
||||
def fully_expanded_name
|
||||
ns = prefix
|
||||
return "#{ns}:#@name" if ns.size > 0
|
||||
return @name
|
||||
end
|
||||
end
|
||||
end
|
35
lib/rexml/node.rb
Normal file
35
lib/rexml/node.rb
Normal file
|
@ -0,0 +1,35 @@
|
|||
require "rexml/parseexception"
|
||||
|
||||
module REXML
|
||||
# Represents a node in the tree. Nodes are never encountered except as
|
||||
# superclasses of other objects. Nodes have siblings.
|
||||
module Node
|
||||
# @return the next sibling (nil if unset)
|
||||
def next_sibling_node
|
||||
return nil if @parent.nil?
|
||||
@parent[ @parent.index(self) + 1 ]
|
||||
end
|
||||
|
||||
# @return the previous sibling (nil if unset)
|
||||
def previous_sibling_node
|
||||
return nil if @parent.nil?
|
||||
ind = @parent.index(self)
|
||||
return nil if ind == 0
|
||||
@parent[ ind - 1 ]
|
||||
end
|
||||
|
||||
def to_s indent=-1
|
||||
rv = ""
|
||||
write rv,indent
|
||||
rv
|
||||
end
|
||||
|
||||
def indent to, ind
|
||||
to << " "*ind unless ind<1
|
||||
end
|
||||
|
||||
def parent?
|
||||
false;
|
||||
end
|
||||
end
|
||||
end
|
22
lib/rexml/output.rb
Normal file
22
lib/rexml/output.rb
Normal file
|
@ -0,0 +1,22 @@
|
|||
require 'rexml/encoding'
|
||||
|
||||
module REXML
|
||||
class Output
|
||||
include Encoding
|
||||
attr_reader :encoding
|
||||
def initialize real_IO, encd="iso-8859-1"
|
||||
@output = real_IO
|
||||
self.encoding = encd
|
||||
|
||||
eval <<-EOL
|
||||
alias :encode :to_#{encoding.tr('-', '_').downcase}
|
||||
alias :decode :from_#{encoding.tr('-', '_').downcase}
|
||||
EOL
|
||||
@to_utf = encd == UTF_8 ? false : true
|
||||
end
|
||||
|
||||
def <<( content )
|
||||
@output << (@to_utf ? encode(content) : content)
|
||||
end
|
||||
end
|
||||
end
|
165
lib/rexml/parent.rb
Normal file
165
lib/rexml/parent.rb
Normal file
|
@ -0,0 +1,165 @@
|
|||
require "rexml/child"
|
||||
|
||||
module REXML
|
||||
# A parent has children, and has methods for accessing them. The Parent
|
||||
# class is never encountered except as the superclass for some other
|
||||
# object.
|
||||
class Parent < Child
|
||||
include Enumerable
|
||||
|
||||
# Constructor
|
||||
# @param parent if supplied, will be set as the parent of this object
|
||||
def initialize parent=nil
|
||||
super(parent)
|
||||
@children = []
|
||||
end
|
||||
|
||||
def add( object )
|
||||
#puts "PARENT GOTS #{size} CHILDREN"
|
||||
object.parent = self
|
||||
@children << object
|
||||
#puts "PARENT NOW GOTS #{size} CHILDREN"
|
||||
object
|
||||
end
|
||||
|
||||
alias :push :add
|
||||
alias :<< :push
|
||||
|
||||
def unshift( object )
|
||||
object.parent = self
|
||||
@children.unshift object
|
||||
end
|
||||
|
||||
def delete( object )
|
||||
return unless @children.include? object
|
||||
@children.delete object
|
||||
object.parent = nil
|
||||
end
|
||||
|
||||
def each(&block)
|
||||
@children.each(&block)
|
||||
end
|
||||
|
||||
def delete_if( &block )
|
||||
@children.delete_if(&block)
|
||||
end
|
||||
|
||||
def delete_at( index )
|
||||
@children.delete_at index
|
||||
end
|
||||
|
||||
def each_index( &block )
|
||||
@children.each_index(&block)
|
||||
end
|
||||
|
||||
# Fetches a child at a given index
|
||||
# @param index the Integer index of the child to fetch
|
||||
def []( index )
|
||||
@children[index]
|
||||
end
|
||||
|
||||
alias :each_child :each
|
||||
|
||||
|
||||
|
||||
# Set an index entry. See Array.[]=
|
||||
# @param index the index of the element to set
|
||||
# @param opt either the object to set, or an Integer length
|
||||
# @param child if opt is an Integer, this is the child to set
|
||||
# @return the parent (self)
|
||||
def []=( *args )
|
||||
args[-1].parent = self
|
||||
@children[*args[0..-2]] = args[-1]
|
||||
end
|
||||
|
||||
# Inserts an child before another child
|
||||
# @param child1 this is either an xpath or an Element. If an Element,
|
||||
# child2 will be inserted before child1 in the child list of the parent.
|
||||
# If an xpath, child2 will be inserted before the first child to match
|
||||
# the xpath.
|
||||
# @param child2 the child to insert
|
||||
# @return the parent (self)
|
||||
def insert_before( child1, child2 )
|
||||
if child1.kind_of? String
|
||||
child1 = XPath.first( self, child1 )
|
||||
child1.parent.insert_before child1, child2
|
||||
else
|
||||
ind = index(child1)
|
||||
child2.parent.delete(child2) if child2.parent
|
||||
@children[ind,0] = child2
|
||||
child2.parent = self
|
||||
end
|
||||
self
|
||||
end
|
||||
|
||||
# Inserts an child after another child
|
||||
# @param child1 this is either an xpath or an Element. If an Element,
|
||||
# child2 will be inserted after child1 in the child list of the parent.
|
||||
# If an xpath, child2 will be inserted after the first child to match
|
||||
# the xpath.
|
||||
# @param child2 the child to insert
|
||||
# @return the parent (self)
|
||||
def insert_after( child1, child2 )
|
||||
if child1.kind_of? String
|
||||
child1 = XPath.first( self, child1 )
|
||||
child1.parent.insert_after child1, child2
|
||||
else
|
||||
ind = index(child1)+1
|
||||
child2.parent.delete(child2) if child2.parent
|
||||
@children[ind,0] = child2
|
||||
child2.parent = self
|
||||
end
|
||||
self
|
||||
end
|
||||
|
||||
def to_a
|
||||
@children.dup
|
||||
end
|
||||
|
||||
# Fetches the index of a given child
|
||||
# @param child the child to get the index of
|
||||
# @return the index of the child, or nil if the object is not a child
|
||||
# of this parent.
|
||||
def index( child )
|
||||
count = -1
|
||||
@children.find { |i| count += 1 ; i.hash == child.hash }
|
||||
count
|
||||
end
|
||||
|
||||
# @return the number of children of this parent
|
||||
def size
|
||||
@children.size
|
||||
end
|
||||
|
||||
# Replaces one child with another, making sure the nodelist is correct
|
||||
# @param to_replace the child to replace (must be a Child)
|
||||
# @param replacement the child to insert into the nodelist (must be a
|
||||
# Child)
|
||||
def replace_child( to_replace, replacement )
|
||||
ind = @children.index( to_replace )
|
||||
to_replace.parent = nil
|
||||
@children[ind,0] = replacement
|
||||
replacement.parent = self
|
||||
end
|
||||
|
||||
# Deeply clones this object. This creates a complete duplicate of this
|
||||
# Parent, including all descendants.
|
||||
def deep_clone
|
||||
cl = clone()
|
||||
each do |child|
|
||||
if child.kind_of? Parent
|
||||
cl << child.deep_clone
|
||||
else
|
||||
cl << child.clone
|
||||
end
|
||||
end
|
||||
cl
|
||||
end
|
||||
|
||||
alias :children :to_a
|
||||
|
||||
def parent?
|
||||
true
|
||||
end
|
||||
end
|
||||
end
|
44
lib/rexml/parseexception.rb
Normal file
44
lib/rexml/parseexception.rb
Normal file
|
@ -0,0 +1,44 @@
|
|||
module REXML
|
||||
class ParseException < Exception
|
||||
attr_accessor :source, :parser, :continued_exception
|
||||
|
||||
def initialize( message, source=nil, parser=nil, exception=nil )
|
||||
super(message)
|
||||
@source = source
|
||||
@parser = parser
|
||||
@continued_exception = exception
|
||||
end
|
||||
|
||||
def to_s
|
||||
# Quote the original exception, if there was one
|
||||
if @continued_exception
|
||||
err = @continued_exception.message
|
||||
err << "\n"
|
||||
err << @continued_exception.backtrace[0..3].join("\n")
|
||||
err << "\n...\n"
|
||||
else
|
||||
err = ""
|
||||
end
|
||||
|
||||
# Get the stack trace and error message
|
||||
err << super
|
||||
|
||||
# Add contextual information
|
||||
err << "\n#{@source.current_line}\nLast 80 unconsumed characters:\n#{@source.buffer[0..80].gsub(/\n/, ' ')}\n" if @source
|
||||
err << "\nContext:\n#{@parser.context}" if @parser
|
||||
err
|
||||
end
|
||||
|
||||
def position
|
||||
@source.current_line[0] if @source
|
||||
end
|
||||
|
||||
def line
|
||||
@source.current_line[2] if @source
|
||||
end
|
||||
|
||||
def context
|
||||
@source.current_line
|
||||
end
|
||||
end
|
||||
end
|
391
lib/rexml/parsers/baseparser.rb
Normal file
391
lib/rexml/parsers/baseparser.rb
Normal file
|
@ -0,0 +1,391 @@
|
|||
require 'rexml/parseexception'
|
||||
require 'rexml/source'
|
||||
|
||||
module REXML
|
||||
module Parsers
|
||||
# = Using the Pull Parser
|
||||
# <em>This API is experimental, and subject to change.</em>
|
||||
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
||||
# end
|
||||
# See the PullEvent class for information on the content of the results.
|
||||
# The data is identical to the arguments passed for the various events to
|
||||
# the StreamListener API.
|
||||
#
|
||||
# Notice that:
|
||||
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# raise res[1] if res.error?
|
||||
# end
|
||||
#
|
||||
# Nat Price gave me some good ideas for the API.
|
||||
class BaseParser
|
||||
NCNAME_STR= '[\w:][-\w\d.]*'
|
||||
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
||||
|
||||
NAMECHAR = '[-\w\d\.:]'
|
||||
NAME = "([\\w:]#{NAMECHAR}*)"
|
||||
NMTOKEN = "(?:#{NAMECHAR})+"
|
||||
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
||||
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
||||
|
||||
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
||||
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
||||
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
||||
COMMENT_START = /\A<!--/u
|
||||
COMMENT_PATTERN = /<!--(.*?)-->/um
|
||||
CDATA_START = /\A<!\[CDATA\[/u
|
||||
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
||||
XMLDECL_START = /\A<\?xml\s/u;
|
||||
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
|
||||
INSTRUCTION_START = /\A<\?/u
|
||||
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
|
||||
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
|
||||
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
|
||||
|
||||
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
||||
ENCODING = /\bencoding=["'](.*?)['"]/um
|
||||
STANDALONE = /\bstandalone=["'](.*?)['"]/um
|
||||
|
||||
ENTITY_START = /^\s*<!ENTITY/
|
||||
IDENTITY = /^([!\*\w-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
|
||||
ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
||||
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
||||
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
||||
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
||||
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
||||
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
||||
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
||||
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
||||
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
||||
ATTLISTDECL_START = /^\s*<!ATTLIST/um
|
||||
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
||||
NOTATIONDECL_START = /^\s*<!NOTATION/um
|
||||
PUBLIC = /^\s*<!NOTATION\s+(\w[-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
|
||||
SYSTEM = /^\s*<!NOTATION\s+(\w[-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
|
||||
|
||||
TEXT_PATTERN = /\A([^<]*)/um
|
||||
|
||||
# Entity constants
|
||||
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9-()+,./:=?;!*@$_%#"
|
||||
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
||||
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
||||
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
||||
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
||||
PEREFERENCE = "%#{NAME};"
|
||||
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
||||
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
||||
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
||||
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
||||
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
||||
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
||||
|
||||
EREFERENCE = /&(?!#{NAME};)/
|
||||
|
||||
DEFAULT_ENTITIES = {
|
||||
'gt' => [/>/, '>', '>'],
|
||||
'lt' => [/</, '<', '<'],
|
||||
'quot' => [/"/, '"', '"'],
|
||||
"apos" => [/'/, "'", "'"]
|
||||
}
|
||||
|
||||
def initialize( source )
|
||||
self.stream = source
|
||||
end
|
||||
|
||||
def stream=( source )
|
||||
if source.kind_of? String
|
||||
@source = Source.new(source)
|
||||
elsif source.kind_of? IO
|
||||
@source = IOSource.new(source)
|
||||
elsif source.kind_of? Source
|
||||
@source = source
|
||||
else
|
||||
raise "#{source.type} is not a valid input stream. It must be \n"+
|
||||
"either a String, IO, or Source."
|
||||
end
|
||||
@closed = nil
|
||||
@document_status = nil
|
||||
@tags = []
|
||||
@stack = []
|
||||
@entities = []
|
||||
end
|
||||
|
||||
# Returns true if there are no more events
|
||||
def empty?
|
||||
!has_next?
|
||||
end
|
||||
|
||||
# Returns true if there are more events. Synonymous with !empty?
|
||||
def has_next?
|
||||
@source.read if @source.buffer.size==0 and !@source.empty?
|
||||
(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
|
||||
end
|
||||
|
||||
# Push an event back on the head of the stream. This method
|
||||
# has (theoretically) infinite depth.
|
||||
def unshift token
|
||||
@stack.unshift(token)
|
||||
end
|
||||
|
||||
# Peek at the +depth+ event in the stack. The first element on the stack
|
||||
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
||||
# stream and return the last event, which is always :end_document.
|
||||
# Be aware that this causes the stream to be parsed up to the +depth+
|
||||
# event, so you can effectively pre-parse the entire document (pull the
|
||||
# entire thing into memory) using this method.
|
||||
def peek depth=0
|
||||
raise 'Illegal argument "#{depth}"' if depth < -1
|
||||
temp = []
|
||||
if depth == -1
|
||||
temp.push(pull()) until empty?
|
||||
else
|
||||
while @stack.size+temp.size < depth+1
|
||||
temp.push(pull())
|
||||
end
|
||||
end
|
||||
@stack += temp if temp.size > 0
|
||||
@stack[depth]
|
||||
end
|
||||
|
||||
# Returns the next event. This is a +PullEvent+ object.
|
||||
def pull
|
||||
return [ :end_document ] if empty?
|
||||
if @closed
|
||||
x, @closed = @closed, nil
|
||||
return [ :end_element, x ]
|
||||
end
|
||||
return @stack.shift if @stack.size > 0
|
||||
@source.read if @source.buffer.size==0
|
||||
if @document_status == nil
|
||||
@source.match( /^\s*/um, true )
|
||||
word = @source.match( /^\s*(<.*?)>/um )
|
||||
word = word[1] unless word.nil?
|
||||
case word
|
||||
when COMMENT_START
|
||||
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
||||
when XMLDECL_START
|
||||
results = @source.match( XMLDECL_PATTERN, true )[1]
|
||||
version = VERSION.match( results )
|
||||
version = version[1] unless version.nil?
|
||||
encoding = ENCODING.match(results)
|
||||
encoding = encoding[1] unless encoding.nil?
|
||||
@source.encoding = encoding
|
||||
standalone = STANDALONE.match(results)
|
||||
standalone = standalone[1] unless standalone.nil?
|
||||
return [ :xmldecl, version, encoding, standalone]
|
||||
when INSTRUCTION_START
|
||||
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
|
||||
when DOCTYPE_START
|
||||
md = @source.match( DOCTYPE_PATTERN, true )
|
||||
identity = md[1]
|
||||
close = md[2]
|
||||
identity =~ IDENTITY
|
||||
name = $1
|
||||
raise "DOCTYPE is missing a name" if name.nil?
|
||||
pub_sys = $2.nil? ? nil : $2.strip
|
||||
long_name = $3.nil? ? nil : $3.strip
|
||||
uri = $4.nil? ? nil : $4.strip
|
||||
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
||||
if close == ">"
|
||||
@document_status = :after_doctype
|
||||
@source.read if @source.buffer.size==0
|
||||
md = @source.match(/^\s*/um, true)
|
||||
@stack << [ :end_doctype ]
|
||||
else
|
||||
@document_status = :in_doctype
|
||||
end
|
||||
return args
|
||||
else
|
||||
@document_status = :after_doctype
|
||||
@source.read if @source.buffer.size==0
|
||||
md = @source.match(/\s*/um, true)
|
||||
end
|
||||
end
|
||||
if @document_status == :in_doctype
|
||||
md = @source.match(/\s*(.*?>)/um)
|
||||
case md[1]
|
||||
when ELEMENTDECL_START
|
||||
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
|
||||
when ENTITY_START
|
||||
match = @source.match( ENTITYDECL, true ).to_a.compact
|
||||
match[0] = :entitydecl
|
||||
ref = false
|
||||
if match[1] == '%'
|
||||
ref = true
|
||||
match.delete_at 1
|
||||
end
|
||||
# Now we have to sort out what kind of entity reference this is
|
||||
if match[2] == 'SYSTEM'
|
||||
# External reference
|
||||
match[3] = match[3][1..-2] # PUBID
|
||||
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
||||
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
||||
elsif match[2] == 'PUBLIC'
|
||||
# External reference
|
||||
match[3] = match[3][1..-2] # PUBID
|
||||
match[4] = match[4][1..-2] # HREF
|
||||
# match is [ :entity, name, PUBLIC, pubid, href ]
|
||||
else
|
||||
match[2] = match[2][1..-2]
|
||||
match.pop if match.size == 4
|
||||
# match is [ :entity, name, value ]
|
||||
end
|
||||
match << '%' if ref
|
||||
return match
|
||||
when ATTLISTDECL_START
|
||||
md = @source.match( ATTLISTDECL_PATTERN, true )
|
||||
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
||||
element = md[1]
|
||||
contents = md[0]
|
||||
|
||||
pairs = {}
|
||||
values = md[0].scan( ATTDEF )
|
||||
values.each do |attdef|
|
||||
unless attdef[3] == "#IMPLIED"
|
||||
attdef.compact!
|
||||
val = attdef[3]
|
||||
val = attdef[4] if val == "#FIXED "
|
||||
pairs[attdef[0]] = val
|
||||
end
|
||||
end
|
||||
return [ :attlistdecl, element, pairs, contents ]
|
||||
when NOTATIONDECL_START
|
||||
md = nil
|
||||
if @source.match( PUBLIC )
|
||||
md = @source.match( PUBLIC, true )
|
||||
elsif @source.match( SYSTEM )
|
||||
md = @source.match( SYSTEM, true )
|
||||
else
|
||||
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
||||
end
|
||||
return [ :notationdecl, md[1], md[2], md[3] ]
|
||||
when /^\s*]\s*>/um
|
||||
@document_status = :after_doctype
|
||||
@source.match( /^\s*]\s*>/um, true )
|
||||
return [ :end_doctype ]
|
||||
end
|
||||
end
|
||||
begin
|
||||
if @source.buffer[0] == ?<
|
||||
if @source.buffer[1] == ?/
|
||||
last_tag = @tags.pop
|
||||
md = @source.match( CLOSE_MATCH, true )
|
||||
raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
|
||||
"(got \"#{md[1]}\")", @source) unless last_tag == md[1]
|
||||
return [ :end_element, last_tag ]
|
||||
elsif @source.buffer[1] == ?!
|
||||
md = @source.match(/\A(\s*[^>]*>)/um)
|
||||
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
||||
raise REXML::ParseException.new("Malformed node", @source) unless md
|
||||
case md[1]
|
||||
when CDATA_START
|
||||
return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ]
|
||||
when COMMENT_START
|
||||
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
||||
else
|
||||
raise REXML::ParseException.new( "Declarations can only occur "+
|
||||
"in the doctype declaration.", @source)
|
||||
end
|
||||
elsif @source.buffer[1] == ??
|
||||
md = @source.match( INSTRUCTION_PATTERN, true )
|
||||
return [ :processing_instruction, md[1], md[2] ]
|
||||
else
|
||||
# Get the next tag
|
||||
md = @source.match(TAG_MATCH, true)
|
||||
raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
|
||||
attrs = []
|
||||
if md[2].size > 0
|
||||
attrs = md[2].scan( ATTRIBUTE_PATTERN )
|
||||
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
|
||||
end
|
||||
|
||||
if md[4]
|
||||
@closed = md[1]
|
||||
else
|
||||
@tags.push( md[1] )
|
||||
end
|
||||
attributes = {}
|
||||
attrs.each { |a,b,c| attributes[a] = c }
|
||||
return [ :start_element, md[1], attributes ]
|
||||
end
|
||||
else
|
||||
md = @source.match(TEXT_PATTERN, true)
|
||||
raise "no text to add" if md[0].length == 0
|
||||
# unnormalized = Text::unnormalize( md[1], self )
|
||||
# return PullEvent.new( :text, md[1], unnormalized )
|
||||
return [ :text, md[1] ]
|
||||
end
|
||||
rescue REXML::ParseException
|
||||
raise $!
|
||||
rescue Exception, NameError => error
|
||||
raise REXML::ParseException.new( "Exception parsing",
|
||||
@source, self, error )
|
||||
end
|
||||
return [ :dummy ]
|
||||
end
|
||||
|
||||
def entity( reference, entities )
|
||||
value = nil
|
||||
value = entities[ reference ] if entities
|
||||
if not value
|
||||
value = DEFAULT_ENTITIES[ reference ]
|
||||
value = value[2] if value
|
||||
end
|
||||
unnormalize( value, entities ) if value
|
||||
end
|
||||
|
||||
# Escapes all possible entities
|
||||
def normalize( input, entities=nil, entity_filter=nil )
|
||||
copy = input.clone
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
copy.gsub!( EREFERENCE, '&' )
|
||||
entities.each do |key, value|
|
||||
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
||||
entity_filter.include?(entity)
|
||||
end if entities
|
||||
copy.gsub!( EREFERENCE, '&' )
|
||||
DEFAULT_ENTITIES.each do |key, value|
|
||||
copy.gsub!( value[2], value[1] )
|
||||
end
|
||||
copy
|
||||
end
|
||||
|
||||
# Unescapes all possible entities
|
||||
def unnormalize( string, entities=nil, filter=nil )
|
||||
rv = string.clone
|
||||
rv.gsub!( /\r\n?/, "\n" )
|
||||
matches = rv.scan( REFERENCE)
|
||||
return rv if matches.size == 0
|
||||
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
|
||||
m=$1
|
||||
m = "0#{m}" if m[0] == ?x
|
||||
[Integer(m)].pack('U*')
|
||||
}
|
||||
matches.collect!{|x|x[0]}.compact!
|
||||
if matches.size > 0
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
entity_value = entity( entity_reference, entities )
|
||||
if entity_value
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value )
|
||||
end
|
||||
end
|
||||
end
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
er = DEFAULT_ENTITIES[entity_reference]
|
||||
rv.gsub!( er[0], er[2] ) if er
|
||||
end
|
||||
end
|
||||
rv.gsub!( /&/, '&' )
|
||||
end
|
||||
rv
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
56
lib/rexml/parsers/lightparser.rb
Normal file
56
lib/rexml/parsers/lightparser.rb
Normal file
|
@ -0,0 +1,56 @@
|
|||
require 'rexml/parsers/streamparser'
|
||||
require 'rexml/parsers/baseparser'
|
||||
require 'rexml/light/node'
|
||||
|
||||
module REXML
|
||||
module Parsers
|
||||
class LightParser
|
||||
def initialize stream
|
||||
@stream = stream
|
||||
@parser = REXML::Parsers::BaseParser.new( stream )
|
||||
end
|
||||
|
||||
def rewind
|
||||
@stream.rewind
|
||||
@parser.stream = @stream
|
||||
end
|
||||
|
||||
def parse
|
||||
root = context = REXML::Light::Node.new([ :document ])
|
||||
while true
|
||||
event = @parser.pull
|
||||
case event[0]
|
||||
when :end_document
|
||||
break
|
||||
when :end_doctype
|
||||
context = context.parent
|
||||
when :start_element, :start_doctype
|
||||
new_node = REXML::Light::Node.new(event)
|
||||
context << new_node
|
||||
new_node.parent = context
|
||||
context = new_node
|
||||
when :end_element, :end_doctype
|
||||
context = context.parent
|
||||
else
|
||||
new_node = REXML::Light::Node.new(event)
|
||||
context << new_node
|
||||
new_node.parent = context
|
||||
end
|
||||
end
|
||||
root
|
||||
end
|
||||
end
|
||||
|
||||
# An element is an array. The array contains:
|
||||
# 0 The parent element
|
||||
# 1 The tag name
|
||||
# 2 A hash of attributes
|
||||
# 3..-1 The child elements
|
||||
# An element is an array of size > 3
|
||||
# Text is a String
|
||||
# PIs are [ :processing_instruction, target, data ]
|
||||
# Comments are [ :comment, data ]
|
||||
# DocTypes are DocType structs
|
||||
# The root is an array with XMLDecls, Text, DocType, Array, Text
|
||||
end
|
||||
end
|
143
lib/rexml/parsers/pullparser.rb
Normal file
143
lib/rexml/parsers/pullparser.rb
Normal file
|
@ -0,0 +1,143 @@
|
|||
require 'rexml/parseexception'
|
||||
require 'rexml/parsers/baseparser'
|
||||
require 'rexml/xmltokens'
|
||||
|
||||
module REXML
|
||||
module Parsers
|
||||
# = Using the Pull Parser
|
||||
# <em>This API is experimental, and subject to change.</em>
|
||||
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
||||
# end
|
||||
# See the PullEvent class for information on the content of the results.
|
||||
# The data is identical to the arguments passed for the various events to
|
||||
# the StreamListener API.
|
||||
#
|
||||
# Notice that:
|
||||
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# raise res[1] if res.error?
|
||||
# end
|
||||
#
|
||||
# Nat Price gave me some good ideas for the API.
|
||||
class PullParser < BaseParser
|
||||
include XMLTokens
|
||||
|
||||
def initialize stream
|
||||
super
|
||||
@entities = {}
|
||||
end
|
||||
|
||||
def each
|
||||
while has_next?
|
||||
yield self.pull
|
||||
end
|
||||
end
|
||||
|
||||
def peek depth=0
|
||||
PullEvent.new(super)
|
||||
end
|
||||
|
||||
def pull
|
||||
event = super
|
||||
case event[0]
|
||||
when :entitydecl
|
||||
@entities[ event[1] ] =
|
||||
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
|
||||
when :text
|
||||
unnormalized = unnormalize( event[1], @entities )
|
||||
event << unnormalized
|
||||
end
|
||||
PullEvent.new( event )
|
||||
end
|
||||
end
|
||||
|
||||
# A parsing event. The contents of the event are accessed as an +Array?,
|
||||
# and the type is given either by the ...? methods, or by accessing the
|
||||
# +type+ accessor. The contents of this object vary from event to event,
|
||||
# but are identical to the arguments passed to +StreamListener+s for each
|
||||
# event.
|
||||
class PullEvent
|
||||
# The type of this event. Will be one of :tag_start, :tag_end, :text,
|
||||
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
|
||||
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
|
||||
def initialize(arg)
|
||||
@contents = arg
|
||||
end
|
||||
def []( index )
|
||||
@contents[index+1]
|
||||
end
|
||||
def event_type
|
||||
@contents[0]
|
||||
end
|
||||
# Content: [ String tag_name, Hash attributes ]
|
||||
def start_element?
|
||||
@contents[0] == :start_element
|
||||
end
|
||||
# Content: [ String tag_name ]
|
||||
def end_element?
|
||||
@contents[0] == :end_element
|
||||
end
|
||||
# Content: [ String raw_text, String unnormalized_text ]
|
||||
def text?
|
||||
@contents[0] == :text
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def instruction?
|
||||
@contents[0] == :processing_instruction
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def comment?
|
||||
@contents[0] == :comment
|
||||
end
|
||||
# Content: [ String name, String pub_sys, String long_name, String uri ]
|
||||
def doctype?
|
||||
@contents[0] == :start_doctype
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def attlistdecl?
|
||||
@contents[0] == :attlistdecl
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def elementdecl?
|
||||
@contents[0] == :elementdecl
|
||||
end
|
||||
# Due to the wonders of DTDs, an entity declaration can be just about
|
||||
# anything. There's no way to normalize it; you'll have to interpret the
|
||||
# content yourself. However, the following is true:
|
||||
#
|
||||
# * If the entity declaration is an internal entity:
|
||||
# [ String name, String value ]
|
||||
# Content: [ String text ]
|
||||
def entitydecl?
|
||||
@contents[0] == :entitydecl
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def notationdecl?
|
||||
@contents[0] == :notationdecl
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def entity?
|
||||
@contents[0] == :entity
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def cdata?
|
||||
@contents[0] == :cdata
|
||||
end
|
||||
# Content: [ String version, String encoding, String standalone ]
|
||||
def xmldecl?
|
||||
@contents[0] == :xmldecl
|
||||
end
|
||||
def error?
|
||||
@contents[0] == :error
|
||||
end
|
||||
|
||||
def inspect
|
||||
@contents[0].to_s + ": " + @contents[1..-1].inspect
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
204
lib/rexml/parsers/sax2parser.rb
Normal file
204
lib/rexml/parsers/sax2parser.rb
Normal file
|
@ -0,0 +1,204 @@
|
|||
module REXML
|
||||
module Parsers
|
||||
class SAX2Parser
|
||||
def initialize source
|
||||
@parser = BaseParser.new(source)
|
||||
@listeners = []
|
||||
@procs = []
|
||||
@namespace_stack = []
|
||||
@has_listeners = false
|
||||
@tag_stack = []
|
||||
end
|
||||
|
||||
# Listen arguments:
|
||||
#
|
||||
# Symbol, Array, Block
|
||||
# Listen to Symbol events on Array elements
|
||||
# Symbol, Block
|
||||
# Listen to Symbol events
|
||||
# Array, Listener
|
||||
# Listen to all events on Array elements
|
||||
# Array, Block
|
||||
# Listen to :start_element events on Array elements
|
||||
# Listener
|
||||
# Listen to All events
|
||||
#
|
||||
# Symbol can be one of: :start_element, :end_element,
|
||||
# :start_prefix_mapping, :end_prefix_mapping, :characters,
|
||||
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
|
||||
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
|
||||
#
|
||||
# Array contains regular expressions or strings which will be matched
|
||||
# against fully qualified element names.
|
||||
#
|
||||
# Listener must implement the methods in SAX2Listener
|
||||
#
|
||||
# Block will be passed the same arguments as a SAX2Listener method would
|
||||
# be, where the method name is the same as the matched Symbol.
|
||||
# See the SAX2Listener for more information.
|
||||
def listen( *args, &blok )
|
||||
if args[0].kind_of? Symbol
|
||||
if args.size == 2
|
||||
args[1].each { |match| @procs << [args[0], match, blok] }
|
||||
else
|
||||
add( [args[0], /.*/, blok] )
|
||||
end
|
||||
elsif args[0].kind_of? Array
|
||||
if args.size == 2
|
||||
args[0].each { |match| add( [nil, match, args[1]] ) }
|
||||
else
|
||||
args[0].each { |match| add( [ :start_element, match, blok ] ) }
|
||||
end
|
||||
else
|
||||
add([nil, /.*/, args[0]])
|
||||
end
|
||||
end
|
||||
|
||||
def deafen( listener=nil, &blok )
|
||||
if listener
|
||||
@listeners.delete_if {|item| item[-1] == listener }
|
||||
@has_listeners = false if @listeners.size == 0
|
||||
else
|
||||
@procs.delete_if {|item| item[-1] == blok }
|
||||
end
|
||||
end
|
||||
|
||||
def parse
|
||||
@procs.each { |sym,match,block| block.call if sym == :start_document }
|
||||
@listeners.each { |sym,match,block|
|
||||
block.start_document if sym == :start_document or sym.nil?
|
||||
}
|
||||
root = context = []
|
||||
while true
|
||||
event = @parser.pull
|
||||
case event[0]
|
||||
when :end_document
|
||||
handle( :end_document )
|
||||
break
|
||||
when :end_doctype
|
||||
context = context[1]
|
||||
when :start_element
|
||||
@tag_stack.push(event[1])
|
||||
# find the observers for namespaces
|
||||
procs = get_procs( :start_prefix_mapping, event[1] )
|
||||
listeners = get_listeners( :start_prefix_mapping, event[1] )
|
||||
if procs or listeners
|
||||
# break out the namespace declarations
|
||||
# The attributes live in event[2]
|
||||
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns:/ }
|
||||
nsdecl.collect! { |n, value| [ n[6..-1], value ] }
|
||||
@namespace_stack.push({})
|
||||
nsdecl.each do |n,v|
|
||||
@namespace_stack[-1][n] = v
|
||||
# notify observers of namespaces
|
||||
procs.each { |ob| ob.call( n, v ) } if procs
|
||||
listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
|
||||
end
|
||||
end
|
||||
event[1] =~ Namespace::NAMESPLIT
|
||||
prefix = $1
|
||||
local = $2
|
||||
uri = get_namespace(prefix)
|
||||
# find the observers for start_element
|
||||
procs = get_procs( :start_element, event[1] )
|
||||
listeners = get_listeners( :start_element, event[1] )
|
||||
# notify observers
|
||||
procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
|
||||
listeners.each { |ob|
|
||||
ob.start_element( uri, local, event[1], event[2] )
|
||||
} if listeners
|
||||
when :end_element
|
||||
@tag_stack.pop
|
||||
event[1] =~ Namespace::NAMESPLIT
|
||||
prefix = $1
|
||||
local = $2
|
||||
uri = get_namespace(prefix)
|
||||
# find the observers for start_element
|
||||
procs = get_procs( :end_element, event[1] )
|
||||
listeners = get_listeners( :end_element, event[1] )
|
||||
# notify observers
|
||||
procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
|
||||
listeners.each { |ob|
|
||||
ob.end_element( uri, local, event[1] )
|
||||
} if listeners
|
||||
|
||||
namespace_mapping = @namespace_stack.pop
|
||||
# find the observers for namespaces
|
||||
procs = get_procs( :end_prefix_mapping, event[1] )
|
||||
listeners = get_listeners( :end_prefix_mapping, event[1] )
|
||||
if procs or listeners
|
||||
namespace_mapping.each do |prefix, uri|
|
||||
# notify observers of namespaces
|
||||
procs.each { |ob| ob.call( prefix ) } if procs
|
||||
listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
|
||||
end
|
||||
end
|
||||
when :text
|
||||
normalized = @parser.normalize( event[1] )
|
||||
handle( :characters, normalized )
|
||||
when :processing_instruction, :comment, :doctype, :attlistdecl,
|
||||
:elementdecl, :entitydecl, :cdata, :notationdecl, :xmldecl
|
||||
handle( *event )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
def handle( symbol, *arguments )
|
||||
tag = @tag_stack[-1]
|
||||
procs = get_procs( symbol, tag )
|
||||
listeners = get_listeners( symbol, tag )
|
||||
# notify observers
|
||||
procs.each { |ob| ob.call( *arguments ) } if procs
|
||||
listeners.each { |l|
|
||||
l.send( symbol.to_s, *arguments )
|
||||
} if listeners
|
||||
end
|
||||
|
||||
# The following methods are duplicates, but it is faster than using
|
||||
# a helper
|
||||
def get_procs( symbol, name )
|
||||
return nil if @procs.size == 0
|
||||
@procs.find_all do |sym, match, block|
|
||||
(
|
||||
(sym.nil? or symbol == sym) and
|
||||
(name.nil? or (
|
||||
(name == match) or
|
||||
(match.kind_of? Regexp and name =~ match)
|
||||
)
|
||||
)
|
||||
)
|
||||
end.collect{|x| x[-1]}
|
||||
end
|
||||
def get_listeners( symbol, name )
|
||||
return nil if @listeners.size == 0
|
||||
@listeners.find_all do |sym, match, block|
|
||||
(
|
||||
(sym.nil? or symbol == sym) and
|
||||
(name.nil? or (
|
||||
(name == match) or
|
||||
(match.kind_of? Regexp and name =~ match)
|
||||
)
|
||||
)
|
||||
)
|
||||
end.collect{|x| x[-1]}
|
||||
end
|
||||
|
||||
def add( pair )
|
||||
if pair[-1].kind_of? Proc
|
||||
@procs << pair unless @procs.include? pair
|
||||
else
|
||||
@listeners << pair unless @listeners.include? pair
|
||||
@has_listeners = true
|
||||
end
|
||||
end
|
||||
|
||||
def get_namespace( prefix )
|
||||
uri = @namespace_stack.find do |ns|
|
||||
not ns[prefix].nil?
|
||||
end
|
||||
uri[prefix] unless uri.nil?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
33
lib/rexml/parsers/streamparser.rb
Normal file
33
lib/rexml/parsers/streamparser.rb
Normal file
|
@ -0,0 +1,33 @@
|
|||
module REXML
|
||||
module Parsers
|
||||
class StreamParser
|
||||
def initialize source, listener
|
||||
@listener = listener
|
||||
@parser = BaseParser.new( source )
|
||||
end
|
||||
|
||||
def parse
|
||||
# entity string
|
||||
while true
|
||||
event = @parser.pull
|
||||
case event[0]
|
||||
when :end_document
|
||||
return
|
||||
when :start_element
|
||||
@listener.tag_start( event[1], event[2] )
|
||||
when :end_element
|
||||
@listener.tag_end( event[1] )
|
||||
when :text
|
||||
normalized = @parser.unnormalize( event[1] )
|
||||
@listener.text( normalized )
|
||||
when :processing_instruction
|
||||
@listener.instruction( *event[1,2] )
|
||||
when :comment, :doctype, :attlistdecl,
|
||||
:elementdecl, :entitydecl, :cdata, :notationdecl, :xmldecl
|
||||
@listener.send( event[0].to_s, *event[1..-1] )
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
52
lib/rexml/parsers/ultralightparser.rb
Normal file
52
lib/rexml/parsers/ultralightparser.rb
Normal file
|
@ -0,0 +1,52 @@
|
|||
require 'rexml/parsers/streamparser'
|
||||
require 'rexml/parsers/baseparser'
|
||||
|
||||
module REXML
|
||||
module Parsers
|
||||
class UltraLightParser
|
||||
def initialize stream
|
||||
@stream = stream
|
||||
@parser = REXML::Parsers::BaseParser.new( stream )
|
||||
end
|
||||
|
||||
def rewind
|
||||
@stream.rewind
|
||||
@parser.stream = @stream
|
||||
end
|
||||
|
||||
def parse
|
||||
root = context = []
|
||||
while true
|
||||
event = @parser.pull
|
||||
case event[0]
|
||||
when :end_document
|
||||
break
|
||||
when :end_doctype
|
||||
context = context[1]
|
||||
when :start_element, :doctype
|
||||
context << event
|
||||
event[1,0] = [context]
|
||||
context = event
|
||||
when :end_element
|
||||
context = context[1]
|
||||
else
|
||||
context << event
|
||||
end
|
||||
end
|
||||
root
|
||||
end
|
||||
end
|
||||
|
||||
# An element is an array. The array contains:
|
||||
# 0 The parent element
|
||||
# 1 The tag name
|
||||
# 2 A hash of attributes
|
||||
# 3..-1 The child elements
|
||||
# An element is an array of size > 3
|
||||
# Text is a String
|
||||
# PIs are [ :processing_instruction, target, data ]
|
||||
# Comments are [ :comment, data ]
|
||||
# DocTypes are DocType structs
|
||||
# The root is an array with XMLDecls, Text, DocType, Array, Text
|
||||
end
|
||||
end
|
598
lib/rexml/parsers/xpathparser.rb
Normal file
598
lib/rexml/parsers/xpathparser.rb
Normal file
|
@ -0,0 +1,598 @@
|
|||
require 'rexml/namespace'
|
||||
require 'rexml/xmltokens'
|
||||
|
||||
module REXML
|
||||
module Parsers
|
||||
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
||||
# for this class. Believe me. You don't want to poke around in here.
|
||||
# There is strange, dark magic at work in this code. Beware. Go back! Go
|
||||
# back while you still can!
|
||||
class XPathParser
|
||||
include XMLTokens
|
||||
LITERAL = /^'([^']*)'|^"([^"]*)"/u
|
||||
|
||||
def namespaces=( namespaces )
|
||||
Functions::namespace_context = namespaces
|
||||
@namespaces = namespaces
|
||||
end
|
||||
|
||||
def parse path
|
||||
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
|
||||
path.gsub!( /\s+([\]\)])/, '\1' )
|
||||
parsed = []
|
||||
path = LocationPath(path, parsed)
|
||||
parsed
|
||||
end
|
||||
|
||||
def predicate path
|
||||
parsed = []
|
||||
Predicate( "[#{path}]", parsed )
|
||||
parsed
|
||||
end
|
||||
|
||||
def to_string( path )
|
||||
string = ""
|
||||
while path.size > 0
|
||||
case path[0]
|
||||
when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self
|
||||
op = path.shift
|
||||
string << "/" unless string.size == 0
|
||||
string << op.to_s
|
||||
string << "::"
|
||||
when :any
|
||||
path.shift
|
||||
string << "*"
|
||||
when :qname
|
||||
path.shift
|
||||
prefix = path.shift
|
||||
name = path.shift
|
||||
string << prefix+":" if prefix.size > 0
|
||||
string << name
|
||||
when :predicate
|
||||
path.shift
|
||||
string << '['
|
||||
string << predicate_to_string( path.shift )
|
||||
string << ' ]'
|
||||
else
|
||||
string << "/" unless string.size == 0
|
||||
string << "UNKNOWN("
|
||||
string << path.shift.inspect
|
||||
string << ")"
|
||||
end
|
||||
end
|
||||
return string
|
||||
end
|
||||
|
||||
def predicate_to_string( path )
|
||||
string = ""
|
||||
case path[0]
|
||||
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union
|
||||
op = path.shift
|
||||
left = predicate_to_string( path.shift )
|
||||
right = predicate_to_string( path.shift )
|
||||
string << " "
|
||||
string << left
|
||||
string << " "
|
||||
string << op.to_s
|
||||
string << " "
|
||||
string << right
|
||||
string << " "
|
||||
when :function
|
||||
path.shift
|
||||
name = path.shift
|
||||
string << name
|
||||
string << "( "
|
||||
string << predicate_to_string( path.shift )
|
||||
string << " )"
|
||||
when :literal
|
||||
path.shift
|
||||
string << " "
|
||||
string << path.shift.inspect
|
||||
string << " "
|
||||
else
|
||||
string << " "
|
||||
string << to_string( path )
|
||||
string << " "
|
||||
end
|
||||
return string.squeeze(" ")
|
||||
end
|
||||
|
||||
private
|
||||
#LocationPath
|
||||
# | RelativeLocationPath
|
||||
# | '/' RelativeLocationPath?
|
||||
# | '//' RelativeLocationPath
|
||||
def LocationPath path, parsed
|
||||
#puts "LocationPath '#{path}'"
|
||||
path = path.strip
|
||||
if path[0] == ?/
|
||||
parsed << :document
|
||||
if path[1] == ?/
|
||||
parsed << :descendant_or_self
|
||||
parsed << :node
|
||||
path = path[2..-1]
|
||||
else
|
||||
path = path[1..-1]
|
||||
end
|
||||
end
|
||||
#puts parsed.inspect
|
||||
return RelativeLocationPath( path, parsed ) if path.size > 0
|
||||
end
|
||||
|
||||
#RelativeLocationPath
|
||||
# | Step
|
||||
# | (AXIS_NAME '::' | '@' | '') AxisSpecifier
|
||||
# NodeTest
|
||||
# Predicate
|
||||
# | '.' | '..' AbbreviatedStep
|
||||
# | RelativeLocationPath '/' Step
|
||||
# | RelativeLocationPath '//' Step
|
||||
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
|
||||
def RelativeLocationPath path, parsed
|
||||
#puts "RelativeLocationPath #{path}"
|
||||
while path.size > 0
|
||||
# (axis or @ or <child::>) nodetest predicate >
|
||||
# OR > / Step
|
||||
# (. or ..) >
|
||||
if path[0] == ?.
|
||||
if path[1] == ?.
|
||||
parsed << :parent
|
||||
parsed << :node
|
||||
path = path[2..-1]
|
||||
else
|
||||
parsed << :self
|
||||
parsed << :node
|
||||
path = path[1..-1]
|
||||
end
|
||||
else
|
||||
if path[0] == ?@
|
||||
#puts "ATTRIBUTE"
|
||||
parsed << :attribute
|
||||
path = path[1..-1]
|
||||
# Goto Nodetest
|
||||
elsif path =~ AXIS
|
||||
parsed << $1.tr('-','_').intern
|
||||
path = $'
|
||||
# Goto Nodetest
|
||||
else
|
||||
parsed << :child
|
||||
end
|
||||
|
||||
#puts "NODETESTING '#{path}'"
|
||||
n = []
|
||||
path = NodeTest( path, n)
|
||||
#puts "NODETEST RETURNED '#{path}'"
|
||||
|
||||
if path[0] == ?[
|
||||
path = Predicate( path, n )
|
||||
end
|
||||
|
||||
parsed.concat(n)
|
||||
end
|
||||
|
||||
if path.size > 0
|
||||
if path[0] == ?/
|
||||
if path[1] == ?/
|
||||
parsed << :descendant_or_self
|
||||
parsed << :node
|
||||
path = path[2..-1]
|
||||
else
|
||||
path = path[1..-1]
|
||||
end
|
||||
else
|
||||
return path
|
||||
end
|
||||
end
|
||||
end
|
||||
return path
|
||||
end
|
||||
|
||||
# Returns a 1-1 map of the nodeset
|
||||
# The contents of the resulting array are either:
|
||||
# true/false, if a positive match
|
||||
# String, if a name match
|
||||
#NodeTest
|
||||
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
|
||||
# | NODE_TYPE '(' ')' NodeType
|
||||
# | PI '(' LITERAL ')' PI
|
||||
# | '[' expr ']' Predicate
|
||||
NCNAMETEST= /^(#{NCNAME_STR}):\*/u
|
||||
QNAME = Namespace::NAMESPLIT
|
||||
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
|
||||
PI = /^processing-instruction\(/
|
||||
def NodeTest path, parsed
|
||||
#puts "NodeTest with #{path}"
|
||||
res = nil
|
||||
case path
|
||||
when /^\*/
|
||||
path = $'
|
||||
parsed << :any
|
||||
when NODE_TYPE
|
||||
type = $1
|
||||
path = $'
|
||||
parsed << type.tr('-', '_').intern
|
||||
when PI
|
||||
path = $'
|
||||
literal = nil
|
||||
if path !~ /^\s*\)/
|
||||
path =~ LITERAL
|
||||
literal = $1
|
||||
path = $'
|
||||
raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
|
||||
path = path[1..-1]
|
||||
end
|
||||
parsed << :processing_instruction
|
||||
parsed << literal
|
||||
when NCNAMETEST
|
||||
#puts "NCNAMETEST"
|
||||
prefix = $1
|
||||
path = $'
|
||||
parsed << :namespace
|
||||
parsed << prefix
|
||||
when QNAME
|
||||
#puts "QNAME"
|
||||
prefix = $1
|
||||
name = $2
|
||||
path = $'
|
||||
prefix = "" unless prefix
|
||||
parsed << :qname
|
||||
parsed << prefix
|
||||
parsed << name
|
||||
end
|
||||
return path
|
||||
end
|
||||
|
||||
# Filters the supplied nodeset on the predicate(s)
|
||||
def Predicate path, parsed
|
||||
#puts "PREDICATE with #{path}"
|
||||
return nil unless path[0] == ?[
|
||||
predicates = []
|
||||
while path[0] == ?[
|
||||
path, expr = get_group(path)
|
||||
predicates << expr[1..-2] if expr
|
||||
end
|
||||
#puts "PREDICATES = #{predicates.inspect}"
|
||||
predicates.each{ |expr|
|
||||
#puts "ORING #{expr}"
|
||||
preds = []
|
||||
parsed << :predicate
|
||||
parsed << preds
|
||||
OrExpr(expr, preds)
|
||||
}
|
||||
#puts "PREDICATES = #{predicates.inspect}"
|
||||
path
|
||||
end
|
||||
|
||||
# The following return arrays of true/false, a 1-1 mapping of the
|
||||
# supplied nodeset, except for axe(), which returns a filtered
|
||||
# nodeset
|
||||
|
||||
#| OrExpr S 'or' S AndExpr
|
||||
#| AndExpr
|
||||
def OrExpr path, parsed
|
||||
#puts "OR >>> #{path}"
|
||||
n = []
|
||||
rest = AndExpr( path, n )
|
||||
#puts "OR <<< #{rest}"
|
||||
if rest != path
|
||||
while rest =~ /^\s*( or )/
|
||||
n = [ :or, n, [] ]
|
||||
rest = AndExpr( $', n[-1] )
|
||||
end
|
||||
end
|
||||
if parsed.size == 0 and n.size != 0
|
||||
parsed.replace(n)
|
||||
elsif n.size > 0
|
||||
parsed << n
|
||||
end
|
||||
rest
|
||||
end
|
||||
|
||||
#| AndExpr S 'and' S EqualityExpr
|
||||
#| EqualityExpr
|
||||
def AndExpr path, parsed
|
||||
#puts "AND >>> #{path}"
|
||||
n = []
|
||||
rest = EqualityExpr( path, n )
|
||||
#puts "AND <<< #{rest}"
|
||||
if rest != path
|
||||
while rest =~ /^\s*( and )/
|
||||
n = [ :and, n, [] ]
|
||||
#puts "AND >>> #{rest}"
|
||||
rest = EqualityExpr( $', n[-1] )
|
||||
#puts "AND <<< #{rest}"
|
||||
end
|
||||
end
|
||||
if parsed.size == 0 and n.size != 0
|
||||
parsed.replace(n)
|
||||
elsif n.size > 0
|
||||
parsed << n
|
||||
end
|
||||
rest
|
||||
end
|
||||
|
||||
#| EqualityExpr ('=' | '!=') RelationalExpr
|
||||
#| RelationalExpr
|
||||
def EqualityExpr path, parsed
|
||||
#puts "EQUALITY >>> #{path}"
|
||||
n = []
|
||||
rest = RelationalExpr( path, n )
|
||||
#puts "EQUALITY <<< #{rest}"
|
||||
if rest != path
|
||||
while rest =~ /^\s*(!?=)\s*/
|
||||
if $1[0] == ?!
|
||||
n = [ :neq, n, [] ]
|
||||
else
|
||||
n = [ :eq, n, [] ]
|
||||
end
|
||||
rest = RelationalExpr( $', n[-1] )
|
||||
end
|
||||
end
|
||||
if parsed.size == 0 and n.size != 0
|
||||
parsed.replace(n)
|
||||
elsif n.size > 0
|
||||
parsed << n
|
||||
end
|
||||
rest
|
||||
end
|
||||
|
||||
#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
|
||||
#| AdditiveExpr
|
||||
def RelationalExpr path, parsed
|
||||
#puts "RELATION >>> #{path}"
|
||||
n = []
|
||||
rest = AdditiveExpr( path, n )
|
||||
#puts "RELATION <<< #{rest}"
|
||||
if rest != path
|
||||
while rest =~ /^\s*([<>]=?)\s*/
|
||||
if $1[0] == ?<
|
||||
sym = "lt"
|
||||
else
|
||||
sym = "gt"
|
||||
end
|
||||
sym << "eq" if $1[-1] == ?=
|
||||
n = [ sym.intern, n, [] ]
|
||||
rest = AdditiveExpr( $', n[-1] )
|
||||
end
|
||||
end
|
||||
if parsed.size == 0 and n.size != 0
|
||||
parsed.replace(n)
|
||||
elsif n.size > 0
|
||||
parsed << n
|
||||
end
|
||||
rest
|
||||
end
|
||||
|
||||
#| AdditiveExpr ('+' | S '-') MultiplicativeExpr
|
||||
#| MultiplicativeExpr
|
||||
def AdditiveExpr path, parsed
|
||||
#puts "ADDITIVE >>> #{path}"
|
||||
n = []
|
||||
rest = MultiplicativeExpr( path, n )
|
||||
#puts "ADDITIVE <<< #{rest}"
|
||||
if rest != path
|
||||
while rest =~ /^\s*(\+| -)\s*/
|
||||
if $1[0] == ?+
|
||||
n = [ :plus, n, [] ]
|
||||
else
|
||||
n = [ :minus, n, [] ]
|
||||
end
|
||||
rest = MultiplicativeExpr( $', n[-1] )
|
||||
end
|
||||
end
|
||||
if parsed.size == 0 and n.size != 0
|
||||
parsed.replace(n)
|
||||
elsif n.size > 0
|
||||
parsed << n
|
||||
end
|
||||
rest
|
||||
end
|
||||
|
||||
#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
|
||||
#| UnaryExpr
|
||||
def MultiplicativeExpr path, parsed
|
||||
#puts "MULT >>> #{path}"
|
||||
n = []
|
||||
rest = UnaryExpr( path, n )
|
||||
#puts "MULT <<< #{rest}"
|
||||
if rest != path
|
||||
while rest =~ /^\s*(\*| div | mod )\s*/
|
||||
if $1[0] == ?*
|
||||
n = [ :mult, n, [] ]
|
||||
elsif $1.include?( "div" )
|
||||
n = [ :div, n, [] ]
|
||||
else
|
||||
n = [ :mod, n, [] ]
|
||||
end
|
||||
rest = UnaryExpr( $', n[-1] )
|
||||
end
|
||||
end
|
||||
if parsed.size == 0 and n.size != 0
|
||||
parsed.replace(n)
|
||||
elsif n.size > 0
|
||||
parsed << n
|
||||
end
|
||||
rest
|
||||
end
|
||||
|
||||
#| '-' UnaryExpr
|
||||
#| UnionExpr
|
||||
def UnaryExpr path, parsed
|
||||
path =~ /^(\-*)/
|
||||
path = $'
|
||||
if $1 and (($1.size % 2) != 0)
|
||||
mult = -1
|
||||
else
|
||||
mult = 1
|
||||
end
|
||||
parsed << :neg if mult < 0
|
||||
|
||||
#puts "UNARY >>> #{path}"
|
||||
n = []
|
||||
path = UnionExpr( path, n )
|
||||
#puts "UNARY <<< #{path}"
|
||||
parsed.concat( n )
|
||||
path
|
||||
end
|
||||
|
||||
#| UnionExpr '|' PathExpr
|
||||
#| PathExpr
|
||||
def UnionExpr path, parsed
|
||||
#puts "UNION >>> #{path}"
|
||||
n = []
|
||||
rest = PathExpr( path, n )
|
||||
#puts "UNION <<< #{rest}"
|
||||
if rest != path
|
||||
while rest =~ /^\s*(\|)\s*/
|
||||
n = [ :union, n, [] ]
|
||||
rest = PathExpr( $', n[-1] )
|
||||
end
|
||||
end
|
||||
if parsed.size == 0 and n.size != 0
|
||||
parsed.replace( n )
|
||||
elsif n.size > 0
|
||||
parsed << n
|
||||
end
|
||||
rest
|
||||
end
|
||||
|
||||
#| LocationPath
|
||||
#| FilterExpr ('/' | '//') RelativeLocationPath
|
||||
def PathExpr path, parsed
|
||||
path =~ /^\s*/
|
||||
path = $'
|
||||
#puts "PATH >>> #{path}"
|
||||
n = []
|
||||
rest = FilterExpr( path, n )
|
||||
#puts "PATH <<< '#{rest}'"
|
||||
if rest != path
|
||||
if rest and rest[0] == ?/
|
||||
return RelativeLocationPath(rest, n)
|
||||
end
|
||||
end
|
||||
#puts "BEFORE WITH '#{rest}'"
|
||||
rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
|
||||
parsed.concat(n)
|
||||
return rest
|
||||
end
|
||||
|
||||
#| FilterExpr Predicate
|
||||
#| PrimaryExpr
|
||||
def FilterExpr path, parsed
|
||||
#puts "FILTER >>> #{path}"
|
||||
n = []
|
||||
path = PrimaryExpr( path, n )
|
||||
#puts "FILTER <<< #{path}"
|
||||
path = Predicate(path, n) if path and path[0] == ?[
|
||||
#puts "FILTER <<< #{path}"
|
||||
parsed.concat(n)
|
||||
path
|
||||
end
|
||||
|
||||
#| VARIABLE_REFERENCE
|
||||
#| '(' expr ')'
|
||||
#| LITERAL
|
||||
#| NUMBER
|
||||
#| FunctionCall
|
||||
VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
|
||||
NUMBER = /^(\d*\.?\d+)/
|
||||
NT = /^comment|text|processing-instruction|node$/
|
||||
def PrimaryExpr path, parsed
|
||||
arry = []
|
||||
case path
|
||||
when VARIABLE_REFERENCE
|
||||
varname = $1
|
||||
path = $'
|
||||
parsed << :variable
|
||||
parsed << varname
|
||||
#arry << @variables[ varname ]
|
||||
when /^(\w[-\w]*)(?:\()/
|
||||
fname = $1
|
||||
path = $'
|
||||
return nil if fname =~ NT
|
||||
parsed << :function
|
||||
parsed << fname
|
||||
path = FunctionCall(path, parsed)
|
||||
when LITERAL, NUMBER
|
||||
#puts "LITERAL or NUMBER: #$1"
|
||||
varname = $1.nil? ? $2 : $1
|
||||
path = $'
|
||||
parsed << :literal
|
||||
parsed << varname
|
||||
when /^\(/ #/
|
||||
path, contents = get_group(path)
|
||||
contents = contents[1..-2]
|
||||
n = []
|
||||
OrExpr( contents, n )
|
||||
parsed.concat(n)
|
||||
end
|
||||
path
|
||||
end
|
||||
|
||||
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
|
||||
def FunctionCall rest, parsed
|
||||
path, arguments = parse_args(rest)
|
||||
argset = []
|
||||
for argument in arguments
|
||||
args = []
|
||||
OrExpr( argument, args )
|
||||
argset << args
|
||||
end
|
||||
parsed << argset
|
||||
path
|
||||
end
|
||||
|
||||
# get_group( '[foo]bar' ) -> ['bar', '[foo]']
|
||||
def get_group string
|
||||
ind = 0
|
||||
depth = 0
|
||||
st = string[0,1]
|
||||
en = (st == "(" ? ")" : "]")
|
||||
begin
|
||||
case string[ind,1]
|
||||
when st
|
||||
depth += 1
|
||||
when en
|
||||
depth -= 1
|
||||
end
|
||||
ind += 1
|
||||
end while depth > 0 and ind < string.length
|
||||
return nil unless depth==0
|
||||
[string[ind..-1], string[0..ind-1]]
|
||||
end
|
||||
|
||||
def parse_args( string )
|
||||
arguments = []
|
||||
ind = 0
|
||||
depth = 1
|
||||
begin
|
||||
case string[ind]
|
||||
when ?(
|
||||
depth += 1
|
||||
if depth == 1
|
||||
string = string[1..-1]
|
||||
ind -= 1
|
||||
end
|
||||
when ?)
|
||||
depth -= 1
|
||||
if depth == 0
|
||||
s = string[0,ind].strip
|
||||
arguments << s unless s == ""
|
||||
string = string[ind+1..-1]
|
||||
end
|
||||
when ?,
|
||||
if depth == 1
|
||||
s = string[0,ind].strip
|
||||
arguments << s unless s == ""
|
||||
string = string[ind+1..-1]
|
||||
ind = 0
|
||||
end
|
||||
end
|
||||
ind += 1
|
||||
end while depth > 0 and ind < string.length
|
||||
return nil unless depth==0
|
||||
[string,arguments]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
266
lib/rexml/quickpath.rb
Normal file
266
lib/rexml/quickpath.rb
Normal file
|
@ -0,0 +1,266 @@
|
|||
require 'rexml/functions'
|
||||
require 'rexml/xmltokens'
|
||||
|
||||
module REXML
|
||||
class QuickPath
|
||||
include Functions
|
||||
include XMLTokens
|
||||
|
||||
EMPTY_HASH = {}
|
||||
|
||||
def QuickPath::first element, path, namespaces=EMPTY_HASH
|
||||
match(element, path, namespaces)[0]
|
||||
end
|
||||
|
||||
def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
|
||||
path = "*" unless path
|
||||
match(element, path, namespaces).each( &block )
|
||||
end
|
||||
|
||||
def QuickPath::match element, path, namespaces=EMPTY_HASH
|
||||
raise "nil is not a valid xpath" unless path
|
||||
results = nil
|
||||
Functions::namespace_context = namespaces
|
||||
case path
|
||||
when /^\/([^\/]|$)/u
|
||||
# match on root
|
||||
path = path[1..-1]
|
||||
return [element.root.parent] if path == ''
|
||||
results = filter([element.root], path)
|
||||
when /^[-\w]*::/u
|
||||
results = filter([element], path)
|
||||
when /^\*/u
|
||||
results = filter(element.to_a, path)
|
||||
when /^[[!\w:]/u
|
||||
# match on child
|
||||
matches = []
|
||||
children = element.to_a
|
||||
results = filter(children, path)
|
||||
else
|
||||
results = filter([element], path)
|
||||
end
|
||||
return results
|
||||
end
|
||||
|
||||
# Given an array of nodes it filters the array based on the path. The
|
||||
# result is that when this method returns, the array will contain elements
|
||||
# which match the path
|
||||
def QuickPath::filter elements, path
|
||||
return elements if path.nil? or path == '' or elements.size == 0
|
||||
case path
|
||||
when /^\/\//u # Descendant
|
||||
return axe( elements, "descendant-or-self", $' )
|
||||
when /^\/?\b(\w[-\w]*)\b::/u # Axe
|
||||
axe_name = $1
|
||||
rest = $'
|
||||
return axe( elements, $1, $' )
|
||||
when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
|
||||
rest = $'
|
||||
results = []
|
||||
elements.each do |element|
|
||||
results |= filter( element.to_a, rest )
|
||||
end
|
||||
return results
|
||||
when /^\/?(\w[-\w]*)\(/u # / Function
|
||||
return function( elements, $1, $' )
|
||||
when Namespace::NAMESPLIT # Element name
|
||||
name = $2
|
||||
ns = $1
|
||||
rest = $'
|
||||
elements.delete_if do |element|
|
||||
!(element.kind_of? Element and
|
||||
(element.expanded_name == name or
|
||||
(element.name == name and
|
||||
element.namespace == Functions.namespace_context[ns])))
|
||||
end
|
||||
return filter( elements, rest )
|
||||
when /^\/\[/u
|
||||
matches = []
|
||||
elements.each do |element|
|
||||
matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
|
||||
end
|
||||
return matches
|
||||
when /^\[/u # Predicate
|
||||
return predicate( elements, path )
|
||||
when /^\/?\.\.\./u # Ancestor
|
||||
return axe( elements, "ancestor", $' )
|
||||
when /^\/?\.\./u # Parent
|
||||
return filter( elements.collect{|e|e.parent}, $' )
|
||||
when /^\/?\./u # Self
|
||||
return filter( elements, $' )
|
||||
when /^\*/u # Any
|
||||
results = []
|
||||
elements.each do |element|
|
||||
results |= filter( [element], $' ) if element.kind_of? Element
|
||||
#if element.kind_of? Element
|
||||
# children = element.to_a
|
||||
# children.delete_if { |child| !child.kind_of?(Element) }
|
||||
# results |= filter( children, $' )
|
||||
#end
|
||||
end
|
||||
return results
|
||||
end
|
||||
return []
|
||||
end
|
||||
|
||||
def QuickPath::axe( elements, axe_name, rest )
|
||||
matches = []
|
||||
matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
|
||||
case axe_name
|
||||
when /^descendant/u
|
||||
elements.each do |element|
|
||||
matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
|
||||
end
|
||||
when /^ancestor/u
|
||||
elements.each do |element|
|
||||
while element.parent
|
||||
matches << element.parent
|
||||
element = element.parent
|
||||
end
|
||||
end
|
||||
matches = filter( matches, rest )
|
||||
when "self"
|
||||
matches = filter( elements, rest )
|
||||
when "child"
|
||||
elements.each do |element|
|
||||
matches |= filter( element.to_a, rest ) if element.kind_of? Element
|
||||
end
|
||||
when "attribute"
|
||||
elements.each do |element|
|
||||
matches << element.attributes[ rest ] if element.kind_of? Element
|
||||
end
|
||||
when "parent"
|
||||
matches = filter(elements.collect{|element| element.parent}.uniq, rest)
|
||||
when "following-sibling"
|
||||
matches = filter(elements.collect{|element| element.next_sibling}.uniq,
|
||||
rest)
|
||||
when "previous-sibling"
|
||||
matches = filter(elements.collect{|element|
|
||||
element.previous_sibling}.uniq, rest )
|
||||
end
|
||||
return matches.uniq
|
||||
end
|
||||
|
||||
# A predicate filters a node-set with respect to an axis to produce a
|
||||
# new node-set. For each node in the node-set to be filtered, the
|
||||
# PredicateExpr is evaluated with that node as the context node, with
|
||||
# the number of nodes in the node-set as the context size, and with the
|
||||
# proximity position of the node in the node-set with respect to the
|
||||
# axis as the context position; if PredicateExpr evaluates to true for
|
||||
# that node, the node is included in the new node-set; otherwise, it is
|
||||
# not included.
|
||||
#
|
||||
# A PredicateExpr is evaluated by evaluating the Expr and converting
|
||||
# the result to a boolean. If the result is a number, the result will
|
||||
# be converted to true if the number is equal to the context position
|
||||
# and will be converted to false otherwise; if the result is not a
|
||||
# number, then the result will be converted as if by a call to the
|
||||
# boolean function. Thus a location path para[3] is equivalent to
|
||||
# para[position()=3].
|
||||
def QuickPath::predicate( elements, path )
|
||||
ind = 1
|
||||
bcount = 1
|
||||
while bcount > 0
|
||||
bcount += 1 if path[ind] == ?[
|
||||
bcount -= 1 if path[ind] == ?]
|
||||
ind += 1
|
||||
end
|
||||
ind -= 1
|
||||
predicate = path[1..ind-1]
|
||||
rest = path[ind+1..-1]
|
||||
|
||||
# have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
|
||||
predicate.gsub!( /([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)/u ) {
|
||||
"#$1 #$2 #$3 and #$3 #$4 #$5"
|
||||
}
|
||||
# Let's do some Ruby trickery to avoid some work:
|
||||
predicate.gsub!( /&/u, "&&" )
|
||||
predicate.gsub!( /=/u, "==" )
|
||||
predicate.gsub!( /@(\w[-\w.]*)/u ) {
|
||||
"attribute(\"#$1\")"
|
||||
}
|
||||
predicate.gsub!( /\bmod\b/u, "%" )
|
||||
predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
|
||||
fname = $1
|
||||
fname.gsub( /-/u, "_" )
|
||||
}
|
||||
|
||||
Functions.pair = [ 0, elements.size ]
|
||||
results = []
|
||||
elements.each do |element|
|
||||
Functions.pair[0] += 1
|
||||
Functions.node = element
|
||||
res = eval( predicate )
|
||||
case res
|
||||
when true
|
||||
results << element
|
||||
when Fixnum
|
||||
results << element if Functions.pair[0] == res
|
||||
when String
|
||||
results << element
|
||||
end
|
||||
end
|
||||
return filter( results, rest )
|
||||
end
|
||||
|
||||
def QuickPath::attribute( name )
|
||||
return Functions.node.attributes[name] if Functions.node.kind_of? Element
|
||||
end
|
||||
|
||||
def QuickPath::name()
|
||||
return Functions.node.name if Functions.node.kind_of? Element
|
||||
end
|
||||
|
||||
def QuickPath::method_missing( id, *args )
|
||||
begin
|
||||
Functions.send( id.id2name, *args )
|
||||
rescue Exception
|
||||
raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
|
||||
end
|
||||
end
|
||||
|
||||
def QuickPath::function( elements, fname, rest )
|
||||
args = parse_args( elements, rest )
|
||||
Functions.pair = [0, elements.size]
|
||||
results = []
|
||||
elements.each do |element|
|
||||
Functions.pair[0] += 1
|
||||
Functions.node = element
|
||||
res = Functions.send( fname, *args )
|
||||
case res
|
||||
when true
|
||||
results << element
|
||||
when Fixnum
|
||||
results << element if Functions.pair[0] == res
|
||||
end
|
||||
end
|
||||
return results
|
||||
end
|
||||
|
||||
def QuickPath::parse_args( element, string )
|
||||
# /.*?(?:\)|,)/
|
||||
arguments = []
|
||||
buffer = ""
|
||||
while string and string != ""
|
||||
c = string[0]
|
||||
string.sub!(/^./u, "")
|
||||
case c
|
||||
when ?,
|
||||
# if depth = 1, then we start a new argument
|
||||
arguments << evaluate( buffer )
|
||||
#arguments << evaluate( string[0..count] )
|
||||
when ?(
|
||||
# start a new method call
|
||||
function( element, buffer, string )
|
||||
buffer = ""
|
||||
when ?)
|
||||
# close the method call and return arguments
|
||||
return arguments
|
||||
else
|
||||
buffer << c
|
||||
end
|
||||
end
|
||||
""
|
||||
end
|
||||
end
|
||||
end
|
26
lib/rexml/rexml.rb
Normal file
26
lib/rexml/rexml.rb
Normal file
|
@ -0,0 +1,26 @@
|
|||
# REXML is an XML parser for Ruby, in Ruby.
|
||||
#
|
||||
# URL: http://www.germane-software.com/software/rexml
|
||||
# Author: Sean Russell <ser@germane-software.com>
|
||||
# Version: 2.5.6
|
||||
# Date: +2003/054
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Short Description:
|
||||
# Why did I write REXML? At the time of this writing, there were already
|
||||
# two XML parsers for Ruby. The first is a Ruby binding to a native XML
|
||||
# parser. This is a fast parser, using proven technology. However,
|
||||
# it isn't very portable. The second is a native Ruby implementation, but
|
||||
# I didn't like its API very much. I wrote REXML for myself, so that I'd
|
||||
# have an XML parser that had an intuitive API.
|
||||
#
|
||||
# API documentation can be downloaded from the REXML home page, or can
|
||||
# be accessed online at http://www.germane-software.com/software/rexml_doc
|
||||
# A tutorial is available in docs/tutorial.html
|
||||
module REXML
|
||||
Copyright = 'Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>'
|
||||
Date = "@ANT_DATE@"
|
||||
Version = "@ANT_VERSION@"
|
||||
end
|
94
lib/rexml/sax2listener.rb
Normal file
94
lib/rexml/sax2listener.rb
Normal file
|
@ -0,0 +1,94 @@
|
|||
module REXML
|
||||
# A template for stream parser listeners.
|
||||
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
|
||||
# processed; REXML doesn't yet handle doctype entity declarations, so you
|
||||
# have to parse them out yourself.
|
||||
# === Missing methods from SAX2
|
||||
# ignorable_whitespace
|
||||
# === Methods extending SAX2
|
||||
# +WARNING+
|
||||
# These methods are certainly going to change, until DTDs are fully
|
||||
# supported. Be aware of this.
|
||||
# start_document
|
||||
# end_document
|
||||
# doctype
|
||||
# elementdecl
|
||||
# attlistdecl
|
||||
# entitydecl
|
||||
# notationdecl
|
||||
# cdata
|
||||
# xmldecl
|
||||
# comment
|
||||
module SAX2Listener
|
||||
def start_document
|
||||
end
|
||||
def end_document
|
||||
end
|
||||
def start_prefix_mapping prefix, uri
|
||||
end
|
||||
def end_prefix_mapping prefix
|
||||
end
|
||||
def start_element uri, localname, qname, attributes
|
||||
end
|
||||
def end_element uri, localname, qname
|
||||
end
|
||||
def characters text
|
||||
end
|
||||
def processing_instruction target, data
|
||||
end
|
||||
# Handles a doctype declaration. Any attributes of the doctype which are
|
||||
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
|
||||
# @p name the name of the doctype; EG, "me"
|
||||
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
|
||||
# @p long_name the supplied long name, or nil. EG, "foo"
|
||||
# @p uri the uri of the doctype, or nil. EG, "bar"
|
||||
def doctype name, pub_sys, long_name, uri
|
||||
end
|
||||
# If a doctype includes an ATTLIST declaration, it will cause this
|
||||
# method to be called. The content is the declaration itself, unparsed.
|
||||
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
||||
# attr CDATA #REQUIRED". This is the same for all of the .*decl
|
||||
# methods.
|
||||
def attlistdecl(element, pairs, contents)
|
||||
end
|
||||
# <!ELEMENT ...>
|
||||
def elementdecl content
|
||||
end
|
||||
# <!ENTITY ...>
|
||||
# The argument passed to this method is an array of the entity
|
||||
# declaration. It can be in a number of formats, but in general it
|
||||
# returns (example, result):
|
||||
# <!ENTITY % YN '"Yes"'>
|
||||
# ["%", "YN", "'\"Yes\"'", "\""]
|
||||
# <!ENTITY % YN 'Yes'>
|
||||
# ["%", "YN", "'Yes'", "s"]
|
||||
# <!ENTITY WhatHeSaid "He said %YN;">
|
||||
# ["WhatHeSaid", "\"He said %YN;\"", "YN"]
|
||||
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
||||
# ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
||||
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
||||
# ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
||||
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
|
||||
# ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
|
||||
def entitydecl content
|
||||
end
|
||||
# <!NOTATION ...>
|
||||
def notationdecl content
|
||||
end
|
||||
# Called when <![CDATA[ ... ]]> is encountered in a document.
|
||||
# @p content "..."
|
||||
def cdata content
|
||||
end
|
||||
# Called when an XML PI is encountered in the document.
|
||||
# EG: <?xml version="1.0" encoding="utf"?>
|
||||
# @p version the version attribute value. EG, "1.0"
|
||||
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
||||
# @p standalone the standalone attribute value, or nil. EG, nil
|
||||
def xmldecl version, encoding, standalone
|
||||
end
|
||||
# Called when a comment is encountered.
|
||||
# @p comment The content of the comment
|
||||
def comment comment
|
||||
end
|
||||
end
|
||||
end
|
191
lib/rexml/source.rb
Normal file
191
lib/rexml/source.rb
Normal file
|
@ -0,0 +1,191 @@
|
|||
require 'rexml/encoding'
|
||||
|
||||
module REXML
|
||||
# Generates Source-s. USE THIS CLASS.
|
||||
class SourceFactory
|
||||
# Generates a Source object
|
||||
# @param arg Either a String, or an IO
|
||||
# @return a Source, or nil if a bad argument was given
|
||||
def SourceFactory::create_from arg#, slurp=true
|
||||
if arg.kind_of? String
|
||||
source = Source.new(arg)
|
||||
elsif arg.kind_of? IO
|
||||
source = IOSource.new(arg)
|
||||
end
|
||||
source
|
||||
end
|
||||
end
|
||||
|
||||
# A Source can be searched for patterns, and wraps buffers and other
|
||||
# objects and provides consumption of text
|
||||
class Source
|
||||
include Encoding
|
||||
# The current buffer (what we're going to read next)
|
||||
attr_reader :buffer
|
||||
# The line number of the last consumed text
|
||||
attr_reader :line
|
||||
attr_reader :encoding
|
||||
|
||||
# Constructor
|
||||
# @param arg must be a String, and should be a valid XML document
|
||||
def initialize arg
|
||||
@orig = @buffer = arg
|
||||
self.encoding = check_encoding( @buffer )
|
||||
#@buffer = decode(@buffer) unless @encoding == UTF_8
|
||||
@line = 0
|
||||
end
|
||||
|
||||
# Inherited from Encoding
|
||||
# Overridden to support optimized en/decoding
|
||||
def encoding=(enc)
|
||||
super
|
||||
eval <<-EOL
|
||||
alias :encode :to_#{encoding.tr('-', '_').downcase}
|
||||
alias :decode :from_#{encoding.tr('-', '_').downcase}
|
||||
EOL
|
||||
@line_break = encode( '>' )
|
||||
if enc != UTF_8
|
||||
@buffer = decode(@buffer)
|
||||
@to_utf = true
|
||||
else
|
||||
@to_utf = false
|
||||
end
|
||||
end
|
||||
|
||||
# Scans the source for a given pattern. Note, that this is not your
|
||||
# usual scan() method. For one thing, the pattern argument has some
|
||||
# requirements; for another, the source can be consumed. You can easily
|
||||
# confuse this method. Originally, the patterns were easier
|
||||
# to construct and this method more robust, because this method
|
||||
# generated search regexes on the fly; however, this was
|
||||
# computationally expensive and slowed down the entire REXML package
|
||||
# considerably, since this is by far the most commonly called method.
|
||||
# @param pattern must be a Regexp, and must be in the form of
|
||||
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
||||
# will be returned; the second group is used if the consume flag is
|
||||
# set.
|
||||
# @param consume if true, the pattern returned will be consumed, leaving
|
||||
# everything after it in the Source.
|
||||
# @return the pattern, if found, or nil if the Source is empty or the
|
||||
# pattern is not found.
|
||||
def scan pattern, consume=false
|
||||
return nil if @buffer.nil?
|
||||
rv = @buffer.scan(pattern)
|
||||
@buffer = $' if consume and rv.size>0
|
||||
rv
|
||||
end
|
||||
|
||||
def read
|
||||
end
|
||||
|
||||
def match pattern, consume=false
|
||||
md = pattern.match @buffer
|
||||
@buffer = $' if consume and md
|
||||
return md
|
||||
end
|
||||
|
||||
# @return true if the Source is exhausted
|
||||
def empty?
|
||||
@buffer.nil? or @buffer.strip.nil?
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
def current_line
|
||||
lines = @orig.split
|
||||
res = lines.grep @buffer[0..30]
|
||||
res = res[-1] if res.kind_of? Array
|
||||
lines.index( res ) if res
|
||||
end
|
||||
end
|
||||
|
||||
# A Source that wraps an IO. See the Source class for method
|
||||
# documentation
|
||||
class IOSource < Source
|
||||
#attr_reader :block_size
|
||||
|
||||
def initialize arg, block_size=500
|
||||
@er_source = @source = arg
|
||||
@to_utf = false
|
||||
# READLINE OPT
|
||||
# The following was commented out when IOSource started using readline
|
||||
# to pull the data from the stream.
|
||||
#@block_size = block_size
|
||||
#super @source.read(@block_size)
|
||||
@line_break = '>'
|
||||
super @source.readline( @line_break )
|
||||
end
|
||||
|
||||
def scan pattern, consume=false
|
||||
rv = super
|
||||
# You'll notice that this next section is very similar to the same
|
||||
# section in match(), but just a liiittle different. This is
|
||||
# because it is a touch faster to do it this way with scan()
|
||||
# than the way match() does it; enough faster to warrent duplicating
|
||||
# some code
|
||||
if rv.size == 0
|
||||
until @buffer =~ pattern or @source.nil?
|
||||
begin
|
||||
# READLINE OPT
|
||||
#str = @source.read(@block_size)
|
||||
str = @source.readline(@line_break)
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
rescue
|
||||
@source = nil
|
||||
end
|
||||
end
|
||||
rv = super
|
||||
end
|
||||
rv.taint
|
||||
rv
|
||||
end
|
||||
|
||||
def read
|
||||
begin
|
||||
str = @source.readline('>')
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
rescue
|
||||
@source = nil
|
||||
end
|
||||
end
|
||||
|
||||
def match pattern, consume=false
|
||||
rv = pattern.match(@buffer)
|
||||
@buffer = $' if consume and rv
|
||||
while !rv and @source
|
||||
begin
|
||||
str = @source.readline('>')
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
rv = pattern.match(@buffer)
|
||||
@buffer = $' if consume and rv
|
||||
rescue
|
||||
@source = nil
|
||||
end
|
||||
end
|
||||
rv.taint
|
||||
rv
|
||||
end
|
||||
|
||||
def empty?
|
||||
super and ( @source.nil? || @source.eof? )
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
def current_line
|
||||
pos = @er_source.pos # The byte position in the source
|
||||
lineno = @er_source.lineno # The XML < position in the source
|
||||
@er_source.rewind
|
||||
line = 0 # The \r\n position in the source
|
||||
begin
|
||||
while @er_source.pos < pos
|
||||
@er_source.readline
|
||||
line += 1
|
||||
end
|
||||
rescue
|
||||
end
|
||||
[pos, lineno, line]
|
||||
end
|
||||
end
|
||||
end
|
89
lib/rexml/streamlistener.rb
Normal file
89
lib/rexml/streamlistener.rb
Normal file
|
@ -0,0 +1,89 @@
|
|||
module REXML
|
||||
# A template for stream parser listeners.
|
||||
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
|
||||
# processed; REXML doesn't yet handle doctype entity declarations, so you
|
||||
# have to parse them out yourself.
|
||||
module StreamListener
|
||||
# Called when a tag is encountered.
|
||||
# @p name the tag name
|
||||
# @p attrs an array of arrays of attribute/value pairs, suitable for
|
||||
# use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
|
||||
# will result in
|
||||
# tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
|
||||
def tag_start name, attrs
|
||||
end
|
||||
# Called when the end tag is reached. In the case of <tag/>, tag_end
|
||||
# will be called immidiately after tag_start
|
||||
# @p the name of the tag
|
||||
def tag_end name
|
||||
end
|
||||
# Called when text is encountered in the document
|
||||
# @p text the text content.
|
||||
def text text
|
||||
end
|
||||
# Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
|
||||
# @p name the instruction name; in the example, "xsl"
|
||||
# @p instruction the rest of the instruction. In the example,
|
||||
# "sheet='foo'"
|
||||
def instruction name, instruction
|
||||
end
|
||||
# Called when a comment is encountered.
|
||||
# @p comment The content of the comment
|
||||
def comment comment
|
||||
end
|
||||
# Handles a doctype declaration. Any attributes of the doctype which are
|
||||
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
|
||||
# @p name the name of the doctype; EG, "me"
|
||||
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
|
||||
# @p long_name the supplied long name, or nil. EG, "foo"
|
||||
# @p uri the uri of the doctype, or nil. EG, "bar"
|
||||
def doctype name, pub_sys, long_name, uri
|
||||
end
|
||||
# If a doctype includes an ATTLIST declaration, it will cause this
|
||||
# method to be called. The content is the declaration itself, unparsed.
|
||||
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
||||
# attr CDATA #REQUIRED". This is the same for all of the .*decl
|
||||
# methods.
|
||||
def attlistdecl element_name, attributes, raw_content
|
||||
end
|
||||
# <!ELEMENT ...>
|
||||
def elementdecl content
|
||||
end
|
||||
# <!ENTITY ...>
|
||||
# The argument passed to this method is an array of the entity
|
||||
# declaration. It can be in a number of formats, but in general it
|
||||
# returns (example, result):
|
||||
# <!ENTITY % YN '"Yes"'>
|
||||
# ["%", "YN", "'\"Yes\"'", "\""]
|
||||
# <!ENTITY % YN 'Yes'>
|
||||
# ["%", "YN", "'Yes'", "s"]
|
||||
# <!ENTITY WhatHeSaid "He said %YN;">
|
||||
# ["WhatHeSaid", "\"He said %YN;\"", "YN"]
|
||||
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
||||
# ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
||||
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
||||
# ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
||||
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
|
||||
# ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
|
||||
def entitydecl content
|
||||
end
|
||||
# <!NOTATION ...>
|
||||
def notationdecl content
|
||||
end
|
||||
# Called when %foo; is encountered in a doctype declaration.
|
||||
# @p content "foo"
|
||||
def entity content
|
||||
end
|
||||
# Called when <![CDATA[ ... ]]> is encountered in a document.
|
||||
# @p content "..."
|
||||
def cdata content
|
||||
end
|
||||
# Called when an XML PI is encountered in the document.
|
||||
# EG: <?xml version="1.0" encoding="utf"?>
|
||||
# @p version the version attribute value. EG, "1.0"
|
||||
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
||||
# @p standalone the standalone attribute value, or nil. EG, nil
|
||||
def xmldecl version, encoding, standalone
|
||||
end
|
||||
end
|
||||
end
|
279
lib/rexml/text.rb
Normal file
279
lib/rexml/text.rb
Normal file
|
@ -0,0 +1,279 @@
|
|||
require 'rexml/entity'
|
||||
|
||||
module REXML
|
||||
# Represents text nodes in an XML document
|
||||
class Text < Child
|
||||
include Comparable
|
||||
# The order in which the substitutions occur
|
||||
SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
|
||||
SUBSTITUTES = ['&', '<', '>', '"', ''', ' ']
|
||||
# Characters which are substituted in written strings
|
||||
SLAICEPS = [ '<', '>', '"', "'", '&' ]
|
||||
SETUTITSBUS = [ /</u, />/u, /"/u, /'/u, /&/u ]
|
||||
|
||||
# If +raw+ is true, then REXML leaves the value alone
|
||||
attr_accessor :raw
|
||||
|
||||
ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
|
||||
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
||||
|
||||
# Constructor
|
||||
# +arg+ if a String, the content is set to the String. If a Text,
|
||||
# the object is shallowly cloned.
|
||||
#
|
||||
# +respect_whitespace+ (boolean, false) if true, whitespace is
|
||||
# respected
|
||||
#
|
||||
# +parent+ (nil) if this is a Parent object, the parent
|
||||
# will be set to this.
|
||||
#
|
||||
# +raw+ (nil) This argument can be given three values.
|
||||
# If true, then the value of used to construct this object is expected to
|
||||
# contain no unescaped XML markup, and REXML will not change the text. If
|
||||
# this value is false, the string may contain any characters, and REXML will
|
||||
# escape any and all defined entities whose values are contained in the
|
||||
# text. If this value is nil (the default), then the raw value of the
|
||||
# parent will be used as the raw value for this node. If there is no raw
|
||||
# value for the parent, and no value is supplied, the default is false.
|
||||
# Text.new( "<&", false, nil, false ) #-> "<&"
|
||||
# Text.new( "<&", false, nil, true ) #-> IllegalArgumentException
|
||||
# Text.new( "<&", false, nil, true ) #-> "<&"
|
||||
# # Assume that the entity "s" is defined to be "sean"
|
||||
# # and that the entity "r" is defined to be "russell"
|
||||
# Text.new( "sean russell" ) #-> "&s; &r;"
|
||||
# Text.new( "sean russell", false, nil, true ) #-> "sean russell"
|
||||
#
|
||||
# +entity_filter+ (nil) This can be an array of entities to match in the
|
||||
# supplied text. This argument is only useful if +raw+ is set to false.
|
||||
# Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
|
||||
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
|
||||
# In the last example, the +entity_filter+ argument is ignored.
|
||||
#
|
||||
# +pattern+ INTERNAL USE ONLY
|
||||
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
||||
entity_filter=nil, illegal=ILLEGAL )
|
||||
|
||||
@raw = false
|
||||
|
||||
if parent
|
||||
super( parent )
|
||||
@raw = parent.raw
|
||||
else
|
||||
@parent = nil
|
||||
end
|
||||
|
||||
@raw = raw unless raw.nil?
|
||||
@entity_filter = entity_filter
|
||||
@normalized = @unnormalized = nil
|
||||
|
||||
if arg.kind_of? String
|
||||
@string = arg.clone
|
||||
@string.squeeze!(" \n\t") unless respect_whitespace
|
||||
elsif arg.kind_of? Text
|
||||
@string = arg.to_s
|
||||
@raw = arg.raw
|
||||
elsif
|
||||
raise Exception.new( "Illegal argument of type #{arg.type} for Text constructor (#{arg})" )
|
||||
end
|
||||
|
||||
@string.gsub!( /\r\n?/, "\n" )
|
||||
|
||||
# check for illegal characters
|
||||
if @raw
|
||||
if @string =~ illegal
|
||||
raise Exception.new(
|
||||
"Illegal character '#{$1}' in raw string \"#{@string}\""
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def node_type
|
||||
:text
|
||||
end
|
||||
|
||||
def empty?
|
||||
@string.size==0
|
||||
end
|
||||
|
||||
|
||||
def clone
|
||||
return Text.new(self)
|
||||
end
|
||||
|
||||
|
||||
# Appends text to this text node. The text is appended in the +raw+ mode
|
||||
# of this text node.
|
||||
def <<( to_append )
|
||||
@string << to_append.gsub( /\r\n?/, "\n" )
|
||||
end
|
||||
|
||||
|
||||
# +other+ a String or a Text
|
||||
# +returns+ the result of (to_s <=> arg.to_s)
|
||||
def <=>( other )
|
||||
to_s() <=> other.to_s
|
||||
end
|
||||
|
||||
REFERENCE = /#{Entity::REFERENCE}/
|
||||
# Returns the string value of this text node. This string is always
|
||||
# escaped, meaning that it is a valid XML text node string, and all
|
||||
# entities that can be escaped, have been inserted. This method respects
|
||||
# the entity filter set in the constructor.
|
||||
#
|
||||
# # Assume that the entity "s" is defined to be "sean", and that the
|
||||
# # entity "r" is defined to be "russell"
|
||||
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
||||
# t.to_s #-> "< & &s; russell"
|
||||
# t = Text.new( "< & &s; russell", false, nil, false )
|
||||
# t.to_s #-> "< & &s; russell"
|
||||
# u = Text.new( "sean russell", false, nil, true )
|
||||
# u.to_s #-> "sean russell"
|
||||
def to_s
|
||||
return @string if @raw
|
||||
return @normalized if @normalized
|
||||
|
||||
doctype = nil
|
||||
if @parent
|
||||
doc = @parent.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
|
||||
@normalized = Text::normalize( @string, doctype, @entity_filter )
|
||||
end
|
||||
|
||||
# Returns the string value of this text. This is the text without
|
||||
# entities, as it might be used programmatically, or printed to the
|
||||
# console. This ignores the 'raw' attribute setting, and any
|
||||
# entity_filter.
|
||||
#
|
||||
# # Assume that the entity "s" is defined to be "sean", and that the
|
||||
# # entity "r" is defined to be "russell"
|
||||
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
||||
# t.string #-> "< & sean russell"
|
||||
# t = Text.new( "< & &s; russell", false, nil, false )
|
||||
# t.string #-> "< & sean russell"
|
||||
# u = Text.new( "sean russell", false, nil, true )
|
||||
# u.string #-> "sean russell"
|
||||
def value
|
||||
@unnormalized if @unnormalized
|
||||
doctype = nil
|
||||
if @parent
|
||||
doc = @parent.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
@unnormalized = Text::unnormalize( @string, doctype )
|
||||
end
|
||||
|
||||
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
||||
writer << to_s()
|
||||
end
|
||||
|
||||
# Writes out text, substituting special characters beforehand.
|
||||
# +out+ A String, IO, or any other object supporting <<( String )
|
||||
# +input+ the text to substitute and the write out
|
||||
#
|
||||
# z=utf8.unpack("U*")
|
||||
# ascOut=""
|
||||
# z.each{|r|
|
||||
# if r < 0x100
|
||||
# ascOut.concat(r.chr)
|
||||
# else
|
||||
# ascOut.concat(sprintf("&#x%x;", r))
|
||||
# end
|
||||
# }
|
||||
# puts ascOut
|
||||
def write_with_substitution out, input
|
||||
copy = input.clone
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
|
||||
copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
|
||||
copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
|
||||
copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
|
||||
copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
|
||||
copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
|
||||
out << copy
|
||||
end
|
||||
|
||||
# Reads text, substituting entities
|
||||
def Text::read_with_substitution( input, illegal=nil )
|
||||
copy = input.clone
|
||||
|
||||
if copy =~ illegal
|
||||
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
|
||||
end if illegal
|
||||
|
||||
copy.gsub!( /\r\n?/, "\n" )
|
||||
if copy.include? ?&
|
||||
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
|
||||
copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
|
||||
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
|
||||
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
|
||||
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
|
||||
copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
|
||||
m=$1
|
||||
#m='0' if m==''
|
||||
m = "0#{m}" if m[0] == ?x
|
||||
[Integer(m)].pack('U*')
|
||||
}
|
||||
end
|
||||
copy
|
||||
end
|
||||
|
||||
EREFERENCE = /&(?!#{Entity::NAME};)/
|
||||
# Escapes all possible entities
|
||||
def Text::normalize( input, doctype=nil, entity_filter=nil )
|
||||
copy = input.clone
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
if doctype
|
||||
copy.gsub!( EREFERENCE, '&' )
|
||||
doctype.entities.each_value do |entity|
|
||||
copy.gsub!( entity.value,
|
||||
"&#{entity.name};" ) if entity.value and
|
||||
not( entity_filter and entity_filter.include?(entity) )
|
||||
end
|
||||
else
|
||||
copy.gsub!( EREFERENCE, '&' )
|
||||
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
||||
copy.gsub!(entity.value, "&#{entity.name};" )
|
||||
end
|
||||
end
|
||||
copy
|
||||
end
|
||||
|
||||
# Unescapes all possible entities
|
||||
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
||||
rv = string.clone
|
||||
rv.gsub!( /\r\n?/, "\n" )
|
||||
matches = rv.scan REFERENCE
|
||||
return rv if matches.size == 0
|
||||
rv.gsub!( NUMERICENTITY ) {|m|
|
||||
m=$1
|
||||
m = "0#{m}" if m[0] == ?x
|
||||
[Integer(m)].pack('U*')
|
||||
}
|
||||
matches.collect!{|x|x[0]}.compact!
|
||||
if matches.size > 0
|
||||
if doctype
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
entity_value = doctype.entity( entity_reference )
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value ) if entity_value
|
||||
end
|
||||
end
|
||||
else
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value.value ) if entity_value
|
||||
end
|
||||
end
|
||||
end
|
||||
rv.gsub!( /&/, '&' )
|
||||
end
|
||||
rv
|
||||
end
|
||||
end
|
||||
end
|
72
lib/rexml/xmldecl.rb
Normal file
72
lib/rexml/xmldecl.rb
Normal file
|
@ -0,0 +1,72 @@
|
|||
require 'rexml/encoding'
|
||||
require 'rexml/source'
|
||||
|
||||
module REXML
|
||||
# NEEDS DOCUMENTATION
|
||||
class XMLDecl < Child
|
||||
include Encoding
|
||||
|
||||
DEFAULT_VERSION = "1.0";
|
||||
DEFAULT_ENCODING = "UTF-8";
|
||||
DEFAULT_STANDALONE = "no";
|
||||
START = '<\?xml';
|
||||
STOP = '\?>';
|
||||
|
||||
attr_accessor :version, :standalone
|
||||
|
||||
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
|
||||
@encoding_set = !encoding.nil?
|
||||
if version.kind_of? XMLDecl
|
||||
super()
|
||||
@version = version.version
|
||||
self.encoding = version.encoding
|
||||
@standalone = version.standalone
|
||||
else
|
||||
super()
|
||||
@version = version
|
||||
self.encoding = encoding
|
||||
@standalone = standalone
|
||||
end
|
||||
@version = DEFAULT_VERSION if @version.nil?
|
||||
end
|
||||
|
||||
def clone
|
||||
XMLDecl.new(self)
|
||||
end
|
||||
|
||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
||||
indent( writer, indent )
|
||||
writer << START.sub(/\\/u, '')
|
||||
writer << " #{content}"
|
||||
writer << STOP.sub(/\\/u, '')
|
||||
end
|
||||
|
||||
def ==( other )
|
||||
other.kind_of?(XMLDecl) and
|
||||
other.version == @version and
|
||||
other.encoding == self.encoding and
|
||||
other.standalone == @standalone
|
||||
end
|
||||
|
||||
def xmldecl version, encoding, standalone
|
||||
@version = version
|
||||
@encoding_set = !encoding.nil?
|
||||
self.encoding = encoding
|
||||
@standalone = standalone
|
||||
end
|
||||
|
||||
def node_type
|
||||
:xmldecl
|
||||
end
|
||||
|
||||
alias :stand_alone? :standalone
|
||||
|
||||
private
|
||||
def content
|
||||
rv = "version='#@version'"
|
||||
rv << " encoding='#{encoding}'" if @encoding_set
|
||||
rv << " standalone='#@standalone'" if @standalone
|
||||
rv
|
||||
end
|
||||
end
|
||||
end
|
18
lib/rexml/xmltokens.rb
Normal file
18
lib/rexml/xmltokens.rb
Normal file
|
@ -0,0 +1,18 @@
|
|||
module REXML
|
||||
# Defines a number of tokens used for parsing XML. Not for general
|
||||
# consumption.
|
||||
module XMLTokens
|
||||
NCNAME_STR= '[\w:][\-\w\d.]*'
|
||||
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
||||
|
||||
NAMECHAR = '[\-\w\d\.:]'
|
||||
NAME = "([\\w:]#{NAMECHAR}*)"
|
||||
NMTOKEN = "(?:#{NAMECHAR})+"
|
||||
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
||||
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
||||
|
||||
#REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
|
||||
#ENTITYREF = "&#{NAME};"
|
||||
#CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
|
||||
end
|
||||
end
|
62
lib/rexml/xpath.rb
Normal file
62
lib/rexml/xpath.rb
Normal file
|
@ -0,0 +1,62 @@
|
|||
require 'rexml/functions'
|
||||
require 'rexml/xpath_parser'
|
||||
|
||||
module REXML
|
||||
# Wrapper class. Use this class to access the XPath functions.
|
||||
class XPath
|
||||
include Functions
|
||||
EMPTY_HASH = {}
|
||||
|
||||
# Finds and returns the first node that matches the supplied xpath.
|
||||
# element::
|
||||
# The context element
|
||||
# path::
|
||||
# The xpath to search for. If not supplied or nil, returns the first
|
||||
# node matching '*'.
|
||||
# namespaces::
|
||||
# If supplied, a Hash which defines a namespace mapping.
|
||||
#
|
||||
# XPath.first( node )
|
||||
# XPath.first( doc, "//b"} )
|
||||
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
|
||||
def XPath::first element, path=nil, namespaces={}, variables={}
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path, element)[0]
|
||||
end
|
||||
|
||||
# Itterates over nodes that match the given path, calling the supplied
|
||||
# block with the match.
|
||||
# element::
|
||||
# The context element
|
||||
# path::
|
||||
# The xpath to search for. If not supplied or nil, defaults to '*'
|
||||
# namespaces::
|
||||
# If supplied, a Hash which defines a namespace mapping
|
||||
#
|
||||
# XPath.each( node ) { |el| ... }
|
||||
# XPath.each( node, '/*[@attr='v']' ) { |el| ... }
|
||||
# XPath.each( node, 'ancestor::x' ) { |el| ... }
|
||||
def XPath::each element, path=nil, namespaces={}, variables={}, &block
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path, element).each( &block )
|
||||
end
|
||||
|
||||
# Returns an array of nodes matching a given XPath.
|
||||
def XPath::match element, path=nil, namespaces={}, variables={}
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path,element)
|
||||
end
|
||||
end
|
||||
end
|
530
lib/rexml/xpath_parser.rb
Normal file
530
lib/rexml/xpath_parser.rb
Normal file
|
@ -0,0 +1,530 @@
|
|||
require 'rexml/namespace'
|
||||
require 'rexml/xmltokens'
|
||||
require 'rexml/parsers/xpathparser'
|
||||
|
||||
# Ignore this class. It adds a __ne__ method, because Ruby doesn't seem to
|
||||
# understand object.send( "!=", foo ), whereas it *does* understand "<", "==",
|
||||
# and all of the other comparison methods. Stupid, and annoying, and not at
|
||||
# all POLS.
|
||||
class Object
|
||||
def __ne__(b)
|
||||
self != b
|
||||
end
|
||||
end
|
||||
|
||||
module REXML
|
||||
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
||||
# for this class. Believe me. You don't want to poke around in here.
|
||||
# There is strange, dark magic at work in this code. Beware. Go back! Go
|
||||
# back while you still can!
|
||||
class XPathParser
|
||||
include XMLTokens
|
||||
LITERAL = /^'([^']*)'|^"([^"]*)"/u
|
||||
|
||||
def initialize( )
|
||||
@parser = REXML::Parsers::XPathParser.new
|
||||
@namespaces = {}
|
||||
@variables = {}
|
||||
end
|
||||
|
||||
def namespaces=( namespaces={} )
|
||||
Functions::namespace_context = namespaces
|
||||
@namespaces = namespaces
|
||||
end
|
||||
|
||||
def variables=( vars={} )
|
||||
Functions::variables = vars
|
||||
@variables = vars
|
||||
end
|
||||
|
||||
def parse path, nodeset
|
||||
path_stack = @parser.parse( path )
|
||||
#puts "PARSE: #{path} => #{path_stack.inspect}"
|
||||
match( path_stack, nodeset )
|
||||
end
|
||||
|
||||
def predicate path, nodeset
|
||||
path_stack = @parser.predicate( path )
|
||||
return Predicate( path_stack, nodeset )
|
||||
end
|
||||
|
||||
def []=( variable_name, value )
|
||||
@variables[ variable_name ] = value
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def match( path_stack, nodeset )
|
||||
while ( path_stack.size > 0 and nodeset.size > 0 )
|
||||
#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.type}.inspect}'"
|
||||
nodeset = internal_parse( path_stack, nodeset )
|
||||
#puts "NODESET: #{nodeset.size}"
|
||||
#puts "PATH_STACK: #{path_stack.inspect}"
|
||||
end
|
||||
nodeset
|
||||
end
|
||||
|
||||
def internal_parse path_stack, nodeset
|
||||
return nodeset if nodeset.size == 0 or path_stack.size == 0
|
||||
#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.type}.inspect}"
|
||||
case path_stack.shift
|
||||
when :document
|
||||
return [ nodeset[0].root.parent ]
|
||||
|
||||
when :qname
|
||||
prefix = path_stack.shift
|
||||
name = path_stack.shift
|
||||
#puts "QNAME #{prefix}#{prefix.size>0?':':''}#{name}"
|
||||
n = nodeset.clone
|
||||
ns = @namespaces[prefix]
|
||||
ns = ns ? ns : ''
|
||||
n.delete_if do |node|
|
||||
# FIXME: This DOUBLES the time XPath searches take
|
||||
ns = node.namespace( prefix ) if node.node_type == :element and ns == ''
|
||||
#puts "NODE: '#{node.to_s}'; node.has_name?( #{name.inspect}, #{ns.inspect} ): #{ node.has_name?( name, ns )}; node.namespace() = #{node.namespace().inspect}; node.prefix = #{node.prefix().inspect}" if node.node_type == :element
|
||||
!(node.node_type == :element and node.name == name and node.namespace == ns )
|
||||
end
|
||||
return n
|
||||
|
||||
when :any
|
||||
n = nodeset.clone
|
||||
n.delete_if { |node| node.node_type != :element }
|
||||
return n
|
||||
|
||||
when :self
|
||||
# THIS SPACE LEFT INTENTIONALLY BLANK
|
||||
|
||||
when :processing_instruction
|
||||
target = path_stack.shift
|
||||
n = nodeset.clone
|
||||
n.delete_if do |node|
|
||||
(node.node_type != :processing_instruction) or
|
||||
( !target.nil? and ( node.target != target ) )
|
||||
end
|
||||
return n
|
||||
|
||||
when :text
|
||||
#puts ":TEXT"
|
||||
n = nodeset.clone
|
||||
n.delete_if do |node|
|
||||
#puts "#{node} :: #{node.node_type}"
|
||||
node.node_type != :text
|
||||
end
|
||||
return n
|
||||
|
||||
when :comment
|
||||
n = nodeset.clone
|
||||
n.delete_if do |node|
|
||||
node.node_type != :comment
|
||||
end
|
||||
return n
|
||||
|
||||
when :node
|
||||
return nodeset
|
||||
#n = nodeset.clone
|
||||
#n.delete_if do |node|
|
||||
# !node.node?
|
||||
#end
|
||||
#return n
|
||||
|
||||
# FIXME: I suspect the following XPath will fail:
|
||||
# /a/*/*[1]
|
||||
when :child
|
||||
#puts "CHILD"
|
||||
new_nodeset = []
|
||||
ps_clone = nil
|
||||
for node in nodeset
|
||||
#ps_clone = path_stack.clone
|
||||
#new_nodeset += internal_parse( ps_clone, node.children ) if node.parent?
|
||||
new_nodeset += node.children if node.parent?
|
||||
end
|
||||
#path_stack[0,(path_stack.size-ps_clone.size)] = []
|
||||
return new_nodeset
|
||||
|
||||
when :literal
|
||||
literal = path_stack.shift
|
||||
if literal =~ /^\d+(\.\d+)?$/
|
||||
return ($1 ? literal.to_f : literal.to_i)
|
||||
end
|
||||
#puts "RETURNING '#{literal}'"
|
||||
return literal
|
||||
|
||||
when :attribute
|
||||
#puts ":ATTRIBUTE"
|
||||
new_nodeset = []
|
||||
case path_stack.shift
|
||||
when :qname
|
||||
prefix = path_stack.shift
|
||||
name = path_stack.shift
|
||||
for element in nodeset
|
||||
if element.node_type == :element
|
||||
#puts element.name
|
||||
#puts "looking for attribute #{name} in '#{@namespaces[prefix]}'"
|
||||
attr = element.attribute( name, @namespaces[prefix] )
|
||||
#puts ":ATTRIBUTE: attr => #{attr}"
|
||||
new_nodeset << attr if attr
|
||||
end
|
||||
end
|
||||
when :any
|
||||
for element in nodeset
|
||||
if element.node_type == :element
|
||||
attr = element.attributes
|
||||
end
|
||||
end
|
||||
end
|
||||
#puts "RETURNING #{new_nodeset.collect{|n|n.to_s}.inspect}"
|
||||
return new_nodeset
|
||||
|
||||
when :parent
|
||||
return internal_parse( path_stack, nodeset.collect{|n| n.parent}.compact )
|
||||
|
||||
when :ancestor
|
||||
#puts "ANCESTOR"
|
||||
new_nodeset = []
|
||||
for node in nodeset
|
||||
while node.parent
|
||||
node = node.parent
|
||||
new_nodeset << node unless new_nodeset.include? node
|
||||
end
|
||||
end
|
||||
#nodeset = new_nodeset.uniq
|
||||
return new_nodeset
|
||||
|
||||
when :ancestor_or_self
|
||||
new_nodeset = []
|
||||
for node in nodeset
|
||||
if node.node_type == :element
|
||||
new_nodeset << node
|
||||
while ( node.parent )
|
||||
node = node.parent
|
||||
new_nodeset << node unless new_nodeset.includes? node
|
||||
end
|
||||
end
|
||||
end
|
||||
#nodeset = new_nodeset.uniq
|
||||
return new_nodeset
|
||||
|
||||
when :predicate
|
||||
#puts "@"*80
|
||||
#puts "NODESET = #{nodeset.collect{|n|n.to_s}.inspect}"
|
||||
predicate = path_stack.shift
|
||||
new_nodeset = []
|
||||
Functions::size = nodeset.size
|
||||
nodeset.size.times do |index|
|
||||
node = nodeset[index]
|
||||
Functions::node = node
|
||||
Functions::index = index+1
|
||||
#puts "Node #{node} and index=#{index+1}"
|
||||
result = Predicate( predicate, node )
|
||||
#puts "Predicate returned #{result} (#{result.type}) for #{node.type}"
|
||||
if result.kind_of? Numeric
|
||||
#puts "#{result} == #{index} => #{result == index}"
|
||||
new_nodeset << node if result == (index+1)
|
||||
elsif result.instance_of? Array
|
||||
new_nodeset << node if result.size > 0
|
||||
else
|
||||
new_nodeset << node if result
|
||||
end
|
||||
end
|
||||
#puts "Nodeset after predicate #{predicate.inspect} has #{new_nodeset.size} nodes"
|
||||
#puts "NODESET: #{new_nodeset.collect{|n|n.to_s}.inspect}"
|
||||
return new_nodeset
|
||||
|
||||
when :descendant_or_self
|
||||
rv = descendant_or_self( path_stack, nodeset )
|
||||
path_stack.clear
|
||||
return rv
|
||||
|
||||
when :descendant
|
||||
#puts ":DESCENDANT"
|
||||
results = []
|
||||
for node in nodeset
|
||||
results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
|
||||
node.children ) if node.parent?
|
||||
end
|
||||
return results
|
||||
|
||||
when :following_sibling
|
||||
results = []
|
||||
for node in nodeset
|
||||
all_siblings = node.parent.children
|
||||
current_index = all_siblings.index( node )
|
||||
following_siblings = all_siblings[ current_index+1 .. -1 ]
|
||||
results += internal_parse( path_stack.clone, following_siblings )
|
||||
end
|
||||
return results
|
||||
|
||||
when :preceding_sibling
|
||||
results = []
|
||||
for node in nodeset
|
||||
all_siblings = node.parent.children
|
||||
current_index = all_siblings.index( node )
|
||||
preceding_siblings = all_siblings[ 0 .. current_index-1 ]
|
||||
results += internal_parse( path_stack.clone, preceding_siblings )
|
||||
end
|
||||
return results
|
||||
|
||||
when :preceding
|
||||
new_nodeset = []
|
||||
for node in nodeset
|
||||
new_nodeset += preceding( node )
|
||||
end
|
||||
return new_nodeset
|
||||
|
||||
when :following
|
||||
new_nodeset = []
|
||||
for node in nodeset
|
||||
new_nodeset += following( node )
|
||||
end
|
||||
return new_nodeset
|
||||
|
||||
when :namespace
|
||||
new_set = []
|
||||
for node in nodeset
|
||||
new_nodeset << node.namespace if node.node_type == :element or node.node_type == :attribute
|
||||
end
|
||||
return new_nodeset
|
||||
|
||||
when :variable
|
||||
var_name = path_stack.shift
|
||||
return @variables[ var_name ]
|
||||
|
||||
end
|
||||
nodeset
|
||||
end
|
||||
|
||||
##########################################################
|
||||
# The next two methods are BAD MOJO!
|
||||
# This is my achilles heel. If anybody thinks of a better
|
||||
# way of doing this, be my guest. This really sucks, but
|
||||
# it took me three days to get it to work at all.
|
||||
# ########################################################
|
||||
|
||||
def descendant_or_self( path_stack, nodeset )
|
||||
rs = []
|
||||
d_o_s( path_stack, nodeset, rs )
|
||||
#puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
|
||||
rs.flatten.compact
|
||||
end
|
||||
|
||||
def d_o_s( p, ns, r )
|
||||
#puts r.collect{|n|n.to_s}.inspect
|
||||
#puts ns.collect{|n|n.to_s}.inspect
|
||||
ns.each_index do |i|
|
||||
n = ns[i]
|
||||
x = match( p.clone, [ n ] )
|
||||
#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
|
||||
d_o_s( p, n.children, x ) if n.parent?
|
||||
r[i,0] = [x] if x.size > 0
|
||||
end
|
||||
end
|
||||
|
||||
def recurse( nodeset, &block )
|
||||
for node in nodeset
|
||||
yield node
|
||||
recurse( node, &block ) if node.node_type == :element
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Given a predicate, a node, and a context, evaluates to true or false.
|
||||
def Predicate( predicate, node )
|
||||
predicate = predicate.clone
|
||||
#puts "#"*20
|
||||
#puts "Predicate( #{predicate.inspect}, #{node.type} )"
|
||||
results = []
|
||||
case (predicate[0])
|
||||
when :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
|
||||
eq = predicate.shift
|
||||
left = Predicate( predicate.shift, node )
|
||||
right = Predicate( predicate.shift, node )
|
||||
return equality_relational_compare( left, eq, right )
|
||||
|
||||
when :div, :mod, :mult, :plus, :minus, :union
|
||||
op = predicate.shift
|
||||
left = Predicate( predicate.shift, node )
|
||||
right = Predicate( predicate.shift, node )
|
||||
left = Functions::number( left )
|
||||
right = Functions::number( right )
|
||||
case op
|
||||
when :div
|
||||
return left.to_f / right.to_f
|
||||
when :mod
|
||||
return left % right
|
||||
when :mult
|
||||
return left * right
|
||||
when :plus
|
||||
return left + right
|
||||
when :minus
|
||||
return left - right
|
||||
when :union
|
||||
return (left | right)
|
||||
end
|
||||
|
||||
when :neg
|
||||
predicate.shift
|
||||
operand = Functions::number(Predicate( predicate, node ))
|
||||
return -operand
|
||||
|
||||
when :not
|
||||
predicate.shift
|
||||
return !Predicate( predicate.shift, node )
|
||||
|
||||
when :function
|
||||
predicate.shift
|
||||
func_name = predicate.shift.tr('-', '_')
|
||||
arguments = predicate.shift
|
||||
#puts "\nFUNCTION: #{func_name}"
|
||||
#puts "ARGUMENTS: #{arguments.inspect} #{node.to_s}"
|
||||
args = arguments.collect { |arg| Predicate( arg, node ) }
|
||||
#puts "FUNCTION: #{func_name}( #{args.collect{|n|n.to_s}.inspect} )"
|
||||
result = Functions.send( func_name, *args )
|
||||
#puts "RESULTS: #{result.inspect}"
|
||||
return result
|
||||
|
||||
else
|
||||
return match( predicate, [ node ] )
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
# Builds a nodeset of all of the following nodes of the supplied node,
|
||||
# in document order
|
||||
def following( node )
|
||||
all_siblings = node.parent.children
|
||||
current_index = all_siblings.index( node )
|
||||
following_siblings = all_siblings[ current_index+1 .. -1 ]
|
||||
following = []
|
||||
recurse( following_siblings ) { |node| following << node }
|
||||
following.shift
|
||||
#puts "following is returning #{puta following}"
|
||||
following
|
||||
end
|
||||
|
||||
# Builds a nodeset of all of the preceding nodes of the supplied node,
|
||||
# in reverse document order
|
||||
def preceding( node )
|
||||
all_siblings = node.parent.children
|
||||
current_index = all_siblings.index( node )
|
||||
preceding_siblings = all_siblings[ 0 .. current_index-1 ]
|
||||
|
||||
preceding_siblings.reverse!
|
||||
preceding = []
|
||||
recurse( preceding_siblings ) { |node| preceding << node }
|
||||
preceding.reverse
|
||||
end
|
||||
|
||||
def equality_relational_compare( set1, op, set2 )
|
||||
#puts "EQ_REL_COMP: #{set1.to_s}, #{op}, #{set2.to_s}"
|
||||
if set1.kind_of? Array and set2.kind_of? Array
|
||||
if set1.size == 1 and set2.size == 1
|
||||
set1 = set1[0]
|
||||
set2 = set2[0]
|
||||
else
|
||||
set1.each do |i1|
|
||||
i1 = i1.to_s
|
||||
set2.each do |i2|
|
||||
i2 = i2.to_s
|
||||
return true if compare( i1, op, i2 )
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
end
|
||||
#puts "COMPARING VALUES"
|
||||
# If one is nodeset and other is number, compare number to each item
|
||||
# in nodeset s.t. number op number(string(item))
|
||||
# If one is nodeset and other is string, compare string to each item
|
||||
# in nodeset s.t. string op string(item)
|
||||
# If one is nodeset and other is boolean, compare boolean to each item
|
||||
# in nodeset s.t. boolean op boolean(item)
|
||||
if set1.kind_of? Array or set2.kind_of? Array
|
||||
#puts "ISA ARRAY"
|
||||
if set1.kind_of? Array
|
||||
a = set1
|
||||
b = set2.to_s
|
||||
else
|
||||
a = set2
|
||||
b = set1.to_s
|
||||
end
|
||||
|
||||
case b
|
||||
when 'true', 'false'
|
||||
b = Functions::boolean( b )
|
||||
for v in a
|
||||
v = Functions::boolean(v)
|
||||
return true if compare( v, op, b )
|
||||
end
|
||||
when /^\d+(\.\d+)?$/
|
||||
b = Functions::number( b )
|
||||
for v in a
|
||||
v = Functions::number(v)
|
||||
return true if compare( v, op, b )
|
||||
end
|
||||
else
|
||||
b = Functions::string( b )
|
||||
for v in a
|
||||
v = Functions::string(v)
|
||||
return true if compare( v, op, b )
|
||||
end
|
||||
end
|
||||
else
|
||||
# If neither is nodeset,
|
||||
# If op is = or !=
|
||||
# If either boolean, convert to boolean
|
||||
# If either number, convert to number
|
||||
# Else, convert to string
|
||||
# Else
|
||||
# Convert both to numbers and compare
|
||||
s1 = set1.to_s
|
||||
s2 = set2.to_s
|
||||
#puts "EQ_REL_COMP: #{set1}=>#{s1}, #{set2}=>#{s2}"
|
||||
if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
|
||||
#puts "Functions::boolean(#{set1})=>#{Functions::boolean(set1)}"
|
||||
#puts "Functions::boolean(#{set2})=>#{Functions::boolean(set2)}"
|
||||
set1 = Functions::boolean( set1 )
|
||||
set2 = Functions::boolean( set2 )
|
||||
else
|
||||
if op == :eq or op == :neq
|
||||
if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
|
||||
set1 = Functions::number( s1 )
|
||||
set2 = Functions::number( s2 )
|
||||
else
|
||||
set1 = Functions::string( set1 )
|
||||
set2 = Functions::string( set2 )
|
||||
end
|
||||
else
|
||||
set1 = Functions::number( set1 )
|
||||
set2 = Functions::number( set2 )
|
||||
end
|
||||
end
|
||||
#puts "EQ_REL_COMP: #{set1} #{op} #{set2}"
|
||||
return compare( set1, op, set2 )
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
def compare a, op, b
|
||||
case op
|
||||
when :eq
|
||||
a == b
|
||||
when :neq
|
||||
a != b
|
||||
when :lt
|
||||
a < b
|
||||
when :lteq
|
||||
a <= b
|
||||
when :gt
|
||||
a > b
|
||||
when :gteq
|
||||
a >= b
|
||||
when :and
|
||||
a and b
|
||||
when :or
|
||||
a or b
|
||||
else
|
||||
false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue