1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* Fixed the inheritance bug in the pull parser that James Britt found.

* Indentation changes, and changed some exceptions to runtime
  exceptions.
* Backed out the patch that changed the versions
* Wasn't including Text class.
* Fixes issue:25 (Trac)
* Fixes ticket:3 (Issue38 in Roundup.)
* Numerous fixes in the XPath interpreter correcting, among other
  things, ordering bugs and some incorrect behavior.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@8973 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2005-08-12 12:08:52 +00:00
parent 1b12d598f8
commit 69e5c7d297
15 changed files with 792 additions and 786 deletions

View file

@ -101,20 +101,20 @@ module REXML
end end
@unnormalized = nil @unnormalized = nil
@value = @normalized = Text::normalize( @value, doctype ) @normalized = Text::normalize( @value, doctype )
end end
# Returns the UNNORMALIZED value of this attribute. That is, entities # Returns the UNNORMALIZED value of this attribute. That is, entities
# have been expanded to their values # have been expanded to their values
def value def value
@unnormalized if @unnormalized return @unnormalized if @unnormalized
doctype = nil doctype = nil
if @element if @element
doc = @element.document doc = @element.document
doctype = doc.doctype if doc doctype = doc.doctype if doc
end end
@normalized = nil @normalized = nil
@value = @unnormalized = Text::unnormalize( @value, doctype ) @unnormalized = Text::unnormalize( @value, doctype )
end end
# Returns a copy of this attribute # Returns a copy of this attribute

View file

@ -16,166 +16,166 @@ module REXML
# Document has a single child that can be accessed by root(). # Document has a single child that can be accessed by root().
# Note that if you want to have an XML declaration written for a document # Note that if you want to have an XML declaration written for a document
# you create, you must add one; REXML documents do not write a default # you create, you must add one; REXML documents do not write a default
# declaration for you. See |DECLARATION| and |write|. # declaration for you. See |DECLARATION| and |write|.
class Document < Element class Document < Element
# A convenient default XML declaration. If you want an XML declaration, # A convenient default XML declaration. If you want an XML declaration,
# the easiest way to add one is mydoc << Document::DECLARATION # the easiest way to add one is mydoc << Document::DECLARATION
# +DEPRECATED+ # +DEPRECATED+
# Use: mydoc << XMLDecl.default # Use: mydoc << XMLDecl.default
DECLARATION = XMLDecl.default DECLARATION = XMLDecl.default
# Constructor # Constructor
# @param source if supplied, must be a Document, String, or IO. # @param source if supplied, must be a Document, String, or IO.
# Documents have their context and Element attributes cloned. # Documents have their context and Element attributes cloned.
# Strings are expected to be valid XML documents. IOs are expected # Strings are expected to be valid XML documents. IOs are expected
# to be sources of valid XML documents. # to be sources of valid XML documents.
# @param context if supplied, contains the context of the document; # @param context if supplied, contains the context of the document;
# this should be a Hash. # this should be a Hash.
# NOTE that I'm not sure what the context is for; I cloned it out of # NOTE that I'm not sure what the context is for; I cloned it out of
# the Electric XML API (in which it also seems to do nothing), and it # the Electric XML API (in which it also seems to do nothing), and it
# is now legacy. It may do something, someday... it may disappear. # is now legacy. It may do something, someday... it may disappear.
def initialize( source = nil, context = {} ) def initialize( source = nil, context = {} )
super() super()
@context = context @context = context
return if source.nil? return if source.nil?
if source.kind_of? Document if source.kind_of? Document
@context = source.context @context = source.context
super source super source
else else
build( source ) build( source )
end end
end end
def node_type def node_type
:document :document
end end
# Should be obvious # Should be obvious
def clone def clone
Document.new self Document.new self
end end
# According to the XML spec, a root node has no expanded name # According to the XML spec, a root node has no expanded name
def expanded_name def expanded_name
'' ''
#d = doc_type #d = doc_type
#d ? d.name : "UNDEFINED" #d ? d.name : "UNDEFINED"
end end
alias :name :expanded_name alias :name :expanded_name
# We override this, because XMLDecls and DocTypes must go at the start # We override this, because XMLDecls and DocTypes must go at the start
# of the document # of the document
def add( child ) def add( child )
if child.kind_of? XMLDecl if child.kind_of? XMLDecl
@children.unshift child @children.unshift child
elsif child.kind_of? DocType elsif child.kind_of? DocType
if @children[0].kind_of? XMLDecl if @children[0].kind_of? XMLDecl
@children[1,0] = child @children[1,0] = child
else else
@children.unshift child @children.unshift child
end end
child.parent = self child.parent = self
else else
rv = super rv = super
raise "attempted adding second root element to document" if @elements.size > 1 raise "attempted adding second root element to document" if @elements.size > 1
rv rv
end end
end end
alias :<< :add alias :<< :add
def add_element(arg=nil, arg2=nil) def add_element(arg=nil, arg2=nil)
rv = super rv = super
raise "attempted adding second root element to document" if @elements.size > 1 raise "attempted adding second root element to document" if @elements.size > 1
rv rv
end end
# @return the root Element of the document, or nil if this document # @return the root Element of the document, or nil if this document
# has no children. # has no children.
def root def root
elements[1] elements[1]
#self #self
#@children.find { |item| item.kind_of? Element } #@children.find { |item| item.kind_of? Element }
end end
# @return the DocType child of the document, if one exists, # @return the DocType child of the document, if one exists,
# and nil otherwise. # and nil otherwise.
def doctype def doctype
@children.find { |item| item.kind_of? DocType } @children.find { |item| item.kind_of? DocType }
end end
# @return the XMLDecl of this document; if no XMLDecl has been # @return the XMLDecl of this document; if no XMLDecl has been
# set, the default declaration is returned. # set, the default declaration is returned.
def xml_decl def xml_decl
rv = @children[0] rv = @children[0]
return rv if rv.kind_of? XMLDecl return rv if rv.kind_of? XMLDecl
rv = @children.unshift(XMLDecl.default)[0] rv = @children.unshift(XMLDecl.default)[0]
end end
# @return the XMLDecl version of this document as a String. # @return the XMLDecl version of this document as a String.
# If no XMLDecl has been set, returns the default version. # If no XMLDecl has been set, returns the default version.
def version def version
xml_decl().version xml_decl().version
end end
# @return the XMLDecl encoding of this document as a String. # @return the XMLDecl encoding of this document as a String.
# If no XMLDecl has been set, returns the default encoding. # If no XMLDecl has been set, returns the default encoding.
def encoding def encoding
xml_decl().encoding xml_decl().encoding
end end
# @return the XMLDecl standalone value of this document as a String. # @return the XMLDecl standalone value of this document as a String.
# If no XMLDecl has been set, returns the default setting. # If no XMLDecl has been set, returns the default setting.
def stand_alone? def stand_alone?
xml_decl().stand_alone? xml_decl().stand_alone?
end end
# Write the XML tree out, optionally with indent. This writes out the # Write the XML tree out, optionally with indent. This writes out the
# entire XML document, including XML declarations, doctype declarations, # entire XML document, including XML declarations, doctype declarations,
# and processing instructions (if any are given). # and processing instructions (if any are given).
# A controversial point is whether Document should always write the XML # A controversial point is whether Document should always write the XML
# declaration (<?xml version='1.0'?>) whether or not one is given by the # declaration (<?xml version='1.0'?>) whether or not one is given by the
# user (or source document). REXML does not write one if one was not # user (or source document). REXML does not write one if one was not
# specified, because it adds unneccessary bandwidth to applications such # specified, because it adds unneccessary bandwidth to applications such
# as XML-RPC. # as XML-RPC.
# #
# #
# output:: # output::
# output an object which supports '<< string'; this is where the # output an object which supports '<< string'; this is where the
# document will be written. # document will be written.
# indent:: # indent::
# An integer. If -1, no indenting will be used; otherwise, the # An integer. If -1, no indenting will be used; otherwise, the
# indentation will be this number of spaces, and children will be # indentation will be this number of spaces, and children will be
# indented an additional amount. Defaults to -1 # indented an additional amount. Defaults to -1
# transitive:: # transitive::
# If transitive is true and indent is >= 0, then the output will be # If transitive is true and indent is >= 0, then the output will be
# pretty-printed in such a way that the added whitespace does not affect # pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value # the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged. # and number of Text nodes in the document unchanged.
# ie_hack:: # ie_hack::
# Internet Explorer is the worst piece of crap to have ever been # Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is # written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML # unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space # that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false # before the /> on empty tags. Defaults to false
def write( output=$stdout, indent_level=-1, transitive=false, ie_hack=false ) def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
@children.each { |node| @children.each { |node|
indent( output, indent_level ) if node.node_type == :element indent( output, indent ) if node.node_type == :element
if node.write( output, indent_level, transitive, ie_hack ) if node.write( output, indent, transitive, ie_hack )
output << "\n" unless indent_level<0 or node == @children[-1] output << "\n" unless indent<0 or node == @children[-1]
end end
} }
end end
def Document::parse_stream( source, listener ) def Document::parse_stream( source, listener )
Parsers::StreamParser.new( source, listener ).parse Parsers::StreamParser.new( source, listener ).parse
end end
private private
def build( source ) def build( source )
Parsers::TreeParser.new( source, self ).parse Parsers::TreeParser.new( source, self ).parse
end end
end end
end end

View file

@ -36,8 +36,6 @@ module REXML
# If an Element, the object will be shallowly cloned; name, # If an Element, the object will be shallowly cloned; name,
# attributes, and namespaces will be copied. Children will +not+ be # attributes, and namespaces will be copied. Children will +not+ be
# copied. # copied.
# If a Source, the source will be scanned and parsed for an Element,
# and all child elements will be recursively parsed as well.
# parent:: # parent::
# if supplied, must be a Parent, and will be used as # if supplied, must be a Parent, and will be used as
# the parent of this object. # the parent of this object.
@ -223,7 +221,7 @@ module REXML
# b.namespace("y") # -> '2' # b.namespace("y") # -> '2'
def namespace(prefix=nil) def namespace(prefix=nil)
if prefix.nil? if prefix.nil?
prefix = self.prefix() prefix = prefix()
end end
if prefix == '' if prefix == ''
prefix = "xmlns" prefix = "xmlns"

View file

@ -339,6 +339,8 @@ module REXML
end end
def Functions::sum( nodes ) def Functions::sum( nodes )
nodes = [nodes] unless nodes.kind_of? Array
nodes.inject(0) { |r,n| r += number(string(n)) }
end end
def Functions::floor( number ) def Functions::floor( number )

View file

@ -38,8 +38,8 @@ module REXML
Instruction.new self Instruction.new self
end end
def write writer, indent_level=-1, transitive=false, ie_hack=false def write writer, indent=-1, transitive=false, ie_hack=false
indent(writer, indent_level) indent(writer, indent)
writer << START.sub(/\\/u, '') writer << START.sub(/\\/u, '')
writer << @target writer << @target
writer << ' ' writer << ' '

View file

@ -2,103 +2,103 @@ require 'rexml/parseexception'
require 'rexml/source' require 'rexml/source'
module REXML module REXML
module Parsers module Parsers
# = Using the Pull Parser # = Using the Pull Parser
# <em>This API is experimental, and subject to change.</em> # <em>This API is experimental, and subject to change.</em>
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
# while parser.has_next? # while parser.has_next?
# res = parser.next # res = parser.next
# puts res[1]['att'] if res.start_tag? and res[0] == 'b' # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
# end # end
# See the PullEvent class for information on the content of the results. # See the PullEvent class for information on the content of the results.
# The data is identical to the arguments passed for the various events to # The data is identical to the arguments passed for the various events to
# the StreamListener API. # the StreamListener API.
# #
# Notice that: # Notice that:
# parser = PullParser.new( "<a>BAD DOCUMENT" ) # parser = PullParser.new( "<a>BAD DOCUMENT" )
# while parser.has_next? # while parser.has_next?
# res = parser.next # res = parser.next
# raise res[1] if res.error? # raise res[1] if res.error?
# end # end
# #
# Nat Price gave me some good ideas for the API. # Nat Price gave me some good ideas for the API.
class BaseParser class BaseParser
NCNAME_STR= '[\w:][\-\w\d.]*' NCNAME_STR= '[\w:][\-\w\d.]*'
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]' NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)" NAME = "([\\w:]#{NAMECHAR}*)"
NMTOKEN = "(?:#{NAMECHAR})+" NMTOKEN = "(?:#{NAMECHAR})+"
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*" NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)" REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
REFERENCE_RE = /#{REFERENCE}/ REFERENCE_RE = /#{REFERENCE}/
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
COMMENT_START = /\A<!--/u COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u CDATA_START = /\A<!\[CDATA\[/u
CDATA_END = /^\s*\]\s*>/um CDATA_END = /^\s*\]\s*>/um
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
XMLDECL_START = /\A<\?xml\s/u; XMLDECL_START = /\A<\?xml\s/u;
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
INSTRUCTION_START = /\A<\?/u INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
ENCODING = /\bencoding=["'](.*?)['"]/um ENCODING = /\bencoding=["'](.*?)['"]/um
STANDALONE = /\bstandalone=["'](.*?)['"]/um STANDALONE = /\bstandalone=["'](.*?)['"]/um
ENTITY_START = /^\s*<!ENTITY/ ENTITY_START = /^\s*<!ENTITY/
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
ELEMENTDECL_START = /^\s*<!ELEMENT/um ELEMENTDECL_START = /^\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)" ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)" NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))" ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})" ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')" ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))" DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}" ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
ATTDEF_RE = /#{ATTDEF}/ ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_START = /^\s*<!ATTLIST/um ATTLISTDECL_START = /^\s*<!ATTLIST/um
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
NOTATIONDECL_START = /^\s*<!NOTATION/um NOTATIONDECL_START = /^\s*<!NOTATION/um
PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
TEXT_PATTERN = /\A([^<]*)/um TEXT_PATTERN = /\A([^<]*)/um
# Entity constants # Entity constants
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#" PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))} SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')} PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))" EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
NDATADECL = "\\s+NDATA\\s+#{NAME}" NDATADECL = "\\s+NDATA\\s+#{NAME}"
PEREFERENCE = "%#{NAME};" PEREFERENCE = "%#{NAME};"
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))} ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})" PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))" ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
EREFERENCE = /&(?!#{NAME};)/ EREFERENCE = /&(?!#{NAME};)/
DEFAULT_ENTITIES = { DEFAULT_ENTITIES = {
'gt' => [/&gt;/, '&gt;', '>', />/], 'gt' => [/&gt;/, '&gt;', '>', />/],
'lt' => [/&lt;/, '&lt;', '<', /</], 'lt' => [/&lt;/, '&lt;', '<', /</],
'quot' => [/&quot;/, '&quot;', '"', /"/], 'quot' => [/&quot;/, '&quot;', '"', /"/],
"apos" => [/&apos;/, "&apos;", "'", /'/] "apos" => [/&apos;/, "&apos;", "'", /'/]
} }
def initialize( source ) def initialize( source )
self.stream = source self.stream = source
end end
def add_listener( listener ) def add_listener( listener )
if !defined?(@listeners) or !@listeners if !defined?(@listeners) or !@listeners
@ -119,315 +119,315 @@ module REXML
attr_reader :source attr_reader :source
def stream=( source ) def stream=( source )
if source.kind_of? String if source.kind_of? String
@source = Source.new(source) @source = Source.new(source)
elsif source.kind_of? IO elsif source.kind_of? IO
@source = IOSource.new(source) @source = IOSource.new(source)
elsif source.kind_of? Source elsif source.kind_of? Source
@source = source @source = source
elsif defined? StringIO and source.kind_of? StringIO elsif defined? StringIO and source.kind_of? StringIO
@source = IOSource.new(source) @source = IOSource.new(source)
else else
raise "#{source.class} is not a valid input stream. It must be \n"+ raise "#{source.class} is not a valid input stream. It must be \n"+
"either a String, IO, StringIO or Source." "either a String, IO, StringIO or Source."
end end
@closed = nil @closed = nil
@document_status = nil @document_status = nil
@tags = [] @tags = []
@stack = [] @stack = []
@entities = [] @entities = []
end end
# Returns true if there are no more events # Returns true if there are no more events
def empty? def empty?
#puts "@source.empty? = #{@source.empty?}" #puts "@source.empty? = #{@source.empty?}"
#puts "@stack.empty? = #{@stack.empty?}" #puts "@stack.empty? = #{@stack.empty?}"
return (@source.empty? and @stack.empty?) return (@source.empty? and @stack.empty?)
end end
# Returns true if there are more events. Synonymous with !empty? # Returns true if there are more events. Synonymous with !empty?
def has_next? def has_next?
return !(@source.empty? and @stack.empty?) return !(@source.empty? and @stack.empty?)
end end
# Push an event back on the head of the stream. This method # Push an event back on the head of the stream. This method
# has (theoretically) infinite depth. # has (theoretically) infinite depth.
def unshift token def unshift token
@stack.unshift(token) @stack.unshift(token)
end end
# Peek at the +depth+ event in the stack. The first element on the stack # Peek at the +depth+ event in the stack. The first element on the stack
# is at depth 0. If +depth+ is -1, will parse to the end of the input # is at depth 0. If +depth+ is -1, will parse to the end of the input
# stream and return the last event, which is always :end_document. # stream and return the last event, which is always :end_document.
# Be aware that this causes the stream to be parsed up to the +depth+ # Be aware that this causes the stream to be parsed up to the +depth+
# event, so you can effectively pre-parse the entire document (pull the # event, so you can effectively pre-parse the entire document (pull the
# entire thing into memory) using this method. # entire thing into memory) using this method.
def peek depth=0 def peek depth=0
raise %Q[Illegal argument "#{depth}"] if depth < -1 raise %Q[Illegal argument "#{depth}"] if depth < -1
temp = [] temp = []
if depth == -1 if depth == -1
temp.push(pull()) until empty? temp.push(pull()) until empty?
else else
while @stack.size+temp.size < depth+1 while @stack.size+temp.size < depth+1
temp.push(pull()) temp.push(pull())
end end
end end
@stack += temp if temp.size > 0 @stack += temp if temp.size > 0
@stack[depth] @stack[depth]
end end
# Returns the next event. This is a +PullEvent+ object. # Returns the next event. This is a +PullEvent+ object.
def pull def pull
if @closed if @closed
x, @closed = @closed, nil x, @closed = @closed, nil
return [ :end_element, x ] return [ :end_element, x ]
end end
return [ :end_document ] if empty? return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0 return @stack.shift if @stack.size > 0
@source.read if @source.buffer.size<2 @source.read if @source.buffer.size<2
if @document_status == nil if @document_status == nil
@source.consume( /^\s*/um ) @source.consume( /^\s*/um )
word = @source.match( /(<[^>]*)>/um ) word = @source.match( /(<[^>]*)>/um )
word = word[1] unless word.nil? word = word[1] unless word.nil?
case word case word
when COMMENT_START when COMMENT_START
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ] return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
when XMLDECL_START when XMLDECL_START
results = @source.match( XMLDECL_PATTERN, true )[1] results = @source.match( XMLDECL_PATTERN, true )[1]
version = VERSION.match( results ) version = VERSION.match( results )
version = version[1] unless version.nil? version = version[1] unless version.nil?
encoding = ENCODING.match(results) encoding = ENCODING.match(results)
encoding = encoding[1] unless encoding.nil? encoding = encoding[1] unless encoding.nil?
@source.encoding = encoding @source.encoding = encoding
standalone = STANDALONE.match(results) standalone = STANDALONE.match(results)
standalone = standalone[1] unless standalone.nil? standalone = standalone[1] unless standalone.nil?
return [ :xmldecl, version, encoding, standalone] return [ :xmldecl, version, encoding, standalone]
when INSTRUCTION_START when INSTRUCTION_START
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true ) md = @source.match( DOCTYPE_PATTERN, true )
identity = md[1] identity = md[1]
close = md[2] close = md[2]
identity =~ IDENTITY identity =~ IDENTITY
name = $1 name = $1
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil? raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip pub_sys = $2.nil? ? nil : $2.strip
long_name = $3.nil? ? nil : $3.strip long_name = $3.nil? ? nil : $3.strip
uri = $4.nil? ? nil : $4.strip uri = $4.nil? ? nil : $4.strip
args = [ :start_doctype, name, pub_sys, long_name, uri ] args = [ :start_doctype, name, pub_sys, long_name, uri ]
if close == ">" if close == ">"
@document_status = :after_doctype @document_status = :after_doctype
@source.read if @source.buffer.size<2 @source.read if @source.buffer.size<2
md = @source.match(/^\s*/um, true) md = @source.match(/^\s*/um, true)
@stack << [ :end_doctype ] @stack << [ :end_doctype ]
else else
@document_status = :in_doctype @document_status = :in_doctype
end end
return args return args
else else
@document_status = :after_doctype @document_status = :after_doctype
@source.read if @source.buffer.size<2 @source.read if @source.buffer.size<2
md = @source.match(/\s*/um, true) md = @source.match(/\s*/um, true)
end end
end end
if @document_status == :in_doctype if @document_status == :in_doctype
md = @source.match(/\s*(.*?>)/um) md = @source.match(/\s*(.*?>)/um)
case md[1] case md[1]
when SYSTEMENTITY when SYSTEMENTITY
match = @source.match( SYSTEMENTITY, true )[1] match = @source.match( SYSTEMENTITY, true )[1]
return [ :externalentity, match ] return [ :externalentity, match ]
when ELEMENTDECL_START when ELEMENTDECL_START
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ] return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
when ENTITY_START when ENTITY_START
match = @source.match( ENTITYDECL, true ).to_a.compact match = @source.match( ENTITYDECL, true ).to_a.compact
match[0] = :entitydecl match[0] = :entitydecl
ref = false ref = false
if match[1] == '%' if match[1] == '%'
ref = true ref = true
match.delete_at 1 match.delete_at 1
end end
# Now we have to sort out what kind of entity reference this is # Now we have to sort out what kind of entity reference this is
if match[2] == 'SYSTEM' if match[2] == 'SYSTEM'
# External reference # External reference
match[3] = match[3][1..-2] # PUBID match[3] = match[3][1..-2] # PUBID
match.delete_at(4) if match.size > 4 # Chop out NDATA decl match.delete_at(4) if match.size > 4 # Chop out NDATA decl
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ] # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
elsif match[2] == 'PUBLIC' elsif match[2] == 'PUBLIC'
# External reference # External reference
match[3] = match[3][1..-2] # PUBID match[3] = match[3][1..-2] # PUBID
match[4] = match[4][1..-2] # HREF match[4] = match[4][1..-2] # HREF
# match is [ :entity, name, PUBLIC, pubid, href ] # match is [ :entity, name, PUBLIC, pubid, href ]
else else
match[2] = match[2][1..-2] match[2] = match[2][1..-2]
match.pop if match.size == 4 match.pop if match.size == 4
# match is [ :entity, name, value ] # match is [ :entity, name, value ]
end end
match << '%' if ref match << '%' if ref
return match return match
when ATTLISTDECL_START when ATTLISTDECL_START
md = @source.match( ATTLISTDECL_PATTERN, true ) md = @source.match( ATTLISTDECL_PATTERN, true )
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
element = md[1] element = md[1]
contents = md[0] contents = md[0]
pairs = {} pairs = {}
values = md[0].scan( ATTDEF_RE ) values = md[0].scan( ATTDEF_RE )
values.each do |attdef| values.each do |attdef|
unless attdef[3] == "#IMPLIED" unless attdef[3] == "#IMPLIED"
attdef.compact! attdef.compact!
val = attdef[3] val = attdef[3]
val = attdef[4] if val == "#FIXED " val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val pairs[attdef[0]] = val
end end
end end
return [ :attlistdecl, element, pairs, contents ] return [ :attlistdecl, element, pairs, contents ]
when NOTATIONDECL_START when NOTATIONDECL_START
md = nil md = nil
if @source.match( PUBLIC ) if @source.match( PUBLIC )
md = @source.match( PUBLIC, true ) md = @source.match( PUBLIC, true )
elsif @source.match( SYSTEM ) elsif @source.match( SYSTEM )
md = @source.match( SYSTEM, true ) md = @source.match( SYSTEM, true )
else else
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source ) raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
end end
return [ :notationdecl, md[1], md[2], md[3] ] return [ :notationdecl, md[1], md[2], md[3] ]
when CDATA_END when CDATA_END
@document_status = :after_doctype @document_status = :after_doctype
@source.match( CDATA_END, true ) @source.match( CDATA_END, true )
return [ :end_doctype ] return [ :end_doctype ]
end end
end end
begin begin
if @source.buffer[0] == ?< if @source.buffer[0] == ?<
if @source.buffer[1] == ?/ if @source.buffer[1] == ?/
last_tag = @tags.pop last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH) #md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true ) md = @source.match( CLOSE_MATCH, true )
raise REXML::ParseException.new( "Missing end tag for "+ raise REXML::ParseException.new( "Missing end tag for "+
"'#{last_tag}' (got \"#{md[1]}\")", "'#{last_tag}' (got \"#{md[1]}\")",
@source) unless last_tag == md[1] @source) unless last_tag == md[1]
return [ :end_element, last_tag ] return [ :end_element, last_tag ]
elsif @source.buffer[1] == ?! elsif @source.buffer[1] == ?!
md = @source.match(/\A(\s*[^>]*>)/um) md = @source.match(/\A(\s*[^>]*>)/um)
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md raise REXML::ParseException.new("Malformed node", @source) unless md
if md[0][2] == ?- if md[0][2] == ?-
md = @source.match( COMMENT_PATTERN, true ) md = @source.match( COMMENT_PATTERN, true )
return [ :comment, md[1] ] if md return [ :comment, md[1] ] if md
else else
md = @source.match( CDATA_PATTERN, true ) md = @source.match( CDATA_PATTERN, true )
return [ :cdata, md[1] ] if md return [ :cdata, md[1] ] if md
end end
raise REXML::ParseException.new( "Declarations can only occur "+ raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source) "in the doctype declaration.", @source)
elsif @source.buffer[1] == ?? elsif @source.buffer[1] == ??
md = @source.match( INSTRUCTION_PATTERN, true ) md = @source.match( INSTRUCTION_PATTERN, true )
return [ :processing_instruction, md[1], md[2] ] if md return [ :processing_instruction, md[1], md[2] ] if md
raise REXML::ParseException.new( "Bad instruction declaration", raise REXML::ParseException.new( "Bad instruction declaration",
@source) @source)
else else
# Get the next tag # Get the next tag
md = @source.match(TAG_MATCH, true) md = @source.match(TAG_MATCH, true)
raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
attrs = [] attrs = []
if md[2].size > 0 if md[2].size > 0
attrs = md[2].scan( ATTRIBUTE_PATTERN ) attrs = md[2].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
end end
if md[4] if md[4]
@closed = md[1] @closed = md[1]
else else
@tags.push( md[1] ) @tags.push( md[1] )
end end
attributes = {} attributes = {}
attrs.each { |a,b,c| attributes[a] = c } attrs.each { |a,b,c| attributes[a] = c }
return [ :start_element, md[1], attributes ] return [ :start_element, md[1], attributes ]
end end
else else
md = @source.match( TEXT_PATTERN, true ) md = @source.match( TEXT_PATTERN, true )
if md[0].length == 0 if md[0].length == 0
#puts "EMPTY = #{empty?}" #puts "EMPTY = #{empty?}"
#puts "BUFFER = \"#{@source.buffer}\"" #puts "BUFFER = \"#{@source.buffer}\""
@source.match( /(\s+)/, true ) @source.match( /(\s+)/, true )
end end
#return [ :text, "" ] if md[0].length == 0 #return [ :text, "" ] if md[0].length == 0
# unnormalized = Text::unnormalize( md[1], self ) # unnormalized = Text::unnormalize( md[1], self )
# return PullEvent.new( :text, md[1], unnormalized ) # return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ] return [ :text, md[1] ]
end end
rescue REXML::ParseException rescue REXML::ParseException
raise raise
rescue Exception, NameError => error rescue Exception, NameError => error
raise REXML::ParseException.new( "Exception parsing", raise REXML::ParseException.new( "Exception parsing",
@source, self, (error ? error : $!) ) @source, self, (error ? error : $!) )
end end
return [ :dummy ] return [ :dummy ]
end end
def entity( reference, entities ) def entity( reference, entities )
value = nil value = nil
value = entities[ reference ] if entities value = entities[ reference ] if entities
if not value if not value
value = DEFAULT_ENTITIES[ reference ] value = DEFAULT_ENTITIES[ reference ]
value = value[2] if value value = value[2] if value
end end
unnormalize( value, entities ) if value unnormalize( value, entities ) if value
end end
# Escapes all possible entities # Escapes all possible entities
def normalize( input, entities=nil, entity_filter=nil ) def normalize( input, entities=nil, entity_filter=nil )
copy = input.clone copy = input.clone
# Doing it like this rather than in a loop improves the speed # Doing it like this rather than in a loop improves the speed
copy.gsub!( EREFERENCE, '&amp;' ) copy.gsub!( EREFERENCE, '&amp;' )
entities.each do |key, value| entities.each do |key, value|
copy.gsub!( value, "&#{key};" ) unless entity_filter and copy.gsub!( value, "&#{key};" ) unless entity_filter and
entity_filter.include?(entity) entity_filter.include?(entity)
end if entities end if entities
copy.gsub!( EREFERENCE, '&amp;' ) copy.gsub!( EREFERENCE, '&amp;' )
DEFAULT_ENTITIES.each do |key, value| DEFAULT_ENTITIES.each do |key, value|
copy.gsub!( value[3], value[1] ) copy.gsub!( value[3], value[1] )
end end
copy copy
end end
# Unescapes all possible entities # Unescapes all possible entities
def unnormalize( string, entities=nil, filter=nil ) def unnormalize( string, entities=nil, filter=nil )
rv = string.clone rv = string.clone
rv.gsub!( /\r\n?/, "\n" ) rv.gsub!( /\r\n?/, "\n" )
matches = rv.scan( REFERENCE_RE ) matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0 return rv if matches.size == 0
rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m| rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
m=$1 m=$1
m = "0#{m}" if m[0] == ?x m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*') [Integer(m)].pack('U*')
} }
matches.collect!{|x|x[0]}.compact! matches.collect!{|x|x[0]}.compact!
if matches.size > 0 if matches.size > 0
matches.each do |entity_reference| matches.each do |entity_reference|
unless filter and filter.include?(entity_reference) unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities ) entity_value = entity( entity_reference, entities )
if entity_value if entity_value
re = /&#{entity_reference};/ re = /&#{entity_reference};/
rv.gsub!( re, entity_value ) rv.gsub!( re, entity_value )
end end
end end
end end
matches.each do |entity_reference| matches.each do |entity_reference|
unless filter and filter.include?(entity_reference) unless filter and filter.include?(entity_reference)
er = DEFAULT_ENTITIES[entity_reference] er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er rv.gsub!( er[0], er[2] ) if er
end end
end end
rv.gsub!( /&amp;/, '&' ) rv.gsub!( /&amp;/, '&' )
end end
rv rv
end end
end end
end end
end end
=begin =begin

View file

@ -1,96 +1,99 @@
require 'forwardable'
require 'rexml/parseexception' require 'rexml/parseexception'
require 'rexml/parsers/baseparser' require 'rexml/parsers/baseparser'
require 'rexml/xmltokens' require 'rexml/xmltokens'
module REXML module REXML
module Parsers module Parsers
# = Using the Pull Parser # = Using the Pull Parser
# <em>This API is experimental, and subject to change.</em> # <em>This API is experimental, and subject to change.</em>
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
# while parser.has_next? # while parser.has_next?
# res = parser.next # res = parser.next
# puts res[1]['att'] if res.start_tag? and res[0] == 'b' # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
# end # end
# See the PullEvent class for information on the content of the results. # See the PullEvent class for information on the content of the results.
# The data is identical to the arguments passed for the various events to # The data is identical to the arguments passed for the various events to
# the StreamListener API. # the StreamListener API.
# #
# Notice that: # Notice that:
# parser = PullParser.new( "<a>BAD DOCUMENT" ) # parser = PullParser.new( "<a>BAD DOCUMENT" )
# while parser.has_next? # while parser.has_next?
# res = parser.next # res = parser.next
# raise res[1] if res.error? # raise res[1] if res.error?
# end # end
# #
# Nat Price gave me some good ideas for the API. # Nat Price gave me some good ideas for the API.
class PullParser class PullParser
include XMLTokens include XMLTokens
extend Forwardable
def initialize stream def_delegators( :@parser, :has_next? )
@entities = {} def_delegators( :@parser, :entity )
def_delegators( :@parser, :empty? )
def initialize stream
@entities = {}
@listeners = nil @listeners = nil
@parser = BaseParser.new( stream ) @parser = BaseParser.new( stream )
end @my_stack = []
end
def add_listener( listener ) def add_listener( listener )
@listeners = [] unless @listeners @listeners = [] unless @listeners
@listeners << listener @listeners << listener
end end
def each def each
while has_next? while has_next?
yield self.pull yield self.pull
end end
end
def peek depth=0
PullEvent.new(@parser.peek(depth))
end
def has_next?
@parser.has_next?
end end
def pull def peek depth=0
event = @parser.pull if @my_stack.length <= depth
case event[0] (depth - @my_stack.length + 1).times {
when :entitydecl e = PullEvent.new(@parser.pull)
@entities[ event[1] ] = @my_stack.push(e)
event[2] unless event[2] =~ /PUBLIC|SYSTEM/ }
when :text end
unnormalized = @parser.unnormalize( event[1], @entities ) @my_stack[depth]
event << unnormalized end
end
PullEvent.new( event ) def pull
end return @my_stack.shift if @my_stack.length > 0
event = @parser.pull
case event[0]
when :entitydecl
@entities[ event[1] ] =
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
when :text
unnormalized = @parser.unnormalize( event[1], @entities )
event << unnormalized
end
PullEvent.new( event )
end
def unshift token def unshift token
@parser.unshift token @my_stack.unshift token
end end
end
def entity reference # A parsing event. The contents of the event are accessed as an +Array?,
@parser.entity( reference ) # and the type is given either by the ...? methods, or by accessing the
# +type+ accessor. The contents of this object vary from event to event,
# but are identical to the arguments passed to +StreamListener+s for each
# event.
class PullEvent
# The type of this event. Will be one of :tag_start, :tag_end, :text,
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
def initialize(arg)
@contents = arg
end end
def empty?
@parser.empty?
end
end
# A parsing event. The contents of the event are accessed as an +Array?,
# and the type is given either by the ...? methods, or by accessing the
# +type+ accessor. The contents of this object vary from event to event,
# but are identical to the arguments passed to +StreamListener+s for each
# event.
class PullEvent
# The type of this event. Will be one of :tag_start, :tag_end, :text,
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
def initialize(arg)
@contents = arg
end
def []( start, endd=nil) def []( start, endd=nil)
if start.kind_of? Range if start.kind_of? Range
@contents.slice( start.begin+1 .. start.end ) @contents.slice( start.begin+1 .. start.end )
@ -103,90 +106,90 @@ module REXML
else else
raise "Illegal argument #{start.inspect} (#{start.class})" raise "Illegal argument #{start.inspect} (#{start.class})"
end end
end end
def event_type def event_type
@contents[0] @contents[0]
end end
# Content: [ String tag_name, Hash attributes ] # Content: [ String tag_name, Hash attributes ]
def start_element? def start_element?
@contents[0] == :start_element @contents[0] == :start_element
end end
# Content: [ String tag_name ] # Content: [ String tag_name ]
def end_element? def end_element?
@contents[0] == :end_element @contents[0] == :end_element
end end
# Content: [ String raw_text, String unnormalized_text ] # Content: [ String raw_text, String unnormalized_text ]
def text? def text?
@contents[0] == :text @contents[0] == :text
end end
# Content: [ String text ] # Content: [ String text ]
def instruction? def instruction?
@contents[0] == :processing_instruction @contents[0] == :processing_instruction
end end
# Content: [ String text ] # Content: [ String text ]
def comment? def comment?
@contents[0] == :comment @contents[0] == :comment
end end
# Content: [ String name, String pub_sys, String long_name, String uri ] # Content: [ String name, String pub_sys, String long_name, String uri ]
def doctype? def doctype?
@contents[0] == :start_doctype @contents[0] == :start_doctype
end end
# Content: [ String text ] # Content: [ String text ]
def attlistdecl? def attlistdecl?
@contents[0] == :attlistdecl @contents[0] == :attlistdecl
end end
# Content: [ String text ] # Content: [ String text ]
def elementdecl? def elementdecl?
@contents[0] == :elementdecl @contents[0] == :elementdecl
end end
# Due to the wonders of DTDs, an entity declaration can be just about # Due to the wonders of DTDs, an entity declaration can be just about
# anything. There's no way to normalize it; you'll have to interpret the # anything. There's no way to normalize it; you'll have to interpret the
# content yourself. However, the following is true: # content yourself. However, the following is true:
# #
# * If the entity declaration is an internal entity: # * If the entity declaration is an internal entity:
# [ String name, String value ] # [ String name, String value ]
# Content: [ String text ] # Content: [ String text ]
def entitydecl? def entitydecl?
@contents[0] == :entitydecl @contents[0] == :entitydecl
end end
# Content: [ String text ] # Content: [ String text ]
def notationdecl? def notationdecl?
@contents[0] == :notationdecl @contents[0] == :notationdecl
end end
# Content: [ String text ] # Content: [ String text ]
def entity? def entity?
@contents[0] == :entity @contents[0] == :entity
end end
# Content: [ String text ] # Content: [ String text ]
def cdata? def cdata?
@contents[0] == :cdata @contents[0] == :cdata
end end
# Content: [ String version, String encoding, String standalone ] # Content: [ String version, String encoding, String standalone ]
def xmldecl? def xmldecl?
@contents[0] == :xmldecl @contents[0] == :xmldecl
end end
def error? def error?
@contents[0] == :error @contents[0] == :error
end end
def inspect def inspect
@contents[0].to_s + ": " + @contents[1..-1].inspect @contents[0].to_s + ": " + @contents[1..-1].inspect
end end
end end
end end
end end

View file

@ -1,9 +1,11 @@
require 'rexml/parsers/baseparser' require 'rexml/parsers/baseparser'
require 'rexml/parseexception' require 'rexml/parseexception'
require 'rexml/namespace' require 'rexml/namespace'
require 'rexml/text'
module REXML module REXML
module Parsers module Parsers
# SAX2Parser
class SAX2Parser class SAX2Parser
def initialize source def initialize source
@parser = BaseParser.new(source) @parser = BaseParser.new(source)
@ -36,6 +38,10 @@ module REXML
# :start_prefix_mapping, :end_prefix_mapping, :characters, # :start_prefix_mapping, :end_prefix_mapping, :characters,
# :processing_instruction, :doctype, :attlistdecl, :elementdecl, # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
#
# There is an additional symbol that can be listened for: :progress.
# This will be called for every event generated, passing in the current
# stream position.
# #
# Array contains regular expressions or strings which will be matched # Array contains regular expressions or strings which will be matched
# against fully qualified element names. # against fully qualified element names.
@ -161,6 +167,7 @@ module REXML
:elementdecl, :cdata, :notationdecl, :xmldecl :elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event ) handle( *event )
end end
handle( :progress, @parser.source.position )
end end
end end

View file

@ -11,7 +11,7 @@
# Main page:: http://www.germane-software.com/software/rexml # Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom> # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
# Version:: 3.1.3 # Version:: 3.1.3
# Date:: +2005/138 # Date:: 2005/224
# #
# This API documentation can be downloaded from the REXML home page, or can # This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc] # be accessed online[http://www.germane-software.com/software/rexml_doc]
@ -20,7 +20,7 @@
# or can be accessed # or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html] # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML module REXML
Copyright = "Copyright © 2001-2005 Sean Russell <ser@germane-software.com>" Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell <ser@germane-software.com>"
Date = "+2005/138" Date = "2005/224"
Version = "3.1.3" Version = "3.1.3"
end end

View file

@ -90,5 +90,7 @@ module REXML
# @p comment The content of the comment # @p comment The content of the comment
def comment comment def comment comment
end end
def progress position
end
end end
end end

View file

@ -98,6 +98,10 @@ module REXML
@buffer == "" @buffer == ""
end end
def position
@orig.index( @buffer )
end
# @return the current line in the source # @return the current line in the source
def current_line def current_line
lines = @orig.split lines = @orig.split
@ -194,6 +198,10 @@ module REXML
super and ( @source.nil? || @source.eof? ) super and ( @source.nil? || @source.eof? )
end end
def position
@er_source.pos
end
# @return the current line in the source # @return the current line in the source
def current_line def current_line
begin begin

View file

@ -82,10 +82,13 @@ module REXML
@event_arg = event_arg @event_arg = event_arg
end end
attr_reader :done?
attr_reader :event_type attr_reader :event_type
attr_accessor :event_arg attr_accessor :event_arg
def done?
@done
end
def single? def single?
return (@event_type != :start_element and @event_type != :start_attribute) return (@event_type != :start_element and @event_type != :start_attribute)
end end

View file

@ -2,71 +2,71 @@ require 'rexml/encoding'
require 'rexml/source' require 'rexml/source'
module REXML module REXML
# NEEDS DOCUMENTATION # NEEDS DOCUMENTATION
class XMLDecl < Child class XMLDecl < Child
include Encoding include Encoding
DEFAULT_VERSION = "1.0"; DEFAULT_VERSION = "1.0";
DEFAULT_ENCODING = "UTF-8"; DEFAULT_ENCODING = "UTF-8";
DEFAULT_STANDALONE = "no"; DEFAULT_STANDALONE = "no";
START = '<\?xml'; START = '<\?xml';
STOP = '\?>'; STOP = '\?>';
attr_accessor :version, :standalone attr_accessor :version, :standalone
attr_reader :writeencoding attr_reader :writeencoding
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil) def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@writethis = true @writethis = true
@writeencoding = !encoding.nil? @writeencoding = !encoding.nil?
if version.kind_of? XMLDecl if version.kind_of? XMLDecl
super() super()
@version = version.version @version = version.version
self.encoding = version.encoding self.encoding = version.encoding
@writeencoding = version.writeencoding @writeencoding = version.writeencoding
@standalone = version.standalone @standalone = version.standalone
else else
super() super()
@version = version @version = version
self.encoding = encoding self.encoding = encoding
@standalone = standalone @standalone = standalone
end end
@version = DEFAULT_VERSION if @version.nil? @version = DEFAULT_VERSION if @version.nil?
end end
def clone def clone
XMLDecl.new(self) XMLDecl.new(self)
end end
def write writer, indent_level=-1, transitive=false, ie_hack=false def write writer, indent=-1, transitive=false, ie_hack=false
return nil unless @writethis or writer.kind_of? Output return nil unless @writethis or writer.kind_of? Output
indent( writer, indent_level ) indent( writer, indent )
writer << START.sub(/\\/u, '') writer << START.sub(/\\/u, '')
if writer.kind_of? Output if writer.kind_of? Output
writer << " #{content writer.encoding}" writer << " #{content writer.encoding}"
else else
writer << " #{content encoding}" writer << " #{content encoding}"
end end
writer << STOP.sub(/\\/u, '') writer << STOP.sub(/\\/u, '')
end end
def ==( other ) def ==( other )
other.kind_of?(XMLDecl) and other.kind_of?(XMLDecl) and
other.version == @version and other.version == @version and
other.encoding == self.encoding and other.encoding == self.encoding and
other.standalone == @standalone other.standalone == @standalone
end end
def xmldecl version, encoding, standalone def xmldecl version, encoding, standalone
@version = version @version = version
self.encoding = encoding self.encoding = encoding
@standalone = standalone @standalone = standalone
end end
def node_type def node_type
:xmldecl :xmldecl
end end
alias :stand_alone? :standalone alias :stand_alone? :standalone
alias :old_enc= :encoding= alias :old_enc= :encoding=
def encoding=( enc ) def encoding=( enc )
@ -98,12 +98,12 @@ module REXML
START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '') START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
end end
private private
def content(enc) def content(enc)
rv = "version='#@version'" rv = "version='#@version'"
rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
rv << " standalone='#@standalone'" if @standalone rv << " standalone='#@standalone'" if @standalone
rv rv
end end
end end
end end

View file

@ -2,76 +2,65 @@ require 'rexml/functions'
require 'rexml/xpath_parser' require 'rexml/xpath_parser'
module REXML module REXML
# Wrapper class. Use this class to access the XPath functions. # Wrapper class. Use this class to access the XPath functions.
class XPath class XPath
include Functions include Functions
EMPTY_HASH = {} EMPTY_HASH = {}
# Finds and returns the first node that matches the supplied xpath. # Finds and returns the first node that matches the supplied xpath.
# element:: # element::
# The context element # The context element
# path:: # path::
# The xpath to search for. If not supplied or nil, returns the first # The xpath to search for. If not supplied or nil, returns the first
# node matching '*'. # node matching '*'.
# namespaces:: # namespaces::
# If supplied, a Hash which defines a namespace mapping. # If supplied, a Hash which defines a namespace mapping.
# #
# XPath.first( node ) # XPath.first( node )
# XPath.first( doc, "//b"} ) # XPath.first( doc, "//b"} )
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } ) # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
def XPath::first element, path=nil, namespaces={}, variables={} def XPath::first element, path=nil, namespaces={}, variables={}
=begin
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
parser = XPathParser.new parser = XPathParser.new
parser.namespaces = namespaces parser.namespaces = namespaces
parser.variables = variables parser.variables = variables
path = "*" unless path path = "*" unless path
parser.first( path, element ); element = [element] unless element.kind_of? Array
=end parser.parse(path, element).flatten[0]
#=begin end
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
parser = XPathParser.new
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
element = [element] unless element.kind_of? Array
parser.parse(path, element).flatten[0]
#=end
end
# Itterates over nodes that match the given path, calling the supplied # Itterates over nodes that match the given path, calling the supplied
# block with the match. # block with the match.
# element:: # element::
# The context element # The context element
# path:: # path::
# The xpath to search for. If not supplied or nil, defaults to '*' # The xpath to search for. If not supplied or nil, defaults to '*'
# namespaces:: # namespaces::
# If supplied, a Hash which defines a namespace mapping # If supplied, a Hash which defines a namespace mapping
# #
# XPath.each( node ) { |el| ... } # XPath.each( node ) { |el| ... }
# XPath.each( node, '/*[@attr='v']' ) { |el| ... } # XPath.each( node, '/*[@attr='v']' ) { |el| ... }
# XPath.each( node, 'ancestor::x' ) { |el| ... } # XPath.each( node, 'ancestor::x' ) { |el| ... }
def XPath::each element, path=nil, namespaces={}, variables={}, &block def XPath::each element, path=nil, namespaces={}, variables={}, &block
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
parser = XPathParser.new parser = XPathParser.new
parser.namespaces = namespaces parser.namespaces = namespaces
parser.variables = variables parser.variables = variables
path = "*" unless path path = "*" unless path
element = [element] unless element.kind_of? Array element = [element] unless element.kind_of? Array
parser.parse(path, element).each( &block ) parser.parse(path, element).each( &block )
end end
# Returns an array of nodes matching a given XPath. # Returns an array of nodes matching a given XPath.
def XPath::match element, path=nil, namespaces={}, variables={} def XPath::match element, path=nil, namespaces={}, variables={}
parser = XPathParser.new parser = XPathParser.new
parser.namespaces = namespaces parser.namespaces = namespaces
parser.variables = variables parser.variables = variables
path = "*" unless path path = "*" unless path
element = [element] unless element.kind_of? Array element = [element] unless element.kind_of? Array
parser.parse(path,element) parser.parse(path,element)
end end
end end
end end

View file

@ -76,6 +76,8 @@ module REXML
# Performs a depth-first (document order) XPath search, and returns the # Performs a depth-first (document order) XPath search, and returns the
# first match. This is the fastest, lightest way to return a single result. # first match. This is the fastest, lightest way to return a single result.
#
# FIXME: This method is incomplete!
def first( path_stack, node ) def first( path_stack, node )
#puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )" #puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
return nil if path.size == 0 return nil if path.size == 0
@ -123,14 +125,6 @@ module REXML
r = expr( path_stack, nodeset ) r = expr( path_stack, nodeset )
#puts "MAIN EXPR => #{r.inspect}" #puts "MAIN EXPR => #{r.inspect}"
r r
#while ( path_stack.size > 0 and nodeset.size > 0 )
# #puts "MATCH: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
# nodeset = expr( path_stack, nodeset )
# #puts "NODESET: #{nodeset.inspect}"
# #puts "PATH_STACK: #{path_stack.inspect}"
#end
#nodeset
end end
private private