mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* Fixed the inheritance bug in the pull parser that James Britt found.
* Indentation changes, and changed some exceptions to runtime exceptions. * Backed out the patch that changed the versions * Wasn't including Text class. * Fixes issue:25 (Trac) * Fixes ticket:3 (Issue38 in Roundup.) * Numerous fixes in the XPath interpreter correcting, among other things, ordering bugs and some incorrect behavior. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@8973 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
1b12d598f8
commit
69e5c7d297
15 changed files with 792 additions and 786 deletions
|
@ -101,20 +101,20 @@ module REXML
|
|||
end
|
||||
|
||||
@unnormalized = nil
|
||||
@value = @normalized = Text::normalize( @value, doctype )
|
||||
@normalized = Text::normalize( @value, doctype )
|
||||
end
|
||||
|
||||
# Returns the UNNORMALIZED value of this attribute. That is, entities
|
||||
# have been expanded to their values
|
||||
def value
|
||||
@unnormalized if @unnormalized
|
||||
return @unnormalized if @unnormalized
|
||||
doctype = nil
|
||||
if @element
|
||||
doc = @element.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
@normalized = nil
|
||||
@value = @unnormalized = Text::unnormalize( @value, doctype )
|
||||
@unnormalized = Text::unnormalize( @value, doctype )
|
||||
end
|
||||
|
||||
# Returns a copy of this attribute
|
||||
|
|
|
@ -16,166 +16,166 @@ module REXML
|
|||
# Document has a single child that can be accessed by root().
|
||||
# Note that if you want to have an XML declaration written for a document
|
||||
# you create, you must add one; REXML documents do not write a default
|
||||
# declaration for you. See |DECLARATION| and |write|.
|
||||
class Document < Element
|
||||
# A convenient default XML declaration. If you want an XML declaration,
|
||||
# the easiest way to add one is mydoc << Document::DECLARATION
|
||||
# declaration for you. See |DECLARATION| and |write|.
|
||||
class Document < Element
|
||||
# A convenient default XML declaration. If you want an XML declaration,
|
||||
# the easiest way to add one is mydoc << Document::DECLARATION
|
||||
# +DEPRECATED+
|
||||
# Use: mydoc << XMLDecl.default
|
||||
DECLARATION = XMLDecl.default
|
||||
DECLARATION = XMLDecl.default
|
||||
|
||||
# Constructor
|
||||
# @param source if supplied, must be a Document, String, or IO.
|
||||
# Documents have their context and Element attributes cloned.
|
||||
# Strings are expected to be valid XML documents. IOs are expected
|
||||
# to be sources of valid XML documents.
|
||||
# @param context if supplied, contains the context of the document;
|
||||
# this should be a Hash.
|
||||
# NOTE that I'm not sure what the context is for; I cloned it out of
|
||||
# the Electric XML API (in which it also seems to do nothing), and it
|
||||
# is now legacy. It may do something, someday... it may disappear.
|
||||
def initialize( source = nil, context = {} )
|
||||
super()
|
||||
@context = context
|
||||
return if source.nil?
|
||||
if source.kind_of? Document
|
||||
@context = source.context
|
||||
super source
|
||||
else
|
||||
build( source )
|
||||
end
|
||||
end
|
||||
# Constructor
|
||||
# @param source if supplied, must be a Document, String, or IO.
|
||||
# Documents have their context and Element attributes cloned.
|
||||
# Strings are expected to be valid XML documents. IOs are expected
|
||||
# to be sources of valid XML documents.
|
||||
# @param context if supplied, contains the context of the document;
|
||||
# this should be a Hash.
|
||||
# NOTE that I'm not sure what the context is for; I cloned it out of
|
||||
# the Electric XML API (in which it also seems to do nothing), and it
|
||||
# is now legacy. It may do something, someday... it may disappear.
|
||||
def initialize( source = nil, context = {} )
|
||||
super()
|
||||
@context = context
|
||||
return if source.nil?
|
||||
if source.kind_of? Document
|
||||
@context = source.context
|
||||
super source
|
||||
else
|
||||
build( source )
|
||||
end
|
||||
end
|
||||
|
||||
def node_type
|
||||
:document
|
||||
end
|
||||
|
||||
# Should be obvious
|
||||
def clone
|
||||
Document.new self
|
||||
end
|
||||
# Should be obvious
|
||||
def clone
|
||||
Document.new self
|
||||
end
|
||||
|
||||
# According to the XML spec, a root node has no expanded name
|
||||
def expanded_name
|
||||
''
|
||||
#d = doc_type
|
||||
#d ? d.name : "UNDEFINED"
|
||||
end
|
||||
# According to the XML spec, a root node has no expanded name
|
||||
def expanded_name
|
||||
''
|
||||
#d = doc_type
|
||||
#d ? d.name : "UNDEFINED"
|
||||
end
|
||||
|
||||
alias :name :expanded_name
|
||||
alias :name :expanded_name
|
||||
|
||||
# We override this, because XMLDecls and DocTypes must go at the start
|
||||
# of the document
|
||||
def add( child )
|
||||
if child.kind_of? XMLDecl
|
||||
@children.unshift child
|
||||
elsif child.kind_of? DocType
|
||||
if @children[0].kind_of? XMLDecl
|
||||
@children[1,0] = child
|
||||
else
|
||||
@children.unshift child
|
||||
end
|
||||
child.parent = self
|
||||
else
|
||||
rv = super
|
||||
raise "attempted adding second root element to document" if @elements.size > 1
|
||||
rv
|
||||
end
|
||||
end
|
||||
alias :<< :add
|
||||
# We override this, because XMLDecls and DocTypes must go at the start
|
||||
# of the document
|
||||
def add( child )
|
||||
if child.kind_of? XMLDecl
|
||||
@children.unshift child
|
||||
elsif child.kind_of? DocType
|
||||
if @children[0].kind_of? XMLDecl
|
||||
@children[1,0] = child
|
||||
else
|
||||
@children.unshift child
|
||||
end
|
||||
child.parent = self
|
||||
else
|
||||
rv = super
|
||||
raise "attempted adding second root element to document" if @elements.size > 1
|
||||
rv
|
||||
end
|
||||
end
|
||||
alias :<< :add
|
||||
|
||||
def add_element(arg=nil, arg2=nil)
|
||||
rv = super
|
||||
raise "attempted adding second root element to document" if @elements.size > 1
|
||||
rv
|
||||
end
|
||||
def add_element(arg=nil, arg2=nil)
|
||||
rv = super
|
||||
raise "attempted adding second root element to document" if @elements.size > 1
|
||||
rv
|
||||
end
|
||||
|
||||
# @return the root Element of the document, or nil if this document
|
||||
# has no children.
|
||||
def root
|
||||
# @return the root Element of the document, or nil if this document
|
||||
# has no children.
|
||||
def root
|
||||
elements[1]
|
||||
#self
|
||||
#@children.find { |item| item.kind_of? Element }
|
||||
end
|
||||
end
|
||||
|
||||
# @return the DocType child of the document, if one exists,
|
||||
# and nil otherwise.
|
||||
def doctype
|
||||
@children.find { |item| item.kind_of? DocType }
|
||||
end
|
||||
# @return the DocType child of the document, if one exists,
|
||||
# and nil otherwise.
|
||||
def doctype
|
||||
@children.find { |item| item.kind_of? DocType }
|
||||
end
|
||||
|
||||
# @return the XMLDecl of this document; if no XMLDecl has been
|
||||
# set, the default declaration is returned.
|
||||
def xml_decl
|
||||
rv = @children[0]
|
||||
# @return the XMLDecl of this document; if no XMLDecl has been
|
||||
# set, the default declaration is returned.
|
||||
def xml_decl
|
||||
rv = @children[0]
|
||||
return rv if rv.kind_of? XMLDecl
|
||||
rv = @children.unshift(XMLDecl.default)[0]
|
||||
end
|
||||
end
|
||||
|
||||
# @return the XMLDecl version of this document as a String.
|
||||
# If no XMLDecl has been set, returns the default version.
|
||||
def version
|
||||
xml_decl().version
|
||||
end
|
||||
# @return the XMLDecl version of this document as a String.
|
||||
# If no XMLDecl has been set, returns the default version.
|
||||
def version
|
||||
xml_decl().version
|
||||
end
|
||||
|
||||
# @return the XMLDecl encoding of this document as a String.
|
||||
# If no XMLDecl has been set, returns the default encoding.
|
||||
def encoding
|
||||
xml_decl().encoding
|
||||
end
|
||||
# @return the XMLDecl encoding of this document as a String.
|
||||
# If no XMLDecl has been set, returns the default encoding.
|
||||
def encoding
|
||||
xml_decl().encoding
|
||||
end
|
||||
|
||||
# @return the XMLDecl standalone value of this document as a String.
|
||||
# If no XMLDecl has been set, returns the default setting.
|
||||
def stand_alone?
|
||||
xml_decl().stand_alone?
|
||||
end
|
||||
# @return the XMLDecl standalone value of this document as a String.
|
||||
# If no XMLDecl has been set, returns the default setting.
|
||||
def stand_alone?
|
||||
xml_decl().stand_alone?
|
||||
end
|
||||
|
||||
# Write the XML tree out, optionally with indent. This writes out the
|
||||
# entire XML document, including XML declarations, doctype declarations,
|
||||
# and processing instructions (if any are given).
|
||||
# A controversial point is whether Document should always write the XML
|
||||
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
||||
# user (or source document). REXML does not write one if one was not
|
||||
# specified, because it adds unneccessary bandwidth to applications such
|
||||
# as XML-RPC.
|
||||
#
|
||||
#
|
||||
# output::
|
||||
# output an object which supports '<< string'; this is where the
|
||||
# document will be written.
|
||||
# indent::
|
||||
# An integer. If -1, no indenting will be used; otherwise, the
|
||||
# indentation will be this number of spaces, and children will be
|
||||
# indented an additional amount. Defaults to -1
|
||||
# transitive::
|
||||
# If transitive is true and indent is >= 0, then the output will be
|
||||
# pretty-printed in such a way that the added whitespace does not affect
|
||||
# the absolute *value* of the document -- that is, it leaves the value
|
||||
# and number of Text nodes in the document unchanged.
|
||||
# ie_hack::
|
||||
# Internet Explorer is the worst piece of crap to have ever been
|
||||
# written, with the possible exception of Windows itself. Since IE is
|
||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags. Defaults to false
|
||||
def write( output=$stdout, indent_level=-1, transitive=false, ie_hack=false )
|
||||
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||
@children.each { |node|
|
||||
indent( output, indent_level ) if node.node_type == :element
|
||||
if node.write( output, indent_level, transitive, ie_hack )
|
||||
output << "\n" unless indent_level<0 or node == @children[-1]
|
||||
# Write the XML tree out, optionally with indent. This writes out the
|
||||
# entire XML document, including XML declarations, doctype declarations,
|
||||
# and processing instructions (if any are given).
|
||||
# A controversial point is whether Document should always write the XML
|
||||
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
||||
# user (or source document). REXML does not write one if one was not
|
||||
# specified, because it adds unneccessary bandwidth to applications such
|
||||
# as XML-RPC.
|
||||
#
|
||||
#
|
||||
# output::
|
||||
# output an object which supports '<< string'; this is where the
|
||||
# document will be written.
|
||||
# indent::
|
||||
# An integer. If -1, no indenting will be used; otherwise, the
|
||||
# indentation will be this number of spaces, and children will be
|
||||
# indented an additional amount. Defaults to -1
|
||||
# transitive::
|
||||
# If transitive is true and indent is >= 0, then the output will be
|
||||
# pretty-printed in such a way that the added whitespace does not affect
|
||||
# the absolute *value* of the document -- that is, it leaves the value
|
||||
# and number of Text nodes in the document unchanged.
|
||||
# ie_hack::
|
||||
# Internet Explorer is the worst piece of crap to have ever been
|
||||
# written, with the possible exception of Windows itself. Since IE is
|
||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags. Defaults to false
|
||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
||||
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||
@children.each { |node|
|
||||
indent( output, indent ) if node.node_type == :element
|
||||
if node.write( output, indent, transitive, ie_hack )
|
||||
output << "\n" unless indent<0 or node == @children[-1]
|
||||
end
|
||||
}
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
def Document::parse_stream( source, listener )
|
||||
Parsers::StreamParser.new( source, listener ).parse
|
||||
end
|
||||
|
||||
def Document::parse_stream( source, listener )
|
||||
Parsers::StreamParser.new( source, listener ).parse
|
||||
end
|
||||
|
||||
private
|
||||
def build( source )
|
||||
private
|
||||
def build( source )
|
||||
Parsers::TreeParser.new( source, self ).parse
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -36,8 +36,6 @@ module REXML
|
|||
# If an Element, the object will be shallowly cloned; name,
|
||||
# attributes, and namespaces will be copied. Children will +not+ be
|
||||
# copied.
|
||||
# If a Source, the source will be scanned and parsed for an Element,
|
||||
# and all child elements will be recursively parsed as well.
|
||||
# parent::
|
||||
# if supplied, must be a Parent, and will be used as
|
||||
# the parent of this object.
|
||||
|
@ -223,7 +221,7 @@ module REXML
|
|||
# b.namespace("y") # -> '2'
|
||||
def namespace(prefix=nil)
|
||||
if prefix.nil?
|
||||
prefix = self.prefix()
|
||||
prefix = prefix()
|
||||
end
|
||||
if prefix == ''
|
||||
prefix = "xmlns"
|
||||
|
|
|
@ -339,6 +339,8 @@ module REXML
|
|||
end
|
||||
|
||||
def Functions::sum( nodes )
|
||||
nodes = [nodes] unless nodes.kind_of? Array
|
||||
nodes.inject(0) { |r,n| r += number(string(n)) }
|
||||
end
|
||||
|
||||
def Functions::floor( number )
|
||||
|
|
|
@ -38,8 +38,8 @@ module REXML
|
|||
Instruction.new self
|
||||
end
|
||||
|
||||
def write writer, indent_level=-1, transitive=false, ie_hack=false
|
||||
indent(writer, indent_level)
|
||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
||||
indent(writer, indent)
|
||||
writer << START.sub(/\\/u, '')
|
||||
writer << @target
|
||||
writer << ' '
|
||||
|
|
|
@ -2,103 +2,103 @@ require 'rexml/parseexception'
|
|||
require 'rexml/source'
|
||||
|
||||
module REXML
|
||||
module Parsers
|
||||
# = Using the Pull Parser
|
||||
# <em>This API is experimental, and subject to change.</em>
|
||||
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
||||
# end
|
||||
# See the PullEvent class for information on the content of the results.
|
||||
# The data is identical to the arguments passed for the various events to
|
||||
# the StreamListener API.
|
||||
#
|
||||
# Notice that:
|
||||
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# raise res[1] if res.error?
|
||||
# end
|
||||
#
|
||||
# Nat Price gave me some good ideas for the API.
|
||||
class BaseParser
|
||||
NCNAME_STR= '[\w:][\-\w\d.]*'
|
||||
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
||||
module Parsers
|
||||
# = Using the Pull Parser
|
||||
# <em>This API is experimental, and subject to change.</em>
|
||||
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
||||
# end
|
||||
# See the PullEvent class for information on the content of the results.
|
||||
# The data is identical to the arguments passed for the various events to
|
||||
# the StreamListener API.
|
||||
#
|
||||
# Notice that:
|
||||
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# raise res[1] if res.error?
|
||||
# end
|
||||
#
|
||||
# Nat Price gave me some good ideas for the API.
|
||||
class BaseParser
|
||||
NCNAME_STR= '[\w:][\-\w\d.]*'
|
||||
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
||||
|
||||
NAMECHAR = '[\-\w\d\.:]'
|
||||
NAME = "([\\w:]#{NAMECHAR}*)"
|
||||
NMTOKEN = "(?:#{NAMECHAR})+"
|
||||
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
||||
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
||||
REFERENCE_RE = /#{REFERENCE}/
|
||||
NAMECHAR = '[\-\w\d\.:]'
|
||||
NAME = "([\\w:]#{NAMECHAR}*)"
|
||||
NMTOKEN = "(?:#{NAMECHAR})+"
|
||||
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
||||
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
||||
REFERENCE_RE = /#{REFERENCE}/
|
||||
|
||||
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
||||
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
||||
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
||||
COMMENT_START = /\A<!--/u
|
||||
COMMENT_PATTERN = /<!--(.*?)-->/um
|
||||
CDATA_START = /\A<!\[CDATA\[/u
|
||||
CDATA_END = /^\s*\]\s*>/um
|
||||
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
||||
XMLDECL_START = /\A<\?xml\s/u;
|
||||
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
|
||||
INSTRUCTION_START = /\A<\?/u
|
||||
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
|
||||
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
|
||||
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
|
||||
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
||||
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
||||
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
||||
COMMENT_START = /\A<!--/u
|
||||
COMMENT_PATTERN = /<!--(.*?)-->/um
|
||||
CDATA_START = /\A<!\[CDATA\[/u
|
||||
CDATA_END = /^\s*\]\s*>/um
|
||||
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
||||
XMLDECL_START = /\A<\?xml\s/u;
|
||||
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
|
||||
INSTRUCTION_START = /\A<\?/u
|
||||
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
|
||||
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
|
||||
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
|
||||
|
||||
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
||||
ENCODING = /\bencoding=["'](.*?)['"]/um
|
||||
STANDALONE = /\bstandalone=["'](.*?)['"]/um
|
||||
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
||||
ENCODING = /\bencoding=["'](.*?)['"]/um
|
||||
STANDALONE = /\bstandalone=["'](.*?)['"]/um
|
||||
|
||||
ENTITY_START = /^\s*<!ENTITY/
|
||||
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
|
||||
ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
||||
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
||||
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
|
||||
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
||||
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
||||
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
||||
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
||||
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
||||
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
||||
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
||||
ATTDEF_RE = /#{ATTDEF}/
|
||||
ATTLISTDECL_START = /^\s*<!ATTLIST/um
|
||||
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
||||
NOTATIONDECL_START = /^\s*<!NOTATION/um
|
||||
PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
|
||||
SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
|
||||
ENTITY_START = /^\s*<!ENTITY/
|
||||
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
|
||||
ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
||||
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
||||
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
|
||||
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
||||
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
||||
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
||||
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
||||
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
||||
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
||||
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
||||
ATTDEF_RE = /#{ATTDEF}/
|
||||
ATTLISTDECL_START = /^\s*<!ATTLIST/um
|
||||
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
||||
NOTATIONDECL_START = /^\s*<!NOTATION/um
|
||||
PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
|
||||
SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
|
||||
|
||||
TEXT_PATTERN = /\A([^<]*)/um
|
||||
TEXT_PATTERN = /\A([^<]*)/um
|
||||
|
||||
# Entity constants
|
||||
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
||||
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
||||
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
||||
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
||||
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
||||
PEREFERENCE = "%#{NAME};"
|
||||
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
||||
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
||||
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
||||
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
||||
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
||||
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
||||
# Entity constants
|
||||
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
||||
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
||||
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
||||
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
||||
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
||||
PEREFERENCE = "%#{NAME};"
|
||||
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
||||
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
||||
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
||||
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
||||
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
||||
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
||||
|
||||
EREFERENCE = /&(?!#{NAME};)/
|
||||
EREFERENCE = /&(?!#{NAME};)/
|
||||
|
||||
DEFAULT_ENTITIES = {
|
||||
'gt' => [/>/, '>', '>', />/],
|
||||
'lt' => [/</, '<', '<', /</],
|
||||
'quot' => [/"/, '"', '"', /"/],
|
||||
"apos" => [/'/, "'", "'", /'/]
|
||||
}
|
||||
DEFAULT_ENTITIES = {
|
||||
'gt' => [/>/, '>', '>', />/],
|
||||
'lt' => [/</, '<', '<', /</],
|
||||
'quot' => [/"/, '"', '"', /"/],
|
||||
"apos" => [/'/, "'", "'", /'/]
|
||||
}
|
||||
|
||||
def initialize( source )
|
||||
self.stream = source
|
||||
end
|
||||
def initialize( source )
|
||||
self.stream = source
|
||||
end
|
||||
|
||||
def add_listener( listener )
|
||||
if !defined?(@listeners) or !@listeners
|
||||
|
@ -119,315 +119,315 @@ module REXML
|
|||
|
||||
attr_reader :source
|
||||
|
||||
def stream=( source )
|
||||
if source.kind_of? String
|
||||
@source = Source.new(source)
|
||||
elsif source.kind_of? IO
|
||||
@source = IOSource.new(source)
|
||||
elsif source.kind_of? Source
|
||||
@source = source
|
||||
elsif defined? StringIO and source.kind_of? StringIO
|
||||
@source = IOSource.new(source)
|
||||
else
|
||||
raise "#{source.class} is not a valid input stream. It must be \n"+
|
||||
"either a String, IO, StringIO or Source."
|
||||
end
|
||||
@closed = nil
|
||||
@document_status = nil
|
||||
@tags = []
|
||||
@stack = []
|
||||
@entities = []
|
||||
end
|
||||
def stream=( source )
|
||||
if source.kind_of? String
|
||||
@source = Source.new(source)
|
||||
elsif source.kind_of? IO
|
||||
@source = IOSource.new(source)
|
||||
elsif source.kind_of? Source
|
||||
@source = source
|
||||
elsif defined? StringIO and source.kind_of? StringIO
|
||||
@source = IOSource.new(source)
|
||||
else
|
||||
raise "#{source.class} is not a valid input stream. It must be \n"+
|
||||
"either a String, IO, StringIO or Source."
|
||||
end
|
||||
@closed = nil
|
||||
@document_status = nil
|
||||
@tags = []
|
||||
@stack = []
|
||||
@entities = []
|
||||
end
|
||||
|
||||
# Returns true if there are no more events
|
||||
def empty?
|
||||
# Returns true if there are no more events
|
||||
def empty?
|
||||
#puts "@source.empty? = #{@source.empty?}"
|
||||
#puts "@stack.empty? = #{@stack.empty?}"
|
||||
return (@source.empty? and @stack.empty?)
|
||||
end
|
||||
end
|
||||
|
||||
# Returns true if there are more events. Synonymous with !empty?
|
||||
def has_next?
|
||||
# Returns true if there are more events. Synonymous with !empty?
|
||||
def has_next?
|
||||
return !(@source.empty? and @stack.empty?)
|
||||
end
|
||||
end
|
||||
|
||||
# Push an event back on the head of the stream. This method
|
||||
# has (theoretically) infinite depth.
|
||||
def unshift token
|
||||
@stack.unshift(token)
|
||||
end
|
||||
# Push an event back on the head of the stream. This method
|
||||
# has (theoretically) infinite depth.
|
||||
def unshift token
|
||||
@stack.unshift(token)
|
||||
end
|
||||
|
||||
# Peek at the +depth+ event in the stack. The first element on the stack
|
||||
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
||||
# stream and return the last event, which is always :end_document.
|
||||
# Be aware that this causes the stream to be parsed up to the +depth+
|
||||
# event, so you can effectively pre-parse the entire document (pull the
|
||||
# entire thing into memory) using this method.
|
||||
def peek depth=0
|
||||
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
||||
temp = []
|
||||
if depth == -1
|
||||
temp.push(pull()) until empty?
|
||||
else
|
||||
while @stack.size+temp.size < depth+1
|
||||
temp.push(pull())
|
||||
end
|
||||
end
|
||||
@stack += temp if temp.size > 0
|
||||
@stack[depth]
|
||||
end
|
||||
# Peek at the +depth+ event in the stack. The first element on the stack
|
||||
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
||||
# stream and return the last event, which is always :end_document.
|
||||
# Be aware that this causes the stream to be parsed up to the +depth+
|
||||
# event, so you can effectively pre-parse the entire document (pull the
|
||||
# entire thing into memory) using this method.
|
||||
def peek depth=0
|
||||
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
||||
temp = []
|
||||
if depth == -1
|
||||
temp.push(pull()) until empty?
|
||||
else
|
||||
while @stack.size+temp.size < depth+1
|
||||
temp.push(pull())
|
||||
end
|
||||
end
|
||||
@stack += temp if temp.size > 0
|
||||
@stack[depth]
|
||||
end
|
||||
|
||||
# Returns the next event. This is a +PullEvent+ object.
|
||||
def pull
|
||||
if @closed
|
||||
x, @closed = @closed, nil
|
||||
return [ :end_element, x ]
|
||||
end
|
||||
return [ :end_document ] if empty?
|
||||
return @stack.shift if @stack.size > 0
|
||||
@source.read if @source.buffer.size<2
|
||||
if @document_status == nil
|
||||
@source.consume( /^\s*/um )
|
||||
word = @source.match( /(<[^>]*)>/um )
|
||||
word = word[1] unless word.nil?
|
||||
case word
|
||||
when COMMENT_START
|
||||
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
||||
when XMLDECL_START
|
||||
results = @source.match( XMLDECL_PATTERN, true )[1]
|
||||
version = VERSION.match( results )
|
||||
version = version[1] unless version.nil?
|
||||
encoding = ENCODING.match(results)
|
||||
encoding = encoding[1] unless encoding.nil?
|
||||
@source.encoding = encoding
|
||||
standalone = STANDALONE.match(results)
|
||||
standalone = standalone[1] unless standalone.nil?
|
||||
return [ :xmldecl, version, encoding, standalone]
|
||||
when INSTRUCTION_START
|
||||
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
|
||||
when DOCTYPE_START
|
||||
md = @source.match( DOCTYPE_PATTERN, true )
|
||||
identity = md[1]
|
||||
close = md[2]
|
||||
identity =~ IDENTITY
|
||||
name = $1
|
||||
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
|
||||
pub_sys = $2.nil? ? nil : $2.strip
|
||||
long_name = $3.nil? ? nil : $3.strip
|
||||
uri = $4.nil? ? nil : $4.strip
|
||||
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
||||
if close == ">"
|
||||
@document_status = :after_doctype
|
||||
@source.read if @source.buffer.size<2
|
||||
md = @source.match(/^\s*/um, true)
|
||||
@stack << [ :end_doctype ]
|
||||
else
|
||||
@document_status = :in_doctype
|
||||
end
|
||||
return args
|
||||
else
|
||||
@document_status = :after_doctype
|
||||
@source.read if @source.buffer.size<2
|
||||
md = @source.match(/\s*/um, true)
|
||||
end
|
||||
end
|
||||
if @document_status == :in_doctype
|
||||
md = @source.match(/\s*(.*?>)/um)
|
||||
case md[1]
|
||||
when SYSTEMENTITY
|
||||
match = @source.match( SYSTEMENTITY, true )[1]
|
||||
return [ :externalentity, match ]
|
||||
# Returns the next event. This is a +PullEvent+ object.
|
||||
def pull
|
||||
if @closed
|
||||
x, @closed = @closed, nil
|
||||
return [ :end_element, x ]
|
||||
end
|
||||
return [ :end_document ] if empty?
|
||||
return @stack.shift if @stack.size > 0
|
||||
@source.read if @source.buffer.size<2
|
||||
if @document_status == nil
|
||||
@source.consume( /^\s*/um )
|
||||
word = @source.match( /(<[^>]*)>/um )
|
||||
word = word[1] unless word.nil?
|
||||
case word
|
||||
when COMMENT_START
|
||||
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
||||
when XMLDECL_START
|
||||
results = @source.match( XMLDECL_PATTERN, true )[1]
|
||||
version = VERSION.match( results )
|
||||
version = version[1] unless version.nil?
|
||||
encoding = ENCODING.match(results)
|
||||
encoding = encoding[1] unless encoding.nil?
|
||||
@source.encoding = encoding
|
||||
standalone = STANDALONE.match(results)
|
||||
standalone = standalone[1] unless standalone.nil?
|
||||
return [ :xmldecl, version, encoding, standalone]
|
||||
when INSTRUCTION_START
|
||||
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
|
||||
when DOCTYPE_START
|
||||
md = @source.match( DOCTYPE_PATTERN, true )
|
||||
identity = md[1]
|
||||
close = md[2]
|
||||
identity =~ IDENTITY
|
||||
name = $1
|
||||
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
|
||||
pub_sys = $2.nil? ? nil : $2.strip
|
||||
long_name = $3.nil? ? nil : $3.strip
|
||||
uri = $4.nil? ? nil : $4.strip
|
||||
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
||||
if close == ">"
|
||||
@document_status = :after_doctype
|
||||
@source.read if @source.buffer.size<2
|
||||
md = @source.match(/^\s*/um, true)
|
||||
@stack << [ :end_doctype ]
|
||||
else
|
||||
@document_status = :in_doctype
|
||||
end
|
||||
return args
|
||||
else
|
||||
@document_status = :after_doctype
|
||||
@source.read if @source.buffer.size<2
|
||||
md = @source.match(/\s*/um, true)
|
||||
end
|
||||
end
|
||||
if @document_status == :in_doctype
|
||||
md = @source.match(/\s*(.*?>)/um)
|
||||
case md[1]
|
||||
when SYSTEMENTITY
|
||||
match = @source.match( SYSTEMENTITY, true )[1]
|
||||
return [ :externalentity, match ]
|
||||
|
||||
when ELEMENTDECL_START
|
||||
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
|
||||
when ELEMENTDECL_START
|
||||
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
|
||||
|
||||
when ENTITY_START
|
||||
match = @source.match( ENTITYDECL, true ).to_a.compact
|
||||
match[0] = :entitydecl
|
||||
ref = false
|
||||
if match[1] == '%'
|
||||
ref = true
|
||||
match.delete_at 1
|
||||
end
|
||||
# Now we have to sort out what kind of entity reference this is
|
||||
if match[2] == 'SYSTEM'
|
||||
# External reference
|
||||
match[3] = match[3][1..-2] # PUBID
|
||||
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
||||
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
||||
elsif match[2] == 'PUBLIC'
|
||||
# External reference
|
||||
match[3] = match[3][1..-2] # PUBID
|
||||
match[4] = match[4][1..-2] # HREF
|
||||
# match is [ :entity, name, PUBLIC, pubid, href ]
|
||||
else
|
||||
match[2] = match[2][1..-2]
|
||||
match.pop if match.size == 4
|
||||
# match is [ :entity, name, value ]
|
||||
end
|
||||
match << '%' if ref
|
||||
return match
|
||||
when ATTLISTDECL_START
|
||||
md = @source.match( ATTLISTDECL_PATTERN, true )
|
||||
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
||||
element = md[1]
|
||||
contents = md[0]
|
||||
when ENTITY_START
|
||||
match = @source.match( ENTITYDECL, true ).to_a.compact
|
||||
match[0] = :entitydecl
|
||||
ref = false
|
||||
if match[1] == '%'
|
||||
ref = true
|
||||
match.delete_at 1
|
||||
end
|
||||
# Now we have to sort out what kind of entity reference this is
|
||||
if match[2] == 'SYSTEM'
|
||||
# External reference
|
||||
match[3] = match[3][1..-2] # PUBID
|
||||
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
||||
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
||||
elsif match[2] == 'PUBLIC'
|
||||
# External reference
|
||||
match[3] = match[3][1..-2] # PUBID
|
||||
match[4] = match[4][1..-2] # HREF
|
||||
# match is [ :entity, name, PUBLIC, pubid, href ]
|
||||
else
|
||||
match[2] = match[2][1..-2]
|
||||
match.pop if match.size == 4
|
||||
# match is [ :entity, name, value ]
|
||||
end
|
||||
match << '%' if ref
|
||||
return match
|
||||
when ATTLISTDECL_START
|
||||
md = @source.match( ATTLISTDECL_PATTERN, true )
|
||||
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
||||
element = md[1]
|
||||
contents = md[0]
|
||||
|
||||
pairs = {}
|
||||
values = md[0].scan( ATTDEF_RE )
|
||||
values.each do |attdef|
|
||||
unless attdef[3] == "#IMPLIED"
|
||||
attdef.compact!
|
||||
val = attdef[3]
|
||||
val = attdef[4] if val == "#FIXED "
|
||||
pairs[attdef[0]] = val
|
||||
end
|
||||
end
|
||||
return [ :attlistdecl, element, pairs, contents ]
|
||||
when NOTATIONDECL_START
|
||||
md = nil
|
||||
if @source.match( PUBLIC )
|
||||
md = @source.match( PUBLIC, true )
|
||||
elsif @source.match( SYSTEM )
|
||||
md = @source.match( SYSTEM, true )
|
||||
else
|
||||
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
||||
end
|
||||
return [ :notationdecl, md[1], md[2], md[3] ]
|
||||
when CDATA_END
|
||||
@document_status = :after_doctype
|
||||
@source.match( CDATA_END, true )
|
||||
return [ :end_doctype ]
|
||||
end
|
||||
end
|
||||
begin
|
||||
if @source.buffer[0] == ?<
|
||||
if @source.buffer[1] == ?/
|
||||
last_tag = @tags.pop
|
||||
#md = @source.match_to_consume( '>', CLOSE_MATCH)
|
||||
md = @source.match( CLOSE_MATCH, true )
|
||||
raise REXML::ParseException.new( "Missing end tag for "+
|
||||
pairs = {}
|
||||
values = md[0].scan( ATTDEF_RE )
|
||||
values.each do |attdef|
|
||||
unless attdef[3] == "#IMPLIED"
|
||||
attdef.compact!
|
||||
val = attdef[3]
|
||||
val = attdef[4] if val == "#FIXED "
|
||||
pairs[attdef[0]] = val
|
||||
end
|
||||
end
|
||||
return [ :attlistdecl, element, pairs, contents ]
|
||||
when NOTATIONDECL_START
|
||||
md = nil
|
||||
if @source.match( PUBLIC )
|
||||
md = @source.match( PUBLIC, true )
|
||||
elsif @source.match( SYSTEM )
|
||||
md = @source.match( SYSTEM, true )
|
||||
else
|
||||
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
||||
end
|
||||
return [ :notationdecl, md[1], md[2], md[3] ]
|
||||
when CDATA_END
|
||||
@document_status = :after_doctype
|
||||
@source.match( CDATA_END, true )
|
||||
return [ :end_doctype ]
|
||||
end
|
||||
end
|
||||
begin
|
||||
if @source.buffer[0] == ?<
|
||||
if @source.buffer[1] == ?/
|
||||
last_tag = @tags.pop
|
||||
#md = @source.match_to_consume( '>', CLOSE_MATCH)
|
||||
md = @source.match( CLOSE_MATCH, true )
|
||||
raise REXML::ParseException.new( "Missing end tag for "+
|
||||
"'#{last_tag}' (got \"#{md[1]}\")",
|
||||
@source) unless last_tag == md[1]
|
||||
return [ :end_element, last_tag ]
|
||||
elsif @source.buffer[1] == ?!
|
||||
md = @source.match(/\A(\s*[^>]*>)/um)
|
||||
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
||||
raise REXML::ParseException.new("Malformed node", @source) unless md
|
||||
if md[0][2] == ?-
|
||||
md = @source.match( COMMENT_PATTERN, true )
|
||||
return [ :comment, md[1] ] if md
|
||||
else
|
||||
md = @source.match( CDATA_PATTERN, true )
|
||||
return [ :cdata, md[1] ] if md
|
||||
end
|
||||
raise REXML::ParseException.new( "Declarations can only occur "+
|
||||
"in the doctype declaration.", @source)
|
||||
elsif @source.buffer[1] == ??
|
||||
md = @source.match( INSTRUCTION_PATTERN, true )
|
||||
return [ :processing_instruction, md[1], md[2] ] if md
|
||||
raise REXML::ParseException.new( "Bad instruction declaration",
|
||||
@source)
|
||||
else
|
||||
# Get the next tag
|
||||
md = @source.match(TAG_MATCH, true)
|
||||
raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
|
||||
attrs = []
|
||||
if md[2].size > 0
|
||||
attrs = md[2].scan( ATTRIBUTE_PATTERN )
|
||||
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
|
||||
end
|
||||
|
||||
if md[4]
|
||||
@closed = md[1]
|
||||
else
|
||||
@tags.push( md[1] )
|
||||
end
|
||||
attributes = {}
|
||||
attrs.each { |a,b,c| attributes[a] = c }
|
||||
return [ :start_element, md[1], attributes ]
|
||||
end
|
||||
else
|
||||
md = @source.match( TEXT_PATTERN, true )
|
||||
return [ :end_element, last_tag ]
|
||||
elsif @source.buffer[1] == ?!
|
||||
md = @source.match(/\A(\s*[^>]*>)/um)
|
||||
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
||||
raise REXML::ParseException.new("Malformed node", @source) unless md
|
||||
if md[0][2] == ?-
|
||||
md = @source.match( COMMENT_PATTERN, true )
|
||||
return [ :comment, md[1] ] if md
|
||||
else
|
||||
md = @source.match( CDATA_PATTERN, true )
|
||||
return [ :cdata, md[1] ] if md
|
||||
end
|
||||
raise REXML::ParseException.new( "Declarations can only occur "+
|
||||
"in the doctype declaration.", @source)
|
||||
elsif @source.buffer[1] == ??
|
||||
md = @source.match( INSTRUCTION_PATTERN, true )
|
||||
return [ :processing_instruction, md[1], md[2] ] if md
|
||||
raise REXML::ParseException.new( "Bad instruction declaration",
|
||||
@source)
|
||||
else
|
||||
# Get the next tag
|
||||
md = @source.match(TAG_MATCH, true)
|
||||
raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
|
||||
attrs = []
|
||||
if md[2].size > 0
|
||||
attrs = md[2].scan( ATTRIBUTE_PATTERN )
|
||||
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
|
||||
end
|
||||
|
||||
if md[4]
|
||||
@closed = md[1]
|
||||
else
|
||||
@tags.push( md[1] )
|
||||
end
|
||||
attributes = {}
|
||||
attrs.each { |a,b,c| attributes[a] = c }
|
||||
return [ :start_element, md[1], attributes ]
|
||||
end
|
||||
else
|
||||
md = @source.match( TEXT_PATTERN, true )
|
||||
if md[0].length == 0
|
||||
#puts "EMPTY = #{empty?}"
|
||||
#puts "BUFFER = \"#{@source.buffer}\""
|
||||
@source.match( /(\s+)/, true )
|
||||
end
|
||||
#return [ :text, "" ] if md[0].length == 0
|
||||
# unnormalized = Text::unnormalize( md[1], self )
|
||||
# return PullEvent.new( :text, md[1], unnormalized )
|
||||
return [ :text, md[1] ]
|
||||
end
|
||||
rescue REXML::ParseException
|
||||
raise
|
||||
rescue Exception, NameError => error
|
||||
raise REXML::ParseException.new( "Exception parsing",
|
||||
@source, self, (error ? error : $!) )
|
||||
end
|
||||
return [ :dummy ]
|
||||
end
|
||||
# unnormalized = Text::unnormalize( md[1], self )
|
||||
# return PullEvent.new( :text, md[1], unnormalized )
|
||||
return [ :text, md[1] ]
|
||||
end
|
||||
rescue REXML::ParseException
|
||||
raise
|
||||
rescue Exception, NameError => error
|
||||
raise REXML::ParseException.new( "Exception parsing",
|
||||
@source, self, (error ? error : $!) )
|
||||
end
|
||||
return [ :dummy ]
|
||||
end
|
||||
|
||||
def entity( reference, entities )
|
||||
value = nil
|
||||
value = entities[ reference ] if entities
|
||||
if not value
|
||||
value = DEFAULT_ENTITIES[ reference ]
|
||||
value = value[2] if value
|
||||
end
|
||||
unnormalize( value, entities ) if value
|
||||
end
|
||||
def entity( reference, entities )
|
||||
value = nil
|
||||
value = entities[ reference ] if entities
|
||||
if not value
|
||||
value = DEFAULT_ENTITIES[ reference ]
|
||||
value = value[2] if value
|
||||
end
|
||||
unnormalize( value, entities ) if value
|
||||
end
|
||||
|
||||
# Escapes all possible entities
|
||||
def normalize( input, entities=nil, entity_filter=nil )
|
||||
copy = input.clone
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
copy.gsub!( EREFERENCE, '&' )
|
||||
entities.each do |key, value|
|
||||
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
||||
entity_filter.include?(entity)
|
||||
end if entities
|
||||
copy.gsub!( EREFERENCE, '&' )
|
||||
DEFAULT_ENTITIES.each do |key, value|
|
||||
copy.gsub!( value[3], value[1] )
|
||||
end
|
||||
copy
|
||||
end
|
||||
# Escapes all possible entities
|
||||
def normalize( input, entities=nil, entity_filter=nil )
|
||||
copy = input.clone
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
copy.gsub!( EREFERENCE, '&' )
|
||||
entities.each do |key, value|
|
||||
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
||||
entity_filter.include?(entity)
|
||||
end if entities
|
||||
copy.gsub!( EREFERENCE, '&' )
|
||||
DEFAULT_ENTITIES.each do |key, value|
|
||||
copy.gsub!( value[3], value[1] )
|
||||
end
|
||||
copy
|
||||
end
|
||||
|
||||
# Unescapes all possible entities
|
||||
def unnormalize( string, entities=nil, filter=nil )
|
||||
rv = string.clone
|
||||
rv.gsub!( /\r\n?/, "\n" )
|
||||
matches = rv.scan( REFERENCE_RE )
|
||||
return rv if matches.size == 0
|
||||
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
|
||||
m=$1
|
||||
m = "0#{m}" if m[0] == ?x
|
||||
[Integer(m)].pack('U*')
|
||||
}
|
||||
matches.collect!{|x|x[0]}.compact!
|
||||
if matches.size > 0
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
entity_value = entity( entity_reference, entities )
|
||||
if entity_value
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value )
|
||||
end
|
||||
end
|
||||
end
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
er = DEFAULT_ENTITIES[entity_reference]
|
||||
rv.gsub!( er[0], er[2] ) if er
|
||||
end
|
||||
end
|
||||
rv.gsub!( /&/, '&' )
|
||||
end
|
||||
rv
|
||||
end
|
||||
end
|
||||
end
|
||||
# Unescapes all possible entities
|
||||
def unnormalize( string, entities=nil, filter=nil )
|
||||
rv = string.clone
|
||||
rv.gsub!( /\r\n?/, "\n" )
|
||||
matches = rv.scan( REFERENCE_RE )
|
||||
return rv if matches.size == 0
|
||||
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
|
||||
m=$1
|
||||
m = "0#{m}" if m[0] == ?x
|
||||
[Integer(m)].pack('U*')
|
||||
}
|
||||
matches.collect!{|x|x[0]}.compact!
|
||||
if matches.size > 0
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
entity_value = entity( entity_reference, entities )
|
||||
if entity_value
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value )
|
||||
end
|
||||
end
|
||||
end
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
er = DEFAULT_ENTITIES[entity_reference]
|
||||
rv.gsub!( er[0], er[2] ) if er
|
||||
end
|
||||
end
|
||||
rv.gsub!( /&/, '&' )
|
||||
end
|
||||
rv
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
=begin
|
||||
|
|
|
@ -1,96 +1,99 @@
|
|||
require 'forwardable'
|
||||
|
||||
require 'rexml/parseexception'
|
||||
require 'rexml/parsers/baseparser'
|
||||
require 'rexml/xmltokens'
|
||||
|
||||
module REXML
|
||||
module Parsers
|
||||
# = Using the Pull Parser
|
||||
# <em>This API is experimental, and subject to change.</em>
|
||||
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
||||
# end
|
||||
# See the PullEvent class for information on the content of the results.
|
||||
# The data is identical to the arguments passed for the various events to
|
||||
# the StreamListener API.
|
||||
#
|
||||
# Notice that:
|
||||
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# raise res[1] if res.error?
|
||||
# end
|
||||
#
|
||||
# Nat Price gave me some good ideas for the API.
|
||||
class PullParser
|
||||
include XMLTokens
|
||||
module Parsers
|
||||
# = Using the Pull Parser
|
||||
# <em>This API is experimental, and subject to change.</em>
|
||||
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
||||
# end
|
||||
# See the PullEvent class for information on the content of the results.
|
||||
# The data is identical to the arguments passed for the various events to
|
||||
# the StreamListener API.
|
||||
#
|
||||
# Notice that:
|
||||
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
||||
# while parser.has_next?
|
||||
# res = parser.next
|
||||
# raise res[1] if res.error?
|
||||
# end
|
||||
#
|
||||
# Nat Price gave me some good ideas for the API.
|
||||
class PullParser
|
||||
include XMLTokens
|
||||
extend Forwardable
|
||||
|
||||
def initialize stream
|
||||
@entities = {}
|
||||
def_delegators( :@parser, :has_next? )
|
||||
def_delegators( :@parser, :entity )
|
||||
def_delegators( :@parser, :empty? )
|
||||
|
||||
def initialize stream
|
||||
@entities = {}
|
||||
@listeners = nil
|
||||
@parser = BaseParser.new( stream )
|
||||
end
|
||||
@my_stack = []
|
||||
end
|
||||
|
||||
def add_listener( listener )
|
||||
@listeners = [] unless @listeners
|
||||
@listeners << listener
|
||||
end
|
||||
|
||||
def each
|
||||
while has_next?
|
||||
yield self.pull
|
||||
end
|
||||
end
|
||||
|
||||
def peek depth=0
|
||||
PullEvent.new(@parser.peek(depth))
|
||||
end
|
||||
|
||||
def has_next?
|
||||
@parser.has_next?
|
||||
def each
|
||||
while has_next?
|
||||
yield self.pull
|
||||
end
|
||||
end
|
||||
|
||||
def pull
|
||||
event = @parser.pull
|
||||
case event[0]
|
||||
when :entitydecl
|
||||
@entities[ event[1] ] =
|
||||
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
|
||||
when :text
|
||||
unnormalized = @parser.unnormalize( event[1], @entities )
|
||||
event << unnormalized
|
||||
end
|
||||
PullEvent.new( event )
|
||||
end
|
||||
def peek depth=0
|
||||
if @my_stack.length <= depth
|
||||
(depth - @my_stack.length + 1).times {
|
||||
e = PullEvent.new(@parser.pull)
|
||||
@my_stack.push(e)
|
||||
}
|
||||
end
|
||||
@my_stack[depth]
|
||||
end
|
||||
|
||||
def pull
|
||||
return @my_stack.shift if @my_stack.length > 0
|
||||
|
||||
event = @parser.pull
|
||||
case event[0]
|
||||
when :entitydecl
|
||||
@entities[ event[1] ] =
|
||||
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
|
||||
when :text
|
||||
unnormalized = @parser.unnormalize( event[1], @entities )
|
||||
event << unnormalized
|
||||
end
|
||||
PullEvent.new( event )
|
||||
end
|
||||
|
||||
def unshift token
|
||||
@parser.unshift token
|
||||
@my_stack.unshift token
|
||||
end
|
||||
end
|
||||
|
||||
def entity reference
|
||||
@parser.entity( reference )
|
||||
# A parsing event. The contents of the event are accessed as an +Array?,
|
||||
# and the type is given either by the ...? methods, or by accessing the
|
||||
# +type+ accessor. The contents of this object vary from event to event,
|
||||
# but are identical to the arguments passed to +StreamListener+s for each
|
||||
# event.
|
||||
class PullEvent
|
||||
# The type of this event. Will be one of :tag_start, :tag_end, :text,
|
||||
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
|
||||
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
|
||||
def initialize(arg)
|
||||
@contents = arg
|
||||
end
|
||||
|
||||
def empty?
|
||||
@parser.empty?
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# A parsing event. The contents of the event are accessed as an +Array?,
|
||||
# and the type is given either by the ...? methods, or by accessing the
|
||||
# +type+ accessor. The contents of this object vary from event to event,
|
||||
# but are identical to the arguments passed to +StreamListener+s for each
|
||||
# event.
|
||||
class PullEvent
|
||||
# The type of this event. Will be one of :tag_start, :tag_end, :text,
|
||||
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
|
||||
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
|
||||
def initialize(arg)
|
||||
@contents = arg
|
||||
end
|
||||
|
||||
def []( start, endd=nil)
|
||||
if start.kind_of? Range
|
||||
@contents.slice( start.begin+1 .. start.end )
|
||||
|
@ -103,90 +106,90 @@ module REXML
|
|||
else
|
||||
raise "Illegal argument #{start.inspect} (#{start.class})"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def event_type
|
||||
@contents[0]
|
||||
end
|
||||
def event_type
|
||||
@contents[0]
|
||||
end
|
||||
|
||||
# Content: [ String tag_name, Hash attributes ]
|
||||
def start_element?
|
||||
@contents[0] == :start_element
|
||||
end
|
||||
# Content: [ String tag_name, Hash attributes ]
|
||||
def start_element?
|
||||
@contents[0] == :start_element
|
||||
end
|
||||
|
||||
# Content: [ String tag_name ]
|
||||
def end_element?
|
||||
@contents[0] == :end_element
|
||||
end
|
||||
# Content: [ String tag_name ]
|
||||
def end_element?
|
||||
@contents[0] == :end_element
|
||||
end
|
||||
|
||||
# Content: [ String raw_text, String unnormalized_text ]
|
||||
def text?
|
||||
@contents[0] == :text
|
||||
end
|
||||
# Content: [ String raw_text, String unnormalized_text ]
|
||||
def text?
|
||||
@contents[0] == :text
|
||||
end
|
||||
|
||||
# Content: [ String text ]
|
||||
def instruction?
|
||||
@contents[0] == :processing_instruction
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def instruction?
|
||||
@contents[0] == :processing_instruction
|
||||
end
|
||||
|
||||
# Content: [ String text ]
|
||||
def comment?
|
||||
@contents[0] == :comment
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def comment?
|
||||
@contents[0] == :comment
|
||||
end
|
||||
|
||||
# Content: [ String name, String pub_sys, String long_name, String uri ]
|
||||
def doctype?
|
||||
@contents[0] == :start_doctype
|
||||
end
|
||||
# Content: [ String name, String pub_sys, String long_name, String uri ]
|
||||
def doctype?
|
||||
@contents[0] == :start_doctype
|
||||
end
|
||||
|
||||
# Content: [ String text ]
|
||||
def attlistdecl?
|
||||
@contents[0] == :attlistdecl
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def attlistdecl?
|
||||
@contents[0] == :attlistdecl
|
||||
end
|
||||
|
||||
# Content: [ String text ]
|
||||
def elementdecl?
|
||||
@contents[0] == :elementdecl
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def elementdecl?
|
||||
@contents[0] == :elementdecl
|
||||
end
|
||||
|
||||
# Due to the wonders of DTDs, an entity declaration can be just about
|
||||
# anything. There's no way to normalize it; you'll have to interpret the
|
||||
# content yourself. However, the following is true:
|
||||
#
|
||||
# * If the entity declaration is an internal entity:
|
||||
# [ String name, String value ]
|
||||
# Content: [ String text ]
|
||||
def entitydecl?
|
||||
@contents[0] == :entitydecl
|
||||
end
|
||||
# Due to the wonders of DTDs, an entity declaration can be just about
|
||||
# anything. There's no way to normalize it; you'll have to interpret the
|
||||
# content yourself. However, the following is true:
|
||||
#
|
||||
# * If the entity declaration is an internal entity:
|
||||
# [ String name, String value ]
|
||||
# Content: [ String text ]
|
||||
def entitydecl?
|
||||
@contents[0] == :entitydecl
|
||||
end
|
||||
|
||||
# Content: [ String text ]
|
||||
def notationdecl?
|
||||
@contents[0] == :notationdecl
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def notationdecl?
|
||||
@contents[0] == :notationdecl
|
||||
end
|
||||
|
||||
# Content: [ String text ]
|
||||
def entity?
|
||||
@contents[0] == :entity
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def entity?
|
||||
@contents[0] == :entity
|
||||
end
|
||||
|
||||
# Content: [ String text ]
|
||||
def cdata?
|
||||
@contents[0] == :cdata
|
||||
end
|
||||
# Content: [ String text ]
|
||||
def cdata?
|
||||
@contents[0] == :cdata
|
||||
end
|
||||
|
||||
# Content: [ String version, String encoding, String standalone ]
|
||||
def xmldecl?
|
||||
@contents[0] == :xmldecl
|
||||
end
|
||||
# Content: [ String version, String encoding, String standalone ]
|
||||
def xmldecl?
|
||||
@contents[0] == :xmldecl
|
||||
end
|
||||
|
||||
def error?
|
||||
@contents[0] == :error
|
||||
end
|
||||
def error?
|
||||
@contents[0] == :error
|
||||
end
|
||||
|
||||
def inspect
|
||||
def inspect
|
||||
@contents[0].to_s + ": " + @contents[1..-1].inspect
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
require 'rexml/parsers/baseparser'
|
||||
require 'rexml/parseexception'
|
||||
require 'rexml/namespace'
|
||||
require 'rexml/text'
|
||||
|
||||
module REXML
|
||||
module Parsers
|
||||
# SAX2Parser
|
||||
class SAX2Parser
|
||||
def initialize source
|
||||
@parser = BaseParser.new(source)
|
||||
|
@ -36,6 +38,10 @@ module REXML
|
|||
# :start_prefix_mapping, :end_prefix_mapping, :characters,
|
||||
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
|
||||
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
|
||||
#
|
||||
# There is an additional symbol that can be listened for: :progress.
|
||||
# This will be called for every event generated, passing in the current
|
||||
# stream position.
|
||||
#
|
||||
# Array contains regular expressions or strings which will be matched
|
||||
# against fully qualified element names.
|
||||
|
@ -161,6 +167,7 @@ module REXML
|
|||
:elementdecl, :cdata, :notationdecl, :xmldecl
|
||||
handle( *event )
|
||||
end
|
||||
handle( :progress, @parser.source.position )
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
# Main page:: http://www.germane-software.com/software/rexml
|
||||
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
||||
# Version:: 3.1.3
|
||||
# Date:: +2005/138
|
||||
# Date:: 2005/224
|
||||
#
|
||||
# This API documentation can be downloaded from the REXML home page, or can
|
||||
# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
||||
|
@ -20,7 +20,7 @@
|
|||
# or can be accessed
|
||||
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
||||
module REXML
|
||||
Copyright = "Copyright © 2001-2005 Sean Russell <ser@germane-software.com>"
|
||||
Date = "+2005/138"
|
||||
Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell <ser@germane-software.com>"
|
||||
Date = "2005/224"
|
||||
Version = "3.1.3"
|
||||
end
|
||||
|
|
|
@ -90,5 +90,7 @@ module REXML
|
|||
# @p comment The content of the comment
|
||||
def comment comment
|
||||
end
|
||||
def progress position
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -98,6 +98,10 @@ module REXML
|
|||
@buffer == ""
|
||||
end
|
||||
|
||||
def position
|
||||
@orig.index( @buffer )
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
def current_line
|
||||
lines = @orig.split
|
||||
|
@ -194,6 +198,10 @@ module REXML
|
|||
super and ( @source.nil? || @source.eof? )
|
||||
end
|
||||
|
||||
def position
|
||||
@er_source.pos
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
def current_line
|
||||
begin
|
||||
|
|
|
@ -82,10 +82,13 @@ module REXML
|
|||
@event_arg = event_arg
|
||||
end
|
||||
|
||||
attr_reader :done?
|
||||
attr_reader :event_type
|
||||
attr_accessor :event_arg
|
||||
|
||||
def done?
|
||||
@done
|
||||
end
|
||||
|
||||
def single?
|
||||
return (@event_type != :start_element and @event_type != :start_attribute)
|
||||
end
|
||||
|
|
|
@ -2,71 +2,71 @@ require 'rexml/encoding'
|
|||
require 'rexml/source'
|
||||
|
||||
module REXML
|
||||
# NEEDS DOCUMENTATION
|
||||
class XMLDecl < Child
|
||||
include Encoding
|
||||
# NEEDS DOCUMENTATION
|
||||
class XMLDecl < Child
|
||||
include Encoding
|
||||
|
||||
DEFAULT_VERSION = "1.0";
|
||||
DEFAULT_ENCODING = "UTF-8";
|
||||
DEFAULT_STANDALONE = "no";
|
||||
START = '<\?xml';
|
||||
STOP = '\?>';
|
||||
DEFAULT_VERSION = "1.0";
|
||||
DEFAULT_ENCODING = "UTF-8";
|
||||
DEFAULT_STANDALONE = "no";
|
||||
START = '<\?xml';
|
||||
STOP = '\?>';
|
||||
|
||||
attr_accessor :version, :standalone
|
||||
attr_accessor :version, :standalone
|
||||
attr_reader :writeencoding
|
||||
|
||||
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
|
||||
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
|
||||
@writethis = true
|
||||
@writeencoding = !encoding.nil?
|
||||
if version.kind_of? XMLDecl
|
||||
super()
|
||||
@version = version.version
|
||||
self.encoding = version.encoding
|
||||
if version.kind_of? XMLDecl
|
||||
super()
|
||||
@version = version.version
|
||||
self.encoding = version.encoding
|
||||
@writeencoding = version.writeencoding
|
||||
@standalone = version.standalone
|
||||
else
|
||||
super()
|
||||
@version = version
|
||||
self.encoding = encoding
|
||||
@standalone = standalone
|
||||
end
|
||||
@version = DEFAULT_VERSION if @version.nil?
|
||||
end
|
||||
@standalone = version.standalone
|
||||
else
|
||||
super()
|
||||
@version = version
|
||||
self.encoding = encoding
|
||||
@standalone = standalone
|
||||
end
|
||||
@version = DEFAULT_VERSION if @version.nil?
|
||||
end
|
||||
|
||||
def clone
|
||||
XMLDecl.new(self)
|
||||
end
|
||||
def clone
|
||||
XMLDecl.new(self)
|
||||
end
|
||||
|
||||
def write writer, indent_level=-1, transitive=false, ie_hack=false
|
||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
||||
return nil unless @writethis or writer.kind_of? Output
|
||||
indent( writer, indent_level )
|
||||
writer << START.sub(/\\/u, '')
|
||||
indent( writer, indent )
|
||||
writer << START.sub(/\\/u, '')
|
||||
if writer.kind_of? Output
|
||||
writer << " #{content writer.encoding}"
|
||||
else
|
||||
writer << " #{content encoding}"
|
||||
end
|
||||
writer << STOP.sub(/\\/u, '')
|
||||
end
|
||||
writer << STOP.sub(/\\/u, '')
|
||||
end
|
||||
|
||||
def ==( other )
|
||||
other.kind_of?(XMLDecl) and
|
||||
other.version == @version and
|
||||
other.encoding == self.encoding and
|
||||
other.standalone == @standalone
|
||||
end
|
||||
def ==( other )
|
||||
other.kind_of?(XMLDecl) and
|
||||
other.version == @version and
|
||||
other.encoding == self.encoding and
|
||||
other.standalone == @standalone
|
||||
end
|
||||
|
||||
def xmldecl version, encoding, standalone
|
||||
@version = version
|
||||
self.encoding = encoding
|
||||
@standalone = standalone
|
||||
end
|
||||
def xmldecl version, encoding, standalone
|
||||
@version = version
|
||||
self.encoding = encoding
|
||||
@standalone = standalone
|
||||
end
|
||||
|
||||
def node_type
|
||||
:xmldecl
|
||||
end
|
||||
def node_type
|
||||
:xmldecl
|
||||
end
|
||||
|
||||
alias :stand_alone? :standalone
|
||||
alias :stand_alone? :standalone
|
||||
alias :old_enc= :encoding=
|
||||
|
||||
def encoding=( enc )
|
||||
|
@ -98,12 +98,12 @@ module REXML
|
|||
START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
|
||||
end
|
||||
|
||||
private
|
||||
def content(enc)
|
||||
rv = "version='#@version'"
|
||||
rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
|
||||
rv << " standalone='#@standalone'" if @standalone
|
||||
rv
|
||||
end
|
||||
end
|
||||
private
|
||||
def content(enc)
|
||||
rv = "version='#@version'"
|
||||
rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
|
||||
rv << " standalone='#@standalone'" if @standalone
|
||||
rv
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,76 +2,65 @@ require 'rexml/functions'
|
|||
require 'rexml/xpath_parser'
|
||||
|
||||
module REXML
|
||||
# Wrapper class. Use this class to access the XPath functions.
|
||||
class XPath
|
||||
include Functions
|
||||
EMPTY_HASH = {}
|
||||
# Wrapper class. Use this class to access the XPath functions.
|
||||
class XPath
|
||||
include Functions
|
||||
EMPTY_HASH = {}
|
||||
|
||||
# Finds and returns the first node that matches the supplied xpath.
|
||||
# element::
|
||||
# The context element
|
||||
# path::
|
||||
# The xpath to search for. If not supplied or nil, returns the first
|
||||
# node matching '*'.
|
||||
# namespaces::
|
||||
# If supplied, a Hash which defines a namespace mapping.
|
||||
#
|
||||
# XPath.first( node )
|
||||
# XPath.first( doc, "//b"} )
|
||||
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
|
||||
# Finds and returns the first node that matches the supplied xpath.
|
||||
# element::
|
||||
# The context element
|
||||
# path::
|
||||
# The xpath to search for. If not supplied or nil, returns the first
|
||||
# node matching '*'.
|
||||
# namespaces::
|
||||
# If supplied, a Hash which defines a namespace mapping.
|
||||
#
|
||||
# XPath.first( node )
|
||||
# XPath.first( doc, "//b"} )
|
||||
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
|
||||
def XPath::first element, path=nil, namespaces={}, variables={}
|
||||
=begin
|
||||
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
|
||||
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
parser.first( path, element );
|
||||
=end
|
||||
#=begin
|
||||
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
|
||||
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path, element).flatten[0]
|
||||
#=end
|
||||
end
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path, element).flatten[0]
|
||||
end
|
||||
|
||||
# Itterates over nodes that match the given path, calling the supplied
|
||||
# block with the match.
|
||||
# element::
|
||||
# The context element
|
||||
# path::
|
||||
# The xpath to search for. If not supplied or nil, defaults to '*'
|
||||
# namespaces::
|
||||
# If supplied, a Hash which defines a namespace mapping
|
||||
#
|
||||
# XPath.each( node ) { |el| ... }
|
||||
# XPath.each( node, '/*[@attr='v']' ) { |el| ... }
|
||||
# XPath.each( node, 'ancestor::x' ) { |el| ... }
|
||||
def XPath::each element, path=nil, namespaces={}, variables={}, &block
|
||||
# Itterates over nodes that match the given path, calling the supplied
|
||||
# block with the match.
|
||||
# element::
|
||||
# The context element
|
||||
# path::
|
||||
# The xpath to search for. If not supplied or nil, defaults to '*'
|
||||
# namespaces::
|
||||
# If supplied, a Hash which defines a namespace mapping
|
||||
#
|
||||
# XPath.each( node ) { |el| ... }
|
||||
# XPath.each( node, '/*[@attr='v']' ) { |el| ... }
|
||||
# XPath.each( node, 'ancestor::x' ) { |el| ... }
|
||||
def XPath::each element, path=nil, namespaces={}, variables={}, &block
|
||||
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
|
||||
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path, element).each( &block )
|
||||
end
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path, element).each( &block )
|
||||
end
|
||||
|
||||
# Returns an array of nodes matching a given XPath.
|
||||
def XPath::match element, path=nil, namespaces={}, variables={}
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path,element)
|
||||
end
|
||||
end
|
||||
# Returns an array of nodes matching a given XPath.
|
||||
def XPath::match element, path=nil, namespaces={}, variables={}
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path,element)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -76,6 +76,8 @@ module REXML
|
|||
|
||||
# Performs a depth-first (document order) XPath search, and returns the
|
||||
# first match. This is the fastest, lightest way to return a single result.
|
||||
#
|
||||
# FIXME: This method is incomplete!
|
||||
def first( path_stack, node )
|
||||
#puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
|
||||
return nil if path.size == 0
|
||||
|
@ -123,14 +125,6 @@ module REXML
|
|||
r = expr( path_stack, nodeset )
|
||||
#puts "MAIN EXPR => #{r.inspect}"
|
||||
r
|
||||
|
||||
#while ( path_stack.size > 0 and nodeset.size > 0 )
|
||||
# #puts "MATCH: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
|
||||
# nodeset = expr( path_stack, nodeset )
|
||||
# #puts "NODESET: #{nodeset.inspect}"
|
||||
# #puts "PATH_STACK: #{path_stack.inspect}"
|
||||
#end
|
||||
#nodeset
|
||||
end
|
||||
|
||||
private
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue