* Fixed the inheritance bug in the pull parser that James Britt found.

* Indentation changes, and changed some exceptions to runtime exceptions. * Backed out the patch that changed the versions * Wasn't including Text class. * Fixes issue:25 (Trac) * Fixes ticket:3 (Issue38 in Roundup.) * Numerous fixes in the XPath interpreter correcting, among other things, ordering bugs and some incorrect behavior. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@8973 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2005-08-12 12:08:52 +00:00 · 2005-08-12 12:08:52 +00:00 · 69e5c7d297
commit 69e5c7d297
parent 1b12d598f8
15 changed files with 792 additions and 786 deletions
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@ -101,20 +101,20 @@ module REXML
 			end

 			@unnormalized = nil
-			@value = @normalized = Text::normalize( @value, doctype )
+			@normalized = Text::normalize( @value, doctype )
 		end

 		# Returns the UNNORMALIZED value of this attribute.  That is, entities
 		# have been expanded to their values
 		def value
-			@unnormalized if @unnormalized
+			return @unnormalized if @unnormalized
 			doctype = nil
 			if @element
 				doc = @element.document
 				doctype = doc.doctype if doc
 			end
 			@normalized = nil
-			@value = @unnormalized = Text::unnormalize( @value, doctype )
+			@unnormalized = Text::unnormalize( @value, doctype )
 		end

 		# Returns a copy of this attribute
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@ -16,166 +16,166 @@ module REXML
  # Document has a single child that can be accessed by root().
  # Note that if you want to have an XML declaration written for a document
  # you create, you must add one; REXML documents do not write a default
-  # declaration for you.  See |DECLARATION| and |write|.
-  class Document < Element
-    # A convenient default XML declaration.  If you want an XML declaration,
-    # the easiest way to add one is mydoc << Document::DECLARATION
+	# declaration for you.  See |DECLARATION| and |write|.
+	class Document < Element
+		# A convenient default XML declaration.  If you want an XML declaration,
+		# the easiest way to add one is mydoc << Document::DECLARATION
    # +DEPRECATED+
    # Use: mydoc << XMLDecl.default
-    DECLARATION = XMLDecl.default
+		DECLARATION = XMLDecl.default

-    # Constructor
-    # @param source if supplied, must be a Document, String, or IO. 
-    # Documents have their context and Element attributes cloned.
-    # Strings are expected to be valid XML documents.  IOs are expected
-    # to be sources of valid XML documents.
-    # @param context if supplied, contains the context of the document;
-    # this should be a Hash.
-    # NOTE that I'm not sure what the context is for; I cloned it out of
-    # the Electric XML API (in which it also seems to do nothing), and it
-    # is now legacy.  It may do something, someday... it may disappear.
-    def initialize( source = nil, context = {} )
-      super()
-      @context = context
-      return if source.nil?
-      if source.kind_of? Document
-        @context = source.context
-        super source
-      else
-        build(  source )
-      end
-    end
+		# Constructor
+		# @param source if supplied, must be a Document, String, or IO. 
+		# Documents have their context and Element attributes cloned.
+	  # Strings are expected to be valid XML documents.  IOs are expected
+	  # to be sources of valid XML documents.
+	  # @param context if supplied, contains the context of the document;
+	  # this should be a Hash.
+	  # NOTE that I'm not sure what the context is for; I cloned it out of
+	  # the Electric XML API (in which it also seems to do nothing), and it
+	  # is now legacy.  It may do something, someday... it may disappear.
+		def initialize( source = nil, context = {} )
+			super()
+			@context = context
+			return if source.nil?
+			if source.kind_of? Document
+				@context = source.context
+				super source
+			else
+				build(  source )
+			end
+		end

    def node_type
      :document
    end

-    # Should be obvious
-    def clone
-      Document.new self
-    end
+		# Should be obvious
+		def clone
+			Document.new self
+		end

-    # According to the XML spec, a root node has no expanded name
-    def expanded_name
-      ''
-      #d = doc_type
-      #d ? d.name : "UNDEFINED"
-    end
+		# According to the XML spec, a root node has no expanded name
+		def expanded_name
+			''
+			#d = doc_type
+			#d ? d.name : "UNDEFINED"
+		end

-    alias :name :expanded_name
+		alias :name :expanded_name

-    # We override this, because XMLDecls and DocTypes must go at the start
-    # of the document
-    def add( child )
-      if child.kind_of? XMLDecl
-        @children.unshift child
-      elsif child.kind_of? DocType
-        if @children[0].kind_of? XMLDecl
-          @children[1,0] = child
-        else
-          @children.unshift child
-        end
-        child.parent = self
-      else
-        rv = super
-        raise "attempted adding second root element to document" if @elements.size > 1
-        rv
-      end
-    end
-    alias :<< :add
+		# We override this, because XMLDecls and DocTypes must go at the start
+		# of the document
+		def add( child )
+			if child.kind_of? XMLDecl
+				@children.unshift child
+			elsif child.kind_of? DocType
+				if @children[0].kind_of? XMLDecl
+					@children[1,0] = child
+				else
+					@children.unshift child
+				end
+				child.parent = self
+			else
+				rv = super
+				raise "attempted adding second root element to document" if @elements.size > 1
+				rv
+			end
+		end
+		alias :<< :add

-    def add_element(arg=nil, arg2=nil)
-      rv = super
-      raise "attempted adding second root element to document" if @elements.size > 1
-      rv
-    end
+		def add_element(arg=nil, arg2=nil)
+			rv = super
+			raise "attempted adding second root element to document" if @elements.size > 1
+			rv
+		end

-    # @return the root Element of the document, or nil if this document
-    # has no children.
-    def root
+		# @return the root Element of the document, or nil if this document
+		# has no children.
+		def root
      elements[1]
      #self
      #@children.find { |item| item.kind_of? Element }
-    end
+		end

-    # @return the DocType child of the document, if one exists,
-    # and nil otherwise.
-    def doctype
-      @children.find { |item| item.kind_of? DocType }
-    end
+		# @return the DocType child of the document, if one exists,
+		# and nil otherwise.
+		def doctype
+			@children.find { |item| item.kind_of? DocType }
+		end

-    # @return the XMLDecl of this document; if no XMLDecl has been
-    # set, the default declaration is returned.
-    def xml_decl
-      rv = @children[0]
+		# @return the XMLDecl of this document; if no XMLDecl has been
+		# set, the default declaration is returned.
+		def xml_decl
+			rv = @children[0]
      return rv if rv.kind_of? XMLDecl
      rv = @children.unshift(XMLDecl.default)[0]
-    end
+		end

-    # @return the XMLDecl version of this document as a String.
-    # If no XMLDecl has been set, returns the default version.
-    def version
-      xml_decl().version
-    end
+		# @return the XMLDecl version of this document as a String.
+		# If no XMLDecl has been set, returns the default version.
+		def version
+			xml_decl().version
+		end

-    # @return the XMLDecl encoding of this document as a String.
-    # If no XMLDecl has been set, returns the default encoding.
-    def encoding
-      xml_decl().encoding
-    end
+		# @return the XMLDecl encoding of this document as a String.
+		# If no XMLDecl has been set, returns the default encoding.
+		def encoding
+			xml_decl().encoding
+		end

-    # @return the XMLDecl standalone value of this document as a String.
-    # If no XMLDecl has been set, returns the default setting.
-    def stand_alone?
-      xml_decl().stand_alone?
-    end
+		# @return the XMLDecl standalone value of this document as a String.
+		# If no XMLDecl has been set, returns the default setting.
+		def stand_alone?
+			xml_decl().stand_alone?
+		end

-    # Write the XML tree out, optionally with indent.  This writes out the
-    # entire XML document, including XML declarations, doctype declarations,
-    # and processing instructions (if any are given).
-    # A controversial point is whether Document should always write the XML
-    # declaration (<?xml version='1.0'?>) whether or not one is given by the
-    # user (or source document).  REXML does not write one if one was not
-    # specified, because it adds unneccessary bandwidth to applications such
-    # as XML-RPC.
-    #
-    #
-    # output::
-    #    output an object which supports '<< string'; this is where the
-    #   document will be written.
-    # indent::
-    #   An integer.  If -1, no indenting will be used; otherwise, the
-    #   indentation will be this number of spaces, and children will be
-    #   indented an additional amount.  Defaults to -1
-    # transitive::
-    #   If transitive is true and indent is >= 0, then the output will be
-    #   pretty-printed in such a way that the added whitespace does not affect
-    #   the absolute *value* of the document -- that is, it leaves the value
-    #   and number of Text nodes in the document unchanged.
-    # ie_hack::
-    #   Internet Explorer is the worst piece of crap to have ever been
-    #   written, with the possible exception of Windows itself.  Since IE is
-    #   unable to parse proper XML, we have to provide a hack to generate XML
-    #   that IE's limited abilities can handle.  This hack inserts a space 
-    #   before the /> on empty tags.  Defaults to false
-    def write( output=$stdout, indent_level=-1, transitive=false, ie_hack=false )
-      output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
-      @children.each { |node|
-        indent( output, indent_level ) if node.node_type == :element
-        if node.write( output, indent_level, transitive, ie_hack )
-          output << "\n" unless indent_level<0 or node == @children[-1]
+		# Write the XML tree out, optionally with indent.  This writes out the
+		# entire XML document, including XML declarations, doctype declarations,
+		# and processing instructions (if any are given).
+		# A controversial point is whether Document should always write the XML
+		# declaration (<?xml version='1.0'?>) whether or not one is given by the
+		# user (or source document).  REXML does not write one if one was not
+		# specified, because it adds unneccessary bandwidth to applications such
+		# as XML-RPC.
+		#
+		#
+		# output::
+		#	  output an object which supports '<< string'; this is where the
+		#   document will be written.
+		# indent::
+		#   An integer.  If -1, no indenting will be used; otherwise, the
+		#   indentation will be this number of spaces, and children will be
+		#   indented an additional amount.  Defaults to -1
+		# transitive::
+		#   If transitive is true and indent is >= 0, then the output will be
+		#   pretty-printed in such a way that the added whitespace does not affect
+		#   the absolute *value* of the document -- that is, it leaves the value
+		#   and number of Text nodes in the document unchanged.
+		# ie_hack::
+		#   Internet Explorer is the worst piece of crap to have ever been
+		#   written, with the possible exception of Windows itself.  Since IE is
+		#   unable to parse proper XML, we have to provide a hack to generate XML
+		#   that IE's limited abilities can handle.  This hack inserts a space 
+		#   before the /> on empty tags.  Defaults to false
+		def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
+			output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+			@children.each { |node|
+				indent( output, indent ) if node.node_type == :element
+				if node.write( output, indent, transitive, ie_hack )
+          output << "\n" unless indent<0 or node == @children[-1]
        end
-      }
-    end
+			}
+		end

-    
-    def Document::parse_stream( source, listener )
-      Parsers::StreamParser.new( source, listener ).parse
-    end
+		
+		def Document::parse_stream( source, listener )
+			Parsers::StreamParser.new( source, listener ).parse
+		end

-    private
-    def build( source )
+		private
+		def build( source )
      Parsers::TreeParser.new( source, self ).parse
-    end
-  end
+		end
+	end
 end
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@ -36,8 +36,6 @@ module REXML
 		# 	If an Element, the object will be shallowly cloned; name, 
 		# 	attributes, and namespaces will be copied.  Children will +not+ be
 		# 	copied.
-		# 	If a Source, the source will be scanned and parsed for an Element,
-		# 	and all child elements will be recursively parsed as well.
 		# parent:: 
 		# 	if supplied, must be a Parent, and will be used as
 		# 	the parent of this object.
@ -223,7 +221,7 @@ module REXML
 		#  b.namespace("y")      # -> '2'
 		def namespace(prefix=nil)
 			if prefix.nil?
-				prefix = self.prefix()
+				prefix = prefix()
 			end
 			if prefix == ''
 				prefix = "xmlns"
--- a/lib/rexml/functions.rb
+++ b/lib/rexml/functions.rb
@ -339,6 +339,8 @@ module REXML
    end

    def Functions::sum( nodes )
+      nodes = [nodes] unless nodes.kind_of? Array
+      nodes.inject(0) { |r,n| r += number(string(n)) }
    end
    
    def Functions::floor( number )
--- a/lib/rexml/instruction.rb
+++ b/lib/rexml/instruction.rb
@ -38,8 +38,8 @@ module REXML
 			Instruction.new self
 		end
 		
-		def write writer, indent_level=-1, transitive=false, ie_hack=false
-			indent(writer, indent_level)
+		def write writer, indent=-1, transitive=false, ie_hack=false
+			indent(writer, indent)
 			writer << START.sub(/\\/u, '')
 			writer << @target
 			writer << ' '
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@ -2,103 +2,103 @@ require 'rexml/parseexception'
 require 'rexml/source'

 module REXML
-	module Parsers
-		# = Using the Pull Parser
-		# <em>This API is experimental, and subject to change.</em>
-		#  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
-		#  while parser.has_next?
-		#    res = parser.next
-		#    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
-		#  end
-		# See the PullEvent class for information on the content of the results.
-		# The data is identical to the arguments passed for the various events to
-		# the StreamListener API.
-		#
-		# Notice that:
-		#  parser = PullParser.new( "<a>BAD DOCUMENT" )
-		#  while parser.has_next?
-		#    res = parser.next
-		#    raise res[1] if res.error?
-		#  end
-		#
-		# Nat Price gave me some good ideas for the API.
-		class BaseParser
-			NCNAME_STR= '[\w:][\-\w\d.]*'
-			NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+  module Parsers
+    # = Using the Pull Parser
+    # <em>This API is experimental, and subject to change.</em>
+    #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+    #  end
+    # See the PullEvent class for information on the content of the results.
+    # The data is identical to the arguments passed for the various events to
+    # the StreamListener API.
+    #
+    # Notice that:
+    #  parser = PullParser.new( "<a>BAD DOCUMENT" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    raise res[1] if res.error?
+    #  end
+    #
+    # Nat Price gave me some good ideas for the API.
+    class BaseParser
+      NCNAME_STR= '[\w:][\-\w\d.]*'
+      NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"

-			NAMECHAR = '[\-\w\d\.:]'
-			NAME = "([\\w:]#{NAMECHAR}*)"
-			NMTOKEN = "(?:#{NAMECHAR})+"
-			NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
-			REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
-			REFERENCE_RE = /#{REFERENCE}/
+      NAMECHAR = '[\-\w\d\.:]'
+      NAME = "([\\w:]#{NAMECHAR}*)"
+      NMTOKEN = "(?:#{NAMECHAR})+"
+      NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+      REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+      REFERENCE_RE = /#{REFERENCE}/

-			DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
-			DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
-			ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
-			COMMENT_START = /\A<!--/u
-			COMMENT_PATTERN = /<!--(.*?)-->/um
-			CDATA_START = /\A<!\[CDATA\[/u
-			CDATA_END = /^\s*\]\s*>/um
-			CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
-			XMLDECL_START = /\A<\?xml\s/u;
-			XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
-			INSTRUCTION_START = /\A<\?/u
-			INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
-			TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
-			CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+      DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
+      DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
+      ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+      COMMENT_START = /\A<!--/u
+      COMMENT_PATTERN = /<!--(.*?)-->/um
+      CDATA_START = /\A<!\[CDATA\[/u
+      CDATA_END = /^\s*\]\s*>/um
+      CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
+      XMLDECL_START = /\A<\?xml\s/u;
+      XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
+      INSTRUCTION_START = /\A<\?/u
+      INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+      TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+      CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um

-			VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
-			ENCODING = /\bencoding=["'](.*?)['"]/um
-			STANDALONE = /\bstandalone=["'](.*?)['"]/um
+      VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
+      ENCODING = /\bencoding=["'](.*?)['"]/um
+      STANDALONE = /\bstandalone=["'](.*?)['"]/um

-			ENTITY_START = /^\s*<!ENTITY/
-			IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
-			ELEMENTDECL_START = /^\s*<!ELEMENT/um
-			ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
-			SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
-			ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
-			NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
-			ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
-			ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
-			ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
-			DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
-			ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
-			ATTDEF_RE = /#{ATTDEF}/
-			ATTLISTDECL_START = /^\s*<!ATTLIST/um
-			ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
-			NOTATIONDECL_START = /^\s*<!NOTATION/um
-			PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
-			SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+      ENTITY_START = /^\s*<!ENTITY/
+      IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+      ELEMENTDECL_START = /^\s*<!ELEMENT/um
+      ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
+      SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
+      ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
+      NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
+      ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
+      ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
+      ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
+      DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
+      ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+      ATTDEF_RE = /#{ATTDEF}/
+      ATTLISTDECL_START = /^\s*<!ATTLIST/um
+      ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+      NOTATIONDECL_START = /^\s*<!NOTATION/um
+      PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
+      SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um

-			TEXT_PATTERN = /\A([^<]*)/um
+      TEXT_PATTERN = /\A([^<]*)/um

-			# Entity constants
-			PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
-			SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
-			PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
-			EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
-			NDATADECL = "\\s+NDATA\\s+#{NAME}"
-			PEREFERENCE = "%#{NAME};"
-			ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
-			PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
-			ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
-			PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
-			GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
-			ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+      # Entity constants
+      PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+      SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+      PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+      EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+      NDATADECL = "\\s+NDATA\\s+#{NAME}"
+      PEREFERENCE = "%#{NAME};"
+      ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+      PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+      ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+      PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+      GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+      ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um

-			EREFERENCE = /&(?!#{NAME};)/
+      EREFERENCE = /&(?!#{NAME};)/

-			DEFAULT_ENTITIES = { 
-				'gt' => [/&gt;/, '&gt;', '>', />/], 
-				'lt' => [/&lt;/, '&lt;', '<', /</], 
-				'quot' => [/&quot;/, '&quot;', '"', /"/], 
-				"apos" => [/&apos;/, "&apos;", "'", /'/] 
-			}
+      DEFAULT_ENTITIES = { 
+        'gt' => [/&gt;/, '&gt;', '>', />/], 
+        'lt' => [/&lt;/, '&lt;', '<', /</], 
+        'quot' => [/&quot;/, '&quot;', '"', /"/], 
+        "apos" => [/&apos;/, "&apos;", "'", /'/] 
+      }

-			def initialize( source )
-				self.stream = source
-			end
+      def initialize( source )
+        self.stream = source
+      end

      def add_listener( listener )
        if !defined?(@listeners) or !@listeners
@ -119,315 +119,315 @@ module REXML

      attr_reader :source

-			def stream=( source )
-				if source.kind_of? String
-					@source = Source.new(source)
-				elsif source.kind_of? IO
-					@source = IOSource.new(source)
-				elsif source.kind_of? Source
-					@source = source
-				elsif defined? StringIO and source.kind_of? StringIO
-					@source = IOSource.new(source)
-				else
-					raise "#{source.class} is not a valid input stream.  It must be \n"+
-					"either a String, IO, StringIO or Source."
-				end
-				@closed = nil
-				@document_status = nil
-				@tags = []
-				@stack = []
-				@entities = []
-			end
+      def stream=( source )
+        if source.kind_of? String
+          @source = Source.new(source)
+        elsif source.kind_of? IO
+          @source = IOSource.new(source)
+        elsif source.kind_of? Source
+          @source = source
+        elsif defined? StringIO and source.kind_of? StringIO
+          @source = IOSource.new(source)
+        else
+          raise "#{source.class} is not a valid input stream.  It must be \n"+
+          "either a String, IO, StringIO or Source."
+        end
+        @closed = nil
+        @document_status = nil
+        @tags = []
+        @stack = []
+        @entities = []
+      end

-			# Returns true if there are no more events
-			def empty?
+      # Returns true if there are no more events
+      def empty?
        #puts "@source.empty? = #{@source.empty?}"
        #puts "@stack.empty? = #{@stack.empty?}"
        return (@source.empty? and @stack.empty?)
-			end
+      end

-			# Returns true if there are more events.  Synonymous with !empty?
-			def has_next?
+      # Returns true if there are more events.  Synonymous with !empty?
+      def has_next?
        return !(@source.empty? and @stack.empty?)
-			end
+      end

-			# Push an event back on the head of the stream.  This method
-			# has (theoretically) infinite depth.
-			def unshift token
-				@stack.unshift(token)
-			end
+      # Push an event back on the head of the stream.  This method
+      # has (theoretically) infinite depth.
+      def unshift token
+        @stack.unshift(token)
+      end

-			# Peek at the +depth+ event in the stack.  The first element on the stack
-			# is at depth 0.  If +depth+ is -1, will parse to the end of the input
-			# stream and return the last event, which is always :end_document.
-			# Be aware that this causes the stream to be parsed up to the +depth+ 
-			# event, so you can effectively pre-parse the entire document (pull the 
-			# entire thing into memory) using this method.  
-			def peek depth=0
-				raise %Q[Illegal argument "#{depth}"] if depth < -1
-				temp = []
-				if depth == -1
-					temp.push(pull()) until empty?
-				else
-					while @stack.size+temp.size < depth+1
-						temp.push(pull())
-					end
-				end
-				@stack += temp if temp.size > 0
-				@stack[depth]
-			end
+      # Peek at the +depth+ event in the stack.  The first element on the stack
+      # is at depth 0.  If +depth+ is -1, will parse to the end of the input
+      # stream and return the last event, which is always :end_document.
+      # Be aware that this causes the stream to be parsed up to the +depth+ 
+      # event, so you can effectively pre-parse the entire document (pull the 
+      # entire thing into memory) using this method.  
+      def peek depth=0
+        raise %Q[Illegal argument "#{depth}"] if depth < -1
+        temp = []
+        if depth == -1
+          temp.push(pull()) until empty?
+        else
+          while @stack.size+temp.size < depth+1
+            temp.push(pull())
+          end
+        end
+        @stack += temp if temp.size > 0
+        @stack[depth]
+      end

-			# Returns the next event.  This is a +PullEvent+ object.
-			def pull
-				if @closed
-					x, @closed = @closed, nil
-					return [ :end_element, x ]
-				end
-				return [ :end_document ] if empty?
-				return @stack.shift if @stack.size > 0
-				@source.read if @source.buffer.size<2
-				if @document_status == nil
-					@source.consume( /^\s*/um )
-					word = @source.match( /(<[^>]*)>/um )
-					word = word[1] unless word.nil?
-					case word
-					when COMMENT_START
-						return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
-					when XMLDECL_START
-						results = @source.match( XMLDECL_PATTERN, true )[1]
-						version = VERSION.match( results )
-						version = version[1] unless version.nil?
-						encoding = ENCODING.match(results)
-						encoding = encoding[1] unless encoding.nil?
-						@source.encoding = encoding
-						standalone = STANDALONE.match(results)
-						standalone = standalone[1] unless standalone.nil?
-						return [ :xmldecl, version, encoding, standalone]
-					when INSTRUCTION_START
-						return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
-					when DOCTYPE_START
-						md = @source.match( DOCTYPE_PATTERN, true )
-						identity = md[1]
-						close = md[2]
-						identity =~ IDENTITY
-						name = $1
-						raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
-						pub_sys = $2.nil? ? nil : $2.strip
-						long_name = $3.nil? ? nil : $3.strip
-						uri = $4.nil? ? nil : $4.strip
-						args = [ :start_doctype, name, pub_sys, long_name, uri ]
-						if close == ">"
-							@document_status = :after_doctype
-							@source.read if @source.buffer.size<2
-							md = @source.match(/^\s*/um, true)
-							@stack << [ :end_doctype ]
-						else
-							@document_status = :in_doctype
-						end
-						return args
-					else
-						@document_status = :after_doctype
-						@source.read if @source.buffer.size<2
-						md = @source.match(/\s*/um, true)
-					end
-				end
-				if @document_status == :in_doctype
-					md = @source.match(/\s*(.*?>)/um)
-					case md[1]
-					when SYSTEMENTITY 
-						match = @source.match( SYSTEMENTITY, true )[1]
-						return [ :externalentity, match ]
+      # Returns the next event.  This is a +PullEvent+ object.
+      def pull
+        if @closed
+          x, @closed = @closed, nil
+          return [ :end_element, x ]
+        end
+        return [ :end_document ] if empty?
+        return @stack.shift if @stack.size > 0
+        @source.read if @source.buffer.size<2
+        if @document_status == nil
+          @source.consume( /^\s*/um )
+          word = @source.match( /(<[^>]*)>/um )
+          word = word[1] unless word.nil?
+          case word
+          when COMMENT_START
+            return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+          when XMLDECL_START
+            results = @source.match( XMLDECL_PATTERN, true )[1]
+            version = VERSION.match( results )
+            version = version[1] unless version.nil?
+            encoding = ENCODING.match(results)
+            encoding = encoding[1] unless encoding.nil?
+            @source.encoding = encoding
+            standalone = STANDALONE.match(results)
+            standalone = standalone[1] unless standalone.nil?
+            return [ :xmldecl, version, encoding, standalone]
+          when INSTRUCTION_START
+            return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
+          when DOCTYPE_START
+            md = @source.match( DOCTYPE_PATTERN, true )
+            identity = md[1]
+            close = md[2]
+            identity =~ IDENTITY
+            name = $1
+            raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
+            pub_sys = $2.nil? ? nil : $2.strip
+            long_name = $3.nil? ? nil : $3.strip
+            uri = $4.nil? ? nil : $4.strip
+            args = [ :start_doctype, name, pub_sys, long_name, uri ]
+            if close == ">"
+              @document_status = :after_doctype
+              @source.read if @source.buffer.size<2
+              md = @source.match(/^\s*/um, true)
+              @stack << [ :end_doctype ]
+            else
+              @document_status = :in_doctype
+            end
+            return args
+          else
+            @document_status = :after_doctype
+            @source.read if @source.buffer.size<2
+            md = @source.match(/\s*/um, true)
+          end
+        end
+        if @document_status == :in_doctype
+          md = @source.match(/\s*(.*?>)/um)
+          case md[1]
+          when SYSTEMENTITY 
+            match = @source.match( SYSTEMENTITY, true )[1]
+            return [ :externalentity, match ]

-					when ELEMENTDECL_START
-						return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+          when ELEMENTDECL_START
+            return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]

-					when ENTITY_START
-						match = @source.match( ENTITYDECL, true ).to_a.compact
-						match[0] = :entitydecl
-						ref = false
-						if match[1] == '%'
-							ref = true
-							match.delete_at 1
-						end
-						# Now we have to sort out what kind of entity reference this is
-						if match[2] == 'SYSTEM'
-							# External reference
-							match[3] = match[3][1..-2] # PUBID
-							match.delete_at(4) if match.size > 4 # Chop out NDATA decl
-							# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
-						elsif match[2] == 'PUBLIC'
-							# External reference
-							match[3] = match[3][1..-2] # PUBID
-							match[4] = match[4][1..-2] # HREF
-							# match is [ :entity, name, PUBLIC, pubid, href ]
-						else
-							match[2] = match[2][1..-2]
-							match.pop if match.size == 4
-							# match is [ :entity, name, value ]
-						end
-						match << '%' if ref
-						return match
-					when ATTLISTDECL_START
-						md = @source.match( ATTLISTDECL_PATTERN, true )
-						raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
-						element = md[1]
-						contents = md[0]
+          when ENTITY_START
+            match = @source.match( ENTITYDECL, true ).to_a.compact
+            match[0] = :entitydecl
+            ref = false
+            if match[1] == '%'
+              ref = true
+              match.delete_at 1
+            end
+            # Now we have to sort out what kind of entity reference this is
+            if match[2] == 'SYSTEM'
+              # External reference
+              match[3] = match[3][1..-2] # PUBID
+              match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+              # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+            elsif match[2] == 'PUBLIC'
+              # External reference
+              match[3] = match[3][1..-2] # PUBID
+              match[4] = match[4][1..-2] # HREF
+              # match is [ :entity, name, PUBLIC, pubid, href ]
+            else
+              match[2] = match[2][1..-2]
+              match.pop if match.size == 4
+              # match is [ :entity, name, value ]
+            end
+            match << '%' if ref
+            return match
+          when ATTLISTDECL_START
+            md = @source.match( ATTLISTDECL_PATTERN, true )
+            raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+            element = md[1]
+            contents = md[0]

-						pairs = {}
-						values = md[0].scan( ATTDEF_RE )
-						values.each do |attdef|
-							unless attdef[3] == "#IMPLIED"
-								attdef.compact!
-								val = attdef[3]
-								val = attdef[4] if val == "#FIXED "
-								pairs[attdef[0]] = val
-							end
-						end
-						return [ :attlistdecl, element, pairs, contents ]
-					when NOTATIONDECL_START
-						md = nil
-						if @source.match( PUBLIC )
-							md = @source.match( PUBLIC, true )
-						elsif @source.match( SYSTEM )
-							md = @source.match( SYSTEM, true )
-						else
-							raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
-						end
-						return [ :notationdecl, md[1], md[2], md[3] ]
-					when CDATA_END
-						@document_status = :after_doctype
-						@source.match( CDATA_END, true )
-						return [ :end_doctype ]
-					end
-				end
-				begin
-					if @source.buffer[0] == ?<
-						if @source.buffer[1] == ?/
-							last_tag = @tags.pop
-							#md = @source.match_to_consume( '>', CLOSE_MATCH)
-							md = @source.match( CLOSE_MATCH, true )
-							raise REXML::ParseException.new( "Missing end tag for "+
+            pairs = {}
+            values = md[0].scan( ATTDEF_RE )
+            values.each do |attdef|
+              unless attdef[3] == "#IMPLIED"
+                attdef.compact!
+                val = attdef[3]
+                val = attdef[4] if val == "#FIXED "
+                pairs[attdef[0]] = val
+              end
+            end
+            return [ :attlistdecl, element, pairs, contents ]
+          when NOTATIONDECL_START
+            md = nil
+            if @source.match( PUBLIC )
+              md = @source.match( PUBLIC, true )
+            elsif @source.match( SYSTEM )
+              md = @source.match( SYSTEM, true )
+            else
+              raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+            end
+            return [ :notationdecl, md[1], md[2], md[3] ]
+          when CDATA_END
+            @document_status = :after_doctype
+            @source.match( CDATA_END, true )
+            return [ :end_doctype ]
+          end
+        end
+        begin
+          if @source.buffer[0] == ?<
+            if @source.buffer[1] == ?/
+              last_tag = @tags.pop
+              #md = @source.match_to_consume( '>', CLOSE_MATCH)
+              md = @source.match( CLOSE_MATCH, true )
+              raise REXML::ParseException.new( "Missing end tag for "+
                "'#{last_tag}' (got \"#{md[1]}\")", 
                @source) unless last_tag == md[1]
-							return [ :end_element, last_tag ]
-						elsif @source.buffer[1] == ?!
-							md = @source.match(/\A(\s*[^>]*>)/um)
-							#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
-							raise REXML::ParseException.new("Malformed node", @source) unless md
-							if md[0][2] == ?-
-								md = @source.match( COMMENT_PATTERN, true )
-								return [ :comment, md[1] ] if md
-							else
-								md = @source.match( CDATA_PATTERN, true )
-								return [ :cdata, md[1] ] if md
-							end
-							raise REXML::ParseException.new( "Declarations can only occur "+
-								"in the doctype declaration.", @source)
-						elsif @source.buffer[1] == ??
-							md = @source.match( INSTRUCTION_PATTERN, true )
-							return [ :processing_instruction, md[1], md[2] ] if md
-							raise REXML::ParseException.new( "Bad instruction declaration",
-								@source)
-						else
-							# Get the next tag
-							md = @source.match(TAG_MATCH, true)
-							raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
-							attrs = []
-							if md[2].size > 0
-								attrs = md[2].scan( ATTRIBUTE_PATTERN )
-								raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
-							end
-				
-							if md[4]
-								@closed = md[1]
-							else
-								@tags.push( md[1] )
-							end
-							attributes = {}
-							attrs.each { |a,b,c| attributes[a] = c }
-							return [ :start_element, md[1], attributes ]
-						end
-					else
-						md = @source.match( TEXT_PATTERN, true )
+              return [ :end_element, last_tag ]
+            elsif @source.buffer[1] == ?!
+              md = @source.match(/\A(\s*[^>]*>)/um)
+              #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+              raise REXML::ParseException.new("Malformed node", @source) unless md
+              if md[0][2] == ?-
+                md = @source.match( COMMENT_PATTERN, true )
+                return [ :comment, md[1] ] if md
+              else
+                md = @source.match( CDATA_PATTERN, true )
+                return [ :cdata, md[1] ] if md
+              end
+              raise REXML::ParseException.new( "Declarations can only occur "+
+                "in the doctype declaration.", @source)
+            elsif @source.buffer[1] == ??
+              md = @source.match( INSTRUCTION_PATTERN, true )
+              return [ :processing_instruction, md[1], md[2] ] if md
+              raise REXML::ParseException.new( "Bad instruction declaration",
+                @source)
+            else
+              # Get the next tag
+              md = @source.match(TAG_MATCH, true)
+              raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
+              attrs = []
+              if md[2].size > 0
+                attrs = md[2].scan( ATTRIBUTE_PATTERN )
+                raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+              end
+        
+              if md[4]
+                @closed = md[1]
+              else
+                @tags.push( md[1] )
+              end
+              attributes = {}
+              attrs.each { |a,b,c| attributes[a] = c }
+              return [ :start_element, md[1], attributes ]
+            end
+          else
+            md = @source.match( TEXT_PATTERN, true )
            if md[0].length == 0
              #puts "EMPTY = #{empty?}"
              #puts "BUFFER = \"#{@source.buffer}\""
              @source.match( /(\s+)/, true )
            end
            #return [ :text, "" ] if md[0].length == 0
-						# unnormalized = Text::unnormalize( md[1], self )
-						# return PullEvent.new( :text, md[1], unnormalized )
-						return [ :text, md[1] ]
-					end
-				rescue REXML::ParseException
-					raise
-				rescue Exception, NameError => error
-					raise REXML::ParseException.new( "Exception parsing",
-						@source, self, (error ? error : $!) )
-				end
-				return [ :dummy ]
-			end
+            # unnormalized = Text::unnormalize( md[1], self )
+            # return PullEvent.new( :text, md[1], unnormalized )
+            return [ :text, md[1] ]
+          end
+        rescue REXML::ParseException
+          raise
+        rescue Exception, NameError => error
+          raise REXML::ParseException.new( "Exception parsing",
+            @source, self, (error ? error : $!) )
+        end
+        return [ :dummy ]
+      end

-			def entity( reference, entities )
-				value = nil
-				value = entities[ reference ] if entities
-				if not value
-					value = DEFAULT_ENTITIES[ reference ]
-					value = value[2] if value
-				end
-				unnormalize( value, entities ) if value
-			end
+      def entity( reference, entities )
+        value = nil
+        value = entities[ reference ] if entities
+        if not value
+          value = DEFAULT_ENTITIES[ reference ]
+          value = value[2] if value
+        end
+        unnormalize( value, entities ) if value
+      end

-			# Escapes all possible entities
-			def normalize( input, entities=nil, entity_filter=nil )
-				copy = input.clone
-				# Doing it like this rather than in a loop improves the speed
-				copy.gsub!( EREFERENCE, '&amp;' )
-				entities.each do |key, value|
-					copy.gsub!( value, "&#{key};" ) unless entity_filter and 
-																			entity_filter.include?(entity)
-				end if entities
-				copy.gsub!( EREFERENCE, '&amp;' )
-				DEFAULT_ENTITIES.each do |key, value|
-					copy.gsub!( value[3], value[1] )
-				end
-				copy
-			end
+      # Escapes all possible entities
+      def normalize( input, entities=nil, entity_filter=nil )
+        copy = input.clone
+        # Doing it like this rather than in a loop improves the speed
+        copy.gsub!( EREFERENCE, '&amp;' )
+        entities.each do |key, value|
+          copy.gsub!( value, "&#{key};" ) unless entity_filter and 
+                                      entity_filter.include?(entity)
+        end if entities
+        copy.gsub!( EREFERENCE, '&amp;' )
+        DEFAULT_ENTITIES.each do |key, value|
+          copy.gsub!( value[3], value[1] )
+        end
+        copy
+      end

-			# Unescapes all possible entities
-			def unnormalize( string, entities=nil, filter=nil )
-				rv = string.clone
-				rv.gsub!( /\r\n?/, "\n" )
-				matches = rv.scan( REFERENCE_RE )
-				return rv if matches.size == 0
-				rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
-					m=$1
-					m = "0#{m}" if m[0] == ?x
-					[Integer(m)].pack('U*')
-				}
-				matches.collect!{|x|x[0]}.compact!
-				if matches.size > 0
-					matches.each do |entity_reference|
-						unless filter and filter.include?(entity_reference)
-							entity_value = entity( entity_reference, entities )
-							if entity_value
-								re = /&#{entity_reference};/
-								rv.gsub!( re, entity_value )
-							end
-						end
-					end
-					matches.each do |entity_reference|
-						unless filter and filter.include?(entity_reference)
-							er = DEFAULT_ENTITIES[entity_reference]
-							rv.gsub!( er[0], er[2] ) if er
-						end
-					end
-					rv.gsub!( /&amp;/, '&' )
-				end
-				rv
-			end
-		end
-	end
+      # Unescapes all possible entities
+      def unnormalize( string, entities=nil, filter=nil )
+        rv = string.clone
+        rv.gsub!( /\r\n?/, "\n" )
+        matches = rv.scan( REFERENCE_RE )
+        return rv if matches.size == 0
+        rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
+          m=$1
+          m = "0#{m}" if m[0] == ?x
+          [Integer(m)].pack('U*')
+        }
+        matches.collect!{|x|x[0]}.compact!
+        if matches.size > 0
+          matches.each do |entity_reference|
+            unless filter and filter.include?(entity_reference)
+              entity_value = entity( entity_reference, entities )
+              if entity_value
+                re = /&#{entity_reference};/
+                rv.gsub!( re, entity_value )
+              end
+            end
+          end
+          matches.each do |entity_reference|
+            unless filter and filter.include?(entity_reference)
+              er = DEFAULT_ENTITIES[entity_reference]
+              rv.gsub!( er[0], er[2] ) if er
+            end
+          end
+          rv.gsub!( /&amp;/, '&' )
+        end
+        rv
+      end
+    end
+  end
 end

 =begin
--- a/lib/rexml/parsers/pullparser.rb
+++ b/lib/rexml/parsers/pullparser.rb
@ -1,96 +1,99 @@
+require 'forwardable'
+
 require 'rexml/parseexception'
 require 'rexml/parsers/baseparser'
 require 'rexml/xmltokens'

 module REXML
-	module Parsers
-		# = Using the Pull Parser
-		# <em>This API is experimental, and subject to change.</em>
-		#  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
-		#  while parser.has_next?
-		#    res = parser.next
-		#    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
-		#  end
-		# See the PullEvent class for information on the content of the results.
-		# The data is identical to the arguments passed for the various events to
-		# the StreamListener API.
-		#
-		# Notice that:
-		#  parser = PullParser.new( "<a>BAD DOCUMENT" )
-		#  while parser.has_next?
-		#    res = parser.next
-		#    raise res[1] if res.error?
-		#  end
-		#
-		# Nat Price gave me some good ideas for the API.
-		class PullParser
-			include XMLTokens
+  module Parsers
+    # = Using the Pull Parser
+    # <em>This API is experimental, and subject to change.</em>
+    #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+    #  end
+    # See the PullEvent class for information on the content of the results.
+    # The data is identical to the arguments passed for the various events to
+    # the StreamListener API.
+    #
+    # Notice that:
+    #  parser = PullParser.new( "<a>BAD DOCUMENT" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    raise res[1] if res.error?
+    #  end
+    #
+    # Nat Price gave me some good ideas for the API.
+    class PullParser
+      include XMLTokens
+      extend Forwardable

-			def initialize stream
-				@entities = {}
+      def_delegators( :@parser, :has_next? )
+      def_delegators( :@parser, :entity )
+      def_delegators( :@parser, :empty? )
+
+      def initialize stream
+        @entities = {}
        @listeners = nil
        @parser = BaseParser.new( stream )
-			end
+        @my_stack = []
+      end

      def add_listener( listener )
        @listeners = [] unless @listeners
        @listeners << listener
      end

-			def each
-				while has_next?
-					yield self.pull
-				end
-			end
-
-			def peek depth=0
-				PullEvent.new(@parser.peek(depth))
-			end
-
-      def has_next?
-        @parser.has_next?
+      def each
+        while has_next?
+          yield self.pull
+        end
      end

-			def pull
-				event = @parser.pull
-				case event[0]
-				when :entitydecl
-					@entities[ event[1] ] = 
-						event[2] unless event[2] =~ /PUBLIC|SYSTEM/
-				when :text
-					unnormalized = @parser.unnormalize( event[1], @entities )
-					event << unnormalized
-				end
-				PullEvent.new( event )
-			end
+      def peek depth=0
+        if @my_stack.length <= depth
+          (depth - @my_stack.length + 1).times {
+            e = PullEvent.new(@parser.pull)
+            @my_stack.push(e)
+          }
+        end
+        @my_stack[depth]
+      end
+
+      def pull
+        return @my_stack.shift if @my_stack.length > 0
+
+        event = @parser.pull
+        case event[0]
+        when :entitydecl
+          @entities[ event[1] ] = 
+            event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+        when :text
+          unnormalized = @parser.unnormalize( event[1], @entities )
+          event << unnormalized
+        end
+        PullEvent.new( event )
+      end

      def unshift token
-        @parser.unshift token
+        @my_stack.unshift token
      end
+    end

-      def entity reference
-        @parser.entity( reference )
+    # A parsing event.  The contents of the event are accessed as an +Array?,
+    # and the type is given either by the ...? methods, or by accessing the
+    # +type+ accessor.  The contents of this object vary from event to event,
+    # but are identical to the arguments passed to +StreamListener+s for each
+    # event.
+    class PullEvent
+      # The type of this event.  Will be one of :tag_start, :tag_end, :text,
+      # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
+      # :notationdecl, :entity, :cdata, :xmldecl, or :error.
+      def initialize(arg)
+        @contents = arg
      end

-      def empty?
-        @parser.empty?
-      end
-
-		end
-
-		# A parsing event.  The contents of the event are accessed as an +Array?,
-		# and the type is given either by the ...? methods, or by accessing the
-		# +type+ accessor.  The contents of this object vary from event to event,
-		# but are identical to the arguments passed to +StreamListener+s for each
-		# event.
-		class PullEvent
-			# The type of this event.  Will be one of :tag_start, :tag_end, :text,
-			# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
-			# :notationdecl, :entity, :cdata, :xmldecl, or :error.
-			def initialize(arg)
-				@contents = arg
-			end
-
      def []( start, endd=nil)
        if start.kind_of? Range
          @contents.slice( start.begin+1 .. start.end )
@ -103,90 +106,90 @@ module REXML
        else
          raise "Illegal argument #{start.inspect} (#{start.class})"
        end
-			end
+      end

-			def event_type
-				@contents[0]
-			end
+      def event_type
+        @contents[0]
+      end

-			# Content: [ String tag_name, Hash attributes ]
-			def start_element?
-				@contents[0] == :start_element
-			end
+      # Content: [ String tag_name, Hash attributes ]
+      def start_element?
+        @contents[0] == :start_element
+      end

-			# Content: [ String tag_name ]
-			def end_element?
-				@contents[0] == :end_element
-			end
+      # Content: [ String tag_name ]
+      def end_element?
+        @contents[0] == :end_element
+      end

-			# Content: [ String raw_text, String unnormalized_text ]
-			def text?
-				@contents[0] == :text
-			end
+      # Content: [ String raw_text, String unnormalized_text ]
+      def text?
+        @contents[0] == :text
+      end

-			# Content: [ String text ]
-			def instruction?
-				@contents[0] == :processing_instruction
-			end
+      # Content: [ String text ]
+      def instruction?
+        @contents[0] == :processing_instruction
+      end

-			# Content: [ String text ]
-			def comment?
-				@contents[0] == :comment
-			end
+      # Content: [ String text ]
+      def comment?
+        @contents[0] == :comment
+      end

-			# Content: [ String name, String pub_sys, String long_name, String uri ]
-			def doctype?
-				@contents[0] == :start_doctype
-			end
+      # Content: [ String name, String pub_sys, String long_name, String uri ]
+      def doctype?
+        @contents[0] == :start_doctype
+      end

-			# Content: [ String text ]
-			def attlistdecl?
-				@contents[0] == :attlistdecl
-			end
+      # Content: [ String text ]
+      def attlistdecl?
+        @contents[0] == :attlistdecl
+      end

-			# Content: [ String text ]
-			def elementdecl?
-				@contents[0] == :elementdecl
-			end
+      # Content: [ String text ]
+      def elementdecl?
+        @contents[0] == :elementdecl
+      end

-			# Due to the wonders of DTDs, an entity declaration can be just about
-			# anything.  There's no way to normalize it; you'll have to interpret the
-			# content yourself.  However, the following is true:
-			#
-			# * If the entity declaration is an internal entity:
-			#   [ String name, String value ]
-			# Content: [ String text ]
-			def entitydecl?
-				@contents[0] == :entitydecl
-			end
+      # Due to the wonders of DTDs, an entity declaration can be just about
+      # anything.  There's no way to normalize it; you'll have to interpret the
+      # content yourself.  However, the following is true:
+      #
+      # * If the entity declaration is an internal entity:
+      #   [ String name, String value ]
+      # Content: [ String text ]
+      def entitydecl?
+        @contents[0] == :entitydecl
+      end

-			# Content: [ String text ]
-			def notationdecl?
-				@contents[0] == :notationdecl
-			end
+      # Content: [ String text ]
+      def notationdecl?
+        @contents[0] == :notationdecl
+      end

-			# Content: [ String text ]
-			def entity?
-				@contents[0] == :entity
-			end
+      # Content: [ String text ]
+      def entity?
+        @contents[0] == :entity
+      end

-			# Content: [ String text ]
-			def cdata?
-				@contents[0] == :cdata
-			end
+      # Content: [ String text ]
+      def cdata?
+        @contents[0] == :cdata
+      end

-			# Content: [ String version, String encoding, String standalone ]
-			def xmldecl?
-				@contents[0] == :xmldecl
-			end
+      # Content: [ String version, String encoding, String standalone ]
+      def xmldecl?
+        @contents[0] == :xmldecl
+      end

-			def error?
-				@contents[0] == :error
-			end
+      def error?
+        @contents[0] == :error
+      end

-			def inspect
+      def inspect
        @contents[0].to_s + ": " + @contents[1..-1].inspect
-			end
-		end
-	end
+      end
+    end
+  end
 end
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@ -1,9 +1,11 @@
 require 'rexml/parsers/baseparser'
 require 'rexml/parseexception'
 require 'rexml/namespace'
+require 'rexml/text'

 module REXML
 	module Parsers
+    # SAX2Parser
 		class SAX2Parser
 			def initialize source
 				@parser = BaseParser.new(source)
@ -36,6 +38,10 @@ module REXML
 			# :start_prefix_mapping, :end_prefix_mapping, :characters,
 			# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
 			# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+      #
+      # There is an additional symbol that can be listened for: :progress.
+      # This will be called for every event generated, passing in the current 
+      # stream position.
 			#
 			# Array contains regular expressions or strings which will be matched
 			# against fully qualified element names.
@ -161,6 +167,7 @@ module REXML
 						:elementdecl, :cdata, :notationdecl, :xmldecl
 						handle( *event )
 					end
+          handle( :progress, @parser.source.position )
 				end
 			end

--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@ -11,7 +11,7 @@
 # Main page:: http://www.germane-software.com/software/rexml
 # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
 # Version:: 3.1.3
-# Date:: +2005/138
+# Date:: 2005/224
 # 
 # This API documentation can be downloaded from the REXML home page, or can
 # be accessed online[http://www.germane-software.com/software/rexml_doc]
@ -20,7 +20,7 @@
 # or can be accessed 
 # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
 module REXML
-	Copyright = "Copyright © 2001-2005 Sean Russell <ser@germane-software.com>"
-	Date = "+2005/138"
+	Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell <ser@germane-software.com>"
+	Date = "2005/224"
 	Version = "3.1.3"
 end
--- a/lib/rexml/sax2listener.rb
+++ b/lib/rexml/sax2listener.rb
@ -90,5 +90,7 @@ module REXML
 		# @p comment The content of the comment
 		def comment comment
 		end
+    def progress position
+    end
 	end	
 end
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@ -98,6 +98,10 @@ module REXML
 			@buffer == ""
 		end

+    def position
+      @orig.index( @buffer )
+    end
+
 		# @return the current line in the source
 		def current_line
 			lines = @orig.split
@ -194,6 +198,10 @@ module REXML
 			super and ( @source.nil? || @source.eof? )
 		end

+    def position
+      @er_source.pos
+    end
+
 		# @return the current line in the source
 		def current_line
      begin
--- a/lib/rexml/validation/validation.rb
+++ b/lib/rexml/validation/validation.rb
@ -82,10 +82,13 @@ module REXML
        @event_arg = event_arg
      end

-      attr_reader :done?
      attr_reader :event_type
      attr_accessor :event_arg

+      def done?
+        @done
+      end
+
      def single?
        return (@event_type != :start_element and @event_type != :start_attribute)
      end
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@ -2,71 +2,71 @@ require 'rexml/encoding'
 require 'rexml/source'

 module REXML
-  # NEEDS DOCUMENTATION
-  class XMLDecl < Child
-    include Encoding
+	# NEEDS DOCUMENTATION
+	class XMLDecl < Child
+		include Encoding

-    DEFAULT_VERSION = "1.0";
-    DEFAULT_ENCODING = "UTF-8";
-    DEFAULT_STANDALONE = "no";
-    START = '<\?xml';
-    STOP = '\?>';
+		DEFAULT_VERSION = "1.0";
+		DEFAULT_ENCODING = "UTF-8";
+		DEFAULT_STANDALONE = "no";
+		START = '<\?xml';
+		STOP = '\?>';

-    attr_accessor :version, :standalone
+		attr_accessor :version, :standalone
    attr_reader :writeencoding

-    def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
+		def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
      @writethis = true
      @writeencoding = !encoding.nil?
-      if version.kind_of? XMLDecl
-        super()
-        @version = version.version
-        self.encoding = version.encoding
+			if version.kind_of? XMLDecl
+				super()
+				@version = version.version
+				self.encoding = version.encoding
        @writeencoding = version.writeencoding
-        @standalone = version.standalone
-      else
-        super()
-        @version = version
-        self.encoding = encoding
-        @standalone = standalone
-      end
-      @version = DEFAULT_VERSION if @version.nil?
-    end
+				@standalone = version.standalone
+			else
+				super()
+				@version = version
+				self.encoding = encoding
+				@standalone = standalone
+			end
+			@version = DEFAULT_VERSION if @version.nil?
+		end

-    def clone
-      XMLDecl.new(self)
-    end
+		def clone
+			XMLDecl.new(self)
+		end

-    def write writer, indent_level=-1, transitive=false, ie_hack=false
+		def write writer, indent=-1, transitive=false, ie_hack=false
      return nil unless @writethis or writer.kind_of? Output
-      indent( writer, indent_level )
-      writer << START.sub(/\\/u, '')
+			indent( writer, indent )
+			writer << START.sub(/\\/u, '')
      if writer.kind_of? Output
        writer << " #{content writer.encoding}"
      else
        writer << " #{content encoding}"
      end
-      writer << STOP.sub(/\\/u, '')
-    end
+			writer << STOP.sub(/\\/u, '')
+		end

-    def ==( other )
-      other.kind_of?(XMLDecl) and
-      other.version == @version and
-      other.encoding == self.encoding and
-      other.standalone == @standalone
-    end
+		def ==( other )
+		  other.kind_of?(XMLDecl) and
+		  other.version == @version and
+		  other.encoding == self.encoding and
+		  other.standalone == @standalone
+		end

-    def xmldecl version, encoding, standalone
-      @version = version
-      self.encoding = encoding
-      @standalone = standalone
-    end
+		def xmldecl version, encoding, standalone
+			@version = version
+			self.encoding = encoding
+			@standalone = standalone
+		end

-    def node_type
-      :xmldecl
-    end
+		def node_type
+			:xmldecl
+		end

-    alias :stand_alone? :standalone
+		alias :stand_alone? :standalone
    alias :old_enc= :encoding=

    def encoding=( enc )
@ -98,12 +98,12 @@ module REXML
      START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
    end

-    private
-    def content(enc)
-      rv = "version='#@version'"
-      rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
-      rv << " standalone='#@standalone'" if @standalone
-      rv
-    end
-  end
+		private
+		def content(enc)
+			rv = "version='#@version'"
+			rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
+			rv << " standalone='#@standalone'" if @standalone
+			rv
+		end
+	end
 end
--- a/lib/rexml/xpath.rb
+++ b/lib/rexml/xpath.rb
@ -2,76 +2,65 @@ require 'rexml/functions'
 require 'rexml/xpath_parser'

 module REXML
-  # Wrapper class.  Use this class to access the XPath functions.
-  class XPath
-    include Functions
-    EMPTY_HASH = {}
+	# Wrapper class.  Use this class to access the XPath functions.
+	class XPath
+		include Functions
+		EMPTY_HASH = {}

-    # Finds and returns the first node that matches the supplied xpath.
-    # element::
-    #   The context element
-    # path::
-    #   The xpath to search for.  If not supplied or nil, returns the first
-    #   node matching '*'.
-    # namespaces::
-    #   If supplied, a Hash which defines a namespace mapping.
-    #
-    #  XPath.first( node )
-    #  XPath.first( doc, "//b"} )
-    #  XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
+		# Finds and returns the first node that matches the supplied xpath.
+		# element::
+		# 	The context element
+		# path::
+		# 	The xpath to search for.  If not supplied or nil, returns the first
+		# 	node matching '*'.
+		# namespaces::
+		# 	If supplied, a Hash which defines a namespace mapping.
+		#
+		#  XPath.first( node )
+		#  XPath.first( doc, "//b"} )
+		#  XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
    def XPath::first element, path=nil, namespaces={}, variables={}
-=begin
      raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
      raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
-      parser = XPathParser.new
-      parser.namespaces = namespaces
-      parser.variables = variables
-      path = "*" unless path
-      parser.first( path, element );
-=end
-#=begin
-      raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
-      raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
-      parser = XPathParser.new
-      parser.namespaces = namespaces
-      parser.variables = variables
-      path = "*" unless path
-      element = [element] unless element.kind_of? Array
-      parser.parse(path, element).flatten[0]
-#=end
-    end
+			parser = XPathParser.new
+			parser.namespaces = namespaces
+			parser.variables = variables
+			path = "*" unless path
+			element = [element] unless element.kind_of? Array
+			parser.parse(path, element).flatten[0]
+		end

-    # Itterates over nodes that match the given path, calling the supplied
-    # block with the match.
-    # element::
-    #   The context element
-    # path::
-    #   The xpath to search for.  If not supplied or nil, defaults to '*'
-    # namespaces::
-    #   If supplied, a Hash which defines a namespace mapping
-    #
-    #  XPath.each( node ) { |el| ... }
-    #  XPath.each( node, '/*[@attr='v']' ) { |el| ... }
-    #  XPath.each( node, 'ancestor::x' ) { |el| ... }
-    def XPath::each element, path=nil, namespaces={}, variables={}, &block
+		# Itterates over nodes that match the given path, calling the supplied
+		# block with the match.
+		# element::
+		#   The context element
+		# path::
+		#   The xpath to search for.  If not supplied or nil, defaults to '*'
+		# namespaces::
+		# 	If supplied, a Hash which defines a namespace mapping
+		#
+		#  XPath.each( node ) { |el| ... }
+		#  XPath.each( node, '/*[@attr='v']' ) { |el| ... }
+		#  XPath.each( node, 'ancestor::x' ) { |el| ... }
+		def XPath::each element, path=nil, namespaces={}, variables={}, &block
      raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
      raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
-      parser = XPathParser.new
-      parser.namespaces = namespaces
-      parser.variables = variables
-      path = "*" unless path
-      element = [element] unless element.kind_of? Array
-      parser.parse(path, element).each( &block )
-    end
+			parser = XPathParser.new
+			parser.namespaces = namespaces
+			parser.variables = variables
+			path = "*" unless path
+			element = [element] unless element.kind_of? Array
+			parser.parse(path, element).each( &block )
+		end

-    # Returns an array of nodes matching a given XPath.  
-    def XPath::match element, path=nil, namespaces={}, variables={}
-      parser = XPathParser.new
-      parser.namespaces = namespaces
-      parser.variables = variables
-      path = "*" unless path
-      element = [element] unless element.kind_of? Array
-      parser.parse(path,element)
-    end
-  end
+		# Returns an array of nodes matching a given XPath.  
+		def XPath::match element, path=nil, namespaces={}, variables={}
+			parser = XPathParser.new
+			parser.namespaces = namespaces
+			parser.variables = variables
+			path = "*" unless path
+			element = [element] unless element.kind_of? Array
+			parser.parse(path,element)
+		end
+	end
 end
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@ -76,6 +76,8 @@ module REXML

    # Performs a depth-first (document order) XPath search, and returns the
    # first match.  This is the fastest, lightest way to return a single result.
+    #
+    # FIXME: This method is incomplete!
    def first( path_stack, node )
      #puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
      return nil if path.size == 0
@ -123,14 +125,6 @@ module REXML
      r = expr( path_stack, nodeset )
      #puts "MAIN EXPR => #{r.inspect}"
      r
-      
-      #while ( path_stack.size > 0 and nodeset.size > 0 ) 
-      #  #puts "MATCH: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
-      #  nodeset = expr( path_stack, nodeset )
-      #  #puts "NODESET: #{nodeset.inspect}"
-      #  #puts "PATH_STACK: #{path_stack.inspect}"
-      #end
-      #nodeset
    end

    private