mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00

http://www.germane-software.com/projects/rexml/ticket/# where '#' is the issue or ticket number. * Fixes ticket:3 (Issue38 in Roundup.) However, this needs further testing. * Fixed a couple of bugs in the SAX2 parser, plus a bunch of other changes I don't remember. * More XPath ordering testing added * Fixed the documentation WRT the raw mode of text nodes (ticket:4) * Fixes roundup issue 43: substring-after bug. See: http://www.germane-software.com/cgi-bin/roundup/rexml/issue43 * Fixed issue44, Element#xpath * Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was hoping that, by now, that whole Source thing would have been changed to use duck typing and avoid this sort of issue... but in the meantime, the patch has been applied. * Fixes ticket:30, XPath default namespace bug. The fix was provided by Lucas Nussbaum. * Aliases #size to #length, as per zdennis's request. * Fixes typo from previous commit * Fixes ticket #32 (and adds a unit test) * Merges a user-contributed patch for issue #40 * Changes Date, Version, and Copyright to upper case, to avoid conflicts with the Date class. * Minor, yet incomplete, documentation changes. * Resolves issue #34 (SAX parser change makes it impossible to parse IO feeds.) * Moves parser.source.position() to parser.position() * Improves the build script (less work for me to package a distribution) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@9776 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
196 lines
5.1 KiB
Ruby
196 lines
5.1 KiB
Ruby
require 'forwardable'
|
|
|
|
require 'rexml/parseexception'
|
|
require 'rexml/parsers/baseparser'
|
|
require 'rexml/xmltokens'
|
|
|
|
module REXML
|
|
module Parsers
|
|
# = Using the Pull Parser
|
|
# <em>This API is experimental, and subject to change.</em>
|
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
|
# while parser.has_next?
|
|
# res = parser.next
|
|
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
|
# end
|
|
# See the PullEvent class for information on the content of the results.
|
|
# The data is identical to the arguments passed for the various events to
|
|
# the StreamListener API.
|
|
#
|
|
# Notice that:
|
|
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
|
# while parser.has_next?
|
|
# res = parser.next
|
|
# raise res[1] if res.error?
|
|
# end
|
|
#
|
|
# Nat Price gave me some good ideas for the API.
|
|
class PullParser
|
|
include XMLTokens
|
|
extend Forwardable
|
|
|
|
def_delegators( :@parser, :has_next? )
|
|
def_delegators( :@parser, :entity )
|
|
def_delegators( :@parser, :empty? )
|
|
def_delegators( :@parser, :source )
|
|
|
|
def initialize stream
|
|
@entities = {}
|
|
@listeners = nil
|
|
@parser = BaseParser.new( stream )
|
|
@my_stack = []
|
|
end
|
|
|
|
def add_listener( listener )
|
|
@listeners = [] unless @listeners
|
|
@listeners << listener
|
|
end
|
|
|
|
def each
|
|
while has_next?
|
|
yield self.pull
|
|
end
|
|
end
|
|
|
|
def peek depth=0
|
|
if @my_stack.length <= depth
|
|
(depth - @my_stack.length + 1).times {
|
|
e = PullEvent.new(@parser.pull)
|
|
@my_stack.push(e)
|
|
}
|
|
end
|
|
@my_stack[depth]
|
|
end
|
|
|
|
def pull
|
|
return @my_stack.shift if @my_stack.length > 0
|
|
|
|
event = @parser.pull
|
|
case event[0]
|
|
when :entitydecl
|
|
@entities[ event[1] ] =
|
|
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
|
|
when :text
|
|
unnormalized = @parser.unnormalize( event[1], @entities )
|
|
event << unnormalized
|
|
end
|
|
PullEvent.new( event )
|
|
end
|
|
|
|
def unshift token
|
|
@my_stack.unshift token
|
|
end
|
|
end
|
|
|
|
# A parsing event. The contents of the event are accessed as an +Array?,
|
|
# and the type is given either by the ...? methods, or by accessing the
|
|
# +type+ accessor. The contents of this object vary from event to event,
|
|
# but are identical to the arguments passed to +StreamListener+s for each
|
|
# event.
|
|
class PullEvent
|
|
# The type of this event. Will be one of :tag_start, :tag_end, :text,
|
|
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
|
|
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
|
|
def initialize(arg)
|
|
@contents = arg
|
|
end
|
|
|
|
def []( start, endd=nil)
|
|
if start.kind_of? Range
|
|
@contents.slice( start.begin+1 .. start.end )
|
|
elsif start.kind_of? Numeric
|
|
if endd.nil?
|
|
@contents.slice( start+1 )
|
|
else
|
|
@contents.slice( start+1, endd )
|
|
end
|
|
else
|
|
raise "Illegal argument #{start.inspect} (#{start.class})"
|
|
end
|
|
end
|
|
|
|
def event_type
|
|
@contents[0]
|
|
end
|
|
|
|
# Content: [ String tag_name, Hash attributes ]
|
|
def start_element?
|
|
@contents[0] == :start_element
|
|
end
|
|
|
|
# Content: [ String tag_name ]
|
|
def end_element?
|
|
@contents[0] == :end_element
|
|
end
|
|
|
|
# Content: [ String raw_text, String unnormalized_text ]
|
|
def text?
|
|
@contents[0] == :text
|
|
end
|
|
|
|
# Content: [ String text ]
|
|
def instruction?
|
|
@contents[0] == :processing_instruction
|
|
end
|
|
|
|
# Content: [ String text ]
|
|
def comment?
|
|
@contents[0] == :comment
|
|
end
|
|
|
|
# Content: [ String name, String pub_sys, String long_name, String uri ]
|
|
def doctype?
|
|
@contents[0] == :start_doctype
|
|
end
|
|
|
|
# Content: [ String text ]
|
|
def attlistdecl?
|
|
@contents[0] == :attlistdecl
|
|
end
|
|
|
|
# Content: [ String text ]
|
|
def elementdecl?
|
|
@contents[0] == :elementdecl
|
|
end
|
|
|
|
# Due to the wonders of DTDs, an entity declaration can be just about
|
|
# anything. There's no way to normalize it; you'll have to interpret the
|
|
# content yourself. However, the following is true:
|
|
#
|
|
# * If the entity declaration is an internal entity:
|
|
# [ String name, String value ]
|
|
# Content: [ String text ]
|
|
def entitydecl?
|
|
@contents[0] == :entitydecl
|
|
end
|
|
|
|
# Content: [ String text ]
|
|
def notationdecl?
|
|
@contents[0] == :notationdecl
|
|
end
|
|
|
|
# Content: [ String text ]
|
|
def entity?
|
|
@contents[0] == :entity
|
|
end
|
|
|
|
# Content: [ String text ]
|
|
def cdata?
|
|
@contents[0] == :cdata
|
|
end
|
|
|
|
# Content: [ String version, String encoding, String standalone ]
|
|
def xmldecl?
|
|
@contents[0] == :xmldecl
|
|
end
|
|
|
|
def error?
|
|
@contents[0] == :error
|
|
end
|
|
|
|
def inspect
|
|
@contents[0].to_s + ": " + @contents[1..-1].inspect
|
|
end
|
|
end
|
|
end
|
|
end
|