1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/lib/rexml/parsers/sax2parser.rb
ser 61fec2d310 r1278@bean: ser | 2007-06-07 00:53:06 -0400
Fixed a double-encoding bug.  This was a regression, related
  to ticket:48.
  r1292@bean:  ser | 2007-07-25 08:19:36 -0400
   r1279@bean:  ser | 2007-06-09 23:19:02 -0400
   Fixes ticket:89 -- encoding CP-1252 was broken.  ISO-8859-15 had the same
   problem.
   
   Also in this patch is a fix to merge.rb (unused, but it should at least
   contain no errors), and a unit test for ticket:88.
  
  r1293@bean:  ser | 2007-07-25 08:19:37 -0400
   r1281@bean:  ser | 2007-07-24 11:08:48 -0400
   Addresses ticket:85
   
   This is a major rewrite of the XML formatting code.  The XML writers have all
   been extracted out of the classes and put into their own class containers.
   This makes writing parsers easier, and cleaner.
   
   There are three formatters, which correspond to the previous three XML writing
   modes:
   
     REXML::Formatters::Default    
         Prints the XML document exactly as it was parsed
     REXML::Formatters::Pretty     
         Pretty prints the XML document, destroying whitespace in the document
     REXML::Formatters::Transitive 
         Pretty prints the XML document, preserving whitespace
   
   All of the write() functions have been deprecated (some are still used, but
   these will also go away) except the write() function on Document, which is left
   for convenience.  To pretty print an XML document the canonical way:
   
     formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces
     formatter.write( document, output )
   
  r1296@bean:  ser | 2007-07-25 08:19:40 -0400
   r1287@bean:  ser | 2007-07-24 20:12:25 -0400
   Applied patch from Jeff Barczewski.  Note that this changes what the values of
   the name and IDs are from the previous behavior -- the values no longer include
   the quotes.  This is the correct behavior, so I'm leaving it in, but it is not
   backwards compatible.  Also fixes the serializer so that it outputs the doctype
   in a correct format (needed as a result of this change).
  
  r1297@bean:  ser | 2007-07-25 08:38:38 -0400
  Version update
  r1298@bean:  ser | 2007-07-25 08:40:30 -0400
   r1291@bean (orig r12517):  ryan | 2007-06-11 20:38:57 -0400
   Fixed typo in code. Fixes bug #10420
  
 
 r1304@bean:  ser | 2007-07-27 22:34:18 -0400
 Whitespace changes only


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8_6@12852 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-28 02:46:08 +00:00

238 lines
7.5 KiB
Ruby

require 'rexml/parsers/baseparser'
require 'rexml/parseexception'
require 'rexml/namespace'
require 'rexml/text'
module REXML
module Parsers
# SAX2Parser
class SAX2Parser
def initialize source
@parser = BaseParser.new(source)
@listeners = []
@procs = []
@namespace_stack = []
@has_listeners = false
@tag_stack = []
@entities = {}
end
def source
@parser.source
end
def add_listener( listener )
@parser.add_listener( listener )
end
# Listen arguments:
#
# Symbol, Array, Block
# Listen to Symbol events on Array elements
# Symbol, Block
# Listen to Symbol events
# Array, Listener
# Listen to all events on Array elements
# Array, Block
# Listen to :start_element events on Array elements
# Listener
# Listen to All events
#
# Symbol can be one of: :start_element, :end_element,
# :start_prefix_mapping, :end_prefix_mapping, :characters,
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
#
# There is an additional symbol that can be listened for: :progress.
# This will be called for every event generated, passing in the current
# stream position.
#
# Array contains regular expressions or strings which will be matched
# against fully qualified element names.
#
# Listener must implement the methods in SAX2Listener
#
# Block will be passed the same arguments as a SAX2Listener method would
# be, where the method name is the same as the matched Symbol.
# See the SAX2Listener for more information.
def listen( *args, &blok )
if args[0].kind_of? Symbol
if args.size == 2
args[1].each { |match| @procs << [args[0], match, blok] }
else
add( [args[0], nil, blok] )
end
elsif args[0].kind_of? Array
if args.size == 2
args[0].each { |match| add( [nil, match, args[1]] ) }
else
args[0].each { |match| add( [ :start_element, match, blok ] ) }
end
else
add([nil, nil, args[0]])
end
end
def deafen( listener=nil, &blok )
if listener
@listeners.delete_if {|item| item[-1] == listener }
@has_listeners = false if @listeners.size == 0
else
@procs.delete_if {|item| item[-1] == blok }
end
end
def parse
@procs.each { |sym,match,block| block.call if sym == :start_document }
@listeners.each { |sym,match,block|
block.start_document if sym == :start_document or sym.nil?
}
root = context = []
while true
event = @parser.pull
case event[0]
when :end_document
handle( :end_document )
break
when :start_doctype
handle( :doctype, *event[1..-1])
when :end_doctype
context = context[1]
when :start_element
@tag_stack.push(event[1])
# find the observers for namespaces
procs = get_procs( :start_prefix_mapping, event[1] )
listeners = get_listeners( :start_prefix_mapping, event[1] )
if procs or listeners
# break out the namespace declarations
# The attributes live in event[2]
event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
nsdecl.collect! { |n, value| [ n[6..-1], value ] }
@namespace_stack.push({})
nsdecl.each do |n,v|
@namespace_stack[-1][n] = v
# notify observers of namespaces
procs.each { |ob| ob.call( n, v ) } if procs
listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
end
end
event[1] =~ Namespace::NAMESPLIT
prefix = $1
local = $2
uri = get_namespace(prefix)
# find the observers for start_element
procs = get_procs( :start_element, event[1] )
listeners = get_listeners( :start_element, event[1] )
# notify observers
procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
listeners.each { |ob|
ob.start_element( uri, local, event[1], event[2] )
} if listeners
when :end_element
@tag_stack.pop
event[1] =~ Namespace::NAMESPLIT
prefix = $1
local = $2
uri = get_namespace(prefix)
# find the observers for start_element
procs = get_procs( :end_element, event[1] )
listeners = get_listeners( :end_element, event[1] )
# notify observers
procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
listeners.each { |ob|
ob.end_element( uri, local, event[1] )
} if listeners
namespace_mapping = @namespace_stack.pop
# find the observers for namespaces
procs = get_procs( :end_prefix_mapping, event[1] )
listeners = get_listeners( :end_prefix_mapping, event[1] )
if procs or listeners
namespace_mapping.each do |prefix, uri|
# notify observers of namespaces
procs.each { |ob| ob.call( prefix ) } if procs
listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
end
end
when :text
#normalized = @parser.normalize( event[1] )
#handle( :characters, normalized )
copy = event[1].clone
@entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
copy.gsub!( Text::NUMERICENTITY ) {|m|
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
handle( :characters, copy )
when :entitydecl
@entities[ event[1] ] = event[2] if event.size == 3
handle( *event )
when :processing_instruction, :comment, :attlistdecl,
:elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event )
end
handle( :progress, @parser.position )
end
end
private
def handle( symbol, *arguments )
tag = @tag_stack[-1]
procs = get_procs( symbol, tag )
listeners = get_listeners( symbol, tag )
# notify observers
procs.each { |ob| ob.call( *arguments ) } if procs
listeners.each { |l|
l.send( symbol.to_s, *arguments )
} if listeners
end
# The following methods are duplicates, but it is faster than using
# a helper
def get_procs( symbol, name )
return nil if @procs.size == 0
@procs.find_all do |sym, match, block|
#puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
(
(sym.nil? or symbol == sym) and
((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
)
)
end.collect{|x| x[-1]}
end
def get_listeners( symbol, name )
return nil if @listeners.size == 0
@listeners.find_all do |sym, match, block|
(
(sym.nil? or symbol == sym) and
((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
)
)
end.collect{|x| x[-1]}
end
def add( pair )
if pair[-1].respond_to? :call
@procs << pair unless @procs.include? pair
else
@listeners << pair unless @listeners.include? pair
@has_listeners = true
end
end
def get_namespace( prefix )
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
(@namespace_stack.find { |ns| not ns[nil].nil? })
uris[-1][prefix] unless uris.nil? or 0 == uris.size
end
end
end
end