1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/lib/rexml/parsers/sax2parser.rb
ser fa4bfa6af5 Merged from REXML main repository:
Fixes ticket:68.
  NOTE that this involves an API change!  Entity declarations in the doctype
  now generate events that carry two, not one, arguments.

Implements ticket:15, using gwrite's suggestion.  This allows Element to be
subclassed.

Two unrelated changes, because subversion is retarded and doesn't do
block-level commits:

  1) Fixed a typo bug in previous change for ticket:15
  2) Fixed namespaces handling in XPath and element.  

    ***** Note that this is an API change!!! *****

    Element.namespaces() now returns a hash of namespace mappings which are
    relevant for that node.

Fixes a bug in multiple decodings

The changeset 1230:1231 was bad.  The default behavior is *not* to use the
native REXML encodings by default, but rather to use ICONV by default.  I know
that this will piss some people off, but defaulting to the pure Ruby version
isn't the correct solution, and it breaks other encodings, so I've reverted it.

* Fixes ticket:61 (xpath_parser)
* Fixes ticket:63 (UTF-16; UNILE decoding was bad)
* Cleans up some tests, removing opportunities for test corruption
* Improves parsing error messages a little
* Adds the ability to override the encoding detection in Source construction
* Fixes an edge case in Functions::string, where document nodes weren't 
  correctly converted
* Fixes Functions::string() for Element and Document nodes
* Fixes some problems in entity handling

Addresses ticket:66

Fixes ticket:71

Addresses ticket:78
  NOTE: that this also fixes what is technically another bug in REXML.  REXML's
  XPath parser used to allow exponential notation in numbers.  The XPath spec
  is specific about what a number is, and scientific notation is not included.
  Therefore, this has been fixed.

Cross-ported a fix for ticket:88 from CVS.

Fixes ticket:80

Documentation cleanup.  Ticket:84

Applied Kou's fix for an un-trac'ed bug.

------------------------------------------------------------------------


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@11548 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-01-20 03:56:02 +00:00

236 lines
7.4 KiB
Ruby

require 'rexml/parsers/baseparser'
require 'rexml/parseexception'
require 'rexml/namespace'
require 'rexml/text'
module REXML
module Parsers
# SAX2Parser
class SAX2Parser
def initialize source
@parser = BaseParser.new(source)
@listeners = []
@procs = []
@namespace_stack = []
@has_listeners = false
@tag_stack = []
@entities = {}
end
def source
@parser.source
end
def add_listener( listener )
@parser.add_listener( listener )
end
# Listen arguments:
#
# Symbol, Array, Block
# Listen to Symbol events on Array elements
# Symbol, Block
# Listen to Symbol events
# Array, Listener
# Listen to all events on Array elements
# Array, Block
# Listen to :start_element events on Array elements
# Listener
# Listen to All events
#
# Symbol can be one of: :start_element, :end_element,
# :start_prefix_mapping, :end_prefix_mapping, :characters,
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
#
# There is an additional symbol that can be listened for: :progress.
# This will be called for every event generated, passing in the current
# stream position.
#
# Array contains regular expressions or strings which will be matched
# against fully qualified element names.
#
# Listener must implement the methods in SAX2Listener
#
# Block will be passed the same arguments as a SAX2Listener method would
# be, where the method name is the same as the matched Symbol.
# See the SAX2Listener for more information.
def listen( *args, &blok )
if args[0].kind_of? Symbol
if args.size == 2
args[1].each { |match| @procs << [args[0], match, blok] }
else
add( [args[0], nil, blok] )
end
elsif args[0].kind_of? Array
if args.size == 2
args[0].each { |match| add( [nil, match, args[1]] ) }
else
args[0].each { |match| add( [ :start_element, match, blok ] ) }
end
else
add([nil, nil, args[0]])
end
end
def deafen( listener=nil, &blok )
if listener
@listeners.delete_if {|item| item[-1] == listener }
@has_listeners = false if @listeners.size == 0
else
@procs.delete_if {|item| item[-1] == blok }
end
end
def parse
@procs.each { |sym,match,block| block.call if sym == :start_document }
@listeners.each { |sym,match,block|
block.start_document if sym == :start_document or sym.nil?
}
root = context = []
while true
event = @parser.pull
case event[0]
when :end_document
handle( :end_document )
break
when :end_doctype
context = context[1]
when :start_element
@tag_stack.push(event[1])
# find the observers for namespaces
procs = get_procs( :start_prefix_mapping, event[1] )
listeners = get_listeners( :start_prefix_mapping, event[1] )
if procs or listeners
# break out the namespace declarations
# The attributes live in event[2]
event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
nsdecl.collect! { |n, value| [ n[6..-1], value ] }
@namespace_stack.push({})
nsdecl.each do |n,v|
@namespace_stack[-1][n] = v
# notify observers of namespaces
procs.each { |ob| ob.call( n, v ) } if procs
listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
end
end
event[1] =~ Namespace::NAMESPLIT
prefix = $1
local = $2
uri = get_namespace(prefix)
# find the observers for start_element
procs = get_procs( :start_element, event[1] )
listeners = get_listeners( :start_element, event[1] )
# notify observers
procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
listeners.each { |ob|
ob.start_element( uri, local, event[1], event[2] )
} if listeners
when :end_element
@tag_stack.pop
event[1] =~ Namespace::NAMESPLIT
prefix = $1
local = $2
uri = get_namespace(prefix)
# find the observers for start_element
procs = get_procs( :end_element, event[1] )
listeners = get_listeners( :end_element, event[1] )
# notify observers
procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
listeners.each { |ob|
ob.end_element( uri, local, event[1] )
} if listeners
namespace_mapping = @namespace_stack.pop
# find the observers for namespaces
procs = get_procs( :end_prefix_mapping, event[1] )
listeners = get_listeners( :end_prefix_mapping, event[1] )
if procs or listeners
namespace_mapping.each do |prefix, uri|
# notify observers of namespaces
procs.each { |ob| ob.call( prefix ) } if procs
listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
end
end
when :text
#normalized = @parser.normalize( event[1] )
#handle( :characters, normalized )
copy = event[1].clone
@entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
copy.gsub!( Text::NUMERICENTITY ) {|m|
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
handle( :characters, copy )
when :entitydecl
@entities[ event[1] ] = event[2] if event.size == 3
handle( *event )
when :processing_instruction, :comment, :doctype, :attlistdecl,
:elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event )
end
handle( :progress, @parser.position )
end
end
private
def handle( symbol, *arguments )
tag = @tag_stack[-1]
procs = get_procs( symbol, tag )
listeners = get_listeners( symbol, tag )
# notify observers
procs.each { |ob| ob.call( *arguments ) } if procs
listeners.each { |l|
l.send( symbol.to_s, *arguments )
} if listeners
end
# The following methods are duplicates, but it is faster than using
# a helper
def get_procs( symbol, name )
return nil if @procs.size == 0
@procs.find_all do |sym, match, block|
#puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
(
(sym.nil? or symbol == sym) and
((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
)
)
end.collect{|x| x[-1]}
end
def get_listeners( symbol, name )
return nil if @listeners.size == 0
@listeners.find_all do |sym, match, block|
(
(sym.nil? or symbol == sym) and
((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
)
)
end.collect{|x| x[-1]}
end
def add( pair )
if pair[-1].respond_to? :call
@procs << pair unless @procs.include? pair
else
@listeners << pair unless @listeners.include? pair
@has_listeners = true
end
end
def get_namespace( prefix )
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
(@namespace_stack.find { |ns| not ns[nil].nil? })
uris[-1][prefix] unless uris.nil? or 0 == uris.size
end
end
end
end