mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
7255981a4d
* Workin' in the coal mine, goin' down, down, down... r1003 | ser | 2004-06-08 22:24:08 -0400 (Tue, 08 Jun 2004) | 7 lines * Entirely rewrote the validation code; the finite state machine, while cool, didn't survive the encounter with Interleave. It was getting sort of hacky, too. The new mechanism is less elegant, but is basically still a FSM, and is more flexible without having to add hacks to extend it. Large chunks of the FSM may be reusable in other validation mechanisms. * Added interleave support r1004 | ser | 2004-06-09 07:24:17 -0400 (Wed, 09 Jun 2004) | 2 lines * Added suppert for mixed r1005 | ser | 2004-06-09 08:01:33 -0400 (Wed, 09 Jun 2004) | 3 lines * Added Kou's patch to normalize attribute values passed through the SAX2 and Stream parsers. r1006 | ser | 2004-06-09 08:12:35 -0400 (Wed, 09 Jun 2004) | 2 lines * Applied Kou's preceding-sibling patch, which fixes the order of the axe results r1009 | ser | 2004-06-20 11:02:55 -0400 (Sun, 20 Jun 2004) | 8 lines * Redesigned and rewrote the RelaxNG code. It isn't elegant, but it works. Particular problems encountered were interleave and ref. Interleave means I can't use a clean FSM design, and ref means the dirty FSM design has to be modified during validation. There's a lot of code that could be cleaned up in here. However, I'm pretty sure that this design is reasonably fast and space efficient. I'm not entirely convinced that it is correct; more tests are required. * This version adds support for defines and refs. r1011 | ser | 2004-06-20 11:20:07 -0400 (Sun, 20 Jun 2004) | 3 lines * Removed debugging output from unit test * Moved ">" in Element.inspect r1014 | ser | 2004-06-20 11:40:30 -0400 (Sun, 20 Jun 2004) | 2 lines * Minor big in missing includes for validation rules r1023 | ser | 2004-07-03 08:57:34 -0400 (Sat, 03 Jul 2004) | 2 lines * Fixed bug #34, typo in xpath_parser. r1024 | ser | 2004-07-03 10:22:08 -0400 (Sat, 03 Jul 2004) | 9 lines * Previous fix, (include? -> includes?) was incorrect. * Added another test for encoding * Started AnyName support in RelaxNG * Added Element#Attributes#to_a, so that it does something intelligent. This was needed by XPath, for '@*' * Fixed XPath so that @* works. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6577 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
558 lines
17 KiB
Ruby
558 lines
17 KiB
Ruby
require 'rexml/namespace'
|
|
require 'rexml/xmltokens'
|
|
require 'rexml/parsers/xpathparser'
|
|
|
|
module REXML
|
|
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
|
# for this class. Believe me. You don't want to poke around in here.
|
|
# There is strange, dark magic at work in this code. Beware. Go back! Go
|
|
# back while you still can!
|
|
class XPathParser
|
|
include XMLTokens
|
|
LITERAL = /^'([^']*)'|^"([^"]*)"/u
|
|
|
|
def initialize( )
|
|
@parser = REXML::Parsers::XPathParser.new
|
|
@namespaces = {}
|
|
@variables = {}
|
|
end
|
|
|
|
def namespaces=( namespaces={} )
|
|
Functions::namespace_context = namespaces
|
|
@namespaces = namespaces
|
|
end
|
|
|
|
def variables=( vars={} )
|
|
Functions::variables = vars
|
|
@variables = vars
|
|
end
|
|
|
|
def parse path, nodeset
|
|
path_stack = @parser.parse( path )
|
|
#puts "PARSE: #{path} => #{path_stack.inspect}"
|
|
#puts "PARSE: nodeset = #{nodeset.collect{|x|x.to_s}.inspect}"
|
|
match( path_stack, nodeset )
|
|
end
|
|
|
|
def predicate path, nodeset
|
|
path_stack = @parser.predicate( path )
|
|
return Predicate( path_stack, nodeset )
|
|
end
|
|
|
|
def []=( variable_name, value )
|
|
@variables[ variable_name ] = value
|
|
end
|
|
|
|
def match( path_stack, nodeset )
|
|
while ( path_stack.size > 0 and nodeset.size > 0 )
|
|
#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
|
|
nodeset = internal_parse( path_stack, nodeset )
|
|
#puts "NODESET: #{nodeset}"
|
|
#puts "PATH_STACK: #{path_stack.inspect}"
|
|
end
|
|
nodeset
|
|
end
|
|
|
|
private
|
|
|
|
def internal_parse path_stack, nodeset
|
|
#puts "INTERNAL_PARSE RETURNING WITH NO RESULTS" if nodeset.size == 0 or path_stack.size == 0
|
|
return nodeset if nodeset.size == 0 or path_stack.size == 0
|
|
#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.class}.inspect}"
|
|
case path_stack.shift
|
|
when :document
|
|
return [ nodeset[0].root.parent ]
|
|
|
|
when :qname
|
|
prefix = path_stack.shift
|
|
name = path_stack.shift
|
|
#puts "QNAME #{prefix}#{prefix.size>0?':':''}#{name}"
|
|
n = nodeset.clone
|
|
ns = @namespaces[prefix]
|
|
ns = ns ? ns : ''
|
|
n.delete_if do |node|
|
|
# FIXME: This DOUBLES the time XPath searches take
|
|
ns = node.namespace( prefix ) if node.node_type == :element and ns == ''
|
|
#puts "NODE: '#{node.to_s}'; node.has_name?( #{name.inspect}, #{ns.inspect} ): #{ node.has_name?( name, ns )}; node.namespace() = #{node.namespace().inspect}; node.prefix = #{node.prefix().inspect}" if node.node_type == :element
|
|
!(node.node_type == :element and node.name == name and node.namespace == ns )
|
|
end
|
|
return n
|
|
|
|
when :any
|
|
n = nodeset.clone
|
|
n.delete_if { |node| node.node_type != :element }
|
|
return n
|
|
|
|
when :self
|
|
# THIS SPACE LEFT INTENTIONALLY BLANK
|
|
|
|
when :processing_instruction
|
|
target = path_stack.shift
|
|
n = nodeset.clone
|
|
n.delete_if do |node|
|
|
(node.node_type != :processing_instruction) or
|
|
( !target.nil? and ( node.target != target ) )
|
|
end
|
|
return n
|
|
|
|
when :text
|
|
#puts ":TEXT"
|
|
n = nodeset.clone
|
|
n.delete_if do |node|
|
|
#puts "#{node} :: #{node.node_type}"
|
|
node.node_type != :text
|
|
end
|
|
return n
|
|
|
|
when :comment
|
|
n = nodeset.clone
|
|
n.delete_if do |node|
|
|
node.node_type != :comment
|
|
end
|
|
return n
|
|
|
|
when :node
|
|
return nodeset
|
|
|
|
# FIXME: I suspect the following XPath will fail:
|
|
# /a/*/*[1]
|
|
when :child
|
|
#puts "CHILD"
|
|
new_nodeset = []
|
|
nt = nil
|
|
for node in nodeset
|
|
nt = node.node_type
|
|
new_nodeset += node.children if nt == :element or nt == :document
|
|
end
|
|
#path_stack[0,(path_stack.size-ps_clone.size)] = []
|
|
return new_nodeset
|
|
|
|
when :literal
|
|
literal = path_stack.shift
|
|
if literal =~ /^\d+(\.\d+)?$/
|
|
return ($1 ? literal.to_f : literal.to_i)
|
|
end
|
|
#puts "RETURNING '#{literal}'"
|
|
return literal
|
|
|
|
when :attribute
|
|
new_nodeset = []
|
|
case path_stack.shift
|
|
when :qname
|
|
prefix = path_stack.shift
|
|
name = path_stack.shift
|
|
for element in nodeset
|
|
if element.node_type == :element
|
|
#puts element.name
|
|
#puts "looking for attribute #{name} in '#{@namespaces[prefix]}'"
|
|
attr = element.attribute( name, @namespaces[prefix] )
|
|
#puts ":ATTRIBUTE: attr => #{attr}"
|
|
new_nodeset << attr if attr
|
|
end
|
|
end
|
|
when :any
|
|
#puts "ANY"
|
|
for element in nodeset
|
|
if element.node_type == :element
|
|
new_nodeset += element.attributes.to_a
|
|
end
|
|
end
|
|
end
|
|
#puts "RETURNING #{new_nodeset.collect{|n|n.to_s}.inspect}"
|
|
return new_nodeset
|
|
|
|
when :parent
|
|
return internal_parse( path_stack, nodeset.collect{|n| n.parent}.compact )
|
|
|
|
when :ancestor
|
|
#puts "ANCESTOR"
|
|
new_nodeset = []
|
|
for node in nodeset
|
|
while node.parent
|
|
node = node.parent
|
|
new_nodeset << node unless new_nodeset.include? node
|
|
end
|
|
end
|
|
#nodeset = new_nodeset.uniq
|
|
return new_nodeset
|
|
|
|
when :ancestor_or_self
|
|
new_nodeset = []
|
|
for node in nodeset
|
|
if node.node_type == :element
|
|
new_nodeset << node
|
|
while ( node.parent )
|
|
node = node.parent
|
|
new_nodeset << node unless new_nodeset.include? node
|
|
end
|
|
end
|
|
end
|
|
#nodeset = new_nodeset.uniq
|
|
return new_nodeset
|
|
|
|
when :predicate
|
|
#puts "@"*80
|
|
#puts "NODESET = #{nodeset.collect{|n|n.to_s}.inspect}"
|
|
predicate = path_stack.shift
|
|
new_nodeset = []
|
|
Functions::size = nodeset.size
|
|
nodeset.size.times do |index|
|
|
node = nodeset[index]
|
|
Functions::node = node
|
|
Functions::index = index+1
|
|
#puts "Node #{node} and index=#{index+1}"
|
|
result = Predicate( predicate, node )
|
|
#puts "Predicate returned #{result} (#{result.class}) for #{node.class}"
|
|
if result.kind_of? Numeric
|
|
#puts "#{result} == #{index} => #{result == index}"
|
|
new_nodeset << node if result == (index+1)
|
|
elsif result.instance_of? Array
|
|
new_nodeset << node if result.size > 0
|
|
else
|
|
new_nodeset << node if result
|
|
end
|
|
end
|
|
#puts "Nodeset after predicate #{predicate.inspect} has #{new_nodeset.size} nodes"
|
|
#puts "NODESET: #{new_nodeset.collect{|n|n.to_s}.inspect}"
|
|
return new_nodeset
|
|
|
|
when :descendant_or_self
|
|
rv = descendant_or_self( path_stack, nodeset )
|
|
path_stack.clear
|
|
return rv
|
|
|
|
when :descendant
|
|
#puts ":DESCENDANT"
|
|
results = []
|
|
nt = nil
|
|
for node in nodeset
|
|
nt = node.node_type
|
|
results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
|
|
node.children ) if nt == :element or nt == :document
|
|
end
|
|
return results
|
|
|
|
when :following_sibling
|
|
results = []
|
|
for node in nodeset
|
|
all_siblings = node.parent.children
|
|
current_index = all_siblings.index( node )
|
|
following_siblings = all_siblings[ current_index+1 .. -1 ]
|
|
results += internal_parse( path_stack.clone, following_siblings )
|
|
end
|
|
return results
|
|
|
|
when :preceding_sibling
|
|
results = []
|
|
for node in nodeset
|
|
all_siblings = node.parent.children
|
|
current_index = all_siblings.index( node )
|
|
preceding_siblings = all_siblings[ 0 .. current_index-1 ]
|
|
results += internal_parse( path_stack.clone, preceding_siblings )
|
|
end
|
|
return results
|
|
|
|
when :preceding
|
|
new_nodeset = []
|
|
for node in nodeset
|
|
new_nodeset += preceding( node )
|
|
end
|
|
return new_nodeset
|
|
|
|
when :following
|
|
new_nodeset = []
|
|
for node in nodeset
|
|
new_nodeset += following( node )
|
|
end
|
|
return new_nodeset
|
|
|
|
when :namespace
|
|
new_set = []
|
|
for node in nodeset
|
|
new_nodeset << node.namespace if node.node_type == :element or node.node_type == :attribute
|
|
end
|
|
return new_nodeset
|
|
|
|
when :variable
|
|
var_name = path_stack.shift
|
|
return @variables[ var_name ]
|
|
|
|
end
|
|
nodeset
|
|
end
|
|
|
|
##########################################################
|
|
# FIXME
|
|
# The next two methods are BAD MOJO!
|
|
# This is my achilles heel. If anybody thinks of a better
|
|
# way of doing this, be my guest. This really sucks, but
|
|
# it took me three days to get it to work at all.
|
|
# ########################################################
|
|
|
|
def descendant_or_self( path_stack, nodeset )
|
|
rs = []
|
|
d_o_s( path_stack, nodeset, rs )
|
|
#puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
|
|
document_order(rs.flatten.compact)
|
|
end
|
|
|
|
def d_o_s( p, ns, r )
|
|
nt = nil
|
|
ns.each_index do |i|
|
|
n = ns[i]
|
|
x = match( p.clone, [ n ] )
|
|
nt = n.node_type
|
|
d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
|
|
r.concat(x) if x.size > 0
|
|
end
|
|
end
|
|
|
|
|
|
# Reorders an array of nodes so that they are in document order
|
|
# It tries to do this efficiently.
|
|
def document_order( array_of_nodes )
|
|
new_arry = []
|
|
array_of_nodes.each { |node|
|
|
node_idx = []
|
|
np = node.node_type == :attribute ? node.element : node
|
|
while np.parent and np.parent.node_type == :element
|
|
node_idx << np.parent.index( np )
|
|
np = np.parent
|
|
end
|
|
new_arry << [ node_idx.reverse.join, node ]
|
|
}
|
|
new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
|
|
end
|
|
|
|
|
|
def recurse( nodeset, &block )
|
|
for node in nodeset
|
|
yield node
|
|
recurse( node, &block ) if node.node_type == :element
|
|
end
|
|
end
|
|
|
|
|
|
# Given a predicate, a node, and a context, evaluates to true or false.
|
|
def Predicate( predicate, node )
|
|
predicate = predicate.clone
|
|
#puts "#"*20
|
|
#puts "Predicate( #{predicate.inspect}, #{node.class} )"
|
|
results = []
|
|
case (predicate[0])
|
|
when :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
|
|
eq = predicate.shift
|
|
left = Predicate( predicate.shift, node )
|
|
right = Predicate( predicate.shift, node )
|
|
#puts "LEFT = #{left.inspect}"
|
|
#puts "RIGHT = #{right.inspect}"
|
|
return equality_relational_compare( left, eq, right )
|
|
|
|
when :div, :mod, :mult, :plus, :minus
|
|
op = predicate.shift
|
|
left = Predicate( predicate.shift, node )
|
|
right = Predicate( predicate.shift, node )
|
|
#puts "LEFT = #{left.inspect}"
|
|
#puts "RIGHT = #{right.inspect}"
|
|
left = Functions::number( left )
|
|
right = Functions::number( right )
|
|
#puts "LEFT = #{left.inspect}"
|
|
#puts "RIGHT = #{right.inspect}"
|
|
case op
|
|
when :div
|
|
return left.to_f / right.to_f
|
|
when :mod
|
|
return left % right
|
|
when :mult
|
|
return left * right
|
|
when :plus
|
|
return left + right
|
|
when :minus
|
|
return left - right
|
|
end
|
|
|
|
when :union
|
|
predicate.shift
|
|
left = Predicate( predicate.shift, node )
|
|
right = Predicate( predicate.shift, node )
|
|
return (left | right)
|
|
|
|
when :neg
|
|
predicate.shift
|
|
operand = Functions::number(Predicate( predicate, node ))
|
|
return -operand
|
|
|
|
when :not
|
|
predicate.shift
|
|
return !Predicate( predicate.shift, node )
|
|
|
|
when :function
|
|
predicate.shift
|
|
func_name = predicate.shift.tr('-', '_')
|
|
arguments = predicate.shift
|
|
#puts "\nFUNCTION: #{func_name}"
|
|
#puts "ARGUMENTS: #{arguments.inspect} #{node.to_s}"
|
|
args = arguments.collect { |arg| Predicate( arg, node ) }
|
|
#puts "FUNCTION: #{func_name}( #{args.collect{|n|n.to_s}.inspect} )"
|
|
result = Functions.send( func_name, *args )
|
|
#puts "RESULTS: #{result.inspect}"
|
|
return result
|
|
|
|
else
|
|
return match( predicate, [ node ] )
|
|
|
|
end
|
|
end
|
|
|
|
# Builds a nodeset of all of the following nodes of the supplied node,
|
|
# in document order
|
|
def following( node )
|
|
all_siblings = node.parent.children
|
|
current_index = all_siblings.index( node )
|
|
following_siblings = all_siblings[ current_index+1 .. -1 ]
|
|
following = []
|
|
recurse( following_siblings ) { |node| following << node }
|
|
following.shift
|
|
#puts "following is returning #{puta following}"
|
|
following
|
|
end
|
|
|
|
# Builds a nodeset of all of the preceding nodes of the supplied node,
|
|
# in reverse document order
|
|
def preceding( node )
|
|
all_siblings = node.parent.children
|
|
current_index = all_siblings.index( node )
|
|
preceding_siblings = all_siblings[ 0 .. current_index-1 ]
|
|
|
|
preceding = []
|
|
recurse( preceding_siblings ) { |node| preceding.unshift( node ) }
|
|
preceding
|
|
end
|
|
|
|
def equality_relational_compare( set1, op, set2 )
|
|
#puts "#"*80
|
|
if set1.kind_of? Array and set2.kind_of? Array
|
|
#puts "#{set1.size} & #{set2.size}"
|
|
if set1.size == 1 and set2.size == 1
|
|
set1 = set1[0]
|
|
set2 = set2[0]
|
|
elsif set1.size == 0 or set2.size == 0
|
|
nd = set1.size==0 ? set2 : set1
|
|
nd.each { |il| return true if compare( il, op, nil ) }
|
|
else
|
|
set1.each do |i1|
|
|
i1 = i1.to_s
|
|
set2.each do |i2|
|
|
i2 = i2.to_s
|
|
return true if compare( i1, op, i2 )
|
|
end
|
|
end
|
|
return false
|
|
end
|
|
end
|
|
#puts "EQ_REL_COMP: #{set1.class.name} #{set1.inspect}, #{op}, #{set2.class.name} #{set2.inspect}"
|
|
#puts "COMPARING VALUES"
|
|
# If one is nodeset and other is number, compare number to each item
|
|
# in nodeset s.t. number op number(string(item))
|
|
# If one is nodeset and other is string, compare string to each item
|
|
# in nodeset s.t. string op string(item)
|
|
# If one is nodeset and other is boolean, compare boolean to each item
|
|
# in nodeset s.t. boolean op boolean(item)
|
|
if set1.kind_of? Array or set2.kind_of? Array
|
|
#puts "ISA ARRAY"
|
|
if set1.kind_of? Array
|
|
a = set1
|
|
b = set2.to_s
|
|
else
|
|
a = set2
|
|
b = set1.to_s
|
|
end
|
|
|
|
case b
|
|
when 'true', 'false'
|
|
b = Functions::boolean( b )
|
|
for v in a
|
|
v = Functions::boolean(v)
|
|
return true if compare( v, op, b )
|
|
end
|
|
when /^\d+(\.\d+)?$/
|
|
b = Functions::number( b )
|
|
#puts "B = #{b.inspect}"
|
|
for v in a
|
|
#puts "v = #{v.inspect}"
|
|
v = Functions::number(v)
|
|
#puts "v = #{v.inspect}"
|
|
#puts compare(v,op,b)
|
|
return true if compare( v, op, b )
|
|
end
|
|
else
|
|
#puts "Functions::string( #{b}(#{b.class.name}) ) = #{Functions::string(b)}"
|
|
b = Functions::string( b )
|
|
for v in a
|
|
#puts "v = #{v.class.name} #{v.inspect}"
|
|
v = Functions::string(v)
|
|
return true if compare( v, op, b )
|
|
end
|
|
end
|
|
else
|
|
# If neither is nodeset,
|
|
# If op is = or !=
|
|
# If either boolean, convert to boolean
|
|
# If either number, convert to number
|
|
# Else, convert to string
|
|
# Else
|
|
# Convert both to numbers and compare
|
|
s1 = set1.to_s
|
|
s2 = set2.to_s
|
|
#puts "EQ_REL_COMP: #{set1}=>#{s1}, #{set2}=>#{s2}"
|
|
if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
|
|
#puts "Functions::boolean(#{set1})=>#{Functions::boolean(set1)}"
|
|
#puts "Functions::boolean(#{set2})=>#{Functions::boolean(set2)}"
|
|
set1 = Functions::boolean( set1 )
|
|
set2 = Functions::boolean( set2 )
|
|
else
|
|
if op == :eq or op == :neq
|
|
if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
|
|
set1 = Functions::number( s1 )
|
|
set2 = Functions::number( s2 )
|
|
else
|
|
set1 = Functions::string( set1 )
|
|
set2 = Functions::string( set2 )
|
|
end
|
|
else
|
|
set1 = Functions::number( set1 )
|
|
set2 = Functions::number( set2 )
|
|
end
|
|
end
|
|
#puts "EQ_REL_COMP: #{set1} #{op} #{set2}"
|
|
#puts ">>> #{compare( set1, op, set2 )}"
|
|
return compare( set1, op, set2 )
|
|
end
|
|
return false
|
|
end
|
|
|
|
def compare a, op, b
|
|
#puts "COMPARE #{a.to_s}(#{a.class.name}) #{op} #{b.to_s}(#{a.class.name})"
|
|
case op
|
|
when :eq
|
|
a == b
|
|
when :neq
|
|
a != b
|
|
when :lt
|
|
a < b
|
|
when :lteq
|
|
a <= b
|
|
when :gt
|
|
a > b
|
|
when :gteq
|
|
a >= b
|
|
when :and
|
|
a and b
|
|
when :or
|
|
a or b
|
|
else
|
|
false
|
|
end
|
|
end
|
|
end
|
|
end
|