2003-06-09 21:31:01 -04:00
|
|
|
require 'rexml/namespace'
|
|
|
|
require 'rexml/xmltokens'
|
|
|
|
|
|
|
|
module REXML
|
2003-12-08 21:41:33 -05:00
|
|
|
module Parsers
|
|
|
|
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
|
|
|
# for this class. Believe me. You don't want to poke around in here.
|
|
|
|
# There is strange, dark magic at work in this code. Beware. Go back! Go
|
|
|
|
# back while you still can!
|
|
|
|
class XPathParser
|
|
|
|
include XMLTokens
|
|
|
|
LITERAL = /^'([^']*)'|^"([^"]*)"/u
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
def namespaces=( namespaces )
|
|
|
|
Functions::namespace_context = namespaces
|
|
|
|
@namespaces = namespaces
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
def parse path
|
|
|
|
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
|
|
|
|
path.gsub!( /\s+([\]\)])/, '\1' )
|
|
|
|
parsed = []
|
Merged in development from the main REXML repository.
* Fixed bug #34, typo in xpath_parser.
* Previous fix, (include? -> includes?) was incorrect.
* Added another test for encoding
* Started AnyName support in RelaxNG
* Added Element#Attributes#to_a, so that it does something intelligent.
This was needed by XPath, for '@*'
* Fixed XPath so that @* works.
* Added xmlgrep to the bin/ directory. A little tool allowing you to grep
for XPaths in an XML document.
* Fixed a CDATA pretty-printing bug. (#39)
* Fixed a buffering bug in Source.rb that affected the SAX parser
This bug was related to how REXML determines the encoding of a file, and
evinced itself by hanging on input when using the SAX parser.
* The unit test for the previous patch. Forgot to commit it.
* Minor pretty printing fix.
* Applied Curt Sampson's optimization improvements
* Issue #9; 3.1.3: The SAX parser was not denormalizing entity references
in incoming text. All declared internal entities, as well as numeric
entities, should now be denormalized. There was a related bug in that the
SAX parser was actually double-encoding entities; this is also fixed.
* bin/* programs should now be executable. Setting bin apps to executable
* Issue 14; 3.1.3: DTD events are now all being passed by StreamParser
Some of the DTD events were not being passed through by the stream parser.
* #26: Element#add_element(nil) now raises an error Changed XPath searches so
that if a non-Hash is passed, an error is raised Fixed a spurrious undefined
method error in encoding. #29: XPath ordering bug fixed by Mark Williams.
Incidentally, Mark supplied a superlative bug report, including a full unit
test. Then he went ahead and fixed the bug. It doesn't get any better than
this, folks.
* Fixed a broken link. Thanks to Dick Davies for pointing it out. Added
functions courtesy of Michael Neumann <mneumann@xxxx.de>.
Example code to follow.
* Added Michael's sample code. Merged the changes in from branches/xpath_V
* Fixed preceding:: and following:: axis Fixed the ordering bug that Martin
Fowler reported.
* Uncommented some code commented for testing Applied Nobu's changes to the
Encoding infrastructure, which should fix potential threading issues.
* Added more tests, and the missing syncenumerator class. Fixed the
inheritance bug in the pull parser that James Britt found. Indentation
changes, and changed some exceptions to runtime
exceptions.
* Changes by Matz, mostly of indent -> indent_level, to avoid
function/variable naming conflicts
* Tabs -> spaces (whitespace)
Note the addition of syncenumerator.rb. This is a stopgap, until I can work on
the class enough to get it accepted as a replacement for the SyncEnumerator
that comes with the Generator class. My version is orders of magnitude faster
than the Generator SyncEnumerator, but is currently missing a couple of
features of the original. Eventually, I expect this class to migrate to
another part of the source tree.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8483 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-05-18 22:58:11 -04:00
|
|
|
path = OrExpr(path, parsed)
|
2003-12-08 21:41:33 -05:00
|
|
|
parsed
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
def predicate path
|
|
|
|
parsed = []
|
|
|
|
Predicate( "[#{path}]", parsed )
|
|
|
|
parsed
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2004-06-09 22:01:04 -04:00
|
|
|
def abbreviate( path )
|
|
|
|
path = path.kind_of?(String) ? parse( path ) : path
|
2003-12-08 21:41:33 -05:00
|
|
|
string = ""
|
2004-06-09 22:01:04 -04:00
|
|
|
document = false
|
2003-12-08 21:41:33 -05:00
|
|
|
while path.size > 0
|
2004-06-09 22:01:04 -04:00
|
|
|
op = path.shift
|
|
|
|
case op
|
|
|
|
when :node
|
|
|
|
when :attribute
|
|
|
|
string << "/" if string.size > 0
|
|
|
|
string << "@"
|
|
|
|
when :child
|
|
|
|
string << "/" if string.size > 0
|
|
|
|
when :descendant_or_self
|
|
|
|
string << "/"
|
|
|
|
when :self
|
|
|
|
string << "."
|
|
|
|
when :parent
|
|
|
|
string << ".."
|
|
|
|
when :any
|
|
|
|
string << "*"
|
|
|
|
when :text
|
|
|
|
string << "text()"
|
|
|
|
when :following, :following_sibling,
|
|
|
|
:ancestor, :ancestor_or_self, :descendant,
|
|
|
|
:namespace, :preceding, :preceding_sibling
|
|
|
|
string << "/" unless string.size == 0
|
|
|
|
string << op.to_s.tr("_", "-")
|
|
|
|
string << "::"
|
|
|
|
when :qname
|
|
|
|
prefix = path.shift
|
|
|
|
name = path.shift
|
|
|
|
string << prefix+":" if prefix.size > 0
|
|
|
|
string << name
|
|
|
|
when :predicate
|
|
|
|
string << '['
|
|
|
|
string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
|
|
|
|
string << ']'
|
|
|
|
when :document
|
|
|
|
document = true
|
|
|
|
when :function
|
|
|
|
string << path.shift
|
|
|
|
string << "( "
|
|
|
|
string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
|
|
|
|
string << " )"
|
|
|
|
when :literal
|
|
|
|
string << %Q{ "#{path.shift}" }
|
|
|
|
else
|
|
|
|
string << "/" unless string.size == 0
|
|
|
|
string << "UNKNOWN("
|
|
|
|
string << op.inspect
|
|
|
|
string << ")"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
string = "/"+string if document
|
|
|
|
return string
|
|
|
|
end
|
|
|
|
|
|
|
|
def expand( path )
|
|
|
|
path = path.kind_of?(String) ? parse( path ) : path
|
|
|
|
string = ""
|
|
|
|
document = false
|
|
|
|
while path.size > 0
|
|
|
|
op = path.shift
|
|
|
|
case op
|
|
|
|
when :node
|
|
|
|
string << "node()"
|
|
|
|
when :attribute, :child, :following, :following_sibling,
|
|
|
|
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
|
|
|
|
:namespace, :preceding, :preceding_sibling, :self, :parent
|
2003-12-08 21:41:33 -05:00
|
|
|
string << "/" unless string.size == 0
|
2004-06-09 22:01:04 -04:00
|
|
|
string << op.to_s.tr("_", "-")
|
2003-12-08 21:41:33 -05:00
|
|
|
string << "::"
|
|
|
|
when :any
|
|
|
|
string << "*"
|
|
|
|
when :qname
|
|
|
|
prefix = path.shift
|
|
|
|
name = path.shift
|
|
|
|
string << prefix+":" if prefix.size > 0
|
|
|
|
string << name
|
|
|
|
when :predicate
|
|
|
|
string << '['
|
2004-06-09 22:01:04 -04:00
|
|
|
string << predicate_to_string( path.shift ) { |x| expand(x) }
|
|
|
|
string << ']'
|
|
|
|
when :document
|
|
|
|
document = true
|
2003-12-08 21:41:33 -05:00
|
|
|
else
|
|
|
|
string << "/" unless string.size == 0
|
|
|
|
string << "UNKNOWN("
|
2004-06-09 22:01:04 -04:00
|
|
|
string << op.inspect
|
2003-12-08 21:41:33 -05:00
|
|
|
string << ")"
|
|
|
|
end
|
|
|
|
end
|
2004-06-09 22:01:04 -04:00
|
|
|
string = "/"+string if document
|
2003-12-08 21:41:33 -05:00
|
|
|
return string
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2004-06-09 22:01:04 -04:00
|
|
|
def predicate_to_string( path, &block )
|
2003-12-08 21:41:33 -05:00
|
|
|
string = ""
|
|
|
|
case path[0]
|
2004-06-09 22:01:04 -04:00
|
|
|
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
|
2003-12-08 21:41:33 -05:00
|
|
|
op = path.shift
|
2004-06-09 22:01:04 -04:00
|
|
|
case op
|
|
|
|
when :eq
|
|
|
|
op = "="
|
|
|
|
when :lt
|
|
|
|
op = "<"
|
|
|
|
when :gt
|
|
|
|
op = ">"
|
|
|
|
when :lteq
|
|
|
|
op = "<="
|
|
|
|
when :gteq
|
|
|
|
op = ">="
|
|
|
|
when :neq
|
|
|
|
op = "!="
|
|
|
|
when :union
|
|
|
|
op = "|"
|
|
|
|
end
|
|
|
|
left = predicate_to_string( path.shift, &block )
|
|
|
|
right = predicate_to_string( path.shift, &block )
|
2003-12-08 21:41:33 -05:00
|
|
|
string << " "
|
|
|
|
string << left
|
|
|
|
string << " "
|
|
|
|
string << op.to_s
|
|
|
|
string << " "
|
|
|
|
string << right
|
|
|
|
string << " "
|
|
|
|
when :function
|
|
|
|
path.shift
|
|
|
|
name = path.shift
|
|
|
|
string << name
|
|
|
|
string << "( "
|
2004-06-09 22:01:04 -04:00
|
|
|
string << predicate_to_string( path.shift, &block )
|
2003-12-08 21:41:33 -05:00
|
|
|
string << " )"
|
|
|
|
when :literal
|
|
|
|
path.shift
|
|
|
|
string << " "
|
|
|
|
string << path.shift.inspect
|
|
|
|
string << " "
|
|
|
|
else
|
|
|
|
string << " "
|
2004-06-09 22:01:04 -04:00
|
|
|
string << yield( path )
|
2003-12-08 21:41:33 -05:00
|
|
|
string << " "
|
|
|
|
end
|
|
|
|
return string.squeeze(" ")
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
private
|
|
|
|
#LocationPath
|
|
|
|
# | RelativeLocationPath
|
|
|
|
# | '/' RelativeLocationPath?
|
|
|
|
# | '//' RelativeLocationPath
|
|
|
|
def LocationPath path, parsed
|
2003-06-09 21:31:01 -04:00
|
|
|
#puts "LocationPath '#{path}'"
|
|
|
|
path = path.strip
|
|
|
|
if path[0] == ?/
|
|
|
|
parsed << :document
|
2003-12-08 21:41:33 -05:00
|
|
|
if path[1] == ?/
|
|
|
|
parsed << :descendant_or_self
|
|
|
|
parsed << :node
|
|
|
|
path = path[2..-1]
|
|
|
|
else
|
|
|
|
path = path[1..-1]
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
end
|
|
|
|
#puts parsed.inspect
|
2003-12-08 21:41:33 -05:00
|
|
|
return RelativeLocationPath( path, parsed ) if path.size > 0
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#RelativeLocationPath
|
|
|
|
# | Step
|
|
|
|
# | (AXIS_NAME '::' | '@' | '') AxisSpecifier
|
|
|
|
# NodeTest
|
|
|
|
# Predicate
|
|
|
|
# | '.' | '..' AbbreviatedStep
|
|
|
|
# | RelativeLocationPath '/' Step
|
|
|
|
# | RelativeLocationPath '//' Step
|
|
|
|
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
|
|
|
|
def RelativeLocationPath path, parsed
|
|
|
|
#puts "RelativeLocationPath #{path}"
|
|
|
|
while path.size > 0
|
|
|
|
# (axis or @ or <child::>) nodetest predicate >
|
|
|
|
# OR > / Step
|
|
|
|
# (. or ..) >
|
|
|
|
if path[0] == ?.
|
|
|
|
if path[1] == ?.
|
|
|
|
parsed << :parent
|
|
|
|
parsed << :node
|
|
|
|
path = path[2..-1]
|
|
|
|
else
|
|
|
|
parsed << :self
|
|
|
|
parsed << :node
|
|
|
|
path = path[1..-1]
|
|
|
|
end
|
|
|
|
else
|
|
|
|
if path[0] == ?@
|
|
|
|
#puts "ATTRIBUTE"
|
|
|
|
parsed << :attribute
|
|
|
|
path = path[1..-1]
|
|
|
|
# Goto Nodetest
|
|
|
|
elsif path =~ AXIS
|
|
|
|
parsed << $1.tr('-','_').intern
|
|
|
|
path = $'
|
|
|
|
# Goto Nodetest
|
|
|
|
else
|
|
|
|
parsed << :child
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#puts "NODETESTING '#{path}'"
|
|
|
|
n = []
|
|
|
|
path = NodeTest( path, n)
|
|
|
|
#puts "NODETEST RETURNED '#{path}'"
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
if path[0] == ?[
|
|
|
|
path = Predicate( path, n )
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
parsed.concat(n)
|
|
|
|
end
|
|
|
|
|
|
|
|
if path.size > 0
|
|
|
|
if path[0] == ?/
|
|
|
|
if path[1] == ?/
|
|
|
|
parsed << :descendant_or_self
|
|
|
|
parsed << :node
|
|
|
|
path = path[2..-1]
|
|
|
|
else
|
|
|
|
path = path[1..-1]
|
|
|
|
end
|
|
|
|
else
|
|
|
|
return path
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return path
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
# Returns a 1-1 map of the nodeset
|
|
|
|
# The contents of the resulting array are either:
|
|
|
|
# true/false, if a positive match
|
|
|
|
# String, if a name match
|
|
|
|
#NodeTest
|
|
|
|
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
|
|
|
|
# | NODE_TYPE '(' ')' NodeType
|
|
|
|
# | PI '(' LITERAL ')' PI
|
|
|
|
# | '[' expr ']' Predicate
|
|
|
|
NCNAMETEST= /^(#{NCNAME_STR}):\*/u
|
|
|
|
QNAME = Namespace::NAMESPLIT
|
|
|
|
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
|
|
|
|
PI = /^processing-instruction\(/
|
|
|
|
def NodeTest path, parsed
|
2003-06-09 21:31:01 -04:00
|
|
|
#puts "NodeTest with #{path}"
|
2003-12-08 21:41:33 -05:00
|
|
|
res = nil
|
|
|
|
case path
|
|
|
|
when /^\*/
|
|
|
|
path = $'
|
|
|
|
parsed << :any
|
|
|
|
when NODE_TYPE
|
|
|
|
type = $1
|
|
|
|
path = $'
|
|
|
|
parsed << type.tr('-', '_').intern
|
|
|
|
when PI
|
|
|
|
path = $'
|
|
|
|
literal = nil
|
|
|
|
if path !~ /^\s*\)/
|
|
|
|
path =~ LITERAL
|
|
|
|
literal = $1
|
|
|
|
path = $'
|
|
|
|
raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
|
|
|
|
path = path[1..-1]
|
|
|
|
end
|
|
|
|
parsed << :processing_instruction
|
Merged in development from the main REXML repository.
* Fixed bug #34, typo in xpath_parser.
* Previous fix, (include? -> includes?) was incorrect.
* Added another test for encoding
* Started AnyName support in RelaxNG
* Added Element#Attributes#to_a, so that it does something intelligent.
This was needed by XPath, for '@*'
* Fixed XPath so that @* works.
* Added xmlgrep to the bin/ directory. A little tool allowing you to grep
for XPaths in an XML document.
* Fixed a CDATA pretty-printing bug. (#39)
* Fixed a buffering bug in Source.rb that affected the SAX parser
This bug was related to how REXML determines the encoding of a file, and
evinced itself by hanging on input when using the SAX parser.
* The unit test for the previous patch. Forgot to commit it.
* Minor pretty printing fix.
* Applied Curt Sampson's optimization improvements
* Issue #9; 3.1.3: The SAX parser was not denormalizing entity references
in incoming text. All declared internal entities, as well as numeric
entities, should now be denormalized. There was a related bug in that the
SAX parser was actually double-encoding entities; this is also fixed.
* bin/* programs should now be executable. Setting bin apps to executable
* Issue 14; 3.1.3: DTD events are now all being passed by StreamParser
Some of the DTD events were not being passed through by the stream parser.
* #26: Element#add_element(nil) now raises an error Changed XPath searches so
that if a non-Hash is passed, an error is raised Fixed a spurrious undefined
method error in encoding. #29: XPath ordering bug fixed by Mark Williams.
Incidentally, Mark supplied a superlative bug report, including a full unit
test. Then he went ahead and fixed the bug. It doesn't get any better than
this, folks.
* Fixed a broken link. Thanks to Dick Davies for pointing it out. Added
functions courtesy of Michael Neumann <mneumann@xxxx.de>.
Example code to follow.
* Added Michael's sample code. Merged the changes in from branches/xpath_V
* Fixed preceding:: and following:: axis Fixed the ordering bug that Martin
Fowler reported.
* Uncommented some code commented for testing Applied Nobu's changes to the
Encoding infrastructure, which should fix potential threading issues.
* Added more tests, and the missing syncenumerator class. Fixed the
inheritance bug in the pull parser that James Britt found. Indentation
changes, and changed some exceptions to runtime
exceptions.
* Changes by Matz, mostly of indent -> indent_level, to avoid
function/variable naming conflicts
* Tabs -> spaces (whitespace)
Note the addition of syncenumerator.rb. This is a stopgap, until I can work on
the class enough to get it accepted as a replacement for the SyncEnumerator
that comes with the Generator class. My version is orders of magnitude faster
than the Generator SyncEnumerator, but is currently missing a couple of
features of the original. Eventually, I expect this class to migrate to
another part of the source tree.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8483 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-05-18 22:58:11 -04:00
|
|
|
parsed << (literal || '')
|
2003-12-08 21:41:33 -05:00
|
|
|
when NCNAMETEST
|
|
|
|
#puts "NCNAMETEST"
|
|
|
|
prefix = $1
|
|
|
|
path = $'
|
|
|
|
parsed << :namespace
|
|
|
|
parsed << prefix
|
|
|
|
when QNAME
|
|
|
|
#puts "QNAME"
|
|
|
|
prefix = $1
|
|
|
|
name = $2
|
|
|
|
path = $'
|
|
|
|
prefix = "" unless prefix
|
|
|
|
parsed << :qname
|
|
|
|
parsed << prefix
|
|
|
|
parsed << name
|
|
|
|
end
|
|
|
|
return path
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
# Filters the supplied nodeset on the predicate(s)
|
|
|
|
def Predicate path, parsed
|
2003-06-09 21:31:01 -04:00
|
|
|
#puts "PREDICATE with #{path}"
|
2003-12-08 21:41:33 -05:00
|
|
|
return nil unless path[0] == ?[
|
|
|
|
predicates = []
|
|
|
|
while path[0] == ?[
|
|
|
|
path, expr = get_group(path)
|
|
|
|
predicates << expr[1..-2] if expr
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
#puts "PREDICATES = #{predicates.inspect}"
|
2003-12-08 21:41:33 -05:00
|
|
|
predicates.each{ |expr|
|
|
|
|
#puts "ORING #{expr}"
|
|
|
|
preds = []
|
|
|
|
parsed << :predicate
|
|
|
|
parsed << preds
|
|
|
|
OrExpr(expr, preds)
|
|
|
|
}
|
2003-06-09 21:31:01 -04:00
|
|
|
#puts "PREDICATES = #{predicates.inspect}"
|
2003-12-08 21:41:33 -05:00
|
|
|
path
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
# The following return arrays of true/false, a 1-1 mapping of the
|
|
|
|
# supplied nodeset, except for axe(), which returns a filtered
|
|
|
|
# nodeset
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| OrExpr S 'or' S AndExpr
|
|
|
|
#| AndExpr
|
|
|
|
def OrExpr path, parsed
|
|
|
|
#puts "OR >>> #{path}"
|
|
|
|
n = []
|
|
|
|
rest = AndExpr( path, n )
|
|
|
|
#puts "OR <<< #{rest}"
|
|
|
|
if rest != path
|
|
|
|
while rest =~ /^\s*( or )/
|
|
|
|
n = [ :or, n, [] ]
|
|
|
|
rest = AndExpr( $', n[-1] )
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if parsed.size == 0 and n.size != 0
|
|
|
|
parsed.replace(n)
|
|
|
|
elsif n.size > 0
|
|
|
|
parsed << n
|
|
|
|
end
|
|
|
|
rest
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| AndExpr S 'and' S EqualityExpr
|
|
|
|
#| EqualityExpr
|
|
|
|
def AndExpr path, parsed
|
|
|
|
#puts "AND >>> #{path}"
|
|
|
|
n = []
|
|
|
|
rest = EqualityExpr( path, n )
|
|
|
|
#puts "AND <<< #{rest}"
|
|
|
|
if rest != path
|
|
|
|
while rest =~ /^\s*( and )/
|
|
|
|
n = [ :and, n, [] ]
|
|
|
|
#puts "AND >>> #{rest}"
|
|
|
|
rest = EqualityExpr( $', n[-1] )
|
|
|
|
#puts "AND <<< #{rest}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if parsed.size == 0 and n.size != 0
|
|
|
|
parsed.replace(n)
|
|
|
|
elsif n.size > 0
|
|
|
|
parsed << n
|
|
|
|
end
|
|
|
|
rest
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| EqualityExpr ('=' | '!=') RelationalExpr
|
|
|
|
#| RelationalExpr
|
|
|
|
def EqualityExpr path, parsed
|
|
|
|
#puts "EQUALITY >>> #{path}"
|
|
|
|
n = []
|
|
|
|
rest = RelationalExpr( path, n )
|
|
|
|
#puts "EQUALITY <<< #{rest}"
|
|
|
|
if rest != path
|
|
|
|
while rest =~ /^\s*(!?=)\s*/
|
|
|
|
if $1[0] == ?!
|
|
|
|
n = [ :neq, n, [] ]
|
|
|
|
else
|
|
|
|
n = [ :eq, n, [] ]
|
|
|
|
end
|
|
|
|
rest = RelationalExpr( $', n[-1] )
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if parsed.size == 0 and n.size != 0
|
|
|
|
parsed.replace(n)
|
|
|
|
elsif n.size > 0
|
|
|
|
parsed << n
|
|
|
|
end
|
|
|
|
rest
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
|
|
|
|
#| AdditiveExpr
|
|
|
|
def RelationalExpr path, parsed
|
|
|
|
#puts "RELATION >>> #{path}"
|
|
|
|
n = []
|
|
|
|
rest = AdditiveExpr( path, n )
|
|
|
|
#puts "RELATION <<< #{rest}"
|
|
|
|
if rest != path
|
|
|
|
while rest =~ /^\s*([<>]=?)\s*/
|
|
|
|
if $1[0] == ?<
|
|
|
|
sym = "lt"
|
|
|
|
else
|
|
|
|
sym = "gt"
|
|
|
|
end
|
|
|
|
sym << "eq" if $1[-1] == ?=
|
|
|
|
n = [ sym.intern, n, [] ]
|
|
|
|
rest = AdditiveExpr( $', n[-1] )
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if parsed.size == 0 and n.size != 0
|
|
|
|
parsed.replace(n)
|
|
|
|
elsif n.size > 0
|
|
|
|
parsed << n
|
|
|
|
end
|
|
|
|
rest
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| AdditiveExpr ('+' | S '-') MultiplicativeExpr
|
|
|
|
#| MultiplicativeExpr
|
|
|
|
def AdditiveExpr path, parsed
|
|
|
|
#puts "ADDITIVE >>> #{path}"
|
|
|
|
n = []
|
|
|
|
rest = MultiplicativeExpr( path, n )
|
|
|
|
#puts "ADDITIVE <<< #{rest}"
|
|
|
|
if rest != path
|
|
|
|
while rest =~ /^\s*(\+| -)\s*/
|
|
|
|
if $1[0] == ?+
|
|
|
|
n = [ :plus, n, [] ]
|
|
|
|
else
|
|
|
|
n = [ :minus, n, [] ]
|
|
|
|
end
|
|
|
|
rest = MultiplicativeExpr( $', n[-1] )
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if parsed.size == 0 and n.size != 0
|
|
|
|
parsed.replace(n)
|
|
|
|
elsif n.size > 0
|
|
|
|
parsed << n
|
|
|
|
end
|
|
|
|
rest
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
|
|
|
|
#| UnaryExpr
|
|
|
|
def MultiplicativeExpr path, parsed
|
|
|
|
#puts "MULT >>> #{path}"
|
|
|
|
n = []
|
|
|
|
rest = UnaryExpr( path, n )
|
|
|
|
#puts "MULT <<< #{rest}"
|
|
|
|
if rest != path
|
|
|
|
while rest =~ /^\s*(\*| div | mod )\s*/
|
|
|
|
if $1[0] == ?*
|
|
|
|
n = [ :mult, n, [] ]
|
|
|
|
elsif $1.include?( "div" )
|
|
|
|
n = [ :div, n, [] ]
|
|
|
|
else
|
|
|
|
n = [ :mod, n, [] ]
|
|
|
|
end
|
|
|
|
rest = UnaryExpr( $', n[-1] )
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if parsed.size == 0 and n.size != 0
|
|
|
|
parsed.replace(n)
|
|
|
|
elsif n.size > 0
|
|
|
|
parsed << n
|
|
|
|
end
|
|
|
|
rest
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| '-' UnaryExpr
|
|
|
|
#| UnionExpr
|
|
|
|
def UnaryExpr path, parsed
|
|
|
|
path =~ /^(\-*)/
|
|
|
|
path = $'
|
|
|
|
if $1 and (($1.size % 2) != 0)
|
|
|
|
mult = -1
|
|
|
|
else
|
|
|
|
mult = 1
|
|
|
|
end
|
|
|
|
parsed << :neg if mult < 0
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#puts "UNARY >>> #{path}"
|
|
|
|
n = []
|
|
|
|
path = UnionExpr( path, n )
|
|
|
|
#puts "UNARY <<< #{path}"
|
|
|
|
parsed.concat( n )
|
|
|
|
path
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| UnionExpr '|' PathExpr
|
|
|
|
#| PathExpr
|
|
|
|
def UnionExpr path, parsed
|
|
|
|
#puts "UNION >>> #{path}"
|
|
|
|
n = []
|
|
|
|
rest = PathExpr( path, n )
|
|
|
|
#puts "UNION <<< #{rest}"
|
|
|
|
if rest != path
|
|
|
|
while rest =~ /^\s*(\|)\s*/
|
|
|
|
n = [ :union, n, [] ]
|
|
|
|
rest = PathExpr( $', n[-1] )
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if parsed.size == 0 and n.size != 0
|
|
|
|
parsed.replace( n )
|
|
|
|
elsif n.size > 0
|
|
|
|
parsed << n
|
|
|
|
end
|
|
|
|
rest
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| LocationPath
|
|
|
|
#| FilterExpr ('/' | '//') RelativeLocationPath
|
|
|
|
def PathExpr path, parsed
|
2003-06-09 21:31:01 -04:00
|
|
|
path =~ /^\s*/
|
|
|
|
path = $'
|
2003-12-08 21:41:33 -05:00
|
|
|
#puts "PATH >>> #{path}"
|
|
|
|
n = []
|
|
|
|
rest = FilterExpr( path, n )
|
|
|
|
#puts "PATH <<< '#{rest}'"
|
|
|
|
if rest != path
|
|
|
|
if rest and rest[0] == ?/
|
|
|
|
return RelativeLocationPath(rest, n)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
#puts "BEFORE WITH '#{rest}'"
|
Merges upstream changes for REXML v3.1.7
http://www.germane-software.com/repos/rexml/tags/3.1.7
r1278@bean: ser | 2007-06-07 00:53:06 -0400
Fixed a double-encoding bug. This was a regression, related
to ticket:48.
r1292@bean: ser | 2007-07-25 08:19:36 -0400
r1279@bean: ser | 2007-06-09 23:19:02 -0400
Fixes ticket:89 -- encoding CP-1252 was broken. ISO-8859-15 had the same
problem.
Also in this patch is a fix to merge.rb (unused, but it should at least
contain no errors), and a unit test for ticket:88.
r1293@bean: ser | 2007-07-25 08:19:37 -0400
r1281@bean: ser | 2007-07-24 11:08:48 -0400
Addresses ticket:85
This is a major rewrite of the XML formatting code. The XML writers have all
been extracted out of the classes and put into their own class containers.
This makes writing parsers easier, and cleaner.
There are three formatters, which correspond to the previous three XML writing
modes:
REXML::Formatters::Default
Prints the XML document exactly as it was parsed
REXML::Formatters::Pretty
Pretty prints the XML document, destroying whitespace in the document
REXML::Formatters::Transitive
Pretty prints the XML document, preserving whitespace
All of the write() functions have been deprecated (some are still used, but
these will also go away) except the write() function on Document, which is left
for convenience. To pretty print an XML document the canonical way:
formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces
formatter.write( document, output )
r1294@bean: ser | 2007-07-25 08:19:38 -0400
r1283@bean: ser | 2007-07-24 19:53:30 -0400
This goes with the previous commit.
r1295@bean: ser | 2007-07-25 08:19:39 -0400
r1285@bean: ser | 2007-07-24 20:02:07 -0400
And THIS goes with the previous two patches. Dammit.
r1296@bean: ser | 2007-07-25 08:19:40 -0400
r1287@bean: ser | 2007-07-24 20:12:25 -0400
Applied patch from Jeff Barczewski. Note that this changes what the values of
the name and IDs are from the previous behavior -- the values no longer include
the quotes. This is the correct behavior, so I'm leaving it in, but it is not
backwards compatible. Also fixes the serializer so that it outputs the doctype
in a correct format (needed as a result of this change).
r1297@bean: ser | 2007-07-25 08:38:38 -0400
Version update
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-25 08:47:23 -04:00
|
|
|
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
|
2003-12-08 21:41:33 -05:00
|
|
|
parsed.concat(n)
|
|
|
|
return rest
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| FilterExpr Predicate
|
|
|
|
#| PrimaryExpr
|
|
|
|
def FilterExpr path, parsed
|
|
|
|
#puts "FILTER >>> #{path}"
|
|
|
|
n = []
|
|
|
|
path = PrimaryExpr( path, n )
|
|
|
|
#puts "FILTER <<< #{path}"
|
|
|
|
path = Predicate(path, n) if path and path[0] == ?[
|
|
|
|
#puts "FILTER <<< #{path}"
|
|
|
|
parsed.concat(n)
|
|
|
|
path
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| VARIABLE_REFERENCE
|
|
|
|
#| '(' expr ')'
|
|
|
|
#| LITERAL
|
|
|
|
#| NUMBER
|
|
|
|
#| FunctionCall
|
|
|
|
VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
|
|
|
|
NUMBER = /^(\d*\.?\d+)/
|
|
|
|
NT = /^comment|text|processing-instruction|node$/
|
|
|
|
def PrimaryExpr path, parsed
|
|
|
|
arry = []
|
|
|
|
case path
|
|
|
|
when VARIABLE_REFERENCE
|
|
|
|
varname = $1
|
|
|
|
path = $'
|
|
|
|
parsed << :variable
|
|
|
|
parsed << varname
|
|
|
|
#arry << @variables[ varname ]
|
|
|
|
when /^(\w[-\w]*)(?:\()/
|
|
|
|
#puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
|
|
|
|
fname = $1
|
Merged in development from the main REXML repository.
* Fixed bug #34, typo in xpath_parser.
* Previous fix, (include? -> includes?) was incorrect.
* Added another test for encoding
* Started AnyName support in RelaxNG
* Added Element#Attributes#to_a, so that it does something intelligent.
This was needed by XPath, for '@*'
* Fixed XPath so that @* works.
* Added xmlgrep to the bin/ directory. A little tool allowing you to grep
for XPaths in an XML document.
* Fixed a CDATA pretty-printing bug. (#39)
* Fixed a buffering bug in Source.rb that affected the SAX parser
This bug was related to how REXML determines the encoding of a file, and
evinced itself by hanging on input when using the SAX parser.
* The unit test for the previous patch. Forgot to commit it.
* Minor pretty printing fix.
* Applied Curt Sampson's optimization improvements
* Issue #9; 3.1.3: The SAX parser was not denormalizing entity references
in incoming text. All declared internal entities, as well as numeric
entities, should now be denormalized. There was a related bug in that the
SAX parser was actually double-encoding entities; this is also fixed.
* bin/* programs should now be executable. Setting bin apps to executable
* Issue 14; 3.1.3: DTD events are now all being passed by StreamParser
Some of the DTD events were not being passed through by the stream parser.
* #26: Element#add_element(nil) now raises an error Changed XPath searches so
that if a non-Hash is passed, an error is raised Fixed a spurrious undefined
method error in encoding. #29: XPath ordering bug fixed by Mark Williams.
Incidentally, Mark supplied a superlative bug report, including a full unit
test. Then he went ahead and fixed the bug. It doesn't get any better than
this, folks.
* Fixed a broken link. Thanks to Dick Davies for pointing it out. Added
functions courtesy of Michael Neumann <mneumann@xxxx.de>.
Example code to follow.
* Added Michael's sample code. Merged the changes in from branches/xpath_V
* Fixed preceding:: and following:: axis Fixed the ordering bug that Martin
Fowler reported.
* Uncommented some code commented for testing Applied Nobu's changes to the
Encoding infrastructure, which should fix potential threading issues.
* Added more tests, and the missing syncenumerator class. Fixed the
inheritance bug in the pull parser that James Britt found. Indentation
changes, and changed some exceptions to runtime
exceptions.
* Changes by Matz, mostly of indent -> indent_level, to avoid
function/variable naming conflicts
* Tabs -> spaces (whitespace)
Note the addition of syncenumerator.rb. This is a stopgap, until I can work on
the class enough to get it accepted as a replacement for the SyncEnumerator
that comes with the Generator class. My version is orders of magnitude faster
than the Generator SyncEnumerator, but is currently missing a couple of
features of the original. Eventually, I expect this class to migrate to
another part of the source tree.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8483 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-05-18 22:58:11 -04:00
|
|
|
tmp = $'
|
2003-12-08 21:41:33 -05:00
|
|
|
#puts "#{fname} =~ #{NT.inspect}"
|
Merged in development from the main REXML repository.
* Fixed bug #34, typo in xpath_parser.
* Previous fix, (include? -> includes?) was incorrect.
* Added another test for encoding
* Started AnyName support in RelaxNG
* Added Element#Attributes#to_a, so that it does something intelligent.
This was needed by XPath, for '@*'
* Fixed XPath so that @* works.
* Added xmlgrep to the bin/ directory. A little tool allowing you to grep
for XPaths in an XML document.
* Fixed a CDATA pretty-printing bug. (#39)
* Fixed a buffering bug in Source.rb that affected the SAX parser
This bug was related to how REXML determines the encoding of a file, and
evinced itself by hanging on input when using the SAX parser.
* The unit test for the previous patch. Forgot to commit it.
* Minor pretty printing fix.
* Applied Curt Sampson's optimization improvements
* Issue #9; 3.1.3: The SAX parser was not denormalizing entity references
in incoming text. All declared internal entities, as well as numeric
entities, should now be denormalized. There was a related bug in that the
SAX parser was actually double-encoding entities; this is also fixed.
* bin/* programs should now be executable. Setting bin apps to executable
* Issue 14; 3.1.3: DTD events are now all being passed by StreamParser
Some of the DTD events were not being passed through by the stream parser.
* #26: Element#add_element(nil) now raises an error Changed XPath searches so
that if a non-Hash is passed, an error is raised Fixed a spurrious undefined
method error in encoding. #29: XPath ordering bug fixed by Mark Williams.
Incidentally, Mark supplied a superlative bug report, including a full unit
test. Then he went ahead and fixed the bug. It doesn't get any better than
this, folks.
* Fixed a broken link. Thanks to Dick Davies for pointing it out. Added
functions courtesy of Michael Neumann <mneumann@xxxx.de>.
Example code to follow.
* Added Michael's sample code. Merged the changes in from branches/xpath_V
* Fixed preceding:: and following:: axis Fixed the ordering bug that Martin
Fowler reported.
* Uncommented some code commented for testing Applied Nobu's changes to the
Encoding infrastructure, which should fix potential threading issues.
* Added more tests, and the missing syncenumerator class. Fixed the
inheritance bug in the pull parser that James Britt found. Indentation
changes, and changed some exceptions to runtime
exceptions.
* Changes by Matz, mostly of indent -> indent_level, to avoid
function/variable naming conflicts
* Tabs -> spaces (whitespace)
Note the addition of syncenumerator.rb. This is a stopgap, until I can work on
the class enough to get it accepted as a replacement for the SyncEnumerator
that comes with the Generator class. My version is orders of magnitude faster
than the Generator SyncEnumerator, but is currently missing a couple of
features of the original. Eventually, I expect this class to migrate to
another part of the source tree.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8483 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-05-18 22:58:11 -04:00
|
|
|
return path if fname =~ NT
|
|
|
|
path = tmp
|
2003-12-08 21:41:33 -05:00
|
|
|
parsed << :function
|
|
|
|
parsed << fname
|
|
|
|
path = FunctionCall(path, parsed)
|
2006-09-07 22:03:44 -04:00
|
|
|
when NUMBER
|
|
|
|
#puts "LITERAL or NUMBER: #$1"
|
|
|
|
varname = $1.nil? ? $2 : $1
|
|
|
|
path = $'
|
|
|
|
parsed << :literal
|
|
|
|
parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
|
|
|
|
when LITERAL
|
2003-12-08 21:41:33 -05:00
|
|
|
#puts "LITERAL or NUMBER: #$1"
|
|
|
|
varname = $1.nil? ? $2 : $1
|
|
|
|
path = $'
|
|
|
|
parsed << :literal
|
|
|
|
parsed << varname
|
|
|
|
when /^\(/ #/
|
|
|
|
path, contents = get_group(path)
|
|
|
|
contents = contents[1..-2]
|
|
|
|
n = []
|
|
|
|
OrExpr( contents, n )
|
|
|
|
parsed.concat(n)
|
|
|
|
end
|
|
|
|
path
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
|
|
|
|
def FunctionCall rest, parsed
|
|
|
|
path, arguments = parse_args(rest)
|
2003-06-09 21:31:01 -04:00
|
|
|
argset = []
|
|
|
|
for argument in arguments
|
|
|
|
args = []
|
|
|
|
OrExpr( argument, args )
|
|
|
|
argset << args
|
|
|
|
end
|
2003-12-08 21:41:33 -05:00
|
|
|
parsed << argset
|
|
|
|
path
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2003-12-08 21:41:33 -05:00
|
|
|
# get_group( '[foo]bar' ) -> ['bar', '[foo]']
|
|
|
|
def get_group string
|
|
|
|
ind = 0
|
|
|
|
depth = 0
|
|
|
|
st = string[0,1]
|
|
|
|
en = (st == "(" ? ")" : "]")
|
|
|
|
begin
|
|
|
|
case string[ind,1]
|
|
|
|
when st
|
|
|
|
depth += 1
|
|
|
|
when en
|
|
|
|
depth -= 1
|
|
|
|
end
|
|
|
|
ind += 1
|
|
|
|
end while depth > 0 and ind < string.length
|
|
|
|
return nil unless depth==0
|
|
|
|
[string[ind..-1], string[0..ind-1]]
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse_args( string )
|
|
|
|
arguments = []
|
|
|
|
ind = 0
|
2004-06-09 22:01:04 -04:00
|
|
|
inquot = false
|
|
|
|
inapos = false
|
2003-12-08 21:41:33 -05:00
|
|
|
depth = 1
|
|
|
|
begin
|
|
|
|
case string[ind]
|
2004-06-09 22:01:04 -04:00
|
|
|
when ?"
|
|
|
|
inquot = !inquot unless inapos
|
|
|
|
when ?'
|
|
|
|
inapos = !inapos unless inquot
|
|
|
|
else
|
|
|
|
unless inquot or inapos
|
|
|
|
case string[ind]
|
|
|
|
when ?(
|
|
|
|
depth += 1
|
|
|
|
if depth == 1
|
|
|
|
string = string[1..-1]
|
|
|
|
ind -= 1
|
|
|
|
end
|
|
|
|
when ?)
|
|
|
|
depth -= 1
|
|
|
|
if depth == 0
|
|
|
|
s = string[0,ind].strip
|
|
|
|
arguments << s unless s == ""
|
|
|
|
string = string[ind+1..-1]
|
|
|
|
end
|
|
|
|
when ?,
|
|
|
|
if depth == 1
|
|
|
|
s = string[0,ind].strip
|
|
|
|
arguments << s unless s == ""
|
|
|
|
string = string[ind+1..-1]
|
|
|
|
ind = -1
|
|
|
|
end
|
|
|
|
end
|
2003-12-08 21:41:33 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
ind += 1
|
|
|
|
end while depth > 0 and ind < string.length
|
|
|
|
return nil unless depth==0
|
|
|
|
[string,arguments]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
end
|