1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/lib/rexml/parsers/xpathparser.rb

699 lines
19 KiB
Ruby
Raw Normal View History

require 'rexml/namespace'
require 'rexml/xmltokens'
module REXML
module Parsers
# You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here.
# There is strange, dark magic at work in this code. Beware. Go back! Go
# back while you still can!
class XPathParser
include XMLTokens
LITERAL = /^'([^']*)'|^"([^"]*)"/u
def namespaces=( namespaces )
Functions::namespace_context = namespaces
@namespaces = namespaces
end
def parse path
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
path.gsub!( /\s+([\]\)])/, '\1' )
parsed = []
Merged in development from the main REXML repository. * Fixed bug #34, typo in xpath_parser. * Previous fix, (include? -> includes?) was incorrect. * Added another test for encoding * Started AnyName support in RelaxNG * Added Element#Attributes#to_a, so that it does something intelligent. This was needed by XPath, for '@*' * Fixed XPath so that @* works. * Added xmlgrep to the bin/ directory. A little tool allowing you to grep for XPaths in an XML document. * Fixed a CDATA pretty-printing bug. (#39) * Fixed a buffering bug in Source.rb that affected the SAX parser This bug was related to how REXML determines the encoding of a file, and evinced itself by hanging on input when using the SAX parser. * The unit test for the previous patch. Forgot to commit it. * Minor pretty printing fix. * Applied Curt Sampson's optimization improvements * Issue #9; 3.1.3: The SAX parser was not denormalizing entity references in incoming text. All declared internal entities, as well as numeric entities, should now be denormalized. There was a related bug in that the SAX parser was actually double-encoding entities; this is also fixed. * bin/* programs should now be executable. Setting bin apps to executable * Issue 14; 3.1.3: DTD events are now all being passed by StreamParser Some of the DTD events were not being passed through by the stream parser. * #26: Element#add_element(nil) now raises an error Changed XPath searches so that if a non-Hash is passed, an error is raised Fixed a spurrious undefined method error in encoding. #29: XPath ordering bug fixed by Mark Williams. Incidentally, Mark supplied a superlative bug report, including a full unit test. Then he went ahead and fixed the bug. It doesn't get any better than this, folks. * Fixed a broken link. Thanks to Dick Davies for pointing it out. Added functions courtesy of Michael Neumann <mneumann@xxxx.de>. Example code to follow. * Added Michael's sample code. Merged the changes in from branches/xpath_V * Fixed preceding:: and following:: axis Fixed the ordering bug that Martin Fowler reported. * Uncommented some code commented for testing Applied Nobu's changes to the Encoding infrastructure, which should fix potential threading issues. * Added more tests, and the missing syncenumerator class. Fixed the inheritance bug in the pull parser that James Britt found. Indentation changes, and changed some exceptions to runtime exceptions. * Changes by Matz, mostly of indent -> indent_level, to avoid function/variable naming conflicts * Tabs -> spaces (whitespace) Note the addition of syncenumerator.rb. This is a stopgap, until I can work on the class enough to get it accepted as a replacement for the SyncEnumerator that comes with the Generator class. My version is orders of magnitude faster than the Generator SyncEnumerator, but is currently missing a couple of features of the original. Eventually, I expect this class to migrate to another part of the source tree. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8483 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-05-18 22:58:11 -04:00
path = OrExpr(path, parsed)
parsed
end
def predicate path
parsed = []
Predicate( "[#{path}]", parsed )
parsed
end
def abbreviate( path )
path = path.kind_of?(String) ? parse( path ) : path
string = ""
document = false
while path.size > 0
op = path.shift
case op
when :node
when :attribute
string << "/" if string.size > 0
string << "@"
when :child
string << "/" if string.size > 0
when :descendant_or_self
string << "/"
when :self
string << "."
when :parent
string << ".."
when :any
string << "*"
when :text
string << "text()"
when :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant,
:namespace, :preceding, :preceding_sibling
string << "/" unless string.size == 0
string << op.to_s.tr("_", "-")
string << "::"
when :qname
prefix = path.shift
name = path.shift
string << prefix+":" if prefix.size > 0
string << name
when :predicate
string << '['
string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
string << ']'
when :document
document = true
when :function
string << path.shift
string << "( "
string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
string << " )"
when :literal
string << %Q{ "#{path.shift}" }
else
string << "/" unless string.size == 0
string << "UNKNOWN("
string << op.inspect
string << ")"
end
end
string = "/"+string if document
return string
end
def expand( path )
path = path.kind_of?(String) ? parse( path ) : path
string = ""
document = false
while path.size > 0
op = path.shift
case op
when :node
string << "node()"
when :attribute, :child, :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
:namespace, :preceding, :preceding_sibling, :self, :parent
string << "/" unless string.size == 0
string << op.to_s.tr("_", "-")
string << "::"
when :any
string << "*"
when :qname
prefix = path.shift
name = path.shift
string << prefix+":" if prefix.size > 0
string << name
when :predicate
string << '['
string << predicate_to_string( path.shift ) { |x| expand(x) }
string << ']'
when :document
document = true
else
string << "/" unless string.size == 0
string << "UNKNOWN("
string << op.inspect
string << ")"
end
end
string = "/"+string if document
return string
end
def predicate_to_string( path, &block )
string = ""
case path[0]
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
op = path.shift
case op
when :eq
op = "="
when :lt
op = "<"
when :gt
op = ">"
when :lteq
op = "<="
when :gteq
op = ">="
when :neq
op = "!="
when :union
op = "|"
end
left = predicate_to_string( path.shift, &block )
right = predicate_to_string( path.shift, &block )
string << " "
string << left
string << " "
string << op.to_s
string << " "
string << right
string << " "
when :function
path.shift
name = path.shift
string << name
string << "( "
string << predicate_to_string( path.shift, &block )
string << " )"
when :literal
path.shift
string << " "
string << path.shift.inspect
string << " "
else
string << " "
string << yield( path )
string << " "
end
return string.squeeze(" ")
end
private
#LocationPath
# | RelativeLocationPath
# | '/' RelativeLocationPath?
# | '//' RelativeLocationPath
def LocationPath path, parsed
#puts "LocationPath '#{path}'"
path = path.strip
if path[0] == ?/
parsed << :document
if path[1] == ?/
parsed << :descendant_or_self
parsed << :node
path = path[2..-1]
else
path = path[1..-1]
end
end
#puts parsed.inspect
return RelativeLocationPath( path, parsed ) if path.size > 0
end
#RelativeLocationPath
# | Step
# | (AXIS_NAME '::' | '@' | '') AxisSpecifier
# NodeTest
# Predicate
# | '.' | '..' AbbreviatedStep
# | RelativeLocationPath '/' Step
# | RelativeLocationPath '//' Step
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
def RelativeLocationPath path, parsed
#puts "RelativeLocationPath #{path}"
while path.size > 0
# (axis or @ or <child::>) nodetest predicate >
# OR > / Step
# (. or ..) >
if path[0] == ?.
if path[1] == ?.
parsed << :parent
parsed << :node
path = path[2..-1]
else
parsed << :self
parsed << :node
path = path[1..-1]
end
else
if path[0] == ?@
#puts "ATTRIBUTE"
parsed << :attribute
path = path[1..-1]
# Goto Nodetest
elsif path =~ AXIS
parsed << $1.tr('-','_').intern
path = $'
# Goto Nodetest
else
parsed << :child
end
#puts "NODETESTING '#{path}'"
n = []
path = NodeTest( path, n)
#puts "NODETEST RETURNED '#{path}'"
if path[0] == ?[
path = Predicate( path, n )
end
parsed.concat(n)
end
if path.size > 0
if path[0] == ?/
if path[1] == ?/
parsed << :descendant_or_self
parsed << :node
path = path[2..-1]
else
path = path[1..-1]
end
else
return path
end
end
end
return path
end
# Returns a 1-1 map of the nodeset
# The contents of the resulting array are either:
# true/false, if a positive match
# String, if a name match
#NodeTest
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
# | NODE_TYPE '(' ')' NodeType
# | PI '(' LITERAL ')' PI
# | '[' expr ']' Predicate
NCNAMETEST= /^(#{NCNAME_STR}):\*/u
QNAME = Namespace::NAMESPLIT
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
PI = /^processing-instruction\(/
def NodeTest path, parsed
#puts "NodeTest with #{path}"
res = nil
case path
when /^\*/
path = $'
parsed << :any
when NODE_TYPE
type = $1
path = $'
parsed << type.tr('-', '_').intern
when PI
path = $'
literal = nil
if path !~ /^\s*\)/
path =~ LITERAL
literal = $1
path = $'
raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
path = path[1..-1]
end
parsed << :processing_instruction
Merged in development from the main REXML repository. * Fixed bug #34, typo in xpath_parser. * Previous fix, (include? -> includes?) was incorrect. * Added another test for encoding * Started AnyName support in RelaxNG * Added Element#Attributes#to_a, so that it does something intelligent. This was needed by XPath, for '@*' * Fixed XPath so that @* works. * Added xmlgrep to the bin/ directory. A little tool allowing you to grep for XPaths in an XML document. * Fixed a CDATA pretty-printing bug. (#39) * Fixed a buffering bug in Source.rb that affected the SAX parser This bug was related to how REXML determines the encoding of a file, and evinced itself by hanging on input when using the SAX parser. * The unit test for the previous patch. Forgot to commit it. * Minor pretty printing fix. * Applied Curt Sampson's optimization improvements * Issue #9; 3.1.3: The SAX parser was not denormalizing entity references in incoming text. All declared internal entities, as well as numeric entities, should now be denormalized. There was a related bug in that the SAX parser was actually double-encoding entities; this is also fixed. * bin/* programs should now be executable. Setting bin apps to executable * Issue 14; 3.1.3: DTD events are now all being passed by StreamParser Some of the DTD events were not being passed through by the stream parser. * #26: Element#add_element(nil) now raises an error Changed XPath searches so that if a non-Hash is passed, an error is raised Fixed a spurrious undefined method error in encoding. #29: XPath ordering bug fixed by Mark Williams. Incidentally, Mark supplied a superlative bug report, including a full unit test. Then he went ahead and fixed the bug. It doesn't get any better than this, folks. * Fixed a broken link. Thanks to Dick Davies for pointing it out. Added functions courtesy of Michael Neumann <mneumann@xxxx.de>. Example code to follow. * Added Michael's sample code. Merged the changes in from branches/xpath_V * Fixed preceding:: and following:: axis Fixed the ordering bug that Martin Fowler reported. * Uncommented some code commented for testing Applied Nobu's changes to the Encoding infrastructure, which should fix potential threading issues. * Added more tests, and the missing syncenumerator class. Fixed the inheritance bug in the pull parser that James Britt found. Indentation changes, and changed some exceptions to runtime exceptions. * Changes by Matz, mostly of indent -> indent_level, to avoid function/variable naming conflicts * Tabs -> spaces (whitespace) Note the addition of syncenumerator.rb. This is a stopgap, until I can work on the class enough to get it accepted as a replacement for the SyncEnumerator that comes with the Generator class. My version is orders of magnitude faster than the Generator SyncEnumerator, but is currently missing a couple of features of the original. Eventually, I expect this class to migrate to another part of the source tree. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8483 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-05-18 22:58:11 -04:00
parsed << (literal || '')
when NCNAMETEST
#puts "NCNAMETEST"
prefix = $1
path = $'
parsed << :namespace
parsed << prefix
when QNAME
#puts "QNAME"
prefix = $1
name = $2
path = $'
prefix = "" unless prefix
parsed << :qname
parsed << prefix
parsed << name
end
return path
end
# Filters the supplied nodeset on the predicate(s)
def Predicate path, parsed
#puts "PREDICATE with #{path}"
return nil unless path[0] == ?[
predicates = []
while path[0] == ?[
path, expr = get_group(path)
predicates << expr[1..-2] if expr
end
#puts "PREDICATES = #{predicates.inspect}"
predicates.each{ |pred|
#puts "ORING #{pred}"
preds = []
parsed << :predicate
parsed << preds
OrExpr(pred, preds)
}
#puts "PREDICATES = #{predicates.inspect}"
path
end
# The following return arrays of true/false, a 1-1 mapping of the
# supplied nodeset, except for axe(), which returns a filtered
# nodeset
#| OrExpr S 'or' S AndExpr
#| AndExpr
def OrExpr path, parsed
#puts "OR >>> #{path}"
n = []
rest = AndExpr( path, n )
#puts "OR <<< #{rest}"
if rest != path
while rest =~ /^\s*( or )/
n = [ :or, n, [] ]
rest = AndExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| AndExpr S 'and' S EqualityExpr
#| EqualityExpr
def AndExpr path, parsed
#puts "AND >>> #{path}"
n = []
rest = EqualityExpr( path, n )
#puts "AND <<< #{rest}"
if rest != path
while rest =~ /^\s*( and )/
n = [ :and, n, [] ]
#puts "AND >>> #{rest}"
rest = EqualityExpr( $', n[-1] )
#puts "AND <<< #{rest}"
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| EqualityExpr ('=' | '!=') RelationalExpr
#| RelationalExpr
def EqualityExpr path, parsed
#puts "EQUALITY >>> #{path}"
n = []
rest = RelationalExpr( path, n )
#puts "EQUALITY <<< #{rest}"
if rest != path
while rest =~ /^\s*(!?=)\s*/
if $1[0] == ?!
n = [ :neq, n, [] ]
else
n = [ :eq, n, [] ]
end
rest = RelationalExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
#| AdditiveExpr
def RelationalExpr path, parsed
#puts "RELATION >>> #{path}"
n = []
rest = AdditiveExpr( path, n )
#puts "RELATION <<< #{rest}"
if rest != path
while rest =~ /^\s*([<>]=?)\s*/
if $1[0] == ?<
sym = "lt"
else
sym = "gt"
end
sym << "eq" if $1[-1] == ?=
n = [ sym.intern, n, [] ]
rest = AdditiveExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| AdditiveExpr ('+' | S '-') MultiplicativeExpr
#| MultiplicativeExpr
def AdditiveExpr path, parsed
#puts "ADDITIVE >>> #{path}"
n = []
rest = MultiplicativeExpr( path, n )
#puts "ADDITIVE <<< #{rest}"
if rest != path
while rest =~ /^\s*(\+| -)\s*/
if $1[0] == ?+
n = [ :plus, n, [] ]
else
n = [ :minus, n, [] ]
end
rest = MultiplicativeExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
#| UnaryExpr
def MultiplicativeExpr path, parsed
#puts "MULT >>> #{path}"
n = []
rest = UnaryExpr( path, n )
#puts "MULT <<< #{rest}"
if rest != path
while rest =~ /^\s*(\*| div | mod )\s*/
if $1[0] == ?*
n = [ :mult, n, [] ]
elsif $1.include?( "div" )
n = [ :div, n, [] ]
else
n = [ :mod, n, [] ]
end
rest = UnaryExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| '-' UnaryExpr
#| UnionExpr
def UnaryExpr path, parsed
path =~ /^(\-*)/
path = $'
if $1 and (($1.size % 2) != 0)
mult = -1
else
mult = 1
end
parsed << :neg if mult < 0
#puts "UNARY >>> #{path}"
n = []
path = UnionExpr( path, n )
#puts "UNARY <<< #{path}"
parsed.concat( n )
path
end
#| UnionExpr '|' PathExpr
#| PathExpr
def UnionExpr path, parsed
#puts "UNION >>> #{path}"
n = []
rest = PathExpr( path, n )
#puts "UNION <<< #{rest}"
if rest != path
while rest =~ /^\s*(\|)\s*/
n = [ :union, n, [] ]
rest = PathExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace( n )
elsif n.size > 0
parsed << n
end
rest
end
#| LocationPath
#| FilterExpr ('/' | '//') RelativeLocationPath
def PathExpr path, parsed
path =~ /^\s*/
path = $'
#puts "PATH >>> #{path}"
n = []
rest = FilterExpr( path, n )
#puts "PATH <<< '#{rest}'"
if rest != path
if rest and rest[0] == ?/
return RelativeLocationPath(rest, n)
end
end
#puts "BEFORE WITH '#{rest}'"
Merges upstream changes for REXML v3.1.7 http://www.germane-software.com/repos/rexml/tags/3.1.7 r1278@bean: ser | 2007-06-07 00:53:06 -0400 Fixed a double-encoding bug. This was a regression, related to ticket:48. r1292@bean: ser | 2007-07-25 08:19:36 -0400 r1279@bean: ser | 2007-06-09 23:19:02 -0400 Fixes ticket:89 -- encoding CP-1252 was broken. ISO-8859-15 had the same problem. Also in this patch is a fix to merge.rb (unused, but it should at least contain no errors), and a unit test for ticket:88. r1293@bean: ser | 2007-07-25 08:19:37 -0400 r1281@bean: ser | 2007-07-24 11:08:48 -0400 Addresses ticket:85 This is a major rewrite of the XML formatting code. The XML writers have all been extracted out of the classes and put into their own class containers. This makes writing parsers easier, and cleaner. There are three formatters, which correspond to the previous three XML writing modes: REXML::Formatters::Default Prints the XML document exactly as it was parsed REXML::Formatters::Pretty Pretty prints the XML document, destroying whitespace in the document REXML::Formatters::Transitive Pretty prints the XML document, preserving whitespace All of the write() functions have been deprecated (some are still used, but these will also go away) except the write() function on Document, which is left for convenience. To pretty print an XML document the canonical way: formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces formatter.write( document, output ) r1294@bean: ser | 2007-07-25 08:19:38 -0400 r1283@bean: ser | 2007-07-24 19:53:30 -0400 This goes with the previous commit. r1295@bean: ser | 2007-07-25 08:19:39 -0400 r1285@bean: ser | 2007-07-24 20:02:07 -0400 And THIS goes with the previous two patches. Dammit. r1296@bean: ser | 2007-07-25 08:19:40 -0400 r1287@bean: ser | 2007-07-24 20:12:25 -0400 Applied patch from Jeff Barczewski. Note that this changes what the values of the name and IDs are from the previous behavior -- the values no longer include the quotes. This is the correct behavior, so I'm leaving it in, but it is not backwards compatible. Also fixes the serializer so that it outputs the doctype in a correct format (needed as a result of this change). r1297@bean: ser | 2007-07-25 08:38:38 -0400 Version update git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-25 08:47:23 -04:00
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
parsed.concat(n)
return rest
end
#| FilterExpr Predicate
#| PrimaryExpr
def FilterExpr path, parsed
#puts "FILTER >>> #{path}"
n = []
path = PrimaryExpr( path, n )
#puts "FILTER <<< #{path}"
path = Predicate(path, n) if path and path[0] == ?[
#puts "FILTER <<< #{path}"
parsed.concat(n)
path
end
#| VARIABLE_REFERENCE
#| '(' expr ')'
#| LITERAL
#| NUMBER
#| FunctionCall
VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
NUMBER = /^(\d*\.?\d+)/
NT = /^comment|text|processing-instruction|node$/
def PrimaryExpr path, parsed
arry = []
case path
when VARIABLE_REFERENCE
varname = $1
path = $'
parsed << :variable
parsed << varname
#arry << @variables[ varname ]
when /^(\w[-\w]*)(?:\()/
#puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
fname = $1
Merged in development from the main REXML repository. * Fixed bug #34, typo in xpath_parser. * Previous fix, (include? -> includes?) was incorrect. * Added another test for encoding * Started AnyName support in RelaxNG * Added Element#Attributes#to_a, so that it does something intelligent. This was needed by XPath, for '@*' * Fixed XPath so that @* works. * Added xmlgrep to the bin/ directory. A little tool allowing you to grep for XPaths in an XML document. * Fixed a CDATA pretty-printing bug. (#39) * Fixed a buffering bug in Source.rb that affected the SAX parser This bug was related to how REXML determines the encoding of a file, and evinced itself by hanging on input when using the SAX parser. * The unit test for the previous patch. Forgot to commit it. * Minor pretty printing fix. * Applied Curt Sampson's optimization improvements * Issue #9; 3.1.3: The SAX parser was not denormalizing entity references in incoming text. All declared internal entities, as well as numeric entities, should now be denormalized. There was a related bug in that the SAX parser was actually double-encoding entities; this is also fixed. * bin/* programs should now be executable. Setting bin apps to executable * Issue 14; 3.1.3: DTD events are now all being passed by StreamParser Some of the DTD events were not being passed through by the stream parser. * #26: Element#add_element(nil) now raises an error Changed XPath searches so that if a non-Hash is passed, an error is raised Fixed a spurrious undefined method error in encoding. #29: XPath ordering bug fixed by Mark Williams. Incidentally, Mark supplied a superlative bug report, including a full unit test. Then he went ahead and fixed the bug. It doesn't get any better than this, folks. * Fixed a broken link. Thanks to Dick Davies for pointing it out. Added functions courtesy of Michael Neumann <mneumann@xxxx.de>. Example code to follow. * Added Michael's sample code. Merged the changes in from branches/xpath_V * Fixed preceding:: and following:: axis Fixed the ordering bug that Martin Fowler reported. * Uncommented some code commented for testing Applied Nobu's changes to the Encoding infrastructure, which should fix potential threading issues. * Added more tests, and the missing syncenumerator class. Fixed the inheritance bug in the pull parser that James Britt found. Indentation changes, and changed some exceptions to runtime exceptions. * Changes by Matz, mostly of indent -> indent_level, to avoid function/variable naming conflicts * Tabs -> spaces (whitespace) Note the addition of syncenumerator.rb. This is a stopgap, until I can work on the class enough to get it accepted as a replacement for the SyncEnumerator that comes with the Generator class. My version is orders of magnitude faster than the Generator SyncEnumerator, but is currently missing a couple of features of the original. Eventually, I expect this class to migrate to another part of the source tree. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8483 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-05-18 22:58:11 -04:00
tmp = $'
#puts "#{fname} =~ #{NT.inspect}"
Merged in development from the main REXML repository. * Fixed bug #34, typo in xpath_parser. * Previous fix, (include? -> includes?) was incorrect. * Added another test for encoding * Started AnyName support in RelaxNG * Added Element#Attributes#to_a, so that it does something intelligent. This was needed by XPath, for '@*' * Fixed XPath so that @* works. * Added xmlgrep to the bin/ directory. A little tool allowing you to grep for XPaths in an XML document. * Fixed a CDATA pretty-printing bug. (#39) * Fixed a buffering bug in Source.rb that affected the SAX parser This bug was related to how REXML determines the encoding of a file, and evinced itself by hanging on input when using the SAX parser. * The unit test for the previous patch. Forgot to commit it. * Minor pretty printing fix. * Applied Curt Sampson's optimization improvements * Issue #9; 3.1.3: The SAX parser was not denormalizing entity references in incoming text. All declared internal entities, as well as numeric entities, should now be denormalized. There was a related bug in that the SAX parser was actually double-encoding entities; this is also fixed. * bin/* programs should now be executable. Setting bin apps to executable * Issue 14; 3.1.3: DTD events are now all being passed by StreamParser Some of the DTD events were not being passed through by the stream parser. * #26: Element#add_element(nil) now raises an error Changed XPath searches so that if a non-Hash is passed, an error is raised Fixed a spurrious undefined method error in encoding. #29: XPath ordering bug fixed by Mark Williams. Incidentally, Mark supplied a superlative bug report, including a full unit test. Then he went ahead and fixed the bug. It doesn't get any better than this, folks. * Fixed a broken link. Thanks to Dick Davies for pointing it out. Added functions courtesy of Michael Neumann <mneumann@xxxx.de>. Example code to follow. * Added Michael's sample code. Merged the changes in from branches/xpath_V * Fixed preceding:: and following:: axis Fixed the ordering bug that Martin Fowler reported. * Uncommented some code commented for testing Applied Nobu's changes to the Encoding infrastructure, which should fix potential threading issues. * Added more tests, and the missing syncenumerator class. Fixed the inheritance bug in the pull parser that James Britt found. Indentation changes, and changed some exceptions to runtime exceptions. * Changes by Matz, mostly of indent -> indent_level, to avoid function/variable naming conflicts * Tabs -> spaces (whitespace) Note the addition of syncenumerator.rb. This is a stopgap, until I can work on the class enough to get it accepted as a replacement for the SyncEnumerator that comes with the Generator class. My version is orders of magnitude faster than the Generator SyncEnumerator, but is currently missing a couple of features of the original. Eventually, I expect this class to migrate to another part of the source tree. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8483 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-05-18 22:58:11 -04:00
return path if fname =~ NT
path = tmp
parsed << :function
parsed << fname
path = FunctionCall(path, parsed)
when NUMBER
#puts "LITERAL or NUMBER: #$1"
varname = $1.nil? ? $2 : $1
path = $'
parsed << :literal
parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
when LITERAL
#puts "LITERAL or NUMBER: #$1"
varname = $1.nil? ? $2 : $1
path = $'
parsed << :literal
parsed << varname
when /^\(/ #/
path, contents = get_group(path)
contents = contents[1..-2]
n = []
OrExpr( contents, n )
parsed.concat(n)
end
path
end
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
def FunctionCall rest, parsed
path, arguments = parse_args(rest)
argset = []
for argument in arguments
args = []
OrExpr( argument, args )
argset << args
end
parsed << argset
path
end
# get_group( '[foo]bar' ) -> ['bar', '[foo]']
def get_group string
ind = 0
depth = 0
st = string[0,1]
en = (st == "(" ? ")" : "]")
begin
case string[ind,1]
when st
depth += 1
when en
depth -= 1
end
ind += 1
end while depth > 0 and ind < string.length
return nil unless depth==0
[string[ind..-1], string[0..ind-1]]
end
def parse_args( string )
arguments = []
ind = 0
inquot = false
inapos = false
depth = 1
begin
case string[ind]
when ?"
inquot = !inquot unless inapos
when ?'
inapos = !inapos unless inquot
else
unless inquot or inapos
case string[ind]
when ?(
depth += 1
if depth == 1
string = string[1..-1]
ind -= 1
end
when ?)
depth -= 1
if depth == 0
s = string[0,ind].strip
arguments << s unless s == ""
string = string[ind+1..-1]
end
when ?,
if depth == 1
s = string[0,ind].strip
arguments << s unless s == ""
string = string[ind+1..-1]
ind = -1
end
end
end
end
ind += 1
end while depth > 0 and ind < string.length
return nil unless depth==0
[string,arguments]
end
end
end
end