mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
REXML changes backported from the 1.9 branch:
* Minor source documentation changes * Changes to the pretty-printing code, including the addition of the word- wrap submission. * Bug fix for missing quotations in NOTATION DTD items * Bug fixes and improvements to whitespace handling in text nodes * Refactoring and bug fixes in encoding support * Minor speed optimizations in the core parser * Bug fixes in the SAX2 parserthe core parser * Copyright fixes * Version bump to REXML 3.0.0 * A change that caused speed degredation has been reversed * Addition of a value=() method in Text, for replacing the contents of a text node * Fixed the document order of the descendant-or-self axis in XPath git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@6071 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
354d68f80b
commit
7a07ba45a0
16 changed files with 141 additions and 91 deletions
|
@ -39,7 +39,10 @@ module REXML
|
||||||
# indentation will be this number of spaces, and children will be
|
# indentation will be this number of spaces, and children will be
|
||||||
# indented an additional amount.
|
# indented an additional amount.
|
||||||
# transitive::
|
# transitive::
|
||||||
# Who knows?
|
# If transitive is true and indent is >= 0, then the output will be
|
||||||
|
# pretty-printed in such a way that the added whitespace does not affect
|
||||||
|
# the absolute *value* of the document -- that is, it leaves the value
|
||||||
|
# and number of Text nodes in the document unchanged.
|
||||||
# ie_hack::
|
# ie_hack::
|
||||||
# Internet Explorer is the worst piece of crap to have ever been
|
# Internet Explorer is the worst piece of crap to have ever been
|
||||||
# written, with the possible exception of Windows itself. Since IE is
|
# written, with the possible exception of Windows itself. Since IE is
|
||||||
|
|
|
@ -92,7 +92,10 @@ module REXML
|
||||||
# indentation will be this number of spaces, and children will be
|
# indentation will be this number of spaces, and children will be
|
||||||
# indented an additional amount.
|
# indented an additional amount.
|
||||||
# transitive::
|
# transitive::
|
||||||
# Who knows?
|
# If transitive is true and indent is >= 0, then the output will be
|
||||||
|
# pretty-printed in such a way that the added whitespace does not affect
|
||||||
|
# the absolute *value* of the document -- that is, it leaves the value
|
||||||
|
# and number of Text nodes in the document unchanged.
|
||||||
# ie_hack::
|
# ie_hack::
|
||||||
# Internet Explorer is the worst piece of crap to have ever been
|
# Internet Explorer is the worst piece of crap to have ever been
|
||||||
# written, with the possible exception of Windows itself. Since IE is
|
# written, with the possible exception of Windows itself. Since IE is
|
||||||
|
@ -109,7 +112,7 @@ module REXML
|
||||||
output << " #@long_name" if @long_name
|
output << " #@long_name" if @long_name
|
||||||
output << " #@uri" if @uri
|
output << " #@uri" if @uri
|
||||||
unless @children.empty?
|
unless @children.empty?
|
||||||
next_indent = indent + 2
|
next_indent = indent + 1
|
||||||
output << ' ['
|
output << ' ['
|
||||||
child = nil # speed
|
child = nil # speed
|
||||||
@children.each { |child|
|
@children.each { |child|
|
||||||
|
@ -123,6 +126,10 @@ module REXML
|
||||||
output << STOP
|
output << STOP
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def context
|
||||||
|
@parent.context
|
||||||
|
end
|
||||||
|
|
||||||
def entity( name )
|
def entity( name )
|
||||||
@entities[name].unnormalized if @entities[name]
|
@entities[name].unnormalized if @entities[name]
|
||||||
end
|
end
|
||||||
|
@ -185,7 +192,7 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_s
|
def to_s
|
||||||
"<!NOTATION #@name #@middle #@rest>"
|
"<!NOTATION #@name '#@middle #@rest'>"
|
||||||
end
|
end
|
||||||
|
|
||||||
def write( output, indent=-1 )
|
def write( output, indent=-1 )
|
||||||
|
|
|
@ -145,7 +145,10 @@ module REXML
|
||||||
# indentation will be this number of spaces, and children will be
|
# indentation will be this number of spaces, and children will be
|
||||||
# indented an additional amount. Defaults to -1
|
# indented an additional amount. Defaults to -1
|
||||||
# transitive::
|
# transitive::
|
||||||
# What the heck does this do? Defaults to false
|
# If transitive is true and indent is >= 0, then the output will be
|
||||||
|
# pretty-printed in such a way that the added whitespace does not affect
|
||||||
|
# the absolute *value* of the document -- that is, it leaves the value
|
||||||
|
# and number of Text nodes in the document unchanged.
|
||||||
# ie_hack::
|
# ie_hack::
|
||||||
# Internet Explorer is the worst piece of crap to have ever been
|
# Internet Explorer is the worst piece of crap to have ever been
|
||||||
# written, with the possible exception of Windows itself. Since IE is
|
# written, with the possible exception of Windows itself. Since IE is
|
||||||
|
@ -191,7 +194,7 @@ module REXML
|
||||||
build_context[-1] << event[1]
|
build_context[-1] << event[1]
|
||||||
else
|
else
|
||||||
build_context.add(
|
build_context.add(
|
||||||
Text.new( event[1], true, nil, true )
|
Text.new( event[1], build_context.whitespace, nil, true )
|
||||||
) unless (
|
) unless (
|
||||||
event[1].strip.size==0 and
|
event[1].strip.size==0 and
|
||||||
build_context.ignore_whitespace_nodes
|
build_context.ignore_whitespace_nodes
|
||||||
|
|
|
@ -42,7 +42,7 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def write( output, indent )
|
def write( output, indent )
|
||||||
output << (' '*indent) if indent > 0
|
indent( output, indent )
|
||||||
output << to_s
|
output << to_s
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def write( output, indent )
|
def write( output, indent )
|
||||||
output << (' '*indent) if indent > 0
|
indent( output, indent )
|
||||||
output << to_s
|
output << to_s
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -98,8 +98,9 @@ module REXML
|
||||||
# is the case if:
|
# is the case if:
|
||||||
# 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
|
# 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
|
||||||
# 2. The context has :+respect_whitespace+ set to :+all+ or
|
# 2. The context has :+respect_whitespace+ set to :+all+ or
|
||||||
# an array containing the name of this element, and :+compress_whitespace+
|
# an array containing the name of this element, and
|
||||||
# isn't set to :+all+ or an array containing the name of this element.
|
# :+compress_whitespace+ isn't set to :+all+ or an array containing the
|
||||||
|
# name of this element.
|
||||||
# The evaluation is tested against +expanded_name+, and so is namespace
|
# The evaluation is tested against +expanded_name+, and so is namespace
|
||||||
# sensitive.
|
# sensitive.
|
||||||
def whitespace
|
def whitespace
|
||||||
|
@ -606,7 +607,9 @@ module REXML
|
||||||
# indentation will be this number of spaces, and children will be
|
# indentation will be this number of spaces, and children will be
|
||||||
# indented an additional amount. Defaults to -1
|
# indented an additional amount. Defaults to -1
|
||||||
# transitive::
|
# transitive::
|
||||||
# What the heck does this do? Defaults to false
|
# If transitive is true and indent is >= 0, then the output will be
|
||||||
|
# pretty-printed in such a way that the added whitespace does not affect
|
||||||
|
# the parse tree of the document
|
||||||
# ie_hack::
|
# ie_hack::
|
||||||
# Internet Explorer is the worst piece of crap to have ever been
|
# Internet Explorer is the worst piece of crap to have ever been
|
||||||
# written, with the possible exception of Windows itself. Since IE is
|
# written, with the possible exception of Windows itself. Since IE is
|
||||||
|
@ -632,7 +635,7 @@ module REXML
|
||||||
else
|
else
|
||||||
if transitive and indent>-1 and !@children[0].kind_of? Text
|
if transitive and indent>-1 and !@children[0].kind_of? Text
|
||||||
writer << "\n"
|
writer << "\n"
|
||||||
indent writer, indent+2
|
indent writer, indent+1
|
||||||
end
|
end
|
||||||
writer << ">"
|
writer << ">"
|
||||||
write_children( writer, indent, transitive, ie_hack )
|
write_children( writer, indent, transitive, ie_hack )
|
||||||
|
@ -640,7 +643,7 @@ module REXML
|
||||||
end
|
end
|
||||||
if transitive and indent>-1
|
if transitive and indent>-1
|
||||||
writer << "\n"
|
writer << "\n"
|
||||||
indent -= 2 if next_sibling.nil?
|
indent -= 1 if next_sibling.nil?
|
||||||
indent(writer, indent)
|
indent(writer, indent)
|
||||||
end
|
end
|
||||||
writer << ">"
|
writer << ">"
|
||||||
|
@ -661,12 +664,10 @@ module REXML
|
||||||
# A private helper method
|
# A private helper method
|
||||||
def write_children( writer, indent, transitive, ie_hack )
|
def write_children( writer, indent, transitive, ie_hack )
|
||||||
cr = (indent < 0) ? '' : "\n"
|
cr = (indent < 0) ? '' : "\n"
|
||||||
#if size == 1 and @children[0].kind_of?(Text)
|
|
||||||
# self[0].write( writer, -1 )
|
|
||||||
if indent == -1
|
if indent == -1
|
||||||
each { |child| child.write( writer, indent, transitive, ie_hack ) }
|
each { |child| child.write( writer, indent, transitive, ie_hack ) }
|
||||||
else
|
else
|
||||||
next_indent = indent+2
|
next_indent = indent+1
|
||||||
last_child=nil
|
last_child=nil
|
||||||
each { |child|
|
each { |child|
|
||||||
unless child.kind_of? Text or last_child.kind_of? Text or transitive
|
unless child.kind_of? Text or last_child.kind_of? Text or transitive
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
@@__REXML_encoding_methods =<<-'EOL'
|
@@__REXML_encoding_methods = %q~
|
||||||
# Convert from UTF-8
|
# Convert from UTF-8
|
||||||
def encode content
|
def encode content
|
||||||
array_utf8 = content.unpack('U*')
|
array_utf8 = content.unpack('U*')
|
||||||
|
@ -20,6 +20,6 @@ module REXML
|
||||||
def decode(str)
|
def decode(str)
|
||||||
str.unpack('C*').pack('U*')
|
str.unpack('C*').pack('U*')
|
||||||
end
|
end
|
||||||
EOL
|
~
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,33 +1 @@
|
||||||
begin
|
require 'rexml/encodings/SHIFT-JIS'
|
||||||
require 'uconv'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Encoding
|
|
||||||
def to_shift_jis content
|
|
||||||
Uconv::u8tosjis(content)
|
|
||||||
end
|
|
||||||
|
|
||||||
def from_shift_jis(str)
|
|
||||||
Uconv::sjistou8(str)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
rescue LoadError
|
|
||||||
begin
|
|
||||||
require 'iconv'
|
|
||||||
module REXML
|
|
||||||
module Encoding
|
|
||||||
def from_shift_jis(str)
|
|
||||||
return Iconv::iconv("utf-8", "shift_jis", str).join('')
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_shift_jis content
|
|
||||||
return Iconv::iconv("shift_jis", "utf-8", content).join('')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
rescue LoadError
|
|
||||||
raise "uconv or iconv is required for Japanese encoding support."
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
@@__REXML_encoding_methods =<<-'EOL'
|
@@__REXML_encoding_methods = %q~
|
||||||
# Convert from UTF-8
|
# Convert from UTF-8
|
||||||
def encode content
|
def encode content
|
||||||
array_utf8 = content.unpack('U*')
|
array_utf8 = content.unpack('U*')
|
||||||
|
@ -20,6 +20,6 @@ module REXML
|
||||||
def decode(str)
|
def decode(str)
|
||||||
str.unpack('C*').pack('U*')
|
str.unpack('C*').pack('U*')
|
||||||
end
|
end
|
||||||
EOL
|
~
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -25,7 +25,12 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def indent to, ind
|
def indent to, ind
|
||||||
to << " "*ind unless ind<1
|
if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
|
||||||
|
indentstyle = @parent.context[:indentstyle]
|
||||||
|
else
|
||||||
|
indentstyle = ' '
|
||||||
|
end
|
||||||
|
to << indentstyle*ind unless ind<1
|
||||||
end
|
end
|
||||||
|
|
||||||
def parent?
|
def parent?
|
||||||
|
|
|
@ -122,14 +122,14 @@ module REXML
|
||||||
|
|
||||||
# Returns true if there are no more events
|
# Returns true if there are no more events
|
||||||
def empty?
|
def empty?
|
||||||
!has_next?
|
#puts "@source.empty? = #{@source.empty?}"
|
||||||
|
#puts "@stack.empty? = #{@stack.empty?}"
|
||||||
|
return (@source.empty? and @stack.empty?)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Returns true if there are more events. Synonymous with !empty?
|
# Returns true if there are more events. Synonymous with !empty?
|
||||||
def has_next?
|
def has_next?
|
||||||
return true if @closed
|
return !(@source.empty? and @stack.empty?)
|
||||||
@source.read if @source.buffer.size==0 and !@source.empty?
|
|
||||||
(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Push an event back on the head of the stream. This method
|
# Push an event back on the head of the stream. This method
|
||||||
|
@ -329,9 +329,12 @@ module REXML
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
md = @source.match( TEXT_PATTERN, true )
|
md = @source.match( TEXT_PATTERN, true )
|
||||||
#md = @source.match_to_consume( '<', TEXT_PATTERN )
|
if md[0].length == 0
|
||||||
#@source.read
|
#puts "EMPTY = #{empty?}"
|
||||||
raise REXML::ParseException("no text to add") if md[0].length == 0
|
#puts "BUFFER = \"#{@source.buffer}\""
|
||||||
|
@source.match( /(\s+)/, true )
|
||||||
|
end
|
||||||
|
#return [ :text, "" ] if md[0].length == 0
|
||||||
# unnormalized = Text::unnormalize( md[1], self )
|
# unnormalized = Text::unnormalize( md[1], self )
|
||||||
# return PullEvent.new( :text, md[1], unnormalized )
|
# return PullEvent.new( :text, md[1], unnormalized )
|
||||||
return [ :text, md[1] ]
|
return [ :text, md[1] ]
|
||||||
|
|
|
@ -45,7 +45,7 @@ module REXML
|
||||||
if args.size == 2
|
if args.size == 2
|
||||||
args[1].each { |match| @procs << [args[0], match, blok] }
|
args[1].each { |match| @procs << [args[0], match, blok] }
|
||||||
else
|
else
|
||||||
add( [args[0], /.*/, blok] )
|
add( [args[0], nil, blok] )
|
||||||
end
|
end
|
||||||
elsif args[0].kind_of? Array
|
elsif args[0].kind_of? Array
|
||||||
if args.size == 2
|
if args.size == 2
|
||||||
|
@ -54,7 +54,7 @@ module REXML
|
||||||
args[0].each { |match| add( [ :start_element, match, blok ] ) }
|
args[0].each { |match| add( [ :start_element, match, blok ] ) }
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
add([nil, /.*/, args[0]])
|
add([nil, nil, args[0]])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -164,9 +164,10 @@ module REXML
|
||||||
def get_procs( symbol, name )
|
def get_procs( symbol, name )
|
||||||
return nil if @procs.size == 0
|
return nil if @procs.size == 0
|
||||||
@procs.find_all do |sym, match, block|
|
@procs.find_all do |sym, match, block|
|
||||||
|
#puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
|
||||||
(
|
(
|
||||||
(sym.nil? or symbol == sym) and
|
(sym.nil? or symbol == sym) and
|
||||||
(name.nil? or (
|
((name.nil? and match.nil?) or match.nil? or (
|
||||||
(name == match) or
|
(name == match) or
|
||||||
(match.kind_of? Regexp and name =~ match)
|
(match.kind_of? Regexp and name =~ match)
|
||||||
)
|
)
|
||||||
|
@ -179,7 +180,7 @@ module REXML
|
||||||
@listeners.find_all do |sym, match, block|
|
@listeners.find_all do |sym, match, block|
|
||||||
(
|
(
|
||||||
(sym.nil? or symbol == sym) and
|
(sym.nil? or symbol == sym) and
|
||||||
(name.nil? or (
|
((name.nil? and match.nil?) or match.nil? or (
|
||||||
(name == match) or
|
(name == match) or
|
||||||
(match.kind_of? Regexp and name =~ match)
|
(match.kind_of? Regexp and name =~ match)
|
||||||
)
|
)
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
# be accessed online at http://www.germane-software.com/software/rexml_doc
|
# be accessed online at http://www.germane-software.com/software/rexml_doc
|
||||||
# A tutorial is available in docs/tutorial.html
|
# A tutorial is available in docs/tutorial.html
|
||||||
module REXML
|
module REXML
|
||||||
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
|
Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell <ser@germane-software.com>"
|
||||||
Date = "+2003/346"
|
Date = "+2004/088"
|
||||||
Version = "2.7.3"
|
Version = "3.0.0"
|
||||||
end
|
end
|
||||||
|
|
|
@ -31,7 +31,6 @@ module REXML
|
||||||
def initialize(arg)
|
def initialize(arg)
|
||||||
@orig = @buffer = arg
|
@orig = @buffer = arg
|
||||||
self.encoding = check_encoding( @buffer )
|
self.encoding = check_encoding( @buffer )
|
||||||
#@buffer = decode(@buffer) unless @encoding == UTF_8
|
|
||||||
@line = 0
|
@line = 0
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -96,7 +95,7 @@ module REXML
|
||||||
|
|
||||||
# @return true if the Source is exhausted
|
# @return true if the Source is exhausted
|
||||||
def empty?
|
def empty?
|
||||||
@buffer.nil?
|
@buffer == ""
|
||||||
end
|
end
|
||||||
|
|
||||||
# @return the current line in the source
|
# @return the current line in the source
|
||||||
|
@ -113,17 +112,14 @@ module REXML
|
||||||
class IOSource < Source
|
class IOSource < Source
|
||||||
#attr_reader :block_size
|
#attr_reader :block_size
|
||||||
|
|
||||||
|
# block_size has been deprecated
|
||||||
def initialize(arg, block_size=500)
|
def initialize(arg, block_size=500)
|
||||||
@er_source = @source = arg
|
@er_source = @source = arg
|
||||||
@to_utf = false
|
@to_utf = false
|
||||||
# READLINE OPT
|
# FIXME
|
||||||
# The following was commented out when IOSource started using readline
|
# This is broken. If the user puts in enough carriage returns, this can fail
|
||||||
# to pull the data from the stream.
|
# to calculate the correct encoding.
|
||||||
#@block_size = block_size
|
super @source.read( 100 )
|
||||||
#super @source.read(@block_size)
|
|
||||||
@line_break = '>'
|
|
||||||
#super @source.readline( "\n" )
|
|
||||||
super @source.readline( @line_break )+@source.read
|
|
||||||
@line_break = encode( '>' )
|
@line_break = encode( '>' )
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -164,9 +164,54 @@ module REXML
|
||||||
end
|
end
|
||||||
@unnormalized = Text::unnormalize( @string, doctype )
|
@unnormalized = Text::unnormalize( @string, doctype )
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def wrap(string, width, addnewline=false)
|
||||||
|
# Recursivly wrap string at width.
|
||||||
|
return string if string.length <= width
|
||||||
|
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
||||||
|
if addnewline then
|
||||||
|
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||||
|
else
|
||||||
|
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Sets the contents of this text node. This expects the text to be
|
||||||
|
# unnormalized. It returns self.
|
||||||
|
#
|
||||||
|
# e = Element.new( "a" )
|
||||||
|
# e.add_text( "foo" ) # <a>foo</a>
|
||||||
|
# e[0].value = "bar" # <a>bar</a>
|
||||||
|
# e[0].value = "<a>" # <a><a></a>
|
||||||
|
def value=( val )
|
||||||
|
@string = val.gsub( /\r\n?/, "\n" )
|
||||||
|
@unnormalized = nil
|
||||||
|
@normalized = nil
|
||||||
|
@raw = false
|
||||||
|
end
|
||||||
|
|
||||||
|
def indent(string, level=1, style="\t", indentfirstline=true)
|
||||||
|
return string if level < 0
|
||||||
|
new_string = ''
|
||||||
|
string.each { |line|
|
||||||
|
indent_string = style * level
|
||||||
|
new_line = (indent_string + line).sub(/[\s]+$/,'')
|
||||||
|
new_string << new_line
|
||||||
|
}
|
||||||
|
new_string.strip! unless indentfirstline
|
||||||
|
return new_string
|
||||||
|
end
|
||||||
|
|
||||||
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
||||||
writer << to_s()
|
s = to_s()
|
||||||
|
if not (@parent and @parent.whitespace) then
|
||||||
|
s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
|
||||||
|
if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
|
||||||
|
s = indent(s, indent, @parent.context[:indentstyle], false)
|
||||||
|
end
|
||||||
|
s.squeeze!(" \n\t") if @parent and !@parent.whitespace
|
||||||
|
end
|
||||||
|
writer << s
|
||||||
end
|
end
|
||||||
|
|
||||||
# Writes out text, substituting special characters beforehand.
|
# Writes out text, substituting special characters beforehand.
|
||||||
|
|
|
@ -29,7 +29,8 @@ module REXML
|
||||||
|
|
||||||
def parse path, nodeset
|
def parse path, nodeset
|
||||||
path_stack = @parser.parse( path )
|
path_stack = @parser.parse( path )
|
||||||
#puts "PARSE: #{path} => #{path_stack.inspect}"
|
#puts "PARSE: #{path} => #{path_stack.inspect}"
|
||||||
|
#puts "PARSE: nodeset = #{nodeset.collect{|x|x.to_s}.inspect}"
|
||||||
match( path_stack, nodeset )
|
match( path_stack, nodeset )
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -46,7 +47,7 @@ module REXML
|
||||||
|
|
||||||
def match( path_stack, nodeset )
|
def match( path_stack, nodeset )
|
||||||
while ( path_stack.size > 0 and nodeset.size > 0 )
|
while ( path_stack.size > 0 and nodeset.size > 0 )
|
||||||
#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.type}.inspect}'"
|
#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
|
||||||
nodeset = internal_parse( path_stack, nodeset )
|
nodeset = internal_parse( path_stack, nodeset )
|
||||||
#puts "NODESET: #{nodeset.size}"
|
#puts "NODESET: #{nodeset.size}"
|
||||||
#puts "PATH_STACK: #{path_stack.inspect}"
|
#puts "PATH_STACK: #{path_stack.inspect}"
|
||||||
|
@ -55,8 +56,9 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def internal_parse path_stack, nodeset
|
def internal_parse path_stack, nodeset
|
||||||
|
#puts "INTERNAL_PARSE RETURNING WITH NO RESULTS" if nodeset.size == 0 or path_stack.size == 0
|
||||||
return nodeset if nodeset.size == 0 or path_stack.size == 0
|
return nodeset if nodeset.size == 0 or path_stack.size == 0
|
||||||
#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.type}.inspect}"
|
#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.class}.inspect}"
|
||||||
case path_stack.shift
|
case path_stack.shift
|
||||||
when :document
|
when :document
|
||||||
return [ nodeset[0].root.parent ]
|
return [ nodeset[0].root.parent ]
|
||||||
|
@ -205,7 +207,7 @@ module REXML
|
||||||
Functions::index = index+1
|
Functions::index = index+1
|
||||||
#puts "Node #{node} and index=#{index+1}"
|
#puts "Node #{node} and index=#{index+1}"
|
||||||
result = Predicate( predicate, node )
|
result = Predicate( predicate, node )
|
||||||
#puts "Predicate returned #{result} (#{result.type}) for #{node.type}"
|
#puts "Predicate returned #{result} (#{result.class}) for #{node.class}"
|
||||||
if result.kind_of? Numeric
|
if result.kind_of? Numeric
|
||||||
#puts "#{result} == #{index} => #{result == index}"
|
#puts "#{result} == #{index} => #{result == index}"
|
||||||
new_nodeset << node if result == (index+1)
|
new_nodeset << node if result == (index+1)
|
||||||
|
@ -285,6 +287,7 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
##########################################################
|
##########################################################
|
||||||
|
# FIXME
|
||||||
# The next two methods are BAD MOJO!
|
# The next two methods are BAD MOJO!
|
||||||
# This is my achilles heel. If anybody thinks of a better
|
# This is my achilles heel. If anybody thinks of a better
|
||||||
# way of doing this, be my guest. This really sucks, but
|
# way of doing this, be my guest. This really sucks, but
|
||||||
|
@ -294,24 +297,39 @@ module REXML
|
||||||
def descendant_or_self( path_stack, nodeset )
|
def descendant_or_self( path_stack, nodeset )
|
||||||
rs = []
|
rs = []
|
||||||
d_o_s( path_stack, nodeset, rs )
|
d_o_s( path_stack, nodeset, rs )
|
||||||
#puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
|
#puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
|
||||||
rs.flatten.compact
|
document_order(rs.flatten.compact)
|
||||||
end
|
end
|
||||||
|
|
||||||
def d_o_s( p, ns, r )
|
def d_o_s( p, ns, r )
|
||||||
#puts r.collect{|n|n.to_s}.inspect
|
|
||||||
#puts ns.collect{|n|n.to_s}.inspect
|
|
||||||
nt = nil
|
nt = nil
|
||||||
ns.each_index do |i|
|
ns.each_index do |i|
|
||||||
n = ns[i]
|
n = ns[i]
|
||||||
x = match( p.clone, [ n ] )
|
x = match( p.clone, [ n ] )
|
||||||
#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
|
|
||||||
nt = n.node_type
|
nt = n.node_type
|
||||||
d_o_s( p, n.children, x ) if nt == :element or nt == :document
|
d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
|
||||||
r[i,0] = [x] if x.size > 0
|
r.concat(x) if x.size > 0
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Reorders an array of nodes so that they are in document order
|
||||||
|
# It tries to do this efficiently.
|
||||||
|
def document_order( array_of_nodes )
|
||||||
|
new_arry = []
|
||||||
|
array_of_nodes.each { |node|
|
||||||
|
node_idx = []
|
||||||
|
np = node.node_type == :attribute ? node.element : node
|
||||||
|
while np.parent and np.parent.node_type == :element
|
||||||
|
node_idx << np.parent.children.index( np )
|
||||||
|
np = np.parent
|
||||||
|
end
|
||||||
|
new_arry << [ node_idx.reverse.join, node ]
|
||||||
|
}
|
||||||
|
new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
def recurse( nodeset, &block )
|
def recurse( nodeset, &block )
|
||||||
for node in nodeset
|
for node in nodeset
|
||||||
yield node
|
yield node
|
||||||
|
@ -324,7 +342,7 @@ module REXML
|
||||||
def Predicate( predicate, node )
|
def Predicate( predicate, node )
|
||||||
predicate = predicate.clone
|
predicate = predicate.clone
|
||||||
#puts "#"*20
|
#puts "#"*20
|
||||||
#puts "Predicate( #{predicate.inspect}, #{node.type} )"
|
#puts "Predicate( #{predicate.inspect}, #{node.class} )"
|
||||||
results = []
|
results = []
|
||||||
case (predicate[0])
|
case (predicate[0])
|
||||||
when :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
|
when :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue