mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
@@ Fix for the XPath descendant* result set ordering bug @@
@@ SAX2 listener bug fixes @@ @@ Undid a code change that caused a 10x speed regression @@ @@ Indentation fixes, and a new word wrapping feature for text nodes was contributed by Devin Bayer (documentation forthcoming; see the change logs for now) @@ The XPath bug fix is really ugly and inefficient, but I spent two days hacking at it and this was the best I could come up with. The SAX2 listener fixes had to do with crashes in certain conditions, like when there was a carriage return at the end of a document Several people submitted patches for the speed regression; it is embarrassing how long it took me to get around to looking at this. To this day, I don't know where the offending code came from. Encoding fixes Added a contributed word wrapping option for text formatting. Devin Bayer contributed this. Here's his comment: "Setting :wordwrapping to :all, wordwraps all text nodes longer than 60 characters. Setting :indentstyle to aString, make aString used as indentation, instead of the default ' '. And as long as :respect_whitespace isn't set for the element, multiline text nodes will be indented." git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5696 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
be3b9b1c5c
commit
95be40a06d
13 changed files with 110 additions and 50 deletions
|
@ -109,7 +109,7 @@ module REXML
|
|||
output << " #@long_name" if @long_name
|
||||
output << " #@uri" if @uri
|
||||
unless @children.empty?
|
||||
next_indent = indent + 2
|
||||
next_indent = indent + 1
|
||||
output << ' ['
|
||||
child = nil # speed
|
||||
@children.each { |child|
|
||||
|
@ -123,6 +123,10 @@ module REXML
|
|||
output << STOP
|
||||
end
|
||||
|
||||
def context
|
||||
@parent.context
|
||||
end
|
||||
|
||||
def entity( name )
|
||||
@entities[name].unnormalized if @entities[name]
|
||||
end
|
||||
|
|
|
@ -42,7 +42,7 @@ module REXML
|
|||
end
|
||||
|
||||
def write( output, indent )
|
||||
output << (' '*indent) if indent > 0
|
||||
indent( output, indent )
|
||||
output << to_s
|
||||
end
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ module REXML
|
|||
end
|
||||
|
||||
def write( output, indent )
|
||||
output << (' '*indent) if indent > 0
|
||||
indent( output, indent )
|
||||
output << to_s
|
||||
end
|
||||
|
||||
|
|
|
@ -632,7 +632,7 @@ module REXML
|
|||
else
|
||||
if transitive and indent>-1 and !@children[0].kind_of? Text
|
||||
writer << "\n"
|
||||
indent writer, indent+2
|
||||
indent writer, indent+1
|
||||
end
|
||||
writer << ">"
|
||||
write_children( writer, indent, transitive, ie_hack )
|
||||
|
@ -640,7 +640,7 @@ module REXML
|
|||
end
|
||||
if transitive and indent>-1
|
||||
writer << "\n"
|
||||
indent -= 2 if next_sibling.nil?
|
||||
indent -= 1 if next_sibling.nil?
|
||||
indent(writer, indent)
|
||||
end
|
||||
writer << ">"
|
||||
|
@ -661,12 +661,10 @@ module REXML
|
|||
# A private helper method
|
||||
def write_children( writer, indent, transitive, ie_hack )
|
||||
cr = (indent < 0) ? '' : "\n"
|
||||
#if size == 1 and @children[0].kind_of?(Text)
|
||||
# self[0].write( writer, -1 )
|
||||
if indent == -1
|
||||
each { |child| child.write( writer, indent, transitive, ie_hack ) }
|
||||
else
|
||||
next_indent = indent+2
|
||||
next_indent = indent+1
|
||||
last_child=nil
|
||||
each { |child|
|
||||
unless child.kind_of? Text or last_child.kind_of? Text or transitive
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-'EOL'
|
||||
@@__REXML_encoding_methods = %q~
|
||||
# Convert from UTF-8
|
||||
def encode content
|
||||
array_utf8 = content.unpack('U*')
|
||||
|
@ -20,6 +20,6 @@ module REXML
|
|||
def decode(str)
|
||||
str.unpack('C*').pack('U*')
|
||||
end
|
||||
EOL
|
||||
~
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods =<<-'EOL'
|
||||
@@__REXML_encoding_methods = %q~
|
||||
# Convert from UTF-8
|
||||
def encode content
|
||||
array_utf8 = content.unpack('U*')
|
||||
|
@ -20,6 +20,6 @@ module REXML
|
|||
def decode(str)
|
||||
str.unpack('C*').pack('U*')
|
||||
end
|
||||
EOL
|
||||
~
|
||||
end
|
||||
end
|
||||
|
|
|
@ -25,7 +25,12 @@ module REXML
|
|||
end
|
||||
|
||||
def indent to, ind
|
||||
to << " "*ind unless ind<1
|
||||
if @parent and not @parent.context[:indentstyle].nil? then
|
||||
indentstyle = @parent.context[:indentstyle]
|
||||
else
|
||||
indentstyle = ' '
|
||||
end
|
||||
to << indentstyle*ind unless ind<1
|
||||
end
|
||||
|
||||
def parent?
|
||||
|
|
|
@ -122,14 +122,14 @@ module REXML
|
|||
|
||||
# Returns true if there are no more events
|
||||
def empty?
|
||||
!has_next?
|
||||
#puts "@source.empty? = #{@source.empty?}"
|
||||
#puts "@stack.empty? = #{@stack.empty?}"
|
||||
return (@source.empty? and @stack.empty?)
|
||||
end
|
||||
|
||||
# Returns true if there are more events. Synonymous with !empty?
|
||||
def has_next?
|
||||
return true if @closed
|
||||
@source.read if @source.buffer.size==0 and !@source.empty?
|
||||
(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
|
||||
return !(@source.empty? and @stack.empty?)
|
||||
end
|
||||
|
||||
# Push an event back on the head of the stream. This method
|
||||
|
@ -329,9 +329,12 @@ module REXML
|
|||
end
|
||||
else
|
||||
md = @source.match( TEXT_PATTERN, true )
|
||||
#md = @source.match_to_consume( '<', TEXT_PATTERN )
|
||||
#@source.read
|
||||
raise REXML::ParseException("no text to add") if md[0].length == 0
|
||||
if md[0].length == 0
|
||||
#puts "EMPTY = #{empty?}"
|
||||
#puts "BUFFER = \"#{@source.buffer}\""
|
||||
@source.match( /(\s+)/, true )
|
||||
end
|
||||
#return [ :text, "" ] if md[0].length == 0
|
||||
# unnormalized = Text::unnormalize( md[1], self )
|
||||
# return PullEvent.new( :text, md[1], unnormalized )
|
||||
return [ :text, md[1] ]
|
||||
|
|
|
@ -45,7 +45,7 @@ module REXML
|
|||
if args.size == 2
|
||||
args[1].each { |match| @procs << [args[0], match, blok] }
|
||||
else
|
||||
add( [args[0], /.*/, blok] )
|
||||
add( [args[0], nil, blok] )
|
||||
end
|
||||
elsif args[0].kind_of? Array
|
||||
if args.size == 2
|
||||
|
@ -54,7 +54,7 @@ module REXML
|
|||
args[0].each { |match| add( [ :start_element, match, blok ] ) }
|
||||
end
|
||||
else
|
||||
add([nil, /.*/, args[0]])
|
||||
add([nil, nil, args[0]])
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -164,9 +164,10 @@ module REXML
|
|||
def get_procs( symbol, name )
|
||||
return nil if @procs.size == 0
|
||||
@procs.find_all do |sym, match, block|
|
||||
#puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
|
||||
(
|
||||
(sym.nil? or symbol == sym) and
|
||||
(name.nil? or (
|
||||
((name.nil? and match.nil?) or match.nil? or (
|
||||
(name == match) or
|
||||
(match.kind_of? Regexp and name =~ match)
|
||||
)
|
||||
|
@ -179,7 +180,7 @@ module REXML
|
|||
@listeners.find_all do |sym, match, block|
|
||||
(
|
||||
(sym.nil? or symbol == sym) and
|
||||
(name.nil? or (
|
||||
((name.nil? and match.nil?) or match.nil? or (
|
||||
(name == match) or
|
||||
(match.kind_of? Regexp and name =~ match)
|
||||
)
|
||||
|
|
|
@ -21,6 +21,6 @@
|
|||
# A tutorial is available in docs/tutorial.html
|
||||
module REXML
|
||||
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
|
||||
Date = "+2003/346"
|
||||
Version = "2.7.3"
|
||||
Date = "+2004/044"
|
||||
Version = "2.7.4"
|
||||
end
|
||||
|
|
|
@ -31,7 +31,6 @@ module REXML
|
|||
def initialize(arg)
|
||||
@orig = @buffer = arg
|
||||
self.encoding = check_encoding( @buffer )
|
||||
#@buffer = decode(@buffer) unless @encoding == UTF_8
|
||||
@line = 0
|
||||
end
|
||||
|
||||
|
@ -96,7 +95,7 @@ module REXML
|
|||
|
||||
# @return true if the Source is exhausted
|
||||
def empty?
|
||||
@buffer.nil?
|
||||
@buffer == ""
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
|
@ -113,17 +112,14 @@ module REXML
|
|||
class IOSource < Source
|
||||
#attr_reader :block_size
|
||||
|
||||
# block_size has been deprecated
|
||||
def initialize(arg, block_size=500)
|
||||
@er_source = @source = arg
|
||||
@to_utf = false
|
||||
# READLINE OPT
|
||||
# The following was commented out when IOSource started using readline
|
||||
# to pull the data from the stream.
|
||||
#@block_size = block_size
|
||||
#super @source.read(@block_size)
|
||||
@line_break = '>'
|
||||
#super @source.readline( "\n" )
|
||||
super @source.readline( @line_break )+@source.read
|
||||
# FIXME
|
||||
# This is broken. If the user puts in enough carriage returns, this can fail
|
||||
# to calculate the correct encoding.
|
||||
super @source.read( 100 )
|
||||
@line_break = encode( '>' )
|
||||
end
|
||||
|
||||
|
|
|
@ -164,9 +164,44 @@ module REXML
|
|||
end
|
||||
@unnormalized = Text::unnormalize( @string, doctype )
|
||||
end
|
||||
|
||||
|
||||
def wrap(string, width, addnewline=false)
|
||||
# Recursivly wrap string at width.
|
||||
return string if string.length <= width
|
||||
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
||||
if addnewline then
|
||||
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||
else
|
||||
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||
end
|
||||
end
|
||||
|
||||
def indent(string, level=1, style="\t", indentfirstline=true)
|
||||
return string if level < 0
|
||||
new_string = ''
|
||||
string.each { |line|
|
||||
indent_string = style * level
|
||||
new_line = (indent_string + line).sub(/[\s]+$/,'')
|
||||
new_string << new_line
|
||||
}
|
||||
new_string.strip! unless indentfirstline
|
||||
return new_string
|
||||
end
|
||||
|
||||
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
||||
writer << to_s()
|
||||
s = to_s()
|
||||
if not (@parent and @parent.whitespace) then
|
||||
s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
|
||||
if @parent and not @parent.context[:indentstyle].nil? then
|
||||
indentstyle = @parent.context[:indentstyle]
|
||||
else
|
||||
indentstyle = ' '
|
||||
end
|
||||
if s.count("\n") > 0 and indent > 0 then
|
||||
s = indent(s, indent, indentstyle, false)
|
||||
end
|
||||
end
|
||||
writer << s
|
||||
end
|
||||
|
||||
# Writes out text, substituting special characters beforehand.
|
||||
|
|
|
@ -29,7 +29,8 @@ module REXML
|
|||
|
||||
def parse path, nodeset
|
||||
path_stack = @parser.parse( path )
|
||||
#puts "PARSE: #{path} => #{path_stack.inspect}"
|
||||
#puts "PARSE: #{path} => #{path_stack.inspect}"
|
||||
#puts "PARSE: nodeset = #{nodeset.collect{|x|x.to_s}.inspect}"
|
||||
match( path_stack, nodeset )
|
||||
end
|
||||
|
||||
|
@ -46,7 +47,7 @@ module REXML
|
|||
|
||||
def match( path_stack, nodeset )
|
||||
while ( path_stack.size > 0 and nodeset.size > 0 )
|
||||
#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.type}.inspect}'"
|
||||
#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
|
||||
nodeset = internal_parse( path_stack, nodeset )
|
||||
#puts "NODESET: #{nodeset.size}"
|
||||
#puts "PATH_STACK: #{path_stack.inspect}"
|
||||
|
@ -55,8 +56,9 @@ module REXML
|
|||
end
|
||||
|
||||
def internal_parse path_stack, nodeset
|
||||
#puts "INTERNAL_PARSE RETURNING WITH NO RESULTS" if nodeset.size == 0 or path_stack.size == 0
|
||||
return nodeset if nodeset.size == 0 or path_stack.size == 0
|
||||
#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.type}.inspect}"
|
||||
#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.class}.inspect}"
|
||||
case path_stack.shift
|
||||
when :document
|
||||
return [ nodeset[0].root.parent ]
|
||||
|
@ -205,7 +207,7 @@ module REXML
|
|||
Functions::index = index+1
|
||||
#puts "Node #{node} and index=#{index+1}"
|
||||
result = Predicate( predicate, node )
|
||||
#puts "Predicate returned #{result} (#{result.type}) for #{node.type}"
|
||||
#puts "Predicate returned #{result} (#{result.class}) for #{node.class}"
|
||||
if result.kind_of? Numeric
|
||||
#puts "#{result} == #{index} => #{result == index}"
|
||||
new_nodeset << node if result == (index+1)
|
||||
|
@ -285,6 +287,7 @@ module REXML
|
|||
end
|
||||
|
||||
##########################################################
|
||||
# FIXME
|
||||
# The next two methods are BAD MOJO!
|
||||
# This is my achilles heel. If anybody thinks of a better
|
||||
# way of doing this, be my guest. This really sucks, but
|
||||
|
@ -294,24 +297,39 @@ module REXML
|
|||
def descendant_or_self( path_stack, nodeset )
|
||||
rs = []
|
||||
d_o_s( path_stack, nodeset, rs )
|
||||
#puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
|
||||
rs.flatten.compact
|
||||
#puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
|
||||
document_order(rs.flatten.compact)
|
||||
end
|
||||
|
||||
def d_o_s( p, ns, r )
|
||||
#puts r.collect{|n|n.to_s}.inspect
|
||||
#puts ns.collect{|n|n.to_s}.inspect
|
||||
nt = nil
|
||||
ns.each_index do |i|
|
||||
n = ns[i]
|
||||
x = match( p.clone, [ n ] )
|
||||
#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
|
||||
nt = n.node_type
|
||||
d_o_s( p, n.children, x ) if nt == :element or nt == :document
|
||||
r[i,0] = [x] if x.size > 0
|
||||
d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
|
||||
r.concat(x) if x.size > 0
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Reorders an array of nodes so that they are in document order
|
||||
# It tries to do this efficiently.
|
||||
def document_order( array_of_nodes )
|
||||
new_arry = []
|
||||
array_of_nodes.each { |node|
|
||||
node_idx = []
|
||||
np = node.node_type == :attribute ? node.element : node
|
||||
while np.parent and np.parent.node_type == :element
|
||||
node_idx << np.parent.children.index( np )
|
||||
np = np.parent
|
||||
end
|
||||
new_arry << [ node_idx.reverse.join, node ]
|
||||
}
|
||||
new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
|
||||
end
|
||||
|
||||
|
||||
def recurse( nodeset, &block )
|
||||
for node in nodeset
|
||||
yield node
|
||||
|
@ -324,7 +342,7 @@ module REXML
|
|||
def Predicate( predicate, node )
|
||||
predicate = predicate.clone
|
||||
#puts "#"*20
|
||||
#puts "Predicate( #{predicate.inspect}, #{node.type} )"
|
||||
#puts "Predicate( #{predicate.inspect}, #{node.class} )"
|
||||
results = []
|
||||
case (predicate[0])
|
||||
when :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
|
||||
|
|
Loading…
Add table
Reference in a new issue