mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is identical to the log for the 1.8 branch. It includes numerous bug fixes and is a pretty big patch, but is nonetheless a minor revision bump, since the API hasn't changed. For more information, see: http:/www.germane-software.com/projects/rexml/milestone/3.1.4 For all tickets, see: http://www.germane-software.com/projects/rexml/ticket/# Where '#' is replaced with the ticket number. Changelog: * Fixed the documentation WRT the raw mode of text nodes (ticket #4) * Fixes roundup ticket #43: substring-after bug. * Fixed ticket #44, Element#xpath * Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was hoping that, by now, that whole Source thing would have been changed to use duck typing and avoid this sort of ticket... but in the meantime, the patch has been applied. * Fixes ticket:30, XPath default namespace bug. The fix was provided by Lucas Nussbaum. * Aliases #size to #length, as per zdennis's request. * Fixes typo from previous commit * Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset * Merges a user-contributed patch for ticket #40 * Adds a forgotten-to-commit unit test for ticket #32 * Changes Date, Version, and Copyright to upper case, to avoid conflicts with the Date class. All of the other changes in the altered files are because Subversion doesn't allow block-level commits, like it should. English cased Version and Copyright are aliased to the upper case versions, for partial backward compatability. * Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds. * Moves parser.source.position() to parser.position() * Fixes ticket:48, repeated writes munging text content * Fixes ticket:46, adding methods for accessing notation DTD information. * Encodes some characters and removes a brokes link in the documentation * Deals with carriage returns after XML declarations * Improved doctype handling * Whitespace handling changes * Applies a patch by David Tardon, which (incidentally) fixes ticket:50 * Closes #26, allowing anything that walks like an IO to be a source. * Ticket #31 - One unescape too many This wasn't really a bug, per se... "value" always returns a normalized string, and "value" is the method used to get the text() of an element. However, entities have no meaning in CDATA sections, so there's no justification for value to be normalizing the content of CData objects. This behavior has therefore been changed. * Ticket #45 -- Now parses notation declarations in DTDs properly. * Resolves ticket #49, Document.parse_stream returns ArgumentError * Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports. * Addresses ticket #10, fixing the StreamParser API for DTDs. * Fixes ticket #42, XPath node-set function 'name' fails with relative node set parameter * Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags at the end of a document. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
406c1cb485
commit
78d9dd71a6
23 changed files with 1385 additions and 1236 deletions
|
@ -101,20 +101,20 @@ module REXML
|
|||
end
|
||||
|
||||
@unnormalized = nil
|
||||
@value = @normalized = Text::normalize( @value, doctype )
|
||||
@normalized = Text::normalize( @value, doctype )
|
||||
end
|
||||
|
||||
# Returns the UNNORMALIZED value of this attribute. That is, entities
|
||||
# have been expanded to their values
|
||||
def value
|
||||
@unnormalized if @unnormalized
|
||||
return @unnormalized if @unnormalized
|
||||
doctype = nil
|
||||
if @element
|
||||
doc = @element.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
@normalized = nil
|
||||
@value = @unnormalized = Text::unnormalize( @value, doctype )
|
||||
@unnormalized = Text::unnormalize( @value, doctype )
|
||||
end
|
||||
|
||||
# Returns a copy of this attribute
|
||||
|
|
|
@ -35,6 +35,10 @@ module REXML
|
|||
@string
|
||||
end
|
||||
|
||||
def value
|
||||
@string
|
||||
end
|
||||
|
||||
# Generates XML output of this object
|
||||
#
|
||||
# output::
|
||||
|
|
|
@ -129,9 +129,8 @@ module REXML
|
|||
output << "\n"
|
||||
child.write( output, next_indent )
|
||||
}
|
||||
output << "\n"
|
||||
#output << ' '*next_indent
|
||||
output << "]"
|
||||
output << "\n]"
|
||||
end
|
||||
output << STOP
|
||||
end
|
||||
|
@ -149,6 +148,59 @@ module REXML
|
|||
@entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
|
||||
@entities[ child.name ] = child if child.kind_of? Entity
|
||||
end
|
||||
|
||||
# This method retrieves the public identifier identifying the document's
|
||||
# DTD.
|
||||
#
|
||||
# Method contributed by Henrik Martensson
|
||||
def public
|
||||
case @external_id
|
||||
when "SYSTEM"
|
||||
nil
|
||||
when "PUBLIC"
|
||||
strip_quotes(@long_name)
|
||||
end
|
||||
end
|
||||
|
||||
# This method retrieves the system identifier identifying the document's DTD
|
||||
#
|
||||
# Method contributed by Henrik Martensson
|
||||
def system
|
||||
case @external_id
|
||||
when "SYSTEM"
|
||||
strip_quotes(@long_name)
|
||||
when "PUBLIC"
|
||||
@uri.kind_of?(String) ? strip_quotes(@uri) : nil
|
||||
end
|
||||
end
|
||||
|
||||
# This method returns a list of notations that have been declared in the
|
||||
# _internal_ DTD subset. Notations in the external DTD subset are not
|
||||
# listed.
|
||||
#
|
||||
# Method contributed by Henrik Martensson
|
||||
def notations
|
||||
children().select {|node| node.kind_of?(REXML::NotationDecl)}
|
||||
end
|
||||
|
||||
# Retrieves a named notation. Only notations declared in the internal
|
||||
# DTD subset can be retrieved.
|
||||
#
|
||||
# Method contributed by Henrik Martensson
|
||||
def notation(name)
|
||||
notations.find { |notation_decl|
|
||||
notation_decl.name == name
|
||||
}
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Method contributed by Henrik Martensson
|
||||
def strip_quotes(quoted_string)
|
||||
quoted_string =~ /^[\'\"].*[\´\"]$/ ?
|
||||
quoted_string[1, quoted_string.length-2] :
|
||||
quoted_string
|
||||
end
|
||||
end
|
||||
|
||||
# We don't really handle any of these since we're not a validating
|
||||
|
@ -190,24 +242,37 @@ module REXML
|
|||
end
|
||||
def write( output, indent )
|
||||
output << @entity
|
||||
output << "\n"
|
||||
end
|
||||
end
|
||||
|
||||
class NotationDecl < Child
|
||||
def initialize name, middle, rest
|
||||
attr_accessor :public, :system
|
||||
def initialize name, middle, pub, sys
|
||||
super(nil)
|
||||
@name = name
|
||||
@middle = middle
|
||||
@rest = rest
|
||||
@public = pub
|
||||
@system = sys
|
||||
end
|
||||
|
||||
def to_s
|
||||
"<!NOTATION #@name '#@middle #@rest'>"
|
||||
"<!NOTATION #@name #@middle#{
|
||||
@public ? ' ' + public.inspect : ''
|
||||
}#{
|
||||
@system ? ' ' +@system.inspect : ''
|
||||
}>"
|
||||
end
|
||||
|
||||
def write( output, indent=-1 )
|
||||
output << (' '*indent) if indent > 0
|
||||
output << to_s
|
||||
end
|
||||
|
||||
# This method retrieves the name of the notation.
|
||||
#
|
||||
# Method contributed by Henrik Martensson
|
||||
def name
|
||||
@name
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -70,10 +70,22 @@ module REXML
|
|||
if child.kind_of? XMLDecl
|
||||
@children.unshift child
|
||||
elsif child.kind_of? DocType
|
||||
if @children[0].kind_of? XMLDecl
|
||||
@children[1,0] = child
|
||||
# Find first Element or DocType node and insert the decl right
|
||||
# before it. If there is no such node, just insert the child at the
|
||||
# end. If there is a child and it is an DocType, then replace it.
|
||||
insert_before_index = 0
|
||||
@children.find { |x|
|
||||
insert_before_index += 1
|
||||
x.kind_of?(Element) || x.kind_of?(DocType)
|
||||
}
|
||||
if @children[ insert_before_index ] # Not null = not end of list
|
||||
if @children[ insert_before_index ].kind_of DocType
|
||||
@children[ insert_before_index ] = child
|
||||
else
|
||||
@children.unshift child
|
||||
@children[ index_before_index-1, 0 ] = child
|
||||
end
|
||||
else # Insert at end of list
|
||||
@children[insert_before_index] = child
|
||||
end
|
||||
child.parent = self
|
||||
else
|
||||
|
@ -158,12 +170,12 @@ module REXML
|
|||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags. Defaults to false
|
||||
def write( output=$stdout, indent_level=-1, transitive=false, ie_hack=false )
|
||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
||||
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||
@children.each { |node|
|
||||
indent( output, indent_level ) if node.node_type == :element
|
||||
if node.write( output, indent_level, transitive, ie_hack )
|
||||
output << "\n" unless indent_level<0 or node == @children[-1]
|
||||
indent( output, indent ) if node.node_type == :element
|
||||
if node.write( output, indent, transitive, ie_hack )
|
||||
output << "\n" unless indent<0 or node == @children[-1]
|
||||
end
|
||||
}
|
||||
end
|
||||
|
|
|
@ -36,8 +36,6 @@ module REXML
|
|||
# If an Element, the object will be shallowly cloned; name,
|
||||
# attributes, and namespaces will be copied. Children will +not+ be
|
||||
# copied.
|
||||
# If a Source, the source will be scanned and parsed for an Element,
|
||||
# and all child elements will be recursively parsed as well.
|
||||
# parent::
|
||||
# if supplied, must be a Parent, and will be used as
|
||||
# the parent of this object.
|
||||
|
@ -223,7 +221,7 @@ module REXML
|
|||
# b.namespace("y") # -> '2'
|
||||
def namespace(prefix=nil)
|
||||
if prefix.nil?
|
||||
prefix = self.prefix()
|
||||
prefix = prefix()
|
||||
end
|
||||
if prefix == ''
|
||||
prefix = "xmlns"
|
||||
|
@ -715,7 +713,7 @@ module REXML
|
|||
|
||||
private
|
||||
def __to_xpath_helper node
|
||||
rv = node.expanded_name
|
||||
rv = node.expanded_name.clone
|
||||
if node.parent
|
||||
results = node.parent.find_all {|n|
|
||||
n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
|
||||
|
@ -1226,5 +1224,20 @@ module REXML
|
|||
rv.each{ |attr| attr.remove }
|
||||
return rv
|
||||
end
|
||||
|
||||
# The +get_attribute_ns+ method retrieves a method by its namespace
|
||||
# and name. Thus it is possible to reliably identify an attribute
|
||||
# even if an XML processor has changed the prefix.
|
||||
#
|
||||
# Method contributed by Henrik Martensson
|
||||
def get_attribute_ns(namespace, name)
|
||||
each_attribute() { |attribute|
|
||||
if name == attribute.name &&
|
||||
namespace == attribute.namespace()
|
||||
return attribute
|
||||
end
|
||||
}
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -25,22 +25,28 @@ module REXML
|
|||
begin
|
||||
$VERBOSE = false
|
||||
return if defined? @encoding and enc == @encoding
|
||||
if enc
|
||||
raise ArgumentError, "Bad encoding name #{enc}" unless /\A[\w-]+\z/n =~ enc
|
||||
@encoding = enc.upcase.untaint
|
||||
if enc and enc != UTF_8
|
||||
@encoding = enc.upcase
|
||||
begin
|
||||
require 'rexml/encodings/ICONV.rb'
|
||||
Encoding.apply(self, "ICONV")
|
||||
rescue LoadError, Exception => err
|
||||
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
|
||||
@encoding.untaint
|
||||
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
||||
begin
|
||||
require enc_file
|
||||
Encoding.apply(self, @encoding)
|
||||
rescue LoadError
|
||||
puts $!.message
|
||||
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
|
||||
end
|
||||
end
|
||||
else
|
||||
@encoding = UTF_8
|
||||
require 'rexml/encodings/UTF-8.rb'
|
||||
Encoding.apply(self, @encoding)
|
||||
end
|
||||
err = nil
|
||||
[@encoding, "ICONV"].each do |enc|
|
||||
begin
|
||||
require File.join("rexml", "encodings", "#{enc}.rb")
|
||||
return Encoding.apply(self, enc)
|
||||
rescue LoadError, Exception => err
|
||||
end
|
||||
end
|
||||
puts err.message
|
||||
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
|
||||
ensure
|
||||
$VERBOSE = old_verbosity
|
||||
end
|
||||
|
|
|
@ -67,11 +67,10 @@ module REXML
|
|||
if node_set == nil
|
||||
yield @@context[:node] if defined? @@context[:node].namespace
|
||||
else
|
||||
if node_set.namespace
|
||||
yield node_set
|
||||
else
|
||||
return unless node_set.kind_of? Enumerable
|
||||
if node_set.respond_to? :each
|
||||
node_set.each { |node| yield node if defined? node.namespace }
|
||||
elsif node_set.respond_to? :namespace
|
||||
yield node_set
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -157,12 +156,9 @@ module REXML
|
|||
# Kouhei fixed this too
|
||||
def Functions::substring_after( string, test )
|
||||
ruby_string = string(string)
|
||||
ruby_index = ruby_string.index(string(test))
|
||||
if ruby_index.nil?
|
||||
test_string = string(test)
|
||||
return $1 if ruby_string =~ /#{test}(.*)/
|
||||
""
|
||||
else
|
||||
ruby_string[ ruby_index+1..-1 ]
|
||||
end
|
||||
end
|
||||
|
||||
# Take equal portions of Mike Stok and Sean Russell; mix
|
||||
|
@ -339,6 +335,8 @@ module REXML
|
|||
end
|
||||
|
||||
def Functions::sum( nodes )
|
||||
nodes = [nodes] unless nodes.kind_of? Array
|
||||
nodes.inject(0) { |r,n| r += number(string(n)) }
|
||||
end
|
||||
|
||||
def Functions::floor( number )
|
||||
|
|
|
@ -38,8 +38,8 @@ module REXML
|
|||
Instruction.new self
|
||||
end
|
||||
|
||||
def write writer, indent_level=-1, transitive=false, ie_hack=false
|
||||
indent(writer, indent_level)
|
||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
||||
indent(writer, indent)
|
||||
writer << START.sub(/\\/u, '')
|
||||
writer << @target
|
||||
writer << ' '
|
||||
|
|
|
@ -31,9 +31,9 @@ module REXML
|
|||
end
|
||||
|
||||
def delete( object )
|
||||
return unless @children.include? object
|
||||
@children.delete object
|
||||
object.parent = nil
|
||||
found = false
|
||||
@children.delete_if {|c| c.equal?(object) and found = true }
|
||||
object.parent = nil if found
|
||||
end
|
||||
|
||||
def each(&block)
|
||||
|
@ -131,14 +131,15 @@ module REXML
|
|||
@children.size
|
||||
end
|
||||
|
||||
alias :length :size
|
||||
|
||||
# Replaces one child with another, making sure the nodelist is correct
|
||||
# @param to_replace the child to replace (must be a Child)
|
||||
# @param replacement the child to insert into the nodelist (must be a
|
||||
# Child)
|
||||
def replace_child( to_replace, replacement )
|
||||
ind = @children.index( to_replace )
|
||||
@children.map! {|c| c.equal?( to_replace ) ? replacement : c }
|
||||
to_replace.parent = nil
|
||||
@children[ind,0] = replacement
|
||||
replacement.parent = self
|
||||
end
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ module REXML
|
|||
CDATA_END = /^\s*\]\s*>/um
|
||||
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
||||
XMLDECL_START = /\A<\?xml\s/u;
|
||||
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
|
||||
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
||||
INSTRUCTION_START = /\A<\?/u
|
||||
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
|
||||
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
|
||||
|
@ -68,8 +68,8 @@ module REXML
|
|||
ATTLISTDECL_START = /^\s*<!ATTLIST/um
|
||||
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
||||
NOTATIONDECL_START = /^\s*<!NOTATION/um
|
||||
PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
|
||||
SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
|
||||
PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
||||
SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
||||
|
||||
TEXT_PATTERN = /\A([^<]*)/um
|
||||
|
||||
|
@ -120,18 +120,7 @@ module REXML
|
|||
attr_reader :source
|
||||
|
||||
def stream=( source )
|
||||
if source.kind_of? String
|
||||
@source = Source.new(source)
|
||||
elsif source.kind_of? IO
|
||||
@source = IOSource.new(source)
|
||||
elsif source.kind_of? Source
|
||||
@source = source
|
||||
elsif defined? StringIO and source.kind_of? StringIO
|
||||
@source = IOSource.new(source)
|
||||
else
|
||||
raise "#{source.class} is not a valid input stream. It must be \n"+
|
||||
"either a String, IO, StringIO or Source."
|
||||
end
|
||||
@source = SourceFactory.create_from( source )
|
||||
@closed = nil
|
||||
@document_status = nil
|
||||
@tags = []
|
||||
|
@ -139,10 +128,19 @@ module REXML
|
|||
@entities = []
|
||||
end
|
||||
|
||||
def position
|
||||
if @source.respond_to? :position
|
||||
@source.position
|
||||
else
|
||||
# FIXME
|
||||
0
|
||||
end
|
||||
end
|
||||
|
||||
# Returns true if there are no more events
|
||||
def empty?
|
||||
#puts "@source.empty? = #{@source.empty?}"
|
||||
#puts "@stack.empty? = #{@stack.empty?}"
|
||||
#STDERR.puts "@source.empty? = #{@source.empty?}"
|
||||
#STDERR.puts "@stack.empty? = #{@stack.empty?}"
|
||||
return (@source.empty? and @stack.empty?)
|
||||
end
|
||||
|
||||
|
@ -186,14 +184,17 @@ module REXML
|
|||
return [ :end_document ] if empty?
|
||||
return @stack.shift if @stack.size > 0
|
||||
@source.read if @source.buffer.size<2
|
||||
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
||||
if @document_status == nil
|
||||
@source.consume( /^\s*/um )
|
||||
word = @source.match( /(<[^>]*)>/um )
|
||||
#@source.consume( /^\s*/um )
|
||||
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
|
||||
word = word[1] unless word.nil?
|
||||
#STDERR.puts "WORD = #{word.inspect}"
|
||||
case word
|
||||
when COMMENT_START
|
||||
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
||||
when XMLDECL_START
|
||||
#STDERR.puts "XMLDECL"
|
||||
results = @source.match( XMLDECL_PATTERN, true )[1]
|
||||
version = VERSION.match( results )
|
||||
version = version[1] unless version.nil?
|
||||
|
@ -225,6 +226,7 @@ module REXML
|
|||
@document_status = :in_doctype
|
||||
end
|
||||
return args
|
||||
when /^\s+/
|
||||
else
|
||||
@document_status = :after_doctype
|
||||
@source.read if @source.buffer.size<2
|
||||
|
@ -288,12 +290,14 @@ module REXML
|
|||
md = nil
|
||||
if @source.match( PUBLIC )
|
||||
md = @source.match( PUBLIC, true )
|
||||
vals = [md[1],md[2],md[4],md[6]]
|
||||
elsif @source.match( SYSTEM )
|
||||
md = @source.match( SYSTEM, true )
|
||||
vals = [md[1],md[2],nil,md[4]]
|
||||
else
|
||||
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
||||
end
|
||||
return [ :notationdecl, md[1], md[2], md[3] ]
|
||||
return [ :notationdecl, *vals ]
|
||||
when CDATA_END
|
||||
@document_status = :after_doctype
|
||||
@source.match( CDATA_END, true )
|
||||
|
@ -312,7 +316,7 @@ module REXML
|
|||
return [ :end_element, last_tag ]
|
||||
elsif @source.buffer[1] == ?!
|
||||
md = @source.match(/\A(\s*[^>]*>)/um)
|
||||
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
||||
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
||||
raise REXML::ParseException.new("Malformed node", @source) unless md
|
||||
if md[0][2] == ?-
|
||||
md = @source.match( COMMENT_PATTERN, true )
|
||||
|
@ -350,10 +354,11 @@ module REXML
|
|||
else
|
||||
md = @source.match( TEXT_PATTERN, true )
|
||||
if md[0].length == 0
|
||||
#puts "EMPTY = #{empty?}"
|
||||
#puts "BUFFER = \"#{@source.buffer}\""
|
||||
puts "EMPTY = #{empty?}"
|
||||
puts "BUFFER = \"#{@source.buffer}\""
|
||||
@source.match( /(\s+)/, true )
|
||||
end
|
||||
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
|
||||
#return [ :text, "" ] if md[0].length == 0
|
||||
# unnormalized = Text::unnormalize( md[1], self )
|
||||
# return PullEvent.new( :text, md[1], unnormalized )
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
require 'forwardable'
|
||||
|
||||
require 'rexml/parseexception'
|
||||
require 'rexml/parsers/baseparser'
|
||||
require 'rexml/xmltokens'
|
||||
|
@ -25,11 +27,18 @@ module REXML
|
|||
# Nat Price gave me some good ideas for the API.
|
||||
class PullParser
|
||||
include XMLTokens
|
||||
extend Forwardable
|
||||
|
||||
def_delegators( :@parser, :has_next? )
|
||||
def_delegators( :@parser, :entity )
|
||||
def_delegators( :@parser, :empty? )
|
||||
def_delegators( :@parser, :source )
|
||||
|
||||
def initialize stream
|
||||
@entities = {}
|
||||
@listeners = nil
|
||||
@parser = BaseParser.new( stream )
|
||||
@my_stack = []
|
||||
end
|
||||
|
||||
def add_listener( listener )
|
||||
|
@ -44,14 +53,18 @@ module REXML
|
|||
end
|
||||
|
||||
def peek depth=0
|
||||
PullEvent.new(@parser.peek(depth))
|
||||
if @my_stack.length <= depth
|
||||
(depth - @my_stack.length + 1).times {
|
||||
e = PullEvent.new(@parser.pull)
|
||||
@my_stack.push(e)
|
||||
}
|
||||
end
|
||||
|
||||
def has_next?
|
||||
@parser.has_next?
|
||||
@my_stack[depth]
|
||||
end
|
||||
|
||||
def pull
|
||||
return @my_stack.shift if @my_stack.length > 0
|
||||
|
||||
event = @parser.pull
|
||||
case event[0]
|
||||
when :entitydecl
|
||||
|
@ -65,17 +78,8 @@ module REXML
|
|||
end
|
||||
|
||||
def unshift token
|
||||
@parser.unshift token
|
||||
@my_stack.unshift token
|
||||
end
|
||||
|
||||
def entity reference
|
||||
@parser.entity( reference )
|
||||
end
|
||||
|
||||
def empty?
|
||||
@parser.empty?
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# A parsing event. The contents of the event are accessed as an +Array?,
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
require 'rexml/parsers/baseparser'
|
||||
require 'rexml/parseexception'
|
||||
require 'rexml/namespace'
|
||||
require 'rexml/text'
|
||||
|
||||
module REXML
|
||||
module Parsers
|
||||
# SAX2Parser
|
||||
class SAX2Parser
|
||||
def initialize source
|
||||
@parser = BaseParser.new(source)
|
||||
|
@ -37,6 +39,10 @@ module REXML
|
|||
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
|
||||
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
|
||||
#
|
||||
# There is an additional symbol that can be listened for: :progress.
|
||||
# This will be called for every event generated, passing in the current
|
||||
# stream position.
|
||||
#
|
||||
# Array contains regular expressions or strings which will be matched
|
||||
# against fully qualified element names.
|
||||
#
|
||||
|
@ -161,6 +167,7 @@ module REXML
|
|||
:elementdecl, :cdata, :notationdecl, :xmldecl
|
||||
handle( *event )
|
||||
end
|
||||
handle( :progress, @parser.position )
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -31,9 +31,13 @@ module REXML
|
|||
@listener.instruction( *event[1,2] )
|
||||
when :start_doctype
|
||||
@listener.doctype( *event[1..-1] )
|
||||
when :comment, :attlistdecl, :notationdecl, :elementdecl,
|
||||
:entitydecl, :cdata, :xmldecl, :attlistdecl
|
||||
when :end_doctype
|
||||
# FIXME: remove this condition for milestone:3.2
|
||||
@listener.doctype_end if @listener.respond_to? :doctype_end
|
||||
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
||||
@listener.send( event[0].to_s, *event[1..-1] )
|
||||
when :entitydecl, :notationdecl
|
||||
@listener.send( event[0].to_s, event[1..-1] )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -19,8 +19,12 @@ module REXML
|
|||
begin
|
||||
while true
|
||||
event = @parser.pull
|
||||
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
|
||||
case event[0]
|
||||
when :end_document
|
||||
unless tag_stack.empty?
|
||||
raise ParseException.new("No close tag for #{tag_stack.inspect}")
|
||||
end
|
||||
return
|
||||
when :start_element
|
||||
tag_stack.push(event[1])
|
||||
|
@ -37,8 +41,8 @@ module REXML
|
|||
@build_context.add(
|
||||
Text.new(event[1], @build_context.whitespace, nil, true)
|
||||
) unless (
|
||||
event[1].strip.size==0 and
|
||||
@build_context.ignore_whitespace_nodes
|
||||
@build_context.ignore_whitespace_nodes and
|
||||
event[1].strip.size==0
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
#
|
||||
# Main page:: http://www.germane-software.com/software/rexml
|
||||
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
||||
# Version:: 3.1.3
|
||||
# Date:: +2005/139
|
||||
# Version:: 3.1.4
|
||||
# Date:: 2006/104
|
||||
#
|
||||
# This API documentation can be downloaded from the REXML home page, or can
|
||||
# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
||||
|
@ -20,7 +20,10 @@
|
|||
# or can be accessed
|
||||
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
||||
module REXML
|
||||
Copyright = "Copyright © 2001-2005 Sean Russell <ser@germane-software.com>"
|
||||
Date = "+2005/139"
|
||||
Version = "3.1.3"
|
||||
COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
|
||||
DATE = "2006/104"
|
||||
VERSION = "3.1.4"
|
||||
|
||||
Copyright = COPYRIGHT
|
||||
Version = VERSION
|
||||
end
|
||||
|
|
|
@ -84,11 +84,14 @@ module REXML
|
|||
# @p version the version attribute value. EG, "1.0"
|
||||
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
||||
# @p standalone the standalone attribute value, or nil. EG, nil
|
||||
# @p spaced the declaration is followed by a line break
|
||||
def xmldecl version, encoding, standalone
|
||||
end
|
||||
# Called when a comment is encountered.
|
||||
# @p comment The content of the comment
|
||||
def comment comment
|
||||
end
|
||||
def progress position
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -8,11 +8,18 @@ module REXML
|
|||
# @return a Source, or nil if a bad argument was given
|
||||
def SourceFactory::create_from arg#, slurp=true
|
||||
if arg.kind_of? String
|
||||
source = Source.new(arg)
|
||||
elsif arg.kind_of? IO
|
||||
source = IOSource.new(arg)
|
||||
Source.new(arg)
|
||||
elsif arg.respond_to? :read and
|
||||
arg.respond_to? :readline and
|
||||
arg.respond_to? :nil? and
|
||||
arg.respond_to? :eof?
|
||||
IOSource.new(arg)
|
||||
elsif arg.kind_of? Source
|
||||
arg
|
||||
else
|
||||
raise "#{source.class} is not a valid input stream. It must walk \n"+
|
||||
"like either a String, IO, or Source."
|
||||
end
|
||||
source
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -98,6 +105,10 @@ module REXML
|
|||
@buffer == ""
|
||||
end
|
||||
|
||||
def position
|
||||
@orig.index( @buffer )
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
def current_line
|
||||
lines = @orig.split
|
||||
|
@ -194,6 +205,10 @@ module REXML
|
|||
super and ( @source.nil? || @source.eof? )
|
||||
end
|
||||
|
||||
def position
|
||||
@er_source.stat.pipe? ? 0 : @er_source.pos
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
def current_line
|
||||
begin
|
||||
|
|
|
@ -39,6 +39,9 @@ module REXML
|
|||
# @p uri the uri of the doctype, or nil. EG, "bar"
|
||||
def doctype name, pub_sys, long_name, uri
|
||||
end
|
||||
# Called when the doctype is done
|
||||
def doctype_end
|
||||
end
|
||||
# If a doctype includes an ATTLIST declaration, it will cause this
|
||||
# method to be called. The content is the declaration itself, unparsed.
|
||||
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
||||
|
|
|
@ -39,8 +39,10 @@ module REXML
|
|||
# text. If this value is nil (the default), then the raw value of the
|
||||
# parent will be used as the raw value for this node. If there is no raw
|
||||
# value for the parent, and no value is supplied, the default is false.
|
||||
# Use this field if you have entities defined for some text, and you don't
|
||||
# want REXML to escape that text in output.
|
||||
# Text.new( "<&", false, nil, false ) #-> "<&"
|
||||
# Text.new( "<&", false, nil, true ) #-> IllegalArgumentException
|
||||
# Text.new( "<&", false, nil, true ) #-> Parse exception
|
||||
# Text.new( "<&", false, nil, true ) #-> "<&"
|
||||
# # Assume that the entity "s" is defined to be "sean"
|
||||
# # and that the entity "r" is defined to be "russell"
|
||||
|
@ -156,11 +158,11 @@ module REXML
|
|||
# # Assume that the entity "s" is defined to be "sean", and that the
|
||||
# # entity "r" is defined to be "russell"
|
||||
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
||||
# t.string #-> "< & sean russell"
|
||||
# t.value #-> "< & sean russell"
|
||||
# t = Text.new( "< & &s; russell", false, nil, false )
|
||||
# t.string #-> "< & sean russell"
|
||||
# t.value #-> "< & sean russell"
|
||||
# u = Text.new( "sean russell", false, nil, true )
|
||||
# u.string #-> "sean russell"
|
||||
# u.value #-> "sean russell"
|
||||
def value
|
||||
@unnormalized if @unnormalized
|
||||
doctype = nil
|
||||
|
@ -282,9 +284,10 @@ module REXML
|
|||
EREFERENCE = /&(?!#{Entity::NAME};)/
|
||||
# Escapes all possible entities
|
||||
def Text::normalize( input, doctype=nil, entity_filter=nil )
|
||||
copy = input.clone
|
||||
copy = input
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
if doctype
|
||||
# Replace all ampersands that aren't part of an entity
|
||||
copy = copy.gsub( EREFERENCE, '&' )
|
||||
doctype.entities.each_value do |entity|
|
||||
copy = copy.gsub( entity.value,
|
||||
|
@ -292,6 +295,7 @@ module REXML
|
|||
not( entity_filter and entity_filter.include?(entity) )
|
||||
end
|
||||
else
|
||||
# Replace all ampersands that aren't part of an entity
|
||||
copy = copy.gsub( EREFERENCE, '&' )
|
||||
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
||||
copy = copy.gsub(entity.value, "&#{entity.name};" )
|
||||
|
|
|
@ -82,10 +82,13 @@ module REXML
|
|||
@event_arg = event_arg
|
||||
end
|
||||
|
||||
attr_reader :done?
|
||||
attr_reader :event_type
|
||||
attr_accessor :event_arg
|
||||
|
||||
def done?
|
||||
@done
|
||||
end
|
||||
|
||||
def single?
|
||||
return (@event_type != :start_element and @event_type != :start_attribute)
|
||||
end
|
||||
|
|
|
@ -37,9 +37,9 @@ module REXML
|
|||
XMLDecl.new(self)
|
||||
end
|
||||
|
||||
def write writer, indent_level=-1, transitive=false, ie_hack=false
|
||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
||||
return nil unless @writethis or writer.kind_of? Output
|
||||
indent( writer, indent_level )
|
||||
indent( writer, indent )
|
||||
writer << START.sub(/\\/u, '')
|
||||
if writer.kind_of? Output
|
||||
writer << " #{content writer.encoding}"
|
||||
|
@ -80,6 +80,11 @@ module REXML
|
|||
self.dowrite
|
||||
end
|
||||
|
||||
# Only use this if you do not want the XML declaration to be written;
|
||||
# this object is ignored by the XML writer. Otherwise, instantiate your
|
||||
# own XMLDecl and add it to the document.
|
||||
#
|
||||
# Note that XML 1.1 documents *must* include an XML declaration
|
||||
def XMLDecl.default
|
||||
rv = XMLDecl.new( "1.0" )
|
||||
rv.nowrite
|
||||
|
|
|
@ -20,16 +20,6 @@ module REXML
|
|||
# XPath.first( doc, "//b"} )
|
||||
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
|
||||
def XPath::first element, path=nil, namespaces={}, variables={}
|
||||
=begin
|
||||
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
|
||||
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
|
||||
parser = XPathParser.new
|
||||
parser.namespaces = namespaces
|
||||
parser.variables = variables
|
||||
path = "*" unless path
|
||||
parser.first( path, element );
|
||||
=end
|
||||
#=begin
|
||||
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
|
||||
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
|
||||
parser = XPathParser.new
|
||||
|
@ -38,7 +28,6 @@ module REXML
|
|||
path = "*" unless path
|
||||
element = [element] unless element.kind_of? Array
|
||||
parser.parse(path, element).flatten[0]
|
||||
#=end
|
||||
end
|
||||
|
||||
# Itterates over nodes that match the given path, calling the supplied
|
||||
|
|
|
@ -76,6 +76,8 @@ module REXML
|
|||
|
||||
# Performs a depth-first (document order) XPath search, and returns the
|
||||
# first match. This is the fastest, lightest way to return a single result.
|
||||
#
|
||||
# FIXME: This method is incomplete!
|
||||
def first( path_stack, node )
|
||||
#puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
|
||||
return nil if path.size == 0
|
||||
|
@ -123,14 +125,6 @@ module REXML
|
|||
r = expr( path_stack, nodeset )
|
||||
#puts "MAIN EXPR => #{r.inspect}"
|
||||
r
|
||||
|
||||
#while ( path_stack.size > 0 and nodeset.size > 0 )
|
||||
# #puts "MATCH: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
|
||||
# nodeset = expr( path_stack, nodeset )
|
||||
# #puts "NODESET: #{nodeset.inspect}"
|
||||
# #puts "PATH_STACK: #{path_stack.inspect}"
|
||||
#end
|
||||
#nodeset
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -158,9 +152,10 @@ module REXML
|
|||
#puts "IN QNAME"
|
||||
prefix = path_stack.shift
|
||||
name = path_stack.shift
|
||||
ns = @namespaces[prefix]
|
||||
ns = ns ? ns : ''
|
||||
default_ns = @namespaces[prefix]
|
||||
default_ns = default_ns ? default_ns : ''
|
||||
nodeset.delete_if do |node|
|
||||
ns = default_ns
|
||||
# FIXME: This DOUBLES the time XPath searches take
|
||||
ns = node.namespace( prefix ) if node.node_type == :element and ns == ''
|
||||
#puts "NS = #{ns.inspect}"
|
||||
|
@ -353,7 +348,7 @@ module REXML
|
|||
preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse
|
||||
#results += expr( path_stack.dclone, preceding_siblings )
|
||||
end
|
||||
nodeset = preceding_siblings
|
||||
nodeset = preceding_siblings || []
|
||||
node_types = ELEMENTS
|
||||
|
||||
when :preceding
|
||||
|
@ -385,10 +380,13 @@ module REXML
|
|||
return @variables[ var_name ]
|
||||
|
||||
# :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
|
||||
# TODO: Special case for :or and :and -- not evaluate the right
|
||||
# operand if the left alone determines result (i.e. is true for
|
||||
# :or and false for :and).
|
||||
when :eq, :neq, :lt, :lteq, :gt, :gteq, :and, :or
|
||||
left = expr( path_stack.shift, nodeset, context )
|
||||
left = expr( path_stack.shift, nodeset.dup, context )
|
||||
#puts "LEFT => #{left.inspect} (#{left.class.name})"
|
||||
right = expr( path_stack.shift, nodeset, context )
|
||||
right = expr( path_stack.shift, nodeset.dup, context )
|
||||
#puts "RIGHT => #{right.inspect} (#{right.class.name})"
|
||||
res = equality_relational_compare( left, op, right )
|
||||
#puts "RES => #{res.inspect}"
|
||||
|
@ -472,8 +470,11 @@ module REXML
|
|||
|
||||
def descendant_or_self( path_stack, nodeset )
|
||||
rs = []
|
||||
#puts "#"*80
|
||||
#puts "PATH_STACK = #{path_stack.inspect}"
|
||||
#puts "NODESET = #{nodeset.collect{|n|n.inspect}.inspect}"
|
||||
d_o_s( path_stack, nodeset, rs )
|
||||
#puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
|
||||
#puts "RS = #{rs.collect{|n|n.inspect}.inspect}"
|
||||
document_order(rs.flatten.compact)
|
||||
#rs.flatten.compact
|
||||
end
|
||||
|
|
Loading…
Add table
Reference in a new issue