1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* Cross-patch from Ruby CVS; mostly Nabu edits.

* Fixes ticket:68.

  ***** Note that this is an API change!!! *****

  NOTE that this involves an API change!  Entity declarations in the doctype now
  generate events that carry two, not one, arguments.

* Implements ticket:15, using gwrite's suggestion.  This allows Element to be
  subclassed.

* Fixed namespaces handling in XPath and element.

  ***** Note that this is an API change!!! *****

  Element.namespaces() now returns a hash of namespace mappings which are
  relevant for that node.

* Fixes a bug in multiple decodings

* The changeset 1230:1231 was bad.  The default behavior is *not* to use the
  native REXML encodings by default, but rather to use ICONV by default.  I'll
  have to think of a better way of managing translations, but the REXML codecs
  are (a) less reliable than ICONV, but more importantly (b) slower.  The real
  solution is to use ICONV by default, but allow users to specify that they
  want to use the pure Ruby codecs.

* Fixes ticket:61 (xpath_parser)

* Fixes ticket:63 (UTF-16; UNILE decoding was bad)

* Improves parsing error messages a little

* Adds the ability to override the encoding detection in Source construction

* Fixes an edge case in Functions::string, where document nodes weren't
  correctly converted

  * Fixes Functions::string() for Element and Document nodes

  * Fixes some problems in entity handling

* Addresses ticket:66

* Fixes ticket:71

* Addresses ticket:78

    NOTE: that this also fixes what is technically another bug in REXML.  REXML's
    XPath parser used to allow exponential notation in numbers.  The XPath spec
    is specific about what a number is, and scientific notation is not included.
    Therefore, this has been fixed.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@11315 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2006-12-01 02:20:08 +00:00
parent d2205c869e
commit f114b85d89
14 changed files with 136 additions and 81 deletions

View file

@ -94,7 +94,7 @@ module REXML
# new_a = d.root.clone
# puts new_a # => "<a/>"
def clone
Element.new self
self.class.new self
end
# Evaluates to the root node of the document that this element
@ -200,9 +200,9 @@ module REXML
end
def namespaces
namespaces = []
namespaces = {}
namespaces = parent.namespaces if parent
namespaces |= attributes.namespaces
namespaces = namespaces.merge( attributes.namespaces )
return namespaces
end
@ -500,7 +500,6 @@ module REXML
elsif text and !text.kind_of? Text
text = Text.new( text.to_s, whitespace(), nil, raw() )
end
old_text = get_text
if text.nil?
old_text.remove unless old_text.nil?
@ -557,13 +556,9 @@ module REXML
#################################################
def attribute( name, namespace=nil )
prefix = ''
if namespace
prefix = attributes.prefixes.each { |prefix|
return "#{prefix}:" if namespace( prefix ) == namespace
} || ''
end
attributes.get_attribute( "#{prefix}#{name}" )
prefix = nil
prefix = namespaces.index(namespace) if namespace
attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
end
# Evaluates to +true+ if this element has any attributes set, false
@ -1172,16 +1167,16 @@ module REXML
end
def namespaces
namespaces = []
namespaces = {}
each_attribute do |attribute|
namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
end
if @element.document and @element.document.doctype
expn = @element.expanded_name
expn = @element.document.doctype.name if expn.size == 0
@element.document.doctype.attributes_of(expn).each {
|attribute|
namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
}
end
namespaces

View file

@ -24,21 +24,22 @@ module REXML
old_verbosity = $VERBOSE
begin
$VERBOSE = false
return if defined? @encoding and enc == @encoding
enc = enc.nil? ? nil : enc.upcase
return false if defined? @encoding and enc == @encoding
if enc and enc != UTF_8
@encoding = enc.upcase
@encoding = enc
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
@encoding.untaint
begin
require 'rexml/encodings/ICONV.rb'
Encoding.apply(self, "ICONV")
rescue LoadError, Exception => err
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
@encoding.untaint
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
rescue LoadError, Exception
begin
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
require enc_file
Encoding.apply(self, @encoding)
rescue LoadError
puts $!.message
rescue LoadError => err
puts err.message
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
end
end
@ -50,6 +51,7 @@ module REXML
ensure
$VERBOSE = old_verbosity
end
true
end
def check_encoding str

View file

@ -18,7 +18,7 @@ module REXML
def decode_unile(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
0.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
}
array_utf8.pack('U*')

View file

@ -16,9 +16,10 @@ module REXML
end
def decode_utf16(str)
str = str[2..-1] if /^\376\377/ =~ str
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
0.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
}
array_utf8.pack('U*')

View file

@ -117,16 +117,30 @@ module REXML
elsif defined? object.node_type
if object.node_type == :attribute
object.value
elsif object.node_type == :element
object.text
elsif object.node_type == :element || object.node_type == :document
string_value(object)
else
object.to_s
end
elsif object.nil?
return ""
else
object.to_s
end
end
def Functions::string_value( o )
rv = ""
o.children.each { |e|
if e.node_type == :text
rv << e.to_s
elsif e.node_type == :element
rv << string_value( e )
end
}
rv
end
# UNTESTED
def Functions::concat( *objects )
objects.join
@ -139,7 +153,7 @@ module REXML
# Fixed by Mike Stok
def Functions::contains( string, test )
string(string).include? string(test)
string(string).include?(string(test))
end
# Kouhei fixed this
@ -326,7 +340,9 @@ module REXML
else
str = string( object )
#puts "STRING OF #{object.inspect} = #{str}"
if str =~ /^-?\.?\d/
# If XPath ever gets scientific notation...
#if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
str.to_f
else
(0.0 / 0.0)

View file

@ -55,10 +55,8 @@ module REXML
return nil
end
# Returns the index that +self+ has in its parent's elements array, so that
# the following equation holds true:
#
# node == node.parent.elements[node.index_in_parent]
# Returns the position that +self+ holds in its parent's array, indexed
# from 1.
def index_in_parent
parent.index(self)+1
end

View file

@ -146,8 +146,6 @@ module REXML
# Returns true if there are no more events
def empty?
#STDERR.puts "@source.empty? = #{@source.empty?}"
#STDERR.puts "@stack.empty? = #{@stack.empty?}"
return (@source.empty? and @stack.empty?)
end
@ -365,8 +363,6 @@ module REXML
else
md = @source.match( TEXT_PATTERN, true )
if md[0].length == 0
puts "EMPTY = #{empty?}"
puts "BUFFER = \"#{@source.buffer}\""
@source.match( /(\s+)/, true )
end
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0

View file

@ -17,6 +17,10 @@ module REXML
@entities = {}
end
def source
@parser.source
end
def add_listener( listener )
@parser.add_listener( listener )
end

View file

@ -23,7 +23,8 @@ module REXML
case event[0]
when :end_document
unless tag_stack.empty?
raise ParseException.new("No close tag for #{tag_stack.inspect}")
#raise ParseException.new("No close tag for #{tag_stack.inspect}")
raise ParseException.new("No close tag for #{@build_context.xpath}")
end
return
when :start_element

View file

@ -10,8 +10,8 @@
#
# Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
# Version:: 3.1.5
# Date:: 2006/250
# Version:: 3.1.6
# Date:: 2006/335
#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
@ -21,8 +21,8 @@
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML
COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
DATE = "2006/250"
VERSION = "3.1.5"
DATE = "2006/335"
VERSION = "3.1.6"
Copyright = COPYRIGHT
Version = VERSION

View file

@ -70,7 +70,7 @@ module REXML
# ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
# ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
def entitydecl content
def entitydecl name, decl
end
# <!NOTATION ...>
def notationdecl content

View file

@ -6,7 +6,7 @@ module REXML
# Generates a Source object
# @param arg Either a String, or an IO
# @return a Source, or nil if a bad argument was given
def SourceFactory::create_from arg#, slurp=true
def SourceFactory::create_from(arg)
if arg.kind_of? String
Source.new(arg)
elsif arg.respond_to? :read and
@ -35,16 +35,23 @@ module REXML
# Constructor
# @param arg must be a String, and should be a valid XML document
def initialize(arg)
# @param encoding if non-null, sets the encoding of the source to this
# value, overriding all encoding detection
def initialize(arg, encoding=nil)
@orig = @buffer = arg
if encoding
self.encoding = encoding
else
self.encoding = check_encoding( @buffer )
end
@line = 0
end
# Inherited from Encoding
# Overridden to support optimized en/decoding
def encoding=(enc)
super
return unless super
@line_break = encode( '>' )
if enc != UTF_8
@buffer = decode(@buffer)
@ -124,7 +131,7 @@ module REXML
#attr_reader :block_size
# block_size has been deprecated
def initialize(arg, block_size=500)
def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg
@to_utf = false
# Determining the encoding is a deceptively difficult issue to resolve.
@ -134,10 +141,12 @@ module REXML
# if there is one. If there isn't one, the file MUST be UTF-8, as per
# the XML spec. If there is one, we can determine the encoding from
# it.
@buffer = ""
str = @source.read( 2 )
if /\A(?:\xfe\xff|\xff\xfe)/n =~ str
if encoding
self.encoding = encoding
elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
self.encoding = check_encoding( str )
@line_break = encode( '>' )
else
@line_break = '>'
end
@ -159,6 +168,8 @@ module REXML
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
rescue Iconv::IllegalSequence
raise
rescue
@source = nil
end

View file

@ -42,6 +42,7 @@ module REXML
# Use this field if you have entities defined for some text, and you don't
# want REXML to escape that text in output.
# Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
# Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
# Text.new( "<&", false, nil, true ) #-> Parse exception
# Text.new( "&lt;&amp;", false, nil, true ) #-> "&lt;&amp;"
# # Assume that the entity "s" is defined to be "sean"
@ -173,17 +174,6 @@ module REXML
@unnormalized = Text::unnormalize( @string, doctype )
end
def wrap(string, width, addnewline=false)
# Recursivly wrap string at width.
return string if string.length <= width
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
if addnewline then
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
else
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
end
end
# Sets the contents of this text node. This expects the text to be
# unnormalized. It returns self.
#
@ -198,6 +188,17 @@ module REXML
@raw = false
end
def wrap(string, width, addnewline=false)
# Recursivly wrap string at width.
return string if string.length <= width
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
if addnewline then
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
else
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
end
end
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
new_string = ''
@ -286,9 +287,10 @@ module REXML
def Text::normalize( input, doctype=nil, entity_filter=nil )
copy = input
# Doing it like this rather than in a loop improves the speed
#copy = copy.gsub( EREFERENCE, '&amp;' )
copy = copy.gsub( "&", "&amp;" )
if doctype
# Replace all ampersands that aren't part of an entity
copy = copy.gsub( EREFERENCE, '&amp;' )
doctype.entities.each_value do |entity|
copy = copy.gsub( entity.value,
"&#{entity.name};" ) if entity.value and
@ -296,7 +298,6 @@ module REXML
end
else
# Replace all ampersands that aren't part of an entity
copy = copy.gsub( EREFERENCE, '&amp;' )
DocType::DEFAULT_ENTITIES.each_value do |entity|
copy = copy.gsub(entity.value, "&#{entity.name};" )
end

View file

@ -162,6 +162,10 @@ module REXML
while path_stack.length > 0
#puts "Path stack = #{path_stack.inspect}"
#puts "Nodeset is #{nodeset.inspect}"
if nodeset.length == 0
path_stack.clear
return []
end
case (op = path_stack.shift)
when :document
nodeset = [ nodeset[0].root_node ]
@ -235,9 +239,11 @@ module REXML
name = path_stack.shift
for element in nodeset
if element.node_type == :element
#puts element.name
attr = element.attribute( name, get_namespace(element, prefix) )
new_nodeset << attr if attr
#puts "Element name = #{element.name}"
#puts "get_namespace( #{element.inspect}, #{prefix} ) = #{get_namespace(element, prefix)}"
attrib = element.attribute( name, get_namespace(element, prefix) )
#puts "attrib = #{attrib.inspect}"
new_nodeset << attrib if attrib
end
end
when :any
@ -299,8 +305,10 @@ module REXML
#puts "Adding node #{node.inspect}" if result == (index+1)
new_nodeset << node if result == (index+1)
elsif result.instance_of? Array
if result.size > 0 and result.inject(false) {|k,s| s or k}
#puts "Adding node #{node.inspect}" if result.size > 0
new_nodeset << node if result.size > 0
end
else
#puts "Adding node #{node.inspect}" if result
new_nodeset << node if result
@ -381,9 +389,19 @@ module REXML
node_types = ELEMENTS
when :namespace
new_set = []
new_nodeset = []
prefix = path_stack.shift
for node in nodeset
new_nodeset << node.namespace if node.node_type == :element or node.node_type == :attribute
if (node.node_type == :element or node.node_type == :attribute)
if (node.node_type == :element)
namespaces = node.namespaces
else
namespaces = node.element.namesapces
end
if (node.namespace == namespaces[prefix])
new_nodeset << node
end
end
end
nodeset = new_nodeset
@ -404,6 +422,18 @@ module REXML
#puts "RES => #{res.inspect}"
return res
when :and
left = expr( path_stack.shift, nodeset.dup, context )
#puts "LEFT => #{left.inspect} (#{left.class.name})"
if left == false || left.nil? || !left.inject(false) {|a,b| a | b}
return []
end
right = expr( path_stack.shift, nodeset.dup, context )
#puts "RIGHT => #{right.inspect} (#{right.class.name})"
res = equality_relational_compare( left, op, right )
#puts "RES => #{res.inspect}"
return res
when :div
left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
@ -477,7 +507,7 @@ module REXML
# The next two methods are BAD MOJO!
# This is my achilles heel. If anybody thinks of a better
# way of doing this, be my guest. This really sucks, but
# it took me three days to get it to work at all.
# it is a wonder it works at all.
# ########################################################
def descendant_or_self( path_stack, nodeset )