mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
r1479@bean: ser | 2008-01-19 14:26:31 -0500
r1483@bean: ser | 2008-01-19 14:47:23 -0500 Sam's fixes: * Don't blow up on empty documents * Add a test case for sorted attributes * Making the output predictable simplifies unit tests, and doesn't cost much given that most xml element have few attributes * Ruby 1.9 revision 14922 is more strict * Complete Ticket #134 * Fix for ticket #121 * Fix for ticket #124 * Fix for ticket #128 * Fix ticket #133 * Ticket #131 (Support Ruby 1.9) * Fix for ticket #127 * Fix for ticket #123 * Add missing data needed by test case r1481@bean (orig r1303): ser | 2008-01-19 17:22:32 -0500 Tagged for release r1482@bean (orig r1304): ser | 2008-01-19 17:27:10 -0500 Version bump git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15141 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
00190701e0
commit
66aeb2f708
8 changed files with 163 additions and 51 deletions
|
@ -17,6 +17,8 @@ module REXML
|
|||
attr_writer :normalized
|
||||
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
||||
|
||||
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
|
||||
|
||||
# Constructor.
|
||||
# FIXME: The parser doesn't catch illegal characters in attributes
|
||||
#
|
||||
|
@ -110,15 +112,16 @@ module REXML
|
|||
end
|
||||
end
|
||||
|
||||
# Returns the attribute value, with entities replaced
|
||||
def to_s
|
||||
return @normalized if @normalized
|
||||
|
||||
doctype = nil
|
||||
def doctype
|
||||
if @element
|
||||
doc = @element.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
end
|
||||
|
||||
# Returns the attribute value, with entities replaced
|
||||
def to_s
|
||||
return @normalized if @normalized
|
||||
|
||||
@normalized = Text::normalize( @unnormalized, doctype )
|
||||
@unnormalized = nil
|
||||
|
@ -129,11 +132,6 @@ module REXML
|
|||
# have been expanded to their values
|
||||
def value
|
||||
return @unnormalized if @unnormalized
|
||||
doctype = nil
|
||||
if @element
|
||||
doc = @element.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
@unnormalized = Text::unnormalize( @normalized, doctype )
|
||||
@normalized = nil
|
||||
@unnormalized
|
||||
|
@ -150,6 +148,11 @@ module REXML
|
|||
# Returns this attribute
|
||||
def element=( element )
|
||||
@element = element
|
||||
|
||||
if @normalized
|
||||
Text.check( @normalized, NEEDS_A_SECOND_CHECK, doctype )
|
||||
end
|
||||
|
||||
self
|
||||
end
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ module REXML
|
|||
# CData.new( "Here is some CDATA" )
|
||||
# CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
|
||||
def initialize( first, whitespace=true, parent=nil )
|
||||
super( first, whitespace, parent, true, true, ILLEGAL )
|
||||
super( first, whitespace, parent, false, true, ILLEGAL )
|
||||
end
|
||||
|
||||
# Make a copy of this object
|
||||
|
|
|
@ -558,7 +558,19 @@ module REXML
|
|||
prefix = namespaces.index(namespace) if namespace
|
||||
end
|
||||
prefix = nil if prefix == 'xmlns'
|
||||
attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
|
||||
|
||||
ret_val =
|
||||
attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
|
||||
|
||||
return ret_val unless ret_val.nil?
|
||||
return nil if prefix.nil?
|
||||
|
||||
# now check that prefix'es namespace is not the same as the
|
||||
# default namespace
|
||||
return nil unless ( namespaces[ prefix ] == namespaces[ 'xmlns' ] )
|
||||
|
||||
attributes.get_attribute( name )
|
||||
|
||||
end
|
||||
|
||||
# Evaluates to +true+ if this element has any attributes set, false
|
||||
|
@ -675,7 +687,7 @@ module REXML
|
|||
# out = ''
|
||||
# doc.write( out ) #-> doc is written to the string 'out'
|
||||
# doc.write( $stdout ) #-> doc written to the console
|
||||
def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
|
||||
def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
|
||||
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
|
||||
formatter = if indent > -1
|
||||
if transitive
|
||||
|
@ -1217,14 +1229,17 @@ module REXML
|
|||
#
|
||||
# Method contributed by Henrik Martensson
|
||||
def get_attribute_ns(namespace, name)
|
||||
result = nil
|
||||
each_attribute() { |attribute|
|
||||
if name == attribute.name &&
|
||||
namespace == attribute.namespace() &&
|
||||
( !namespace.empty? || !attribute.fully_expanded_name.index(':') )
|
||||
return attribute
|
||||
# foo will match xmlns:foo, but only if foo isn't also an attribute
|
||||
result = attribute if !result or !namespace.empty? or
|
||||
!attribute.fully_expanded_name.index(':')
|
||||
end
|
||||
}
|
||||
nil
|
||||
result
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -63,7 +63,7 @@ module REXML
|
|||
def write_element( node, output )
|
||||
output << "<#{node.expanded_name}"
|
||||
|
||||
node.attributes.each_attribute do |attr|
|
||||
node.attributes.to_a.sort_by {|attr| attr.name}.each do |attr|
|
||||
output << " "
|
||||
attr.write( output )
|
||||
end unless node.attributes.empty?
|
||||
|
|
|
@ -25,7 +25,20 @@ module REXML
|
|||
#
|
||||
# Nat Price gave me some good ideas for the API.
|
||||
class BaseParser
|
||||
NCNAME_STR= '[\w:][\-\w\d.]*'
|
||||
if String.method_defined? :encode
|
||||
# Oniguruma / POSIX [understands unicode]
|
||||
LETTER = '[[:alpha:]]'
|
||||
DIGIT = '[[:digit:]]'
|
||||
else
|
||||
# Ruby < 1.9 [doesn't understand unicode]
|
||||
LETTER = 'a-zA-Z'
|
||||
DIGIT = '\d'
|
||||
end
|
||||
|
||||
COMBININGCHAR = '' # TODO
|
||||
EXTENDER = '' # TODO
|
||||
|
||||
NCNAME_STR= "[#{LETTER}_:][-#{LETTER}#{DIGIT}._:#{COMBININGCHAR}#{EXTENDER}]*"
|
||||
NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
|
||||
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
||||
|
||||
|
@ -33,7 +46,7 @@ module REXML
|
|||
NAME = "([\\w:]#{NAMECHAR}*)"
|
||||
NMTOKEN = "(?:#{NAMECHAR})+"
|
||||
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
||||
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
||||
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
|
||||
REFERENCE_RE = /#{REFERENCE}/
|
||||
|
||||
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
||||
|
@ -340,6 +353,12 @@ module REXML
|
|||
raise REXML::ParseException.new("Malformed node", @source) unless md
|
||||
if md[0][2] == ?-
|
||||
md = @source.match( COMMENT_PATTERN, true )
|
||||
|
||||
case md[1]
|
||||
when /--/, /-$/
|
||||
raise REXML::ParseException.new("Malformed comment", @source)
|
||||
end
|
||||
|
||||
return [ :comment, md[1] ] if md
|
||||
else
|
||||
md = @source.match( CDATA_PATTERN, true )
|
||||
|
@ -384,6 +403,12 @@ module REXML
|
|||
elsif b
|
||||
prefixes << b unless b == "xml"
|
||||
end
|
||||
|
||||
if attributes.has_key? a
|
||||
msg = "Duplicate attribute #{a.inspect}"
|
||||
raise REXML::ParseException.new( msg, @source, self)
|
||||
end
|
||||
|
||||
attributes[a] = e
|
||||
}
|
||||
end
|
||||
|
@ -470,15 +495,12 @@ module REXML
|
|||
if entity_value
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value )
|
||||
else
|
||||
er = DEFAULT_ENTITIES[entity_reference]
|
||||
rv.gsub!( er[0], er[2] ) if er
|
||||
end
|
||||
end
|
||||
end
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
er = DEFAULT_ENTITIES[entity_reference]
|
||||
rv.gsub!( er[0], er[2] ) if er
|
||||
end
|
||||
end
|
||||
rv.gsub!( /&/, '&' )
|
||||
end
|
||||
rv
|
||||
|
|
|
@ -11,8 +11,8 @@
|
|||
#
|
||||
# Main page:: http://www.germane-software.com/software/rexml
|
||||
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
||||
# Version:: 3.1.7.2
|
||||
# Date:: 2007/275
|
||||
# Date:: 2008/019
|
||||
# Version:: 3.1.7.3
|
||||
#
|
||||
# This API documentation can be downloaded from the REXML home page, or can
|
||||
# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
||||
|
@ -21,9 +21,9 @@
|
|||
# or can be accessed
|
||||
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
||||
module REXML
|
||||
COPYRIGHT = "Copyright © 2001-2007 Sean Russell <ser@germane-software.com>"
|
||||
DATE = "2007/275"
|
||||
VERSION = "3.1.7.2"
|
||||
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
|
||||
DATE = "2008/019"
|
||||
VERSION = "3.1.7.3"
|
||||
REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
|
||||
|
||||
Copyright = COPYRIGHT
|
||||
|
|
|
@ -147,7 +147,7 @@ module REXML
|
|||
# the XML spec. If there is one, we can determine the encoding from
|
||||
# it.
|
||||
@buffer = ""
|
||||
str = @source.read( 2 )
|
||||
str = @source.read( 2 ) || ''
|
||||
if encoding
|
||||
self.encoding = encoding
|
||||
elsif str[0,2] == "\xfe\xff"
|
||||
|
@ -161,7 +161,7 @@ module REXML
|
|||
else
|
||||
@line_break = ">"
|
||||
end
|
||||
super str+@source.readline( @line_break )
|
||||
super( @source.eof? ? str : str+@source.readline( @line_break ) )
|
||||
end
|
||||
|
||||
def scan(pattern, cons=false)
|
||||
|
@ -231,7 +231,7 @@ module REXML
|
|||
end
|
||||
|
||||
def position
|
||||
@er_source.stat.pipe? ? 0 : @er_source.pos
|
||||
@er_source.pos rescue 0
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
|
|
|
@ -18,8 +18,40 @@ module REXML
|
|||
# If +raw+ is true, then REXML leaves the value alone
|
||||
attr_accessor :raw
|
||||
|
||||
ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
|
||||
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
|
||||
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
||||
VALID_CHAR = [
|
||||
0x9, 0xA, 0xD,
|
||||
(0x20..0xD7FF),
|
||||
(0xE000..0xFFFD),
|
||||
(0x10000..0x10FFFF)
|
||||
]
|
||||
|
||||
if String.method_defined? :encode
|
||||
VALID_XML_CHARS = Regexp.new('^['+
|
||||
VALID_CHAR.map { |item|
|
||||
case item
|
||||
when Fixnum
|
||||
[item].pack('U').force_encoding('utf-8')
|
||||
when Range
|
||||
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
||||
end
|
||||
}.join +
|
||||
']*$')
|
||||
else
|
||||
VALID_XML_CHARS = /^(
|
||||
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
||||
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
||||
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
||||
| \xEF[\x80-\xBE]{2} #
|
||||
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
||||
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
||||
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
||||
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
||||
)*$/x;
|
||||
end
|
||||
|
||||
# Constructor
|
||||
# +arg+ if a String, the content is set to the String. If a Text,
|
||||
|
@ -58,7 +90,7 @@ module REXML
|
|||
#
|
||||
# +pattern+ INTERNAL USE ONLY
|
||||
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
||||
entity_filter=nil, illegal=ILLEGAL )
|
||||
entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
|
||||
|
||||
@raw = false
|
||||
|
||||
|
@ -85,10 +117,54 @@ module REXML
|
|||
|
||||
@string.gsub!( /\r\n?/, "\n" )
|
||||
|
||||
# check for illegal characters
|
||||
if @raw
|
||||
if @string =~ illegal
|
||||
raise "Illegal character '#{$1}' in raw string \"#{@string}\""
|
||||
Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
|
||||
end
|
||||
|
||||
def parent= parent
|
||||
super(parent)
|
||||
Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
|
||||
end
|
||||
|
||||
# check for illegal characters
|
||||
def Text.check string, pattern, doctype
|
||||
|
||||
# illegal anywhere
|
||||
if string !~ VALID_XML_CHARS
|
||||
if String.method_defined? :encode
|
||||
string.chars.each do |c|
|
||||
case c.ord
|
||||
when *VALID_CHAR
|
||||
else
|
||||
raise "Illegal character #{c.inspect} in raw string \"#{string}\""
|
||||
end
|
||||
end
|
||||
else
|
||||
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/) do |c|
|
||||
case c.unpack('U')
|
||||
when *VALID_CHAR
|
||||
else
|
||||
raise "Illegal character #{c.inspect} in raw string \"#{string}\""
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# context sensitive
|
||||
string.scan(pattern).each do
|
||||
if $1[-1] != ?;
|
||||
raise "Illegal character '#{$1}' in raw string \"#{string}\""
|
||||
elsif $1[0] == ?&
|
||||
if $5 and $5[0] == ?#
|
||||
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
|
||||
when *VALID_CHAR
|
||||
else
|
||||
raise "Illegal character '#{$1}' in raw string \"#{string}\""
|
||||
end
|
||||
elsif $3 and !SUBSTITUTES.include?($1)
|
||||
if !doctype or !doctype.entities.has_key?($3)
|
||||
raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -120,6 +196,13 @@ module REXML
|
|||
to_s() <=> other.to_s
|
||||
end
|
||||
|
||||
def doctype
|
||||
if @parent
|
||||
doc = @parent.document
|
||||
doc.doctype if doc
|
||||
end
|
||||
end
|
||||
|
||||
REFERENCE = /#{Entity::REFERENCE}/
|
||||
# Returns the string value of this text node. This string is always
|
||||
# escaped, meaning that it is a valid XML text node string, and all
|
||||
|
@ -138,12 +221,6 @@ module REXML
|
|||
return @string if @raw
|
||||
return @normalized if @normalized
|
||||
|
||||
doctype = nil
|
||||
if @parent
|
||||
doc = @parent.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
|
||||
@normalized = Text::normalize( @string, doctype, @entity_filter )
|
||||
end
|
||||
|
||||
|
@ -165,12 +242,7 @@ module REXML
|
|||
# u = Text.new( "sean russell", false, nil, true )
|
||||
# u.value #-> "sean russell"
|
||||
def value
|
||||
@unnormalized if @unnormalized
|
||||
doctype = nil
|
||||
if @parent
|
||||
doc = @parent.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
return @unnormalized if @unnormalized
|
||||
@unnormalized = Text::unnormalize( @string, doctype )
|
||||
end
|
||||
|
||||
|
@ -286,7 +358,7 @@ module REXML
|
|||
EREFERENCE = /&(?!#{Entity::NAME};)/
|
||||
# Escapes all possible entities
|
||||
def Text::normalize( input, doctype=nil, entity_filter=nil )
|
||||
copy = input
|
||||
copy = input.to_s
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
#copy = copy.gsub( EREFERENCE, '&' )
|
||||
copy = copy.gsub( "&", "&" )
|
||||
|
|
Loading…
Reference in a new issue