r1366@bean: ser | 2007-10-01 21:24:33 -0400

r1352@bean:  ser | 2007-07-29 11:33:07 -0400
  Implements namespace validation in the baseparser.  This means that, as per
  the XML namespace spec, unbound prefixes generate UndefinedNamespaceException.
  Also, as per the namespace spec, the 'xml' prefix must be bound to 
  http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared. 
  in the XML.
 


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13595 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2007-10-02 01:46:32 +00:00
parent c1b7ca8fec
commit 9acf1749fb
4 changed files with 56 additions and 17 deletions

View File

@ -50,7 +50,7 @@ module REXML
@element = first.element
end
elsif first.kind_of? String
@element = parent if parent.kind_of? Element
@element = parent
self.name = first
@normalized = second.to_s
else

View File

@ -855,15 +855,15 @@ module REXML
# Source (see Element.initialize). If not supplied or nil, a
# new, default Element will be constructed
# Returns:: the added Element
# a = Element.new 'a'
# a.elements.add Element.new 'b' #-> <a><b/></a>
# a.elements.add 'c' #-> <a><b/><c/></a>
# a = Element.new('a')
# a.elements.add(Element.new('b')) #-> <a><b/></a>
# a.elements.add('c') #-> <a><b/><c/></a>
def add element=nil
rv = nil
if element.nil?
Element.new "", self, @element.context
Element.new("", self, @element.context)
elsif not element.kind_of?(Element)
Element.new element, self, @element.context
Element.new(element, self, @element.context)
else
@element << element
element.context = @element.context

View File

@ -1,5 +1,6 @@
require 'rexml/parseexception'
require 'rexml/source'
require 'set'
module REXML
module Parsers
@ -24,7 +25,8 @@ module REXML
# Nat Price gave me some good ideas for the API.
class BaseParser
NCNAME_STR= '[\w:][\-\w\d.]*'
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
@ -35,7 +37,7 @@ module REXML
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u
@ -45,7 +47,7 @@ module REXML
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
@ -133,6 +135,7 @@ module REXML
@tags = []
@stack = []
@entities = []
@nsstack = []
end
def position
@ -213,6 +216,7 @@ module REXML
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true )
@nsstack.unshift(curr_ns=Set.new)
identity = md[1]
close = md[2]
identity =~ IDENTITY
@ -288,6 +292,9 @@ module REXML
val = attdef[3]
val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val
if attdef[0] =~ /^xmlns:(.*)/
@nsstack[0] << $1
end
end
end
return [ :attlistdecl, element, pairs, contents ]
@ -312,6 +319,7 @@ module REXML
begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
@nsstack.shift
last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
@ -345,19 +353,47 @@ module REXML
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
attrs = []
if md[2].size > 0
attrs = md[2].scan( ATTRIBUTE_PATTERN )
attributes = {}
prefixes = Set.new
prefixes << md[2] if md[2]
@nsstack.unshift(curr_ns=Set.new)
if md[4].size > 0
attrs = md[4].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
attrs.each { |a,b,c,d,e|
if b == "xmlns"
if c == "xml"
if d != "http://www.w3.org/XML/1998/namespace"
msg = "The 'xml' prefix must not be bound to any other namespace "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self )
end
elsif c == "xmlns"
msg = "The 'xmlns' prefix must not be declared "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self)
end
curr_ns << c
elsif b
prefixes << b unless b == "xml"
end
attributes[a] = e
}
end
if md[4]
# Verify that all of the prefixes have been defined
for prefix in prefixes
unless @nsstack.find{|k| k.member?(prefix)}
raise UndefinedNamespaceException.new(prefix,@source,self)
end
end
if md[6]
@closed = md[1]
@nsstack.shift
else
@tags.push( md[1] )
end
attributes = {}
attrs.each { |a,b,c| attributes[a] = c }
return [ :start_element, md[1], attributes ]
end
else
@ -371,6 +407,8 @@ module REXML
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
end
rescue REXML::UndefinedNamespaceException
raise
rescue REXML::ParseException
raise
rescue Exception, NameError => error

View File

@ -29,8 +29,7 @@ module REXML
return
when :start_element
tag_stack.push(event[1])
# find the observers for namespaces
@build_context = @build_context.add_element( event[1], event[2] )
el = @build_context = @build_context.add_element( event[1], event[2] )
when :end_element
tag_stack.pop
@build_context = @build_context.parent
@ -86,6 +85,8 @@ module REXML
end
rescue REXML::Validation::ValidationException
raise
rescue REXML::UndefinedNamespaceException
raise
rescue
raise ParseException.new( $!.message, @parser.source, @parser, $! )
end