mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
r1366@bean: ser | 2007-10-01 21:24:33 -0400
r1352@bean: ser | 2007-07-29 11:33:07 -0400 Implements namespace validation in the baseparser. This means that, as per the XML namespace spec, unbound prefixes generate UndefinedNamespaceException. Also, as per the namespace spec, the 'xml' prefix must be bound to http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared. in the XML. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13595 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
c1b7ca8fec
commit
9acf1749fb
4 changed files with 56 additions and 17 deletions
|
@ -50,7 +50,7 @@ module REXML
|
||||||
@element = first.element
|
@element = first.element
|
||||||
end
|
end
|
||||||
elsif first.kind_of? String
|
elsif first.kind_of? String
|
||||||
@element = parent if parent.kind_of? Element
|
@element = parent
|
||||||
self.name = first
|
self.name = first
|
||||||
@normalized = second.to_s
|
@normalized = second.to_s
|
||||||
else
|
else
|
||||||
|
|
|
@ -855,15 +855,15 @@ module REXML
|
||||||
# Source (see Element.initialize). If not supplied or nil, a
|
# Source (see Element.initialize). If not supplied or nil, a
|
||||||
# new, default Element will be constructed
|
# new, default Element will be constructed
|
||||||
# Returns:: the added Element
|
# Returns:: the added Element
|
||||||
# a = Element.new 'a'
|
# a = Element.new('a')
|
||||||
# a.elements.add Element.new 'b' #-> <a><b/></a>
|
# a.elements.add(Element.new('b')) #-> <a><b/></a>
|
||||||
# a.elements.add 'c' #-> <a><b/><c/></a>
|
# a.elements.add('c') #-> <a><b/><c/></a>
|
||||||
def add element=nil
|
def add element=nil
|
||||||
rv = nil
|
rv = nil
|
||||||
if element.nil?
|
if element.nil?
|
||||||
Element.new "", self, @element.context
|
Element.new("", self, @element.context)
|
||||||
elsif not element.kind_of?(Element)
|
elsif not element.kind_of?(Element)
|
||||||
Element.new element, self, @element.context
|
Element.new(element, self, @element.context)
|
||||||
else
|
else
|
||||||
@element << element
|
@element << element
|
||||||
element.context = @element.context
|
element.context = @element.context
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
require 'rexml/parseexception'
|
require 'rexml/parseexception'
|
||||||
require 'rexml/source'
|
require 'rexml/source'
|
||||||
|
require 'set'
|
||||||
|
|
||||||
module REXML
|
module REXML
|
||||||
module Parsers
|
module Parsers
|
||||||
|
@ -24,7 +25,8 @@ module REXML
|
||||||
# Nat Price gave me some good ideas for the API.
|
# Nat Price gave me some good ideas for the API.
|
||||||
class BaseParser
|
class BaseParser
|
||||||
NCNAME_STR= '[\w:][\-\w\d.]*'
|
NCNAME_STR= '[\w:][\-\w\d.]*'
|
||||||
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
|
||||||
|
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
||||||
|
|
||||||
NAMECHAR = '[\-\w\d\.:]'
|
NAMECHAR = '[\-\w\d\.:]'
|
||||||
NAME = "([\\w:]#{NAMECHAR}*)"
|
NAME = "([\\w:]#{NAMECHAR}*)"
|
||||||
|
@ -35,7 +37,7 @@ module REXML
|
||||||
|
|
||||||
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
||||||
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
||||||
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
|
||||||
COMMENT_START = /\A<!--/u
|
COMMENT_START = /\A<!--/u
|
||||||
COMMENT_PATTERN = /<!--(.*?)-->/um
|
COMMENT_PATTERN = /<!--(.*?)-->/um
|
||||||
CDATA_START = /\A<!\[CDATA\[/u
|
CDATA_START = /\A<!\[CDATA\[/u
|
||||||
|
@ -45,7 +47,7 @@ module REXML
|
||||||
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
||||||
INSTRUCTION_START = /\A<\?/u
|
INSTRUCTION_START = /\A<\?/u
|
||||||
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
|
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
|
||||||
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
|
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
|
||||||
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
|
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
|
||||||
|
|
||||||
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
||||||
|
@ -133,6 +135,7 @@ module REXML
|
||||||
@tags = []
|
@tags = []
|
||||||
@stack = []
|
@stack = []
|
||||||
@entities = []
|
@entities = []
|
||||||
|
@nsstack = []
|
||||||
end
|
end
|
||||||
|
|
||||||
def position
|
def position
|
||||||
|
@ -213,6 +216,7 @@ module REXML
|
||||||
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
|
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
|
||||||
when DOCTYPE_START
|
when DOCTYPE_START
|
||||||
md = @source.match( DOCTYPE_PATTERN, true )
|
md = @source.match( DOCTYPE_PATTERN, true )
|
||||||
|
@nsstack.unshift(curr_ns=Set.new)
|
||||||
identity = md[1]
|
identity = md[1]
|
||||||
close = md[2]
|
close = md[2]
|
||||||
identity =~ IDENTITY
|
identity =~ IDENTITY
|
||||||
|
@ -288,6 +292,9 @@ module REXML
|
||||||
val = attdef[3]
|
val = attdef[3]
|
||||||
val = attdef[4] if val == "#FIXED "
|
val = attdef[4] if val == "#FIXED "
|
||||||
pairs[attdef[0]] = val
|
pairs[attdef[0]] = val
|
||||||
|
if attdef[0] =~ /^xmlns:(.*)/
|
||||||
|
@nsstack[0] << $1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
return [ :attlistdecl, element, pairs, contents ]
|
return [ :attlistdecl, element, pairs, contents ]
|
||||||
|
@ -312,6 +319,7 @@ module REXML
|
||||||
begin
|
begin
|
||||||
if @source.buffer[0] == ?<
|
if @source.buffer[0] == ?<
|
||||||
if @source.buffer[1] == ?/
|
if @source.buffer[1] == ?/
|
||||||
|
@nsstack.shift
|
||||||
last_tag = @tags.pop
|
last_tag = @tags.pop
|
||||||
#md = @source.match_to_consume( '>', CLOSE_MATCH)
|
#md = @source.match_to_consume( '>', CLOSE_MATCH)
|
||||||
md = @source.match( CLOSE_MATCH, true )
|
md = @source.match( CLOSE_MATCH, true )
|
||||||
|
@ -345,19 +353,47 @@ module REXML
|
||||||
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
|
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
|
||||||
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
||||||
end
|
end
|
||||||
attrs = []
|
attributes = {}
|
||||||
if md[2].size > 0
|
prefixes = Set.new
|
||||||
attrs = md[2].scan( ATTRIBUTE_PATTERN )
|
prefixes << md[2] if md[2]
|
||||||
|
@nsstack.unshift(curr_ns=Set.new)
|
||||||
|
if md[4].size > 0
|
||||||
|
attrs = md[4].scan( ATTRIBUTE_PATTERN )
|
||||||
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
|
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
|
||||||
|
attrs.each { |a,b,c,d,e|
|
||||||
|
if b == "xmlns"
|
||||||
|
if c == "xml"
|
||||||
|
if d != "http://www.w3.org/XML/1998/namespace"
|
||||||
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
||||||
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
||||||
|
raise REXML::ParseException.new( msg, @source, self )
|
||||||
|
end
|
||||||
|
elsif c == "xmlns"
|
||||||
|
msg = "The 'xmlns' prefix must not be declared "+
|
||||||
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
||||||
|
raise REXML::ParseException.new( msg, @source, self)
|
||||||
|
end
|
||||||
|
curr_ns << c
|
||||||
|
elsif b
|
||||||
|
prefixes << b unless b == "xml"
|
||||||
|
end
|
||||||
|
attributes[a] = e
|
||||||
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
if md[4]
|
# Verify that all of the prefixes have been defined
|
||||||
|
for prefix in prefixes
|
||||||
|
unless @nsstack.find{|k| k.member?(prefix)}
|
||||||
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if md[6]
|
||||||
@closed = md[1]
|
@closed = md[1]
|
||||||
|
@nsstack.shift
|
||||||
else
|
else
|
||||||
@tags.push( md[1] )
|
@tags.push( md[1] )
|
||||||
end
|
end
|
||||||
attributes = {}
|
|
||||||
attrs.each { |a,b,c| attributes[a] = c }
|
|
||||||
return [ :start_element, md[1], attributes ]
|
return [ :start_element, md[1], attributes ]
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
|
@ -371,6 +407,8 @@ module REXML
|
||||||
# return PullEvent.new( :text, md[1], unnormalized )
|
# return PullEvent.new( :text, md[1], unnormalized )
|
||||||
return [ :text, md[1] ]
|
return [ :text, md[1] ]
|
||||||
end
|
end
|
||||||
|
rescue REXML::UndefinedNamespaceException
|
||||||
|
raise
|
||||||
rescue REXML::ParseException
|
rescue REXML::ParseException
|
||||||
raise
|
raise
|
||||||
rescue Exception, NameError => error
|
rescue Exception, NameError => error
|
||||||
|
|
|
@ -29,8 +29,7 @@ module REXML
|
||||||
return
|
return
|
||||||
when :start_element
|
when :start_element
|
||||||
tag_stack.push(event[1])
|
tag_stack.push(event[1])
|
||||||
# find the observers for namespaces
|
el = @build_context = @build_context.add_element( event[1], event[2] )
|
||||||
@build_context = @build_context.add_element( event[1], event[2] )
|
|
||||||
when :end_element
|
when :end_element
|
||||||
tag_stack.pop
|
tag_stack.pop
|
||||||
@build_context = @build_context.parent
|
@build_context = @build_context.parent
|
||||||
|
@ -86,6 +85,8 @@ module REXML
|
||||||
end
|
end
|
||||||
rescue REXML::Validation::ValidationException
|
rescue REXML::Validation::ValidationException
|
||||||
raise
|
raise
|
||||||
|
rescue REXML::UndefinedNamespaceException
|
||||||
|
raise
|
||||||
rescue
|
rescue
|
||||||
raise ParseException.new( $!.message, @parser.source, @parser, $! )
|
raise ParseException.new( $!.message, @parser.source, @parser, $! )
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue