diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index 029035d675..89c1ada36c 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -50,7 +50,7 @@ module REXML @element = first.element end elsif first.kind_of? String - @element = parent if parent.kind_of? Element + @element = parent self.name = first @normalized = second.to_s else diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index da3fb57872..92612036a1 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -855,15 +855,15 @@ module REXML # Source (see Element.initialize). If not supplied or nil, a # new, default Element will be constructed # Returns:: the added Element - # a = Element.new 'a' - # a.elements.add Element.new 'b' #-> - # a.elements.add 'c' #-> + # a = Element.new('a') + # a.elements.add(Element.new('b')) #-> + # a.elements.add('c') #-> def add element=nil rv = nil if element.nil? - Element.new "", self, @element.context + Element.new("", self, @element.context) elsif not element.kind_of?(Element) - Element.new element, self, @element.context + Element.new(element, self, @element.context) else @element << element element.context = @element.context diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 3782d61b2c..5f7a5ec43b 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -1,5 +1,6 @@ require 'rexml/parseexception' require 'rexml/source' +require 'set' module REXML module Parsers @@ -24,7 +25,8 @@ module REXML # Nat Price gave me some good ideas for the API. class BaseParser NCNAME_STR= '[\w:][\-\w\d.]*' - NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" + NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" + UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" NAMECHAR = '[\-\w\d\.:]' NAME = "([\\w:]#{NAMECHAR}*)" @@ -35,7 +37,7 @@ module REXML DOCTYPE_START = /\A\s*)/um - ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um + ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um COMMENT_START = /\A/um CDATA_START = /\A/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um - TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um + TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um @@ -133,6 +135,7 @@ module REXML @tags = [] @stack = [] @entities = [] + @nsstack = [] end def position @@ -213,6 +216,7 @@ module REXML return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] when DOCTYPE_START md = @source.match( DOCTYPE_PATTERN, true ) + @nsstack.unshift(curr_ns=Set.new) identity = md[1] close = md[2] identity =~ IDENTITY @@ -288,6 +292,9 @@ module REXML val = attdef[3] val = attdef[4] if val == "#FIXED " pairs[attdef[0]] = val + if attdef[0] =~ /^xmlns:(.*)/ + @nsstack[0] << $1 + end end end return [ :attlistdecl, element, pairs, contents ] @@ -312,6 +319,7 @@ module REXML begin if @source.buffer[0] == ?< if @source.buffer[1] == ?/ + @nsstack.shift last_tag = @tags.pop #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) @@ -345,19 +353,47 @@ module REXML raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) raise REXML::ParseException.new("malformed XML: missing tag start", @source) end - attrs = [] - if md[2].size > 0 - attrs = md[2].scan( ATTRIBUTE_PATTERN ) + attributes = {} + prefixes = Set.new + prefixes << md[2] if md[2] + @nsstack.unshift(curr_ns=Set.new) + if md[4].size > 0 + attrs = md[4].scan( ATTRIBUTE_PATTERN ) raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 + attrs.each { |a,b,c,d,e| + if b == "xmlns" + if c == "xml" + if d != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif c == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self) + end + curr_ns << c + elsif b + prefixes << b unless b == "xml" + end + attributes[a] = e + } end - if md[4] + # Verify that all of the prefixes have been defined + for prefix in prefixes + unless @nsstack.find{|k| k.member?(prefix)} + raise UndefinedNamespaceException.new(prefix,@source,self) + end + end + + if md[6] @closed = md[1] + @nsstack.shift else @tags.push( md[1] ) end - attributes = {} - attrs.each { |a,b,c| attributes[a] = c } return [ :start_element, md[1], attributes ] end else @@ -371,6 +407,8 @@ module REXML # return PullEvent.new( :text, md[1], unnormalized ) return [ :text, md[1] ] end + rescue REXML::UndefinedNamespaceException + raise rescue REXML::ParseException raise rescue Exception, NameError => error diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index a53fa41925..ff8261cedf 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -29,8 +29,7 @@ module REXML return when :start_element tag_stack.push(event[1]) - # find the observers for namespaces - @build_context = @build_context.add_element( event[1], event[2] ) + el = @build_context = @build_context.add_element( event[1], event[2] ) when :end_element tag_stack.pop @build_context = @build_context.parent @@ -86,6 +85,8 @@ module REXML end rescue REXML::Validation::ValidationException raise + rescue REXML::UndefinedNamespaceException + raise rescue raise ParseException.new( $!.message, @parser.source, @parser, $! ) end