diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index 029035d675..89c1ada36c 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -50,7 +50,7 @@ module REXML
@element = first.element
end
elsif first.kind_of? String
- @element = parent if parent.kind_of? Element
+ @element = parent
self.name = first
@normalized = second.to_s
else
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index da3fb57872..92612036a1 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -855,15 +855,15 @@ module REXML
# Source (see Element.initialize). If not supplied or nil, a
# new, default Element will be constructed
# Returns:: the added Element
- # a = Element.new 'a'
- # a.elements.add Element.new 'b' #->
- # a.elements.add 'c' #->
+ # a = Element.new('a')
+ # a.elements.add(Element.new('b')) #->
+ # a.elements.add('c') #->
def add element=nil
rv = nil
if element.nil?
- Element.new "", self, @element.context
+ Element.new("", self, @element.context)
elsif not element.kind_of?(Element)
- Element.new element, self, @element.context
+ Element.new(element, self, @element.context)
else
@element << element
element.context = @element.context
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 3782d61b2c..5f7a5ec43b 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -1,5 +1,6 @@
require 'rexml/parseexception'
require 'rexml/source'
+require 'set'
module REXML
module Parsers
@@ -24,7 +25,8 @@ module REXML
# Nat Price gave me some good ideas for the API.
class BaseParser
NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+ NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+ UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
@@ -35,7 +37,7 @@ module REXML
DOCTYPE_START = /\A\s*)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A/um
CDATA_START = /\A/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
@@ -133,6 +135,7 @@ module REXML
@tags = []
@stack = []
@entities = []
+ @nsstack = []
end
def position
@@ -213,6 +216,7 @@ module REXML
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true )
+ @nsstack.unshift(curr_ns=Set.new)
identity = md[1]
close = md[2]
identity =~ IDENTITY
@@ -288,6 +292,9 @@ module REXML
val = attdef[3]
val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val
+ if attdef[0] =~ /^xmlns:(.*)/
+ @nsstack[0] << $1
+ end
end
end
return [ :attlistdecl, element, pairs, contents ]
@@ -312,6 +319,7 @@ module REXML
begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
+ @nsstack.shift
last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
@@ -345,19 +353,47 @@ module REXML
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
- attrs = []
- if md[2].size > 0
- attrs = md[2].scan( ATTRIBUTE_PATTERN )
+ attributes = {}
+ prefixes = Set.new
+ prefixes << md[2] if md[2]
+ @nsstack.unshift(curr_ns=Set.new)
+ if md[4].size > 0
+ attrs = md[4].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ attrs.each { |a,b,c,d,e|
+ if b == "xmlns"
+ if c == "xml"
+ if d != "http://www.w3.org/XML/1998/namespace"
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif c == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self)
+ end
+ curr_ns << c
+ elsif b
+ prefixes << b unless b == "xml"
+ end
+ attributes[a] = e
+ }
end
- if md[4]
+ # Verify that all of the prefixes have been defined
+ for prefix in prefixes
+ unless @nsstack.find{|k| k.member?(prefix)}
+ raise UndefinedNamespaceException.new(prefix,@source,self)
+ end
+ end
+
+ if md[6]
@closed = md[1]
+ @nsstack.shift
else
@tags.push( md[1] )
end
- attributes = {}
- attrs.each { |a,b,c| attributes[a] = c }
return [ :start_element, md[1], attributes ]
end
else
@@ -371,6 +407,8 @@ module REXML
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
end
+ rescue REXML::UndefinedNamespaceException
+ raise
rescue REXML::ParseException
raise
rescue Exception, NameError => error
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index a53fa41925..ff8261cedf 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -29,8 +29,7 @@ module REXML
return
when :start_element
tag_stack.push(event[1])
- # find the observers for namespaces
- @build_context = @build_context.add_element( event[1], event[2] )
+ el = @build_context = @build_context.add_element( event[1], event[2] )
when :end_element
tag_stack.pop
@build_context = @build_context.parent
@@ -86,6 +85,8 @@ module REXML
end
rescue REXML::Validation::ValidationException
raise
+ rescue REXML::UndefinedNamespaceException
+ raise
rescue
raise ParseException.new( $!.message, @parser.source, @parser, $! )
end