* Changes to the encoding mechanism. If iconv is found, it is used first

for encoding changes. This should be the case on all 1.8 installations. When it isn't found (<1.6), the native REXML encoding mechanism is used. This cleaned out some files, and tightened up the code a bit; and iconv should be faster than the pure Ruby code. * Changed deprecated assert_not_nil to assert throughout the tests. * Parse exceptions are a little more verbose, and extend RuntimeError. * Bug fixes to XPathParser * The Light API is still shifting, like the sands of the desert. * Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and tightened error reporting in the base parser git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2003-10-10 12:54:46 +00:00 · 2003-10-10 12:54:46 +00:00 · 7d21c237cc
commit 7d21c237cc
parent 662532be00
23 changed files with 185 additions and 224 deletions
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@ -2,61 +2,49 @@ module REXML
 	module Encoding
 		@@uconv_available = false

-		ENCODING_CLAIMS = { }
-
-		def Encoding.claim( encoding_str, match=nil )
-			if match
-				ENCODING_CLAIMS[ match ] = encoding_str
-			else
-				ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/i ] = encoding_str
-			end
-		end
-
 		# Native, default format is UTF-8, so it is declared here rather than in
 		# an encodings/ definition.
 		UTF_8 = 'UTF-8'
-		claim( UTF_8 )
+		UTF_16 = 'UTF-16'
+		UNILE = 'UNILE'

 		# ID ---> Encoding name
 		attr_reader :encoding
 		def encoding=( enc )
-                	enc = UTF_8 unless enc
+			old_verbosity = $VERBOSE
+			begin
+				$VERBOSE = false
+				return if defined? @encoding and enc == @encoding
+				if enc and enc != UTF_8
 					@encoding = enc.upcase
-                	require "rexml/encodings/#@encoding" unless @encoding == UTF_8
+					begin
+						load 'rexml/encodings/ICONV.rb'
+						Iconv::iconv( UTF_8, @encoding, "" )
+					rescue LoadError, Exception => err
+						enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
+						begin
+							load enc_file
+						rescue LoadError
+							raise Exception.new( "No decoder found for encoding #@encoding.  Please install iconv." )
+						end
+					end
+				else
+					enc = UTF_8
+					@encoding = enc.upcase
+					load 'rexml/encodings/UTF-8.rb'
+				end
+			ensure
+				$VERBOSE = old_verbosity
+			end
 		end

 		def check_encoding str
-			rv = ENCODING_CLAIMS.find{|k,v| str =~ k }
-			# Raise an exception if there is a declared encoding and we don't
-			# recognize it
-			unless rv
-				if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/
-					raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)."
-				else
+			# We have to recognize UTF-16, LSB UTF-16, and UTF-8
+			return UTF_16 if str[0] == 254 && str[1] == 255
+			return UNILE if str[0] == 255 && str[1] == 254
+			str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
+			return $1.upcase if $1
 			return UTF_8
 		end
 	end
-			return rv[1]
-		end
-
-		def to_utf_8(str)
-			return str
-		end
-
-		def from_utf_8 content
-			return content
-		end
-	end
-
-	module Encodingses
-		encodings = []
-		$:.each do |incl_dir|
-			if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0
-				encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ]
-			end
-			encodings.collect!{ |f| File.basename(f) }
-			encodings.uniq!
-		end
-		encodings.each { |enc| require "rexml/encodings/#{enc}" }
-	end
 end
--- a/lib/rexml/encodings/EUC-JP.rb
+++ b/lib/rexml/encodings/EUC-JP.rb
@ -3,11 +3,11 @@ begin

 	module REXML
 		module Encoding
-			def from_euc_jp(str)
+			def decode(str)
 				return Uconv::euctou8(str)
 			end

-			def to_euc_jp content
+			def encode content
 				return Uconv::u8toeuc(content)
 			end
 		end
@ -17,12 +17,12 @@ rescue LoadError
 		require 'iconv'
 		module REXML
 			module Encoding
-			def from_euc_jp(str)
-				return Iconv::iconv("utf-8", "euc-jp", str).join('')
+				def decode(str)
+					return Iconv::iconv("utf-8", "euc-jp", str)[0]
 				end

-			def to_euc_jp content
-				return Iconv::iconv("euc-jp", "utf-8", content).join('')
+				def encode content
+					return Iconv::iconv("euc-jp", "utf-8", content)[0]
 				end
 			end
 		end
--- a/lib/rexml/encodings/EUC-JP_decl.rb
+++ b/lib/rexml/encodings/EUC-JP_decl.rb
@ -1,6 +0,0 @@
-module REXML
-	module Encoding
-		EUC_JP = 'EUC-JP'
-		claim( EUC_JP )
-	end
-end
--- a/lib/rexml/encodings/ICONV.rb
+++ b/lib/rexml/encodings/ICONV.rb
@ -0,0 +1,14 @@
+require "iconv"
+raise LoadError unless defined? Iconv
+
+module REXML
+	module Encoding
+		def decode( str )
+			return Iconv::iconv(UTF_8, @encoding, str)[0]
+		end
+
+		def encode( content )
+			return Iconv::iconv(@encoding, UTF_8, content)[0]
+		end
+	end
+end
--- a/lib/rexml/encodings/ISO-8859-1.rb
+++ b/lib/rexml/encodings/ISO-8859-1.rb
@ -1,7 +1,7 @@
 module REXML
 	module Encoding
 		# Convert from UTF-8
-		def to_iso_8859_1 content
+		def encode content
 			array_utf8 = content.unpack('U*')
 			array_enc = []
 			array_utf8.each do |num|
@ -16,7 +16,7 @@ module REXML
 		end

 		# Convert to UTF-8
-		def from_iso_8859_1(str)
+		def decode(str)
 			str.unpack('C*').pack('U*')
 		end
 	end
--- a/lib/rexml/encodings/ISO-8859-1_decl.rb
+++ b/lib/rexml/encodings/ISO-8859-1_decl.rb
@ -1,6 +0,0 @@
-module REXML
-	module Encoding
-		ISO_8859_1 = 'ISO-8859-1'
-		claim( ISO_8859_1 )
-	end
-end
--- a/lib/rexml/encodings/Shift-JIS_decl.rb
+++ b/lib/rexml/encodings/Shift-JIS_decl.rb
@ -1,6 +0,0 @@
-module REXML
-	module Encoding
-		claim( 'Shift-JIS' )
-		claim( 'Shift_JIS' )
-	end
-end
--- a/lib/rexml/encodings/UNILE.rb
+++ b/lib/rexml/encodings/UNILE.rb
@ -1,6 +1,6 @@
 module REXML
 	module Encoding
-		def to_unile content
+		def encode content
 			array_utf8 = content.unpack("U*")
 			array_enc = []
 			array_utf8.each do |num|
@ -15,7 +15,7 @@ module REXML
 			array_enc.pack('C*')
 		end

-		def from_unile(str)
+		def decode(str)
 			array_enc=str.unpack('C*')
 			array_utf8 = []
 			2.step(array_enc.size-1, 2){|i| 
--- a/lib/rexml/encodings/UNILE_decl.rb
+++ b/lib/rexml/encodings/UNILE_decl.rb
@ -1,6 +0,0 @@
-module REXML
-	module Encoding
-		UNILE = 'UNILE'
-		claim( UNILE, /^\377\376/ )
-	end
-end
--- a/lib/rexml/encodings/US-ASCII.rb
+++ b/lib/rexml/encodings/US-ASCII.rb
@ -1,7 +1,7 @@
 module REXML
 	module Encoding
 		# Convert from UTF-8
-		def to_us_ascii content
+		def encode content
 			array_utf8 = content.unpack('U*')
 			array_enc = []
 			array_utf8.each do |num|
@ -16,7 +16,7 @@ module REXML
 		end

 		# Convert to UTF-8
-		def from_us_ascii(str)
+		def decode(str)
 			str.unpack('C*').pack('U*')
 		end
 	end
--- a/lib/rexml/encodings/US-ASCII_decl.rb
+++ b/lib/rexml/encodings/US-ASCII_decl.rb
@ -1,6 +0,0 @@
-module REXML
-	module Encoding
-		US_ASCII = 'US-ASCII'
-		claim( US_ASCII )
-	end
-end
--- a/lib/rexml/encodings/UTF-16.rb
+++ b/lib/rexml/encodings/UTF-16.rb
@ -1,6 +1,6 @@
 module REXML
 	module Encoding
-		def to_utf_16 content
+		def encode content
 			array_utf8 = content.unpack("U*")
 			array_enc = []
 			array_utf8.each do |num|
@ -15,7 +15,7 @@ module REXML
 			array_enc.pack('C*')
 		end

-		def from_utf_16(str)
+		def decode(str)
 			array_enc=str.unpack('C*')
 			array_utf8 = []
 			2.step(arrayEnc.size-1, 2){|i| 
--- a/lib/rexml/encodings/UTF-16_decl.rb
+++ b/lib/rexml/encodings/UTF-16_decl.rb
@ -1,6 +0,0 @@
-module REXML
-	module Encoding
-		UTF_16 = 'UTF-16'
-		claim( UTF_16, /^\376\377/ )
-	end
-end
--- a/lib/rexml/encodings/UTF-8.rb
+++ b/lib/rexml/encodings/UTF-8.rb
@ -0,0 +1,11 @@
+module REXML
+	module Encoding
+		def encode content
+			content
+		end
+
+		def decode(str)
+			str
+		end
+	end
+end
--- a/lib/rexml/light/node.rb
+++ b/lib/rexml/light/node.rb
@ -1,76 +1,58 @@
 require 'rexml/xmltokens'
 require 'rexml/light/node'

-# Development model
-# document = Node.new
-
-# Add an element "foo" to the document
-# foo = document << "foo"
-# # Set attribute "attr" on foo
-# foo["attr"] = "la"
-# # Set another attribute in a different namespace
-# foo["attr", "namespace"] = "too"
-# # Swap foo into another namespace
-# foo.namespace = "blah"
-# # Add a couple of element nodes to foo
-# foo << "a"
-# foo << "b"
-# # Access the children of foo in various ways
-# a = foo[0]
-# foo.each { |child|
-#         #...
-# }
-# # Add text to foo
-# # Add instruction
-# # Add comment
-# # Get the root of the document
-# document == a.root
-# # Write the document out
-# puts document.to_s
+# [ :element, parent, name, attributes, children* ]
+	# a = Node.new
+	# a << "B"		# => <a>B</a>
+	# a.b			# => <a>B<b/></a>
+	# a.b[1]			# => <a>B<b/><b/><a>
+	# a.b[1]["x"] = "y"	# => <a>B<b/><b x="y"/></a>
+	# a.b[0].c		# => <a>B<b><c/></b><b x="y"/></a>
+	# a.b.c << "D"		# => <a>B<b><c>D</c></b><b x="y"/></a>
 module REXML
 	module Light
 		# Represents a tagged XML element.  Elements are characterized by
 		# having children, attributes, and names, and can themselves be
 		# children.
-		class Node < Array
-			alias :_old_get :[]
-			alias :_old_put :[]=
-
+		class Node
 			NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
+			PARENTS = [ :element, :document, :doctype ]
 			# Create a new element.
 			def initialize node=nil
+				@node = node
 				if node.kind_of? String
 					node = [ :text, node ]
 				elsif node.nil?
 					node = [ :document, nil, nil ]
 				elsif node[0] == :start_element
 					node[0] = :element
+				elsif node[0] == :start_doctype
+					node[0] = :doctype
+				elsif node[0] == :start_document
+					node[0] = :document
 				end
-				replace( node )
-				_old_put( 1, 0, 1 )
-				_old_put( 1, nil )
 			end

 			def size
-				el!()
-				super-4
+				if PARENTS.include? @node[0]
+					@node[-1].size
+				else
+					0
+				end
 			end

 			def each( &block )
-				el!()
 				size.times { |x| yield( at(x+4) ) }
 			end

 			def name
-				el!()
 				at(2)
 			end

 			def name=( name_str, ns=nil )
-				el!()
 				pfx = ''
 				pfx = "#{prefix(ns)}:" if ns
-				_old_put(1, "#{pfx}#{name_str}")
+				_old_put(2, "#{pfx}#{name_str}")
 			end

 			def parent=( node )
@ -78,28 +60,23 @@ module REXML
 			end

 			def local_name
-				el!()
 				namesplit
 				@name
 			end

 			def local_name=( name_str )
-				el!()
 				_old_put( 1, "#@prefix:#{name_str}" )
 			end

 			def prefix( namespace=nil )
-				el!()
 				prefix_of( self, namespace )
 			end

 			def namespace( prefix=prefix() )
-				el!()
 				namespace_of( self, prefix )
 			end

 			def namespace=( namespace )
-				el!()
 				@prefix = prefix( namespace )
 				pfx = ''
 				pfx = "#@prefix:" if @prefix.size > 0
@ -107,7 +84,6 @@ module REXML
 			end

 			def []( reference, ns=nil )
-				el!()
 				if reference.kind_of? String
 					pfx = ''
 					pfx = "#{prefix(ns)}:" if ns
@ -125,7 +101,6 @@ module REXML

 			# Doesn't handle namespaces yet
 			def []=( reference, ns, value=nil )
-				el!()
 				if reference.kind_of? String
 					value = ns unless value
 					at( 3 )[reference] = value
@ -170,12 +145,10 @@ module REXML
 			end

 			def has_name?( name, namespace = '' )
-				el!()
 				at(3) == name and namespace() == namespace
 			end

 			def children
-				el!()
 				self
 			end

@ -187,14 +160,6 @@ module REXML

 			end

-			def el!
-				if node_type() != :element and node_type() != :document
-					_old_put( 0, :element )
-					push({})
-				end
-				self
-			end
-
 			private

 			def namesplit
--- a/lib/rexml/output.rb
+++ b/lib/rexml/output.rb
@ -8,10 +8,6 @@ module REXML
 			@output = real_IO
 			self.encoding = encd

-			eval <<-EOL
-				alias :encode :to_#{encoding.tr('-', '_').downcase}
-				alias :decode :from_#{encoding.tr('-', '_').downcase}
-			EOL
 			@to_utf = encd == UTF_8 ? false : true
 		end

--- a/lib/rexml/parseexception.rb
+++ b/lib/rexml/parseexception.rb
@ -1,5 +1,5 @@
 module REXML
-	class ParseException < Exception
+	class ParseException < RuntimeError
 		attr_accessor :source, :parser, :continued_exception

 		def initialize( message, source=nil, parser=nil, exception=nil )
@ -12,9 +12,9 @@ module REXML
 		def to_s
 			# Quote the original exception, if there was one
 			if @continued_exception
-				err = @continued_exception.message
+				err = @continued_exception.inspect
 				err << "\n"
-				err << @continued_exception.backtrace[0..3].join("\n")
+				err << @continued_exception.backtrace.join("\n")
 				err << "\n...\n"
 			else
 				err = ""
@ -24,17 +24,24 @@ module REXML
 			err << super

 			# Add contextual information
-			err << "\n#{@source.current_line}\nLast 80 unconsumed characters:\n#{@source.buffer[0..80].gsub(/\n/, ' ')}\n" if @source
-			err << "\nContext:\n#{@parser.context}" if @parser
+			if @source
+				err << "\nLine: #{line}\n"
+				err << "Position: #{position}\n"
+				err << "Last 80 unconsumed characters:\n"
+				err << @source.buffer[0..80].gsub(/\n/, ' ')
+				err << "\n"
+				err << @source.buffer[0..80].unpack("U*").inspect
+			end
+			
 			err
 		end

 		def position
-			@source.current_line[0] if @source
+			@source.current_line[0] if @source and @source.current_line
 		end

 		def line
-			@source.current_line[2] if @source
+			@source.current_line[2] if @source and @source.current_line
 		end

 		def context
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@ -89,10 +89,10 @@ module REXML
 			EREFERENCE = /&(?!#{NAME};)/

 			DEFAULT_ENTITIES = { 
-				'gt' => [/&gt;/, '&gt;', '>'], 
-				'lt' => [/&lt;/, '&lt;', '<'], 
-				'quot' => [/&quot;/, '&quot;', '"'], 
-				"apos" => [/&apos;/, "&apos;", "'"] 
+				'gt' => [/&gt;/, '&gt;', '>', />/], 
+				'lt' => [/&lt;/, '&lt;', '<', /</], 
+				'quot' => [/&quot;/, '&quot;', '"', /"/], 
+				"apos" => [/&apos;/, "&apos;", "'", /'/] 
 			}

 			def initialize( source )
@ -126,6 +126,7 @@ module REXML

 			# Returns true if there are more events.  Synonymous with !empty?
 			def has_next?
+				return true if @closed
 				@source.read if @source.buffer.size==0 and !@source.empty?
 				(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
 			end
@ -143,7 +144,7 @@ module REXML
 			# event, so you can effectively pre-parse the entire document (pull the 
 			# entire thing into memory) using this method.  
 			def peek depth=0
-				raise 'Illegal argument "#{depth}"' if depth < -1
+				raise %Q[Illegal argument "#{depth}"] if depth < -1
 				temp = []
 				if depth == -1
 					temp.push(pull()) until empty?
@ -166,8 +167,9 @@ module REXML
 				return @stack.shift if @stack.size > 0
 				@source.read if @source.buffer.size==0
 				if @document_status == nil
-					@source.match( /^\s*/um, true )
-					word = @source.match( /^\s*(<.*?)>/um )
+					@source.consume( /^\s*/um )
+					word = @source.match( /(<.*?)>/um )
+					#word = @source.match_to( '>', /(<.*?)>/um )
 					word = word[1] unless word.nil?
 					case word
 					when COMMENT_START
@ -190,7 +192,7 @@ module REXML
 						close = md[2]
 						identity =~ IDENTITY
 						name = $1
-						raise "DOCTYPE is missing a name" if name.nil?
+						raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
 						pub_sys = $2.nil? ? nil : $2.strip
 						long_name = $3.nil? ? nil : $3.strip
 						uri = $4.nil? ? nil : $4.strip
@ -278,6 +280,7 @@ module REXML
 					if @source.buffer[0] == ?<
 						if @source.buffer[1] == ?/
 							last_tag = @tags.pop
+							#md = @source.match_to_consume( '>', CLOSE_MATCH)
 							md = @source.match( CLOSE_MATCH, true )
 							raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
 								"(got \"#{md[1]}\")", @source) unless last_tag == md[1]
@ -286,18 +289,20 @@ module REXML
 							md = @source.match(/\A(\s*[^>]*>)/um)
 							#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
 							raise REXML::ParseException.new("Malformed node", @source) unless md
-							case md[1]
-							when CDATA_START
-								return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ]
-							when COMMENT_START
-								return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+							if md[0][2] == ?-
+								md = @source.match( COMMENT_PATTERN, true )
+								return [ :comment, md[1] ] if md
 							else
+								md = @source.match( CDATA_PATTERN, true )
+								return [ :cdata, md[1] ] if md
+							end
 							raise REXML::ParseException.new( "Declarations can only occur "+
 								"in the doctype declaration.", @source)
-							end
 						elsif @source.buffer[1] == ??
 							md = @source.match( INSTRUCTION_PATTERN, true )
-							return [ :processing_instruction, md[1], md[2] ]
+							return [ :processing_instruction, md[1], md[2] ] if md
+							raise REXML::ParseException.new( "Bad instruction declaration",
+								@source)
 						else
 							# Get the next tag
 							md = @source.match(TAG_MATCH, true)
@ -319,16 +324,18 @@ module REXML
 						end
 					else
 						md = @source.match( TEXT_PATTERN, true )
-						raise "no text to add" if md[0].length == 0
+						#md = @source.match_to_consume( '<', TEXT_PATTERN )
+						#@source.read
+						raise REXML::ParseException("no text to add") if md[0].length == 0
 						# unnormalized = Text::unnormalize( md[1], self )
 						# return PullEvent.new( :text, md[1], unnormalized )
 						return [ :text, md[1] ]
 					end
 				rescue REXML::ParseException
-          raise $!
+					raise
 				rescue Exception, NameError => error
 					raise REXML::ParseException.new( "Exception parsing",
-						@source, self, error )
+						@source, self, (error ? error : $!) )
 				end
 				return [ :dummy ]
 			end
@ -354,7 +361,7 @@ module REXML
 				end if entities
 				copy.gsub!( EREFERENCE, '&amp;' )
 				DEFAULT_ENTITIES.each do |key, value|
-					copy.gsub!( value[2], value[1] )
+					copy.gsub!( value[3], value[1] )
 				end
 				copy
 			end
--- a/lib/rexml/parsers/lightparser.rb
+++ b/lib/rexml/parsers/lightparser.rb
@ -16,25 +16,25 @@ module REXML
      end

 			def parse
-				root = context = REXML::Light::Node.new([ :document ])
+				root = context = [ :document ]
 				while true
 					event = @parser.pull
 					case event[0]
 					when :end_document
 						break
 					when :end_doctype
-						context = context.parent
+						context = context[1]
 					when :start_element, :start_doctype
-						new_node = REXML::Light::Node.new(event)
+						new_node = event
 						context << new_node
-						new_node.parent = context
+						new_node[1,0] = [context]
 						context = new_node
 					when :end_element, :end_doctype
-						context = context.parent
+						context = context[1]
 					else
-						new_node = REXML::Light::Node.new(event)
+						new_node = event
 						context << new_node
-						new_node.parent = context
+						new_node[1,0] = [context]
 					end
 				end
 				root
--- a/lib/rexml/quickpath.rb
+++ b/lib/rexml/quickpath.rb
@ -31,7 +31,7 @@ module REXML
 				results = filter([element], path)
 			when /^\*/u
 				results = filter(element.to_a, path)
-			when /^[\[!\w:]/u
+			when /^[[!\w:]/u
 				# match on child
 				matches = []
 				children = element.to_a
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@ -21,6 +21,6 @@
 # A tutorial is available in docs/tutorial.html
 module REXML
 	Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
-	Date = "+2003/110"
-	Version = "2.7.1"
+	Date = "+2003/283"
+	Version = "2.7.2"
 end
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@ -39,10 +39,6 @@ module REXML
 		# Overridden to support optimized en/decoding
 		def encoding=(enc)
 			super
-			eval <<-EOL
-				alias :encode :to_#{encoding.tr('-', '_').downcase}
-				alias :decode :from_#{encoding.tr('-', '_').downcase}
-			EOL
 			@line_break = encode( '>' )
 			if enc != UTF_8
 				@buffer = decode(@buffer)
@ -78,8 +74,22 @@ module REXML
 		def read
 		end

+		def consume( pattern )
+			@buffer = $' if pattern.match( @buffer )
+		end
+
+		def match_to( char, pattern )
+			return pattern.match(@buffer)
+		end
+
+		def match_to_consume( char, pattern )
+			md = pattern.match(@buffer)
+			@buffer = $'
+			return md
+		end
+
 		def match pattern, consume=false
-			md = pattern.match @buffer
+			md = pattern.match(@buffer)
 			@buffer = $' if consume and md
 			return md
 		end
@ -112,7 +122,9 @@ module REXML
 			#@block_size = block_size
 			#super @source.read(@block_size)
 			@line_break = '>'
-			super @source.readline( @line_break )
+			#super @source.readline( "\n" )
+			super @source.readline( @line_break )+@source.read
+			@line_break = encode( '>' )
 		end

 		def scan pattern, consume=false
@ -145,11 +157,15 @@ module REXML
 				str = @source.readline('>')
 				str = decode(str) if @to_utf and str 
 				@buffer << str
-			rescue
+			rescue Exception, NameError
 				@source = nil
 			end
 		end

+		def consume( pattern )
+			match( pattern, true )
+		end
+
 		def match pattern, consume=false
 			rv = pattern.match(@buffer)
 			@buffer = $' if consume and rv
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@ -2,16 +2,6 @@ require 'rexml/namespace'
 require 'rexml/xmltokens'
 require 'rexml/parsers/xpathparser'

-# Ignore this class.  It adds a __ne__ method, because Ruby doesn't seem to
-# understand object.send( "!=", foo ), whereas it *does* understand "<", "==",
-# and all of the other comparison methods.  Stupid, and annoying, and not at
-# all POLS.
-class Object
-	def __ne__(b)
-		self != b
-	end
-end
-
 module REXML
 	# You don't want to use this class.  Really.  Use XPath, which is a wrapper
 	# for this class.  Believe me.  You don't want to poke around in here.
@ -132,11 +122,10 @@ module REXML
 			when :child
 				#puts "CHILD"
 				new_nodeset = []
-				ps_clone = nil
+				nt = nil
 				for node in nodeset
-					#ps_clone = path_stack.clone
-					#new_nodeset += internal_parse( ps_clone, node.children ) if node.parent?
-					new_nodeset += node.children if node.parent?
+					nt = node.node_type
+					new_nodeset += node.children if nt == :element or nt == :document
 				end
 				#path_stack[0,(path_stack.size-ps_clone.size)] = []
 				return new_nodeset
@ -238,9 +227,11 @@ module REXML
 			when :descendant
 				#puts ":DESCENDANT"
 				results = []
+				nt = nil
 				for node in nodeset
+					nt = node.node_type
 					results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
-						node.children ) if node.parent?
+						node.children ) if nt == :element or nt == :document
 				end
 				return results

@ -310,11 +301,13 @@ module REXML
 		def d_o_s( p, ns, r )
 			#puts r.collect{|n|n.to_s}.inspect
 			#puts ns.collect{|n|n.to_s}.inspect
+			nt = nil
 			ns.each_index do |i|
 				n = ns[i]
 				x = match( p.clone, [ n ] )
 				#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
-				d_o_s( p, n.children, x ) if n.parent?
+				nt = n.node_type
+				d_o_s( p, n.children, x ) if nt == :element or nt == :document
 				r[i,0] = [x] if x.size > 0
 			end
 		end