mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	* Changes to the encoding mechanism. If iconv is found, it is used first
for encoding changes. This should be the case on all 1.8 installations. When it isn't found (<1.6), the native REXML encoding mechanism is used. This cleaned out some files, and tightened up the code a bit; and iconv should be faster than the pure Ruby code. * Changed deprecated assert_not_nil to assert throughout the tests. * Parse exceptions are a little more verbose, and extend RuntimeError. * Bug fixes to XPathParser * The Light API is still shifting, like the sands of the desert. * Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and tightened error reporting in the base parser git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
		
							parent
							
								
									662532be00
								
							
						
					
					
						commit
						7d21c237cc
					
				
					 23 changed files with 185 additions and 224 deletions
				
			
		| 
						 | 
				
			
			@ -2,61 +2,49 @@ module REXML
 | 
			
		|||
	module Encoding
 | 
			
		||||
		@@uconv_available = false
 | 
			
		||||
 | 
			
		||||
		ENCODING_CLAIMS = { }
 | 
			
		||||
 | 
			
		||||
		def Encoding.claim( encoding_str, match=nil )
 | 
			
		||||
			if match
 | 
			
		||||
				ENCODING_CLAIMS[ match ] = encoding_str
 | 
			
		||||
			else
 | 
			
		||||
				ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/i ] = encoding_str
 | 
			
		||||
			end
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		# Native, default format is UTF-8, so it is declared here rather than in
 | 
			
		||||
		# an encodings/ definition.
 | 
			
		||||
		UTF_8 = 'UTF-8'
 | 
			
		||||
		claim( UTF_8 )
 | 
			
		||||
		UTF_16 = 'UTF-16'
 | 
			
		||||
		UNILE = 'UNILE'
 | 
			
		||||
 | 
			
		||||
		# ID ---> Encoding name
 | 
			
		||||
		attr_reader :encoding
 | 
			
		||||
		def encoding=( enc )
 | 
			
		||||
                	enc = UTF_8 unless enc
 | 
			
		||||
                	@encoding = enc.upcase
 | 
			
		||||
                	require "rexml/encodings/#@encoding" unless @encoding == UTF_8
 | 
			
		||||
			old_verbosity = $VERBOSE
 | 
			
		||||
			begin
 | 
			
		||||
				$VERBOSE = false
 | 
			
		||||
				return if defined? @encoding and enc == @encoding
 | 
			
		||||
				if enc and enc != UTF_8
 | 
			
		||||
					@encoding = enc.upcase
 | 
			
		||||
					begin
 | 
			
		||||
						load 'rexml/encodings/ICONV.rb'
 | 
			
		||||
						Iconv::iconv( UTF_8, @encoding, "" )
 | 
			
		||||
					rescue LoadError, Exception => err
 | 
			
		||||
						enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
 | 
			
		||||
						begin
 | 
			
		||||
							load enc_file
 | 
			
		||||
						rescue LoadError
 | 
			
		||||
							raise Exception.new( "No decoder found for encoding #@encoding.  Please install iconv." )
 | 
			
		||||
						end
 | 
			
		||||
					end
 | 
			
		||||
				else
 | 
			
		||||
					enc = UTF_8
 | 
			
		||||
					@encoding = enc.upcase
 | 
			
		||||
					load 'rexml/encodings/UTF-8.rb'
 | 
			
		||||
				end
 | 
			
		||||
			ensure
 | 
			
		||||
				$VERBOSE = old_verbosity
 | 
			
		||||
			end
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def check_encoding str
 | 
			
		||||
			rv = ENCODING_CLAIMS.find{|k,v| str =~ k }
 | 
			
		||||
			# Raise an exception if there is a declared encoding and we don't
 | 
			
		||||
			# recognize it
 | 
			
		||||
			unless rv
 | 
			
		||||
				if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/
 | 
			
		||||
					raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)."
 | 
			
		||||
				else
 | 
			
		||||
					return UTF_8
 | 
			
		||||
				end
 | 
			
		||||
			end
 | 
			
		||||
			return rv[1]
 | 
			
		||||
			# We have to recognize UTF-16, LSB UTF-16, and UTF-8
 | 
			
		||||
			return UTF_16 if str[0] == 254 && str[1] == 255
 | 
			
		||||
			return UNILE if str[0] == 255 && str[1] == 254
 | 
			
		||||
			str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
 | 
			
		||||
			return $1.upcase if $1
 | 
			
		||||
			return UTF_8
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def to_utf_8(str)
 | 
			
		||||
			return str
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def from_utf_8 content
 | 
			
		||||
			return content
 | 
			
		||||
		end
 | 
			
		||||
	end
 | 
			
		||||
 | 
			
		||||
	module Encodingses
 | 
			
		||||
		encodings = []
 | 
			
		||||
		$:.each do |incl_dir|
 | 
			
		||||
			if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0
 | 
			
		||||
				encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ]
 | 
			
		||||
			end
 | 
			
		||||
			encodings.collect!{ |f| File.basename(f) }
 | 
			
		||||
			encodings.uniq!
 | 
			
		||||
		end
 | 
			
		||||
		encodings.each { |enc| require "rexml/encodings/#{enc}" }
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,30 +3,30 @@ begin
 | 
			
		|||
 | 
			
		||||
	module REXML
 | 
			
		||||
		module Encoding
 | 
			
		||||
			def from_euc_jp(str)
 | 
			
		||||
			def decode(str)
 | 
			
		||||
				return Uconv::euctou8(str)
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def to_euc_jp content
 | 
			
		||||
			def encode content
 | 
			
		||||
				return Uconv::u8toeuc(content)
 | 
			
		||||
			end
 | 
			
		||||
		end
 | 
			
		||||
	end
 | 
			
		||||
rescue LoadError
 | 
			
		||||
  begin
 | 
			
		||||
	require 'iconv'
 | 
			
		||||
	module REXML
 | 
			
		||||
		module Encoding
 | 
			
		||||
			def from_euc_jp(str)
 | 
			
		||||
				return Iconv::iconv("utf-8", "euc-jp", str).join('')
 | 
			
		||||
			end
 | 
			
		||||
		require 'iconv'
 | 
			
		||||
		module REXML
 | 
			
		||||
			module Encoding
 | 
			
		||||
				def decode(str)
 | 
			
		||||
					return Iconv::iconv("utf-8", "euc-jp", str)[0]
 | 
			
		||||
				end
 | 
			
		||||
 | 
			
		||||
			def to_euc_jp content
 | 
			
		||||
				return Iconv::iconv("euc-jp", "utf-8", content).join('')
 | 
			
		||||
				def encode content
 | 
			
		||||
					return Iconv::iconv("euc-jp", "utf-8", content)[0]
 | 
			
		||||
				end
 | 
			
		||||
			end
 | 
			
		||||
		end
 | 
			
		||||
	end
 | 
			
		||||
  rescue LoadError
 | 
			
		||||
	raise "uconv or iconv is required for Japanese encoding support."
 | 
			
		||||
		raise "uconv or iconv is required for Japanese encoding support."
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +0,0 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		EUC_JP = 'EUC-JP'
 | 
			
		||||
		claim( EUC_JP )
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
							
								
								
									
										14
									
								
								lib/rexml/encodings/ICONV.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								lib/rexml/encodings/ICONV.rb
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,14 @@
 | 
			
		|||
require "iconv"
 | 
			
		||||
raise LoadError unless defined? Iconv
 | 
			
		||||
 | 
			
		||||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		def decode( str )
 | 
			
		||||
			return Iconv::iconv(UTF_8, @encoding, str)[0]
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def encode( content )
 | 
			
		||||
			return Iconv::iconv(@encoding, UTF_8, content)[0]
 | 
			
		||||
		end
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -1,7 +1,7 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		# Convert from UTF-8
 | 
			
		||||
		def to_iso_8859_1 content
 | 
			
		||||
		def encode content
 | 
			
		||||
			array_utf8 = content.unpack('U*')
 | 
			
		||||
			array_enc = []
 | 
			
		||||
			array_utf8.each do |num|
 | 
			
		||||
| 
						 | 
				
			
			@ -16,7 +16,7 @@ module REXML
 | 
			
		|||
		end
 | 
			
		||||
 | 
			
		||||
		# Convert to UTF-8
 | 
			
		||||
		def from_iso_8859_1(str)
 | 
			
		||||
		def decode(str)
 | 
			
		||||
			str.unpack('C*').pack('U*')
 | 
			
		||||
		end
 | 
			
		||||
	end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +0,0 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		ISO_8859_1 = 'ISO-8859-1'
 | 
			
		||||
		claim( ISO_8859_1 )
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -1,6 +0,0 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		claim( 'Shift-JIS' )
 | 
			
		||||
		claim( 'Shift_JIS' )
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -1,6 +1,6 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		def to_unile content
 | 
			
		||||
		def encode content
 | 
			
		||||
			array_utf8 = content.unpack("U*")
 | 
			
		||||
			array_enc = []
 | 
			
		||||
			array_utf8.each do |num|
 | 
			
		||||
| 
						 | 
				
			
			@ -15,7 +15,7 @@ module REXML
 | 
			
		|||
			array_enc.pack('C*')
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def from_unile(str)
 | 
			
		||||
		def decode(str)
 | 
			
		||||
			array_enc=str.unpack('C*')
 | 
			
		||||
			array_utf8 = []
 | 
			
		||||
			2.step(array_enc.size-1, 2){|i| 
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +0,0 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		UNILE = 'UNILE'
 | 
			
		||||
		claim( UNILE, /^\377\376/ )
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -1,7 +1,7 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		# Convert from UTF-8
 | 
			
		||||
		def to_us_ascii content
 | 
			
		||||
		def encode content
 | 
			
		||||
			array_utf8 = content.unpack('U*')
 | 
			
		||||
			array_enc = []
 | 
			
		||||
			array_utf8.each do |num|
 | 
			
		||||
| 
						 | 
				
			
			@ -16,7 +16,7 @@ module REXML
 | 
			
		|||
		end
 | 
			
		||||
 | 
			
		||||
		# Convert to UTF-8
 | 
			
		||||
		def from_us_ascii(str)
 | 
			
		||||
		def decode(str)
 | 
			
		||||
			str.unpack('C*').pack('U*')
 | 
			
		||||
		end
 | 
			
		||||
	end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +0,0 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		US_ASCII = 'US-ASCII'
 | 
			
		||||
		claim( US_ASCII )
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -1,6 +1,6 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		def to_utf_16 content
 | 
			
		||||
		def encode content
 | 
			
		||||
			array_utf8 = content.unpack("U*")
 | 
			
		||||
			array_enc = []
 | 
			
		||||
			array_utf8.each do |num|
 | 
			
		||||
| 
						 | 
				
			
			@ -15,7 +15,7 @@ module REXML
 | 
			
		|||
			array_enc.pack('C*')
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def from_utf_16(str)
 | 
			
		||||
		def decode(str)
 | 
			
		||||
			array_enc=str.unpack('C*')
 | 
			
		||||
			array_utf8 = []
 | 
			
		||||
			2.step(arrayEnc.size-1, 2){|i| 
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +0,0 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		UTF_16 = 'UTF-16'
 | 
			
		||||
		claim( UTF_16, /^\376\377/ )
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
							
								
								
									
										11
									
								
								lib/rexml/encodings/UTF-8.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								lib/rexml/encodings/UTF-8.rb
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,11 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	module Encoding
 | 
			
		||||
		def encode content
 | 
			
		||||
			content
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def decode(str)
 | 
			
		||||
			str
 | 
			
		||||
		end
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -1,76 +1,58 @@
 | 
			
		|||
require 'rexml/xmltokens'
 | 
			
		||||
require 'rexml/light/node'
 | 
			
		||||
 | 
			
		||||
# Development model
 | 
			
		||||
# document = Node.new
 | 
			
		||||
 | 
			
		||||
# Add an element "foo" to the document
 | 
			
		||||
# foo = document << "foo"
 | 
			
		||||
# # Set attribute "attr" on foo
 | 
			
		||||
# foo["attr"] = "la"
 | 
			
		||||
# # Set another attribute in a different namespace
 | 
			
		||||
# foo["attr", "namespace"] = "too"
 | 
			
		||||
# # Swap foo into another namespace
 | 
			
		||||
# foo.namespace = "blah"
 | 
			
		||||
# # Add a couple of element nodes to foo
 | 
			
		||||
# foo << "a"
 | 
			
		||||
# foo << "b"
 | 
			
		||||
# # Access the children of foo in various ways
 | 
			
		||||
# a = foo[0]
 | 
			
		||||
# foo.each { |child|
 | 
			
		||||
#         #...
 | 
			
		||||
# }
 | 
			
		||||
# # Add text to foo
 | 
			
		||||
# # Add instruction
 | 
			
		||||
# # Add comment
 | 
			
		||||
# # Get the root of the document
 | 
			
		||||
# document == a.root
 | 
			
		||||
# # Write the document out
 | 
			
		||||
# puts document.to_s
 | 
			
		||||
# [ :element, parent, name, attributes, children* ]
 | 
			
		||||
	# a = Node.new
 | 
			
		||||
	# a << "B"		# => <a>B</a>
 | 
			
		||||
	# a.b			# => <a>B<b/></a>
 | 
			
		||||
	# a.b[1]			# => <a>B<b/><b/><a>
 | 
			
		||||
	# a.b[1]["x"] = "y"	# => <a>B<b/><b x="y"/></a>
 | 
			
		||||
	# a.b[0].c		# => <a>B<b><c/></b><b x="y"/></a>
 | 
			
		||||
	# a.b.c << "D"		# => <a>B<b><c>D</c></b><b x="y"/></a>
 | 
			
		||||
module REXML
 | 
			
		||||
	module Light
 | 
			
		||||
		# Represents a tagged XML element.  Elements are characterized by
 | 
			
		||||
		# having children, attributes, and names, and can themselves be
 | 
			
		||||
		# children.
 | 
			
		||||
		class Node < Array
 | 
			
		||||
			alias :_old_get :[]
 | 
			
		||||
			alias :_old_put :[]=
 | 
			
		||||
 | 
			
		||||
		class Node
 | 
			
		||||
			NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
 | 
			
		||||
			PARENTS = [ :element, :document, :doctype ]
 | 
			
		||||
			# Create a new element.
 | 
			
		||||
			def initialize node=nil
 | 
			
		||||
				@node = node
 | 
			
		||||
				if node.kind_of? String
 | 
			
		||||
					node = [ :text, node ]
 | 
			
		||||
				elsif node.nil?
 | 
			
		||||
					node = [ :document, nil, nil ]
 | 
			
		||||
				elsif node[0] == :start_element
 | 
			
		||||
					node[0] = :element
 | 
			
		||||
				elsif node[0] == :start_doctype
 | 
			
		||||
					node[0] = :doctype
 | 
			
		||||
				elsif node[0] == :start_document
 | 
			
		||||
					node[0] = :document
 | 
			
		||||
				end
 | 
			
		||||
				replace( node )
 | 
			
		||||
				_old_put( 1, 0, 1 )
 | 
			
		||||
				_old_put( 1, nil )
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def size
 | 
			
		||||
				el!()
 | 
			
		||||
				super-4
 | 
			
		||||
				if PARENTS.include? @node[0]
 | 
			
		||||
					@node[-1].size
 | 
			
		||||
				else
 | 
			
		||||
					0
 | 
			
		||||
				end
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def each( &block )
 | 
			
		||||
				el!()
 | 
			
		||||
				size.times { |x| yield( at(x+4) ) }
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def name
 | 
			
		||||
				el!()
 | 
			
		||||
				at(2)
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def name=( name_str, ns=nil )
 | 
			
		||||
				el!()
 | 
			
		||||
				pfx = ''
 | 
			
		||||
				pfx = "#{prefix(ns)}:" if ns
 | 
			
		||||
				_old_put(1, "#{pfx}#{name_str}")
 | 
			
		||||
				_old_put(2, "#{pfx}#{name_str}")
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def parent=( node )
 | 
			
		||||
| 
						 | 
				
			
			@ -78,28 +60,23 @@ module REXML
 | 
			
		|||
			end
 | 
			
		||||
 | 
			
		||||
			def local_name
 | 
			
		||||
				el!()
 | 
			
		||||
				namesplit
 | 
			
		||||
				@name
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def local_name=( name_str )
 | 
			
		||||
				el!()
 | 
			
		||||
				_old_put( 1, "#@prefix:#{name_str}" )
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def prefix( namespace=nil )
 | 
			
		||||
				el!()
 | 
			
		||||
				prefix_of( self, namespace )
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def namespace( prefix=prefix() )
 | 
			
		||||
				el!()
 | 
			
		||||
				namespace_of( self, prefix )
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def namespace=( namespace )
 | 
			
		||||
				el!()
 | 
			
		||||
				@prefix = prefix( namespace )
 | 
			
		||||
				pfx = ''
 | 
			
		||||
				pfx = "#@prefix:" if @prefix.size > 0
 | 
			
		||||
| 
						 | 
				
			
			@ -107,7 +84,6 @@ module REXML
 | 
			
		|||
			end
 | 
			
		||||
 | 
			
		||||
			def []( reference, ns=nil )
 | 
			
		||||
				el!()
 | 
			
		||||
				if reference.kind_of? String
 | 
			
		||||
					pfx = ''
 | 
			
		||||
					pfx = "#{prefix(ns)}:" if ns
 | 
			
		||||
| 
						 | 
				
			
			@ -125,7 +101,6 @@ module REXML
 | 
			
		|||
 | 
			
		||||
			# Doesn't handle namespaces yet
 | 
			
		||||
			def []=( reference, ns, value=nil )
 | 
			
		||||
				el!()
 | 
			
		||||
				if reference.kind_of? String
 | 
			
		||||
					value = ns unless value
 | 
			
		||||
					at( 3 )[reference] = value
 | 
			
		||||
| 
						 | 
				
			
			@ -170,12 +145,10 @@ module REXML
 | 
			
		|||
			end
 | 
			
		||||
 | 
			
		||||
			def has_name?( name, namespace = '' )
 | 
			
		||||
				el!()
 | 
			
		||||
				at(3) == name and namespace() == namespace
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def children
 | 
			
		||||
				el!()
 | 
			
		||||
				self
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -187,14 +160,6 @@ module REXML
 | 
			
		|||
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			def el!
 | 
			
		||||
				if node_type() != :element and node_type() != :document
 | 
			
		||||
					_old_put( 0, :element )
 | 
			
		||||
					push({})
 | 
			
		||||
				end
 | 
			
		||||
				self
 | 
			
		||||
			end
 | 
			
		||||
 | 
			
		||||
			private
 | 
			
		||||
 | 
			
		||||
			def namesplit
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,10 +8,6 @@ module REXML
 | 
			
		|||
			@output = real_IO
 | 
			
		||||
			self.encoding = encd
 | 
			
		||||
 | 
			
		||||
			eval <<-EOL
 | 
			
		||||
				alias :encode :to_#{encoding.tr('-', '_').downcase}
 | 
			
		||||
				alias :decode :from_#{encoding.tr('-', '_').downcase}
 | 
			
		||||
			EOL
 | 
			
		||||
			@to_utf = encd == UTF_8 ? false : true
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
module REXML
 | 
			
		||||
	class ParseException < Exception
 | 
			
		||||
	class ParseException < RuntimeError
 | 
			
		||||
		attr_accessor :source, :parser, :continued_exception
 | 
			
		||||
 | 
			
		||||
		def initialize( message, source=nil, parser=nil, exception=nil )
 | 
			
		||||
| 
						 | 
				
			
			@ -12,9 +12,9 @@ module REXML
 | 
			
		|||
		def to_s
 | 
			
		||||
			# Quote the original exception, if there was one
 | 
			
		||||
			if @continued_exception
 | 
			
		||||
				err = @continued_exception.message
 | 
			
		||||
				err = @continued_exception.inspect
 | 
			
		||||
				err << "\n"
 | 
			
		||||
				err << @continued_exception.backtrace[0..3].join("\n")
 | 
			
		||||
				err << @continued_exception.backtrace.join("\n")
 | 
			
		||||
				err << "\n...\n"
 | 
			
		||||
			else
 | 
			
		||||
				err = ""
 | 
			
		||||
| 
						 | 
				
			
			@ -24,17 +24,24 @@ module REXML
 | 
			
		|||
			err << super
 | 
			
		||||
 | 
			
		||||
			# Add contextual information
 | 
			
		||||
			err << "\n#{@source.current_line}\nLast 80 unconsumed characters:\n#{@source.buffer[0..80].gsub(/\n/, ' ')}\n" if @source
 | 
			
		||||
			err << "\nContext:\n#{@parser.context}" if @parser
 | 
			
		||||
			if @source
 | 
			
		||||
				err << "\nLine: #{line}\n"
 | 
			
		||||
				err << "Position: #{position}\n"
 | 
			
		||||
				err << "Last 80 unconsumed characters:\n"
 | 
			
		||||
				err << @source.buffer[0..80].gsub(/\n/, ' ')
 | 
			
		||||
				err << "\n"
 | 
			
		||||
				err << @source.buffer[0..80].unpack("U*").inspect
 | 
			
		||||
			end
 | 
			
		||||
			
 | 
			
		||||
			err
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def position
 | 
			
		||||
			@source.current_line[0] if @source
 | 
			
		||||
			@source.current_line[0] if @source and @source.current_line
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def line
 | 
			
		||||
			@source.current_line[2] if @source
 | 
			
		||||
			@source.current_line[2] if @source and @source.current_line
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def context
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -89,10 +89,10 @@ module REXML
 | 
			
		|||
			EREFERENCE = /&(?!#{NAME};)/
 | 
			
		||||
 | 
			
		||||
			DEFAULT_ENTITIES = { 
 | 
			
		||||
				'gt' => [/>/, '>', '>'], 
 | 
			
		||||
				'lt' => [/</, '<', '<'], 
 | 
			
		||||
				'quot' => [/"/, '"', '"'], 
 | 
			
		||||
				"apos" => [/'/, "'", "'"] 
 | 
			
		||||
				'gt' => [/>/, '>', '>', />/], 
 | 
			
		||||
				'lt' => [/</, '<', '<', /</], 
 | 
			
		||||
				'quot' => [/"/, '"', '"', /"/], 
 | 
			
		||||
				"apos" => [/'/, "'", "'", /'/] 
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			def initialize( source )
 | 
			
		||||
| 
						 | 
				
			
			@ -126,6 +126,7 @@ module REXML
 | 
			
		|||
 | 
			
		||||
			# Returns true if there are more events.  Synonymous with !empty?
 | 
			
		||||
			def has_next?
 | 
			
		||||
				return true if @closed
 | 
			
		||||
				@source.read if @source.buffer.size==0 and !@source.empty?
 | 
			
		||||
				(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
 | 
			
		||||
			end
 | 
			
		||||
| 
						 | 
				
			
			@ -143,7 +144,7 @@ module REXML
 | 
			
		|||
			# event, so you can effectively pre-parse the entire document (pull the 
 | 
			
		||||
			# entire thing into memory) using this method.  
 | 
			
		||||
			def peek depth=0
 | 
			
		||||
				raise 'Illegal argument "#{depth}"' if depth < -1
 | 
			
		||||
				raise %Q[Illegal argument "#{depth}"] if depth < -1
 | 
			
		||||
				temp = []
 | 
			
		||||
				if depth == -1
 | 
			
		||||
					temp.push(pull()) until empty?
 | 
			
		||||
| 
						 | 
				
			
			@ -166,8 +167,9 @@ module REXML
 | 
			
		|||
				return @stack.shift if @stack.size > 0
 | 
			
		||||
				@source.read if @source.buffer.size==0
 | 
			
		||||
				if @document_status == nil
 | 
			
		||||
					@source.match( /^\s*/um, true )
 | 
			
		||||
					word = @source.match( /^\s*(<.*?)>/um )
 | 
			
		||||
					@source.consume( /^\s*/um )
 | 
			
		||||
					word = @source.match( /(<.*?)>/um )
 | 
			
		||||
					#word = @source.match_to( '>', /(<.*?)>/um )
 | 
			
		||||
					word = word[1] unless word.nil?
 | 
			
		||||
					case word
 | 
			
		||||
					when COMMENT_START
 | 
			
		||||
| 
						 | 
				
			
			@ -190,7 +192,7 @@ module REXML
 | 
			
		|||
						close = md[2]
 | 
			
		||||
						identity =~ IDENTITY
 | 
			
		||||
						name = $1
 | 
			
		||||
						raise "DOCTYPE is missing a name" if name.nil?
 | 
			
		||||
						raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
 | 
			
		||||
						pub_sys = $2.nil? ? nil : $2.strip
 | 
			
		||||
						long_name = $3.nil? ? nil : $3.strip
 | 
			
		||||
						uri = $4.nil? ? nil : $4.strip
 | 
			
		||||
| 
						 | 
				
			
			@ -274,10 +276,11 @@ module REXML
 | 
			
		|||
						return [ :end_doctype ]
 | 
			
		||||
					end
 | 
			
		||||
				end
 | 
			
		||||
				begin 
 | 
			
		||||
				begin
 | 
			
		||||
					if @source.buffer[0] == ?<
 | 
			
		||||
						if @source.buffer[1] == ?/
 | 
			
		||||
							last_tag = @tags.pop
 | 
			
		||||
							#md = @source.match_to_consume( '>', CLOSE_MATCH)
 | 
			
		||||
							md = @source.match( CLOSE_MATCH, true )
 | 
			
		||||
							raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
 | 
			
		||||
								"(got \"#{md[1]}\")", @source) unless last_tag == md[1]
 | 
			
		||||
| 
						 | 
				
			
			@ -286,18 +289,20 @@ module REXML
 | 
			
		|||
							md = @source.match(/\A(\s*[^>]*>)/um)
 | 
			
		||||
							#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
 | 
			
		||||
							raise REXML::ParseException.new("Malformed node", @source) unless md
 | 
			
		||||
							case md[1]
 | 
			
		||||
							when CDATA_START
 | 
			
		||||
								return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ]
 | 
			
		||||
							when COMMENT_START
 | 
			
		||||
								return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
 | 
			
		||||
							if md[0][2] == ?-
 | 
			
		||||
								md = @source.match( COMMENT_PATTERN, true )
 | 
			
		||||
								return [ :comment, md[1] ] if md
 | 
			
		||||
							else
 | 
			
		||||
								raise REXML::ParseException.new( "Declarations can only occur "+
 | 
			
		||||
								"in the doctype declaration.", @source)
 | 
			
		||||
								md = @source.match( CDATA_PATTERN, true )
 | 
			
		||||
								return [ :cdata, md[1] ] if md
 | 
			
		||||
							end
 | 
			
		||||
							raise REXML::ParseException.new( "Declarations can only occur "+
 | 
			
		||||
								"in the doctype declaration.", @source)
 | 
			
		||||
						elsif @source.buffer[1] == ??
 | 
			
		||||
							md = @source.match( INSTRUCTION_PATTERN, true )
 | 
			
		||||
							return [ :processing_instruction, md[1], md[2] ]
 | 
			
		||||
							return [ :processing_instruction, md[1], md[2] ] if md
 | 
			
		||||
							raise REXML::ParseException.new( "Bad instruction declaration",
 | 
			
		||||
								@source)
 | 
			
		||||
						else
 | 
			
		||||
							# Get the next tag
 | 
			
		||||
							md = @source.match(TAG_MATCH, true)
 | 
			
		||||
| 
						 | 
				
			
			@ -318,17 +323,19 @@ module REXML
 | 
			
		|||
							return [ :start_element, md[1], attributes ]
 | 
			
		||||
						end
 | 
			
		||||
					else
 | 
			
		||||
						md = @source.match(TEXT_PATTERN, true)
 | 
			
		||||
						raise "no text to add" if md[0].length == 0
 | 
			
		||||
						md = @source.match( TEXT_PATTERN, true )
 | 
			
		||||
						#md = @source.match_to_consume( '<', TEXT_PATTERN )
 | 
			
		||||
						#@source.read
 | 
			
		||||
						raise REXML::ParseException("no text to add") if md[0].length == 0
 | 
			
		||||
						# unnormalized = Text::unnormalize( md[1], self )
 | 
			
		||||
						# return PullEvent.new( :text, md[1], unnormalized )
 | 
			
		||||
						return [ :text, md[1] ]
 | 
			
		||||
					end
 | 
			
		||||
        rescue REXML::ParseException
 | 
			
		||||
          raise $!
 | 
			
		||||
				rescue REXML::ParseException
 | 
			
		||||
					raise
 | 
			
		||||
				rescue Exception, NameError => error
 | 
			
		||||
					raise REXML::ParseException.new( "Exception parsing",
 | 
			
		||||
						@source, self, error )
 | 
			
		||||
						@source, self, (error ? error : $!) )
 | 
			
		||||
				end
 | 
			
		||||
				return [ :dummy ]
 | 
			
		||||
			end
 | 
			
		||||
| 
						 | 
				
			
			@ -354,7 +361,7 @@ module REXML
 | 
			
		|||
				end if entities
 | 
			
		||||
				copy.gsub!( EREFERENCE, '&' )
 | 
			
		||||
				DEFAULT_ENTITIES.each do |key, value|
 | 
			
		||||
					copy.gsub!( value[2], value[1] )
 | 
			
		||||
					copy.gsub!( value[3], value[1] )
 | 
			
		||||
				end
 | 
			
		||||
				copy
 | 
			
		||||
			end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -16,25 +16,25 @@ module REXML
 | 
			
		|||
      end
 | 
			
		||||
 | 
			
		||||
			def parse
 | 
			
		||||
				root = context = REXML::Light::Node.new([ :document ])
 | 
			
		||||
				root = context = [ :document ]
 | 
			
		||||
				while true
 | 
			
		||||
					event = @parser.pull
 | 
			
		||||
					case event[0]
 | 
			
		||||
					when :end_document
 | 
			
		||||
						break
 | 
			
		||||
					when :end_doctype
 | 
			
		||||
						context = context.parent
 | 
			
		||||
						context = context[1]
 | 
			
		||||
					when :start_element, :start_doctype
 | 
			
		||||
						new_node = REXML::Light::Node.new(event)
 | 
			
		||||
						new_node = event
 | 
			
		||||
						context << new_node
 | 
			
		||||
						new_node.parent = context
 | 
			
		||||
						new_node[1,0] = [context]
 | 
			
		||||
						context = new_node
 | 
			
		||||
					when :end_element, :end_doctype
 | 
			
		||||
						context = context.parent
 | 
			
		||||
						context = context[1]
 | 
			
		||||
					else
 | 
			
		||||
						new_node = REXML::Light::Node.new(event)
 | 
			
		||||
						new_node = event
 | 
			
		||||
						context << new_node
 | 
			
		||||
						new_node.parent = context
 | 
			
		||||
						new_node[1,0] = [context]
 | 
			
		||||
					end
 | 
			
		||||
				end
 | 
			
		||||
				root
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,7 +31,7 @@ module REXML
 | 
			
		|||
				results = filter([element], path)
 | 
			
		||||
			when /^\*/u
 | 
			
		||||
				results = filter(element.to_a, path)
 | 
			
		||||
			when /^[\[!\w:]/u
 | 
			
		||||
			when /^[[!\w:]/u
 | 
			
		||||
				# match on child
 | 
			
		||||
				matches = []
 | 
			
		||||
				children = element.to_a
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -21,6 +21,6 @@
 | 
			
		|||
# A tutorial is available in docs/tutorial.html
 | 
			
		||||
module REXML
 | 
			
		||||
	Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
 | 
			
		||||
	Date = "+2003/110"
 | 
			
		||||
	Version = "2.7.1"
 | 
			
		||||
	Date = "+2003/283"
 | 
			
		||||
	Version = "2.7.2"
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -39,10 +39,6 @@ module REXML
 | 
			
		|||
		# Overridden to support optimized en/decoding
 | 
			
		||||
		def encoding=(enc)
 | 
			
		||||
			super
 | 
			
		||||
			eval <<-EOL
 | 
			
		||||
				alias :encode :to_#{encoding.tr('-', '_').downcase}
 | 
			
		||||
				alias :decode :from_#{encoding.tr('-', '_').downcase}
 | 
			
		||||
			EOL
 | 
			
		||||
			@line_break = encode( '>' )
 | 
			
		||||
			if enc != UTF_8
 | 
			
		||||
				@buffer = decode(@buffer)
 | 
			
		||||
| 
						 | 
				
			
			@ -78,8 +74,22 @@ module REXML
 | 
			
		|||
		def read
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def consume( pattern )
 | 
			
		||||
			@buffer = $' if pattern.match( @buffer )
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def match_to( char, pattern )
 | 
			
		||||
			return pattern.match(@buffer)
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def match_to_consume( char, pattern )
 | 
			
		||||
			md = pattern.match(@buffer)
 | 
			
		||||
			@buffer = $'
 | 
			
		||||
			return md
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def match pattern, consume=false
 | 
			
		||||
			md = pattern.match @buffer
 | 
			
		||||
			md = pattern.match(@buffer)
 | 
			
		||||
			@buffer = $' if consume and md
 | 
			
		||||
			return md
 | 
			
		||||
		end
 | 
			
		||||
| 
						 | 
				
			
			@ -112,7 +122,9 @@ module REXML
 | 
			
		|||
			#@block_size = block_size
 | 
			
		||||
			#super @source.read(@block_size)
 | 
			
		||||
			@line_break = '>'
 | 
			
		||||
			super @source.readline( @line_break )
 | 
			
		||||
			#super @source.readline( "\n" )
 | 
			
		||||
			super @source.readline( @line_break )+@source.read
 | 
			
		||||
			@line_break = encode( '>' )
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def scan pattern, consume=false
 | 
			
		||||
| 
						 | 
				
			
			@ -145,11 +157,15 @@ module REXML
 | 
			
		|||
				str = @source.readline('>')
 | 
			
		||||
				str = decode(str) if @to_utf and str 
 | 
			
		||||
				@buffer << str
 | 
			
		||||
			rescue
 | 
			
		||||
			rescue Exception, NameError
 | 
			
		||||
				@source = nil
 | 
			
		||||
			end
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def consume( pattern )
 | 
			
		||||
			match( pattern, true )
 | 
			
		||||
		end
 | 
			
		||||
 | 
			
		||||
		def match pattern, consume=false
 | 
			
		||||
			rv = pattern.match(@buffer)
 | 
			
		||||
			@buffer = $' if consume and rv
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,16 +2,6 @@ require 'rexml/namespace'
 | 
			
		|||
require 'rexml/xmltokens'
 | 
			
		||||
require 'rexml/parsers/xpathparser'
 | 
			
		||||
 | 
			
		||||
# Ignore this class.  It adds a __ne__ method, because Ruby doesn't seem to
 | 
			
		||||
# understand object.send( "!=", foo ), whereas it *does* understand "<", "==",
 | 
			
		||||
# and all of the other comparison methods.  Stupid, and annoying, and not at
 | 
			
		||||
# all POLS.
 | 
			
		||||
class Object
 | 
			
		||||
	def __ne__(b)
 | 
			
		||||
		self != b
 | 
			
		||||
	end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
module REXML
 | 
			
		||||
	# You don't want to use this class.  Really.  Use XPath, which is a wrapper
 | 
			
		||||
	# for this class.  Believe me.  You don't want to poke around in here.
 | 
			
		||||
| 
						 | 
				
			
			@ -132,11 +122,10 @@ module REXML
 | 
			
		|||
			when :child
 | 
			
		||||
				#puts "CHILD"
 | 
			
		||||
				new_nodeset = []
 | 
			
		||||
				ps_clone = nil
 | 
			
		||||
				nt = nil
 | 
			
		||||
				for node in nodeset
 | 
			
		||||
					#ps_clone = path_stack.clone
 | 
			
		||||
					#new_nodeset += internal_parse( ps_clone, node.children ) if node.parent?
 | 
			
		||||
					new_nodeset += node.children if node.parent?
 | 
			
		||||
					nt = node.node_type
 | 
			
		||||
					new_nodeset += node.children if nt == :element or nt == :document
 | 
			
		||||
				end
 | 
			
		||||
				#path_stack[0,(path_stack.size-ps_clone.size)] = []
 | 
			
		||||
				return new_nodeset
 | 
			
		||||
| 
						 | 
				
			
			@ -238,9 +227,11 @@ module REXML
 | 
			
		|||
			when :descendant
 | 
			
		||||
				#puts ":DESCENDANT"
 | 
			
		||||
				results = []
 | 
			
		||||
				nt = nil
 | 
			
		||||
				for node in nodeset
 | 
			
		||||
					nt = node.node_type
 | 
			
		||||
					results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
 | 
			
		||||
						node.children ) if node.parent?
 | 
			
		||||
						node.children ) if nt == :element or nt == :document
 | 
			
		||||
				end
 | 
			
		||||
				return results
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -310,11 +301,13 @@ module REXML
 | 
			
		|||
		def d_o_s( p, ns, r )
 | 
			
		||||
			#puts r.collect{|n|n.to_s}.inspect
 | 
			
		||||
			#puts ns.collect{|n|n.to_s}.inspect
 | 
			
		||||
			nt = nil
 | 
			
		||||
			ns.each_index do |i|
 | 
			
		||||
				n = ns[i]
 | 
			
		||||
				x = match( p.clone, [ n ] )
 | 
			
		||||
				#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
 | 
			
		||||
				d_o_s( p, n.children, x ) if n.parent?
 | 
			
		||||
				nt = n.node_type
 | 
			
		||||
				d_o_s( p, n.children, x ) if nt == :element or nt == :document
 | 
			
		||||
				r[i,0] = [x] if x.size > 0
 | 
			
		||||
			end
 | 
			
		||||
		end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue