1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* Changes to the encoding mechanism. If iconv is found, it is used first

for encoding changes.  This should be the case on all 1.8 installations.
  When it isn't found (<1.6), the native REXML encoding mechanism is used.
  This cleaned out some files, and tightened up the code a bit; and iconv
  should be faster than the pure Ruby code.
* Changed deprecated assert_not_nil to assert throughout the tests.
* Parse exceptions are a little more verbose, and extend RuntimeError.
* Bug fixes to XPathParser
* The Light API is still shifting, like the sands of the desert.
* Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and
  tightened error reporting in the base parser


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2003-10-10 12:54:46 +00:00
parent 662532be00
commit 7d21c237cc
23 changed files with 185 additions and 224 deletions

View file

@ -2,61 +2,49 @@ module REXML
module Encoding module Encoding
@@uconv_available = false @@uconv_available = false
ENCODING_CLAIMS = { }
def Encoding.claim( encoding_str, match=nil )
if match
ENCODING_CLAIMS[ match ] = encoding_str
else
ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/i ] = encoding_str
end
end
# Native, default format is UTF-8, so it is declared here rather than in # Native, default format is UTF-8, so it is declared here rather than in
# an encodings/ definition. # an encodings/ definition.
UTF_8 = 'UTF-8' UTF_8 = 'UTF-8'
claim( UTF_8 ) UTF_16 = 'UTF-16'
UNILE = 'UNILE'
# ID ---> Encoding name # ID ---> Encoding name
attr_reader :encoding attr_reader :encoding
def encoding=( enc ) def encoding=( enc )
enc = UTF_8 unless enc old_verbosity = $VERBOSE
begin
$VERBOSE = false
return if defined? @encoding and enc == @encoding
if enc and enc != UTF_8
@encoding = enc.upcase @encoding = enc.upcase
require "rexml/encodings/#@encoding" unless @encoding == UTF_8 begin
load 'rexml/encodings/ICONV.rb'
Iconv::iconv( UTF_8, @encoding, "" )
rescue LoadError, Exception => err
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
begin
load enc_file
rescue LoadError
raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
end
end
else
enc = UTF_8
@encoding = enc.upcase
load 'rexml/encodings/UTF-8.rb'
end
ensure
$VERBOSE = old_verbosity
end
end end
def check_encoding str def check_encoding str
rv = ENCODING_CLAIMS.find{|k,v| str =~ k } # We have to recognize UTF-16, LSB UTF-16, and UTF-8
# Raise an exception if there is a declared encoding and we don't return UTF_16 if str[0] == 254 && str[1] == 255
# recognize it return UNILE if str[0] == 255 && str[1] == 254
unless rv str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/ return $1.upcase if $1
raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)."
else
return UTF_8 return UTF_8
end end
end end
return rv[1]
end
def to_utf_8(str)
return str
end
def from_utf_8 content
return content
end
end
module Encodingses
encodings = []
$:.each do |incl_dir|
if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0
encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ]
end
encodings.collect!{ |f| File.basename(f) }
encodings.uniq!
end
encodings.each { |enc| require "rexml/encodings/#{enc}" }
end
end end

View file

@ -3,11 +3,11 @@ begin
module REXML module REXML
module Encoding module Encoding
def from_euc_jp(str) def decode(str)
return Uconv::euctou8(str) return Uconv::euctou8(str)
end end
def to_euc_jp content def encode content
return Uconv::u8toeuc(content) return Uconv::u8toeuc(content)
end end
end end
@ -17,12 +17,12 @@ rescue LoadError
require 'iconv' require 'iconv'
module REXML module REXML
module Encoding module Encoding
def from_euc_jp(str) def decode(str)
return Iconv::iconv("utf-8", "euc-jp", str).join('') return Iconv::iconv("utf-8", "euc-jp", str)[0]
end end
def to_euc_jp content def encode content
return Iconv::iconv("euc-jp", "utf-8", content).join('') return Iconv::iconv("euc-jp", "utf-8", content)[0]
end end
end end
end end

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
EUC_JP = 'EUC-JP'
claim( EUC_JP )
end
end

View file

@ -0,0 +1,14 @@
require "iconv"
raise LoadError unless defined? Iconv
module REXML
module Encoding
def decode( str )
return Iconv::iconv(UTF_8, @encoding, str)[0]
end
def encode( content )
return Iconv::iconv(@encoding, UTF_8, content)[0]
end
end
end

View file

@ -1,7 +1,7 @@
module REXML module REXML
module Encoding module Encoding
# Convert from UTF-8 # Convert from UTF-8
def to_iso_8859_1 content def encode content
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -16,7 +16,7 @@ module REXML
end end
# Convert to UTF-8 # Convert to UTF-8
def from_iso_8859_1(str) def decode(str)
str.unpack('C*').pack('U*') str.unpack('C*').pack('U*')
end end
end end

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
ISO_8859_1 = 'ISO-8859-1'
claim( ISO_8859_1 )
end
end

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
claim( 'Shift-JIS' )
claim( 'Shift_JIS' )
end
end

View file

@ -1,6 +1,6 @@
module REXML module REXML
module Encoding module Encoding
def to_unile content def encode content
array_utf8 = content.unpack("U*") array_utf8 = content.unpack("U*")
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -15,7 +15,7 @@ module REXML
array_enc.pack('C*') array_enc.pack('C*')
end end
def from_unile(str) def decode(str)
array_enc=str.unpack('C*') array_enc=str.unpack('C*')
array_utf8 = [] array_utf8 = []
2.step(array_enc.size-1, 2){|i| 2.step(array_enc.size-1, 2){|i|

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
UNILE = 'UNILE'
claim( UNILE, /^\377\376/ )
end
end

View file

@ -1,7 +1,7 @@
module REXML module REXML
module Encoding module Encoding
# Convert from UTF-8 # Convert from UTF-8
def to_us_ascii content def encode content
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -16,7 +16,7 @@ module REXML
end end
# Convert to UTF-8 # Convert to UTF-8
def from_us_ascii(str) def decode(str)
str.unpack('C*').pack('U*') str.unpack('C*').pack('U*')
end end
end end

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
US_ASCII = 'US-ASCII'
claim( US_ASCII )
end
end

View file

@ -1,6 +1,6 @@
module REXML module REXML
module Encoding module Encoding
def to_utf_16 content def encode content
array_utf8 = content.unpack("U*") array_utf8 = content.unpack("U*")
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -15,7 +15,7 @@ module REXML
array_enc.pack('C*') array_enc.pack('C*')
end end
def from_utf_16(str) def decode(str)
array_enc=str.unpack('C*') array_enc=str.unpack('C*')
array_utf8 = [] array_utf8 = []
2.step(arrayEnc.size-1, 2){|i| 2.step(arrayEnc.size-1, 2){|i|

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
UTF_16 = 'UTF-16'
claim( UTF_16, /^\376\377/ )
end
end

View file

@ -0,0 +1,11 @@
module REXML
module Encoding
def encode content
content
end
def decode(str)
str
end
end
end

View file

@ -1,76 +1,58 @@
require 'rexml/xmltokens' require 'rexml/xmltokens'
require 'rexml/light/node' require 'rexml/light/node'
# Development model # [ :element, parent, name, attributes, children* ]
# document = Node.new # a = Node.new
# a << "B" # => <a>B</a>
# Add an element "foo" to the document # a.b # => <a>B<b/></a>
# foo = document << "foo" # a.b[1] # => <a>B<b/><b/><a>
# # Set attribute "attr" on foo # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
# foo["attr"] = "la" # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
# # Set another attribute in a different namespace # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
# foo["attr", "namespace"] = "too"
# # Swap foo into another namespace
# foo.namespace = "blah"
# # Add a couple of element nodes to foo
# foo << "a"
# foo << "b"
# # Access the children of foo in various ways
# a = foo[0]
# foo.each { |child|
# #...
# }
# # Add text to foo
# # Add instruction
# # Add comment
# # Get the root of the document
# document == a.root
# # Write the document out
# puts document.to_s
module REXML module REXML
module Light module Light
# Represents a tagged XML element. Elements are characterized by # Represents a tagged XML element. Elements are characterized by
# having children, attributes, and names, and can themselves be # having children, attributes, and names, and can themselves be
# children. # children.
class Node < Array class Node
alias :_old_get :[]
alias :_old_put :[]=
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
PARENTS = [ :element, :document, :doctype ]
# Create a new element. # Create a new element.
def initialize node=nil def initialize node=nil
@node = node
if node.kind_of? String if node.kind_of? String
node = [ :text, node ] node = [ :text, node ]
elsif node.nil? elsif node.nil?
node = [ :document, nil, nil ] node = [ :document, nil, nil ]
elsif node[0] == :start_element elsif node[0] == :start_element
node[0] = :element node[0] = :element
elsif node[0] == :start_doctype
node[0] = :doctype
elsif node[0] == :start_document
node[0] = :document
end end
replace( node )
_old_put( 1, 0, 1 )
_old_put( 1, nil )
end end
def size def size
el!() if PARENTS.include? @node[0]
super-4 @node[-1].size
else
0
end
end end
def each( &block ) def each( &block )
el!()
size.times { |x| yield( at(x+4) ) } size.times { |x| yield( at(x+4) ) }
end end
def name def name
el!()
at(2) at(2)
end end
def name=( name_str, ns=nil ) def name=( name_str, ns=nil )
el!()
pfx = '' pfx = ''
pfx = "#{prefix(ns)}:" if ns pfx = "#{prefix(ns)}:" if ns
_old_put(1, "#{pfx}#{name_str}") _old_put(2, "#{pfx}#{name_str}")
end end
def parent=( node ) def parent=( node )
@ -78,28 +60,23 @@ module REXML
end end
def local_name def local_name
el!()
namesplit namesplit
@name @name
end end
def local_name=( name_str ) def local_name=( name_str )
el!()
_old_put( 1, "#@prefix:#{name_str}" ) _old_put( 1, "#@prefix:#{name_str}" )
end end
def prefix( namespace=nil ) def prefix( namespace=nil )
el!()
prefix_of( self, namespace ) prefix_of( self, namespace )
end end
def namespace( prefix=prefix() ) def namespace( prefix=prefix() )
el!()
namespace_of( self, prefix ) namespace_of( self, prefix )
end end
def namespace=( namespace ) def namespace=( namespace )
el!()
@prefix = prefix( namespace ) @prefix = prefix( namespace )
pfx = '' pfx = ''
pfx = "#@prefix:" if @prefix.size > 0 pfx = "#@prefix:" if @prefix.size > 0
@ -107,7 +84,6 @@ module REXML
end end
def []( reference, ns=nil ) def []( reference, ns=nil )
el!()
if reference.kind_of? String if reference.kind_of? String
pfx = '' pfx = ''
pfx = "#{prefix(ns)}:" if ns pfx = "#{prefix(ns)}:" if ns
@ -125,7 +101,6 @@ module REXML
# Doesn't handle namespaces yet # Doesn't handle namespaces yet
def []=( reference, ns, value=nil ) def []=( reference, ns, value=nil )
el!()
if reference.kind_of? String if reference.kind_of? String
value = ns unless value value = ns unless value
at( 3 )[reference] = value at( 3 )[reference] = value
@ -170,12 +145,10 @@ module REXML
end end
def has_name?( name, namespace = '' ) def has_name?( name, namespace = '' )
el!()
at(3) == name and namespace() == namespace at(3) == name and namespace() == namespace
end end
def children def children
el!()
self self
end end
@ -187,14 +160,6 @@ module REXML
end end
def el!
if node_type() != :element and node_type() != :document
_old_put( 0, :element )
push({})
end
self
end
private private
def namesplit def namesplit

View file

@ -8,10 +8,6 @@ module REXML
@output = real_IO @output = real_IO
self.encoding = encd self.encoding = encd
eval <<-EOL
alias :encode :to_#{encoding.tr('-', '_').downcase}
alias :decode :from_#{encoding.tr('-', '_').downcase}
EOL
@to_utf = encd == UTF_8 ? false : true @to_utf = encd == UTF_8 ? false : true
end end

View file

@ -1,5 +1,5 @@
module REXML module REXML
class ParseException < Exception class ParseException < RuntimeError
attr_accessor :source, :parser, :continued_exception attr_accessor :source, :parser, :continued_exception
def initialize( message, source=nil, parser=nil, exception=nil ) def initialize( message, source=nil, parser=nil, exception=nil )
@ -12,9 +12,9 @@ module REXML
def to_s def to_s
# Quote the original exception, if there was one # Quote the original exception, if there was one
if @continued_exception if @continued_exception
err = @continued_exception.message err = @continued_exception.inspect
err << "\n" err << "\n"
err << @continued_exception.backtrace[0..3].join("\n") err << @continued_exception.backtrace.join("\n")
err << "\n...\n" err << "\n...\n"
else else
err = "" err = ""
@ -24,17 +24,24 @@ module REXML
err << super err << super
# Add contextual information # Add contextual information
err << "\n#{@source.current_line}\nLast 80 unconsumed characters:\n#{@source.buffer[0..80].gsub(/\n/, ' ')}\n" if @source if @source
err << "\nContext:\n#{@parser.context}" if @parser err << "\nLine: #{line}\n"
err << "Position: #{position}\n"
err << "Last 80 unconsumed characters:\n"
err << @source.buffer[0..80].gsub(/\n/, ' ')
err << "\n"
err << @source.buffer[0..80].unpack("U*").inspect
end
err err
end end
def position def position
@source.current_line[0] if @source @source.current_line[0] if @source and @source.current_line
end end
def line def line
@source.current_line[2] if @source @source.current_line[2] if @source and @source.current_line
end end
def context def context

View file

@ -89,10 +89,10 @@ module REXML
EREFERENCE = /&(?!#{NAME};)/ EREFERENCE = /&(?!#{NAME};)/
DEFAULT_ENTITIES = { DEFAULT_ENTITIES = {
'gt' => [/&gt;/, '&gt;', '>'], 'gt' => [/&gt;/, '&gt;', '>', />/],
'lt' => [/&lt;/, '&lt;', '<'], 'lt' => [/&lt;/, '&lt;', '<', /</],
'quot' => [/&quot;/, '&quot;', '"'], 'quot' => [/&quot;/, '&quot;', '"', /"/],
"apos" => [/&apos;/, "&apos;", "'"] "apos" => [/&apos;/, "&apos;", "'", /'/]
} }
def initialize( source ) def initialize( source )
@ -126,6 +126,7 @@ module REXML
# Returns true if there are more events. Synonymous with !empty? # Returns true if there are more events. Synonymous with !empty?
def has_next? def has_next?
return true if @closed
@source.read if @source.buffer.size==0 and !@source.empty? @source.read if @source.buffer.size==0 and !@source.empty?
(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed (!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
end end
@ -143,7 +144,7 @@ module REXML
# event, so you can effectively pre-parse the entire document (pull the # event, so you can effectively pre-parse the entire document (pull the
# entire thing into memory) using this method. # entire thing into memory) using this method.
def peek depth=0 def peek depth=0
raise 'Illegal argument "#{depth}"' if depth < -1 raise %Q[Illegal argument "#{depth}"] if depth < -1
temp = [] temp = []
if depth == -1 if depth == -1
temp.push(pull()) until empty? temp.push(pull()) until empty?
@ -166,8 +167,9 @@ module REXML
return @stack.shift if @stack.size > 0 return @stack.shift if @stack.size > 0
@source.read if @source.buffer.size==0 @source.read if @source.buffer.size==0
if @document_status == nil if @document_status == nil
@source.match( /^\s*/um, true ) @source.consume( /^\s*/um )
word = @source.match( /^\s*(<.*?)>/um ) word = @source.match( /(<.*?)>/um )
#word = @source.match_to( '>', /(<.*?)>/um )
word = word[1] unless word.nil? word = word[1] unless word.nil?
case word case word
when COMMENT_START when COMMENT_START
@ -190,7 +192,7 @@ module REXML
close = md[2] close = md[2]
identity =~ IDENTITY identity =~ IDENTITY
name = $1 name = $1
raise "DOCTYPE is missing a name" if name.nil? raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip pub_sys = $2.nil? ? nil : $2.strip
long_name = $3.nil? ? nil : $3.strip long_name = $3.nil? ? nil : $3.strip
uri = $4.nil? ? nil : $4.strip uri = $4.nil? ? nil : $4.strip
@ -278,6 +280,7 @@ module REXML
if @source.buffer[0] == ?< if @source.buffer[0] == ?<
if @source.buffer[1] == ?/ if @source.buffer[1] == ?/
last_tag = @tags.pop last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true ) md = @source.match( CLOSE_MATCH, true )
raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+ raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
"(got \"#{md[1]}\")", @source) unless last_tag == md[1] "(got \"#{md[1]}\")", @source) unless last_tag == md[1]
@ -286,18 +289,20 @@ module REXML
md = @source.match(/\A(\s*[^>]*>)/um) md = @source.match(/\A(\s*[^>]*>)/um)
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md raise REXML::ParseException.new("Malformed node", @source) unless md
case md[1] if md[0][2] == ?-
when CDATA_START md = @source.match( COMMENT_PATTERN, true )
return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ] return [ :comment, md[1] ] if md
when COMMENT_START
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
else else
md = @source.match( CDATA_PATTERN, true )
return [ :cdata, md[1] ] if md
end
raise REXML::ParseException.new( "Declarations can only occur "+ raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source) "in the doctype declaration.", @source)
end
elsif @source.buffer[1] == ?? elsif @source.buffer[1] == ??
md = @source.match( INSTRUCTION_PATTERN, true ) md = @source.match( INSTRUCTION_PATTERN, true )
return [ :processing_instruction, md[1], md[2] ] return [ :processing_instruction, md[1], md[2] ] if md
raise REXML::ParseException.new( "Bad instruction declaration",
@source)
else else
# Get the next tag # Get the next tag
md = @source.match(TAG_MATCH, true) md = @source.match(TAG_MATCH, true)
@ -318,17 +323,19 @@ module REXML
return [ :start_element, md[1], attributes ] return [ :start_element, md[1], attributes ]
end end
else else
md = @source.match(TEXT_PATTERN, true) md = @source.match( TEXT_PATTERN, true )
raise "no text to add" if md[0].length == 0 #md = @source.match_to_consume( '<', TEXT_PATTERN )
#@source.read
raise REXML::ParseException("no text to add") if md[0].length == 0
# unnormalized = Text::unnormalize( md[1], self ) # unnormalized = Text::unnormalize( md[1], self )
# return PullEvent.new( :text, md[1], unnormalized ) # return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ] return [ :text, md[1] ]
end end
rescue REXML::ParseException rescue REXML::ParseException
raise $! raise
rescue Exception, NameError => error rescue Exception, NameError => error
raise REXML::ParseException.new( "Exception parsing", raise REXML::ParseException.new( "Exception parsing",
@source, self, error ) @source, self, (error ? error : $!) )
end end
return [ :dummy ] return [ :dummy ]
end end
@ -354,7 +361,7 @@ module REXML
end if entities end if entities
copy.gsub!( EREFERENCE, '&amp;' ) copy.gsub!( EREFERENCE, '&amp;' )
DEFAULT_ENTITIES.each do |key, value| DEFAULT_ENTITIES.each do |key, value|
copy.gsub!( value[2], value[1] ) copy.gsub!( value[3], value[1] )
end end
copy copy
end end

View file

@ -16,25 +16,25 @@ module REXML
end end
def parse def parse
root = context = REXML::Light::Node.new([ :document ]) root = context = [ :document ]
while true while true
event = @parser.pull event = @parser.pull
case event[0] case event[0]
when :end_document when :end_document
break break
when :end_doctype when :end_doctype
context = context.parent context = context[1]
when :start_element, :start_doctype when :start_element, :start_doctype
new_node = REXML::Light::Node.new(event) new_node = event
context << new_node context << new_node
new_node.parent = context new_node[1,0] = [context]
context = new_node context = new_node
when :end_element, :end_doctype when :end_element, :end_doctype
context = context.parent context = context[1]
else else
new_node = REXML::Light::Node.new(event) new_node = event
context << new_node context << new_node
new_node.parent = context new_node[1,0] = [context]
end end
end end
root root

View file

@ -31,7 +31,7 @@ module REXML
results = filter([element], path) results = filter([element], path)
when /^\*/u when /^\*/u
results = filter(element.to_a, path) results = filter(element.to_a, path)
when /^[\[!\w:]/u when /^[[!\w:]/u
# match on child # match on child
matches = [] matches = []
children = element.to_a children = element.to_a

View file

@ -21,6 +21,6 @@
# A tutorial is available in docs/tutorial.html # A tutorial is available in docs/tutorial.html
module REXML module REXML
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>" Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
Date = "+2003/110" Date = "+2003/283"
Version = "2.7.1" Version = "2.7.2"
end end

View file

@ -39,10 +39,6 @@ module REXML
# Overridden to support optimized en/decoding # Overridden to support optimized en/decoding
def encoding=(enc) def encoding=(enc)
super super
eval <<-EOL
alias :encode :to_#{encoding.tr('-', '_').downcase}
alias :decode :from_#{encoding.tr('-', '_').downcase}
EOL
@line_break = encode( '>' ) @line_break = encode( '>' )
if enc != UTF_8 if enc != UTF_8
@buffer = decode(@buffer) @buffer = decode(@buffer)
@ -78,8 +74,22 @@ module REXML
def read def read
end end
def consume( pattern )
@buffer = $' if pattern.match( @buffer )
end
def match_to( char, pattern )
return pattern.match(@buffer)
end
def match_to_consume( char, pattern )
md = pattern.match(@buffer)
@buffer = $'
return md
end
def match pattern, consume=false def match pattern, consume=false
md = pattern.match @buffer md = pattern.match(@buffer)
@buffer = $' if consume and md @buffer = $' if consume and md
return md return md
end end
@ -112,7 +122,9 @@ module REXML
#@block_size = block_size #@block_size = block_size
#super @source.read(@block_size) #super @source.read(@block_size)
@line_break = '>' @line_break = '>'
super @source.readline( @line_break ) #super @source.readline( "\n" )
super @source.readline( @line_break )+@source.read
@line_break = encode( '>' )
end end
def scan pattern, consume=false def scan pattern, consume=false
@ -145,11 +157,15 @@ module REXML
str = @source.readline('>') str = @source.readline('>')
str = decode(str) if @to_utf and str str = decode(str) if @to_utf and str
@buffer << str @buffer << str
rescue rescue Exception, NameError
@source = nil @source = nil
end end
end end
def consume( pattern )
match( pattern, true )
end
def match pattern, consume=false def match pattern, consume=false
rv = pattern.match(@buffer) rv = pattern.match(@buffer)
@buffer = $' if consume and rv @buffer = $' if consume and rv

View file

@ -2,16 +2,6 @@ require 'rexml/namespace'
require 'rexml/xmltokens' require 'rexml/xmltokens'
require 'rexml/parsers/xpathparser' require 'rexml/parsers/xpathparser'
# Ignore this class. It adds a __ne__ method, because Ruby doesn't seem to
# understand object.send( "!=", foo ), whereas it *does* understand "<", "==",
# and all of the other comparison methods. Stupid, and annoying, and not at
# all POLS.
class Object
def __ne__(b)
self != b
end
end
module REXML module REXML
# You don't want to use this class. Really. Use XPath, which is a wrapper # You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here. # for this class. Believe me. You don't want to poke around in here.
@ -132,11 +122,10 @@ module REXML
when :child when :child
#puts "CHILD" #puts "CHILD"
new_nodeset = [] new_nodeset = []
ps_clone = nil nt = nil
for node in nodeset for node in nodeset
#ps_clone = path_stack.clone nt = node.node_type
#new_nodeset += internal_parse( ps_clone, node.children ) if node.parent? new_nodeset += node.children if nt == :element or nt == :document
new_nodeset += node.children if node.parent?
end end
#path_stack[0,(path_stack.size-ps_clone.size)] = [] #path_stack[0,(path_stack.size-ps_clone.size)] = []
return new_nodeset return new_nodeset
@ -238,9 +227,11 @@ module REXML
when :descendant when :descendant
#puts ":DESCENDANT" #puts ":DESCENDANT"
results = [] results = []
nt = nil
for node in nodeset for node in nodeset
nt = node.node_type
results += internal_parse( path_stack.clone.unshift( :descendant_or_self ), results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
node.children ) if node.parent? node.children ) if nt == :element or nt == :document
end end
return results return results
@ -310,11 +301,13 @@ module REXML
def d_o_s( p, ns, r ) def d_o_s( p, ns, r )
#puts r.collect{|n|n.to_s}.inspect #puts r.collect{|n|n.to_s}.inspect
#puts ns.collect{|n|n.to_s}.inspect #puts ns.collect{|n|n.to_s}.inspect
nt = nil
ns.each_index do |i| ns.each_index do |i|
n = ns[i] n = ns[i]
x = match( p.clone, [ n ] ) x = match( p.clone, [ n ] )
#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}" #puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
d_o_s( p, n.children, x ) if n.parent? nt = n.node_type
d_o_s( p, n.children, x ) if nt == :element or nt == :document
r[i,0] = [x] if x.size > 0 r[i,0] = [x] if x.size > 0
end end
end end