1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* Changes to the encoding mechanism. If iconv is found, it is used first

for encoding changes.  This should be the case on all 1.8 installations.
  When it isn't found (<1.6), the native REXML encoding mechanism is used.
  This cleaned out some files, and tightened up the code a bit; and iconv
  should be faster than the pure Ruby code.
* Changed deprecated assert_not_nil to assert throughout the tests.
* Parse exceptions are a little more verbose, and extend RuntimeError.
* Bug fixes to XPathParser
* The Light API is still shifting, like the sands of the desert.
* Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and
  tightened error reporting in the base parser


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2003-10-10 12:54:46 +00:00
parent 662532be00
commit 7d21c237cc
23 changed files with 185 additions and 224 deletions

View file

@ -2,61 +2,49 @@ module REXML
module Encoding module Encoding
@@uconv_available = false @@uconv_available = false
ENCODING_CLAIMS = { }
def Encoding.claim( encoding_str, match=nil )
if match
ENCODING_CLAIMS[ match ] = encoding_str
else
ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/i ] = encoding_str
end
end
# Native, default format is UTF-8, so it is declared here rather than in # Native, default format is UTF-8, so it is declared here rather than in
# an encodings/ definition. # an encodings/ definition.
UTF_8 = 'UTF-8' UTF_8 = 'UTF-8'
claim( UTF_8 ) UTF_16 = 'UTF-16'
UNILE = 'UNILE'
# ID ---> Encoding name # ID ---> Encoding name
attr_reader :encoding attr_reader :encoding
def encoding=( enc ) def encoding=( enc )
enc = UTF_8 unless enc old_verbosity = $VERBOSE
@encoding = enc.upcase begin
require "rexml/encodings/#@encoding" unless @encoding == UTF_8 $VERBOSE = false
return if defined? @encoding and enc == @encoding
if enc and enc != UTF_8
@encoding = enc.upcase
begin
load 'rexml/encodings/ICONV.rb'
Iconv::iconv( UTF_8, @encoding, "" )
rescue LoadError, Exception => err
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
begin
load enc_file
rescue LoadError
raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
end
end
else
enc = UTF_8
@encoding = enc.upcase
load 'rexml/encodings/UTF-8.rb'
end
ensure
$VERBOSE = old_verbosity
end
end end
def check_encoding str def check_encoding str
rv = ENCODING_CLAIMS.find{|k,v| str =~ k } # We have to recognize UTF-16, LSB UTF-16, and UTF-8
# Raise an exception if there is a declared encoding and we don't return UTF_16 if str[0] == 254 && str[1] == 255
# recognize it return UNILE if str[0] == 255 && str[1] == 254
unless rv str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/ return $1.upcase if $1
raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)." return UTF_8
else
return UTF_8
end
end
return rv[1]
end end
def to_utf_8(str)
return str
end
def from_utf_8 content
return content
end
end
module Encodingses
encodings = []
$:.each do |incl_dir|
if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0
encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ]
end
encodings.collect!{ |f| File.basename(f) }
encodings.uniq!
end
encodings.each { |enc| require "rexml/encodings/#{enc}" }
end end
end end

View file

@ -3,30 +3,30 @@ begin
module REXML module REXML
module Encoding module Encoding
def from_euc_jp(str) def decode(str)
return Uconv::euctou8(str) return Uconv::euctou8(str)
end end
def to_euc_jp content def encode content
return Uconv::u8toeuc(content) return Uconv::u8toeuc(content)
end end
end end
end end
rescue LoadError rescue LoadError
begin begin
require 'iconv' require 'iconv'
module REXML module REXML
module Encoding module Encoding
def from_euc_jp(str) def decode(str)
return Iconv::iconv("utf-8", "euc-jp", str).join('') return Iconv::iconv("utf-8", "euc-jp", str)[0]
end end
def to_euc_jp content def encode content
return Iconv::iconv("euc-jp", "utf-8", content).join('') return Iconv::iconv("euc-jp", "utf-8", content)[0]
end
end end
end end
end
rescue LoadError rescue LoadError
raise "uconv or iconv is required for Japanese encoding support." raise "uconv or iconv is required for Japanese encoding support."
end end
end end

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
EUC_JP = 'EUC-JP'
claim( EUC_JP )
end
end

View file

@ -0,0 +1,14 @@
require "iconv"
raise LoadError unless defined? Iconv
module REXML
module Encoding
def decode( str )
return Iconv::iconv(UTF_8, @encoding, str)[0]
end
def encode( content )
return Iconv::iconv(@encoding, UTF_8, content)[0]
end
end
end

View file

@ -1,7 +1,7 @@
module REXML module REXML
module Encoding module Encoding
# Convert from UTF-8 # Convert from UTF-8
def to_iso_8859_1 content def encode content
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -16,7 +16,7 @@ module REXML
end end
# Convert to UTF-8 # Convert to UTF-8
def from_iso_8859_1(str) def decode(str)
str.unpack('C*').pack('U*') str.unpack('C*').pack('U*')
end end
end end

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
ISO_8859_1 = 'ISO-8859-1'
claim( ISO_8859_1 )
end
end

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
claim( 'Shift-JIS' )
claim( 'Shift_JIS' )
end
end

View file

@ -1,6 +1,6 @@
module REXML module REXML
module Encoding module Encoding
def to_unile content def encode content
array_utf8 = content.unpack("U*") array_utf8 = content.unpack("U*")
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -15,7 +15,7 @@ module REXML
array_enc.pack('C*') array_enc.pack('C*')
end end
def from_unile(str) def decode(str)
array_enc=str.unpack('C*') array_enc=str.unpack('C*')
array_utf8 = [] array_utf8 = []
2.step(array_enc.size-1, 2){|i| 2.step(array_enc.size-1, 2){|i|

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
UNILE = 'UNILE'
claim( UNILE, /^\377\376/ )
end
end

View file

@ -1,7 +1,7 @@
module REXML module REXML
module Encoding module Encoding
# Convert from UTF-8 # Convert from UTF-8
def to_us_ascii content def encode content
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -16,7 +16,7 @@ module REXML
end end
# Convert to UTF-8 # Convert to UTF-8
def from_us_ascii(str) def decode(str)
str.unpack('C*').pack('U*') str.unpack('C*').pack('U*')
end end
end end

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
US_ASCII = 'US-ASCII'
claim( US_ASCII )
end
end

View file

@ -1,6 +1,6 @@
module REXML module REXML
module Encoding module Encoding
def to_utf_16 content def encode content
array_utf8 = content.unpack("U*") array_utf8 = content.unpack("U*")
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -15,7 +15,7 @@ module REXML
array_enc.pack('C*') array_enc.pack('C*')
end end
def from_utf_16(str) def decode(str)
array_enc=str.unpack('C*') array_enc=str.unpack('C*')
array_utf8 = [] array_utf8 = []
2.step(arrayEnc.size-1, 2){|i| 2.step(arrayEnc.size-1, 2){|i|

View file

@ -1,6 +0,0 @@
module REXML
module Encoding
UTF_16 = 'UTF-16'
claim( UTF_16, /^\376\377/ )
end
end

View file

@ -0,0 +1,11 @@
module REXML
module Encoding
def encode content
content
end
def decode(str)
str
end
end
end

View file

@ -1,76 +1,58 @@
require 'rexml/xmltokens' require 'rexml/xmltokens'
require 'rexml/light/node' require 'rexml/light/node'
# Development model # [ :element, parent, name, attributes, children* ]
# document = Node.new # a = Node.new
# a << "B" # => <a>B</a>
# Add an element "foo" to the document # a.b # => <a>B<b/></a>
# foo = document << "foo" # a.b[1] # => <a>B<b/><b/><a>
# # Set attribute "attr" on foo # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
# foo["attr"] = "la" # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
# # Set another attribute in a different namespace # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
# foo["attr", "namespace"] = "too"
# # Swap foo into another namespace
# foo.namespace = "blah"
# # Add a couple of element nodes to foo
# foo << "a"
# foo << "b"
# # Access the children of foo in various ways
# a = foo[0]
# foo.each { |child|
# #...
# }
# # Add text to foo
# # Add instruction
# # Add comment
# # Get the root of the document
# document == a.root
# # Write the document out
# puts document.to_s
module REXML module REXML
module Light module Light
# Represents a tagged XML element. Elements are characterized by # Represents a tagged XML element. Elements are characterized by
# having children, attributes, and names, and can themselves be # having children, attributes, and names, and can themselves be
# children. # children.
class Node < Array class Node
alias :_old_get :[]
alias :_old_put :[]=
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
PARENTS = [ :element, :document, :doctype ]
# Create a new element. # Create a new element.
def initialize node=nil def initialize node=nil
@node = node
if node.kind_of? String if node.kind_of? String
node = [ :text, node ] node = [ :text, node ]
elsif node.nil? elsif node.nil?
node = [ :document, nil, nil ] node = [ :document, nil, nil ]
elsif node[0] == :start_element elsif node[0] == :start_element
node[0] = :element node[0] = :element
elsif node[0] == :start_doctype
node[0] = :doctype
elsif node[0] == :start_document
node[0] = :document
end end
replace( node )
_old_put( 1, 0, 1 )
_old_put( 1, nil )
end end
def size def size
el!() if PARENTS.include? @node[0]
super-4 @node[-1].size
else
0
end
end end
def each( &block ) def each( &block )
el!()
size.times { |x| yield( at(x+4) ) } size.times { |x| yield( at(x+4) ) }
end end
def name def name
el!()
at(2) at(2)
end end
def name=( name_str, ns=nil ) def name=( name_str, ns=nil )
el!()
pfx = '' pfx = ''
pfx = "#{prefix(ns)}:" if ns pfx = "#{prefix(ns)}:" if ns
_old_put(1, "#{pfx}#{name_str}") _old_put(2, "#{pfx}#{name_str}")
end end
def parent=( node ) def parent=( node )
@ -78,28 +60,23 @@ module REXML
end end
def local_name def local_name
el!()
namesplit namesplit
@name @name
end end
def local_name=( name_str ) def local_name=( name_str )
el!()
_old_put( 1, "#@prefix:#{name_str}" ) _old_put( 1, "#@prefix:#{name_str}" )
end end
def prefix( namespace=nil ) def prefix( namespace=nil )
el!()
prefix_of( self, namespace ) prefix_of( self, namespace )
end end
def namespace( prefix=prefix() ) def namespace( prefix=prefix() )
el!()
namespace_of( self, prefix ) namespace_of( self, prefix )
end end
def namespace=( namespace ) def namespace=( namespace )
el!()
@prefix = prefix( namespace ) @prefix = prefix( namespace )
pfx = '' pfx = ''
pfx = "#@prefix:" if @prefix.size > 0 pfx = "#@prefix:" if @prefix.size > 0
@ -107,7 +84,6 @@ module REXML
end end
def []( reference, ns=nil ) def []( reference, ns=nil )
el!()
if reference.kind_of? String if reference.kind_of? String
pfx = '' pfx = ''
pfx = "#{prefix(ns)}:" if ns pfx = "#{prefix(ns)}:" if ns
@ -125,7 +101,6 @@ module REXML
# Doesn't handle namespaces yet # Doesn't handle namespaces yet
def []=( reference, ns, value=nil ) def []=( reference, ns, value=nil )
el!()
if reference.kind_of? String if reference.kind_of? String
value = ns unless value value = ns unless value
at( 3 )[reference] = value at( 3 )[reference] = value
@ -170,12 +145,10 @@ module REXML
end end
def has_name?( name, namespace = '' ) def has_name?( name, namespace = '' )
el!()
at(3) == name and namespace() == namespace at(3) == name and namespace() == namespace
end end
def children def children
el!()
self self
end end
@ -187,14 +160,6 @@ module REXML
end end
def el!
if node_type() != :element and node_type() != :document
_old_put( 0, :element )
push({})
end
self
end
private private
def namesplit def namesplit

View file

@ -8,10 +8,6 @@ module REXML
@output = real_IO @output = real_IO
self.encoding = encd self.encoding = encd
eval <<-EOL
alias :encode :to_#{encoding.tr('-', '_').downcase}
alias :decode :from_#{encoding.tr('-', '_').downcase}
EOL
@to_utf = encd == UTF_8 ? false : true @to_utf = encd == UTF_8 ? false : true
end end

View file

@ -1,5 +1,5 @@
module REXML module REXML
class ParseException < Exception class ParseException < RuntimeError
attr_accessor :source, :parser, :continued_exception attr_accessor :source, :parser, :continued_exception
def initialize( message, source=nil, parser=nil, exception=nil ) def initialize( message, source=nil, parser=nil, exception=nil )
@ -12,9 +12,9 @@ module REXML
def to_s def to_s
# Quote the original exception, if there was one # Quote the original exception, if there was one
if @continued_exception if @continued_exception
err = @continued_exception.message err = @continued_exception.inspect
err << "\n" err << "\n"
err << @continued_exception.backtrace[0..3].join("\n") err << @continued_exception.backtrace.join("\n")
err << "\n...\n" err << "\n...\n"
else else
err = "" err = ""
@ -24,17 +24,24 @@ module REXML
err << super err << super
# Add contextual information # Add contextual information
err << "\n#{@source.current_line}\nLast 80 unconsumed characters:\n#{@source.buffer[0..80].gsub(/\n/, ' ')}\n" if @source if @source
err << "\nContext:\n#{@parser.context}" if @parser err << "\nLine: #{line}\n"
err << "Position: #{position}\n"
err << "Last 80 unconsumed characters:\n"
err << @source.buffer[0..80].gsub(/\n/, ' ')
err << "\n"
err << @source.buffer[0..80].unpack("U*").inspect
end
err err
end end
def position def position
@source.current_line[0] if @source @source.current_line[0] if @source and @source.current_line
end end
def line def line
@source.current_line[2] if @source @source.current_line[2] if @source and @source.current_line
end end
def context def context

View file

@ -89,10 +89,10 @@ module REXML
EREFERENCE = /&(?!#{NAME};)/ EREFERENCE = /&(?!#{NAME};)/
DEFAULT_ENTITIES = { DEFAULT_ENTITIES = {
'gt' => [/&gt;/, '&gt;', '>'], 'gt' => [/&gt;/, '&gt;', '>', />/],
'lt' => [/&lt;/, '&lt;', '<'], 'lt' => [/&lt;/, '&lt;', '<', /</],
'quot' => [/&quot;/, '&quot;', '"'], 'quot' => [/&quot;/, '&quot;', '"', /"/],
"apos" => [/&apos;/, "&apos;", "'"] "apos" => [/&apos;/, "&apos;", "'", /'/]
} }
def initialize( source ) def initialize( source )
@ -126,6 +126,7 @@ module REXML
# Returns true if there are more events. Synonymous with !empty? # Returns true if there are more events. Synonymous with !empty?
def has_next? def has_next?
return true if @closed
@source.read if @source.buffer.size==0 and !@source.empty? @source.read if @source.buffer.size==0 and !@source.empty?
(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed (!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
end end
@ -143,7 +144,7 @@ module REXML
# event, so you can effectively pre-parse the entire document (pull the # event, so you can effectively pre-parse the entire document (pull the
# entire thing into memory) using this method. # entire thing into memory) using this method.
def peek depth=0 def peek depth=0
raise 'Illegal argument "#{depth}"' if depth < -1 raise %Q[Illegal argument "#{depth}"] if depth < -1
temp = [] temp = []
if depth == -1 if depth == -1
temp.push(pull()) until empty? temp.push(pull()) until empty?
@ -166,8 +167,9 @@ module REXML
return @stack.shift if @stack.size > 0 return @stack.shift if @stack.size > 0
@source.read if @source.buffer.size==0 @source.read if @source.buffer.size==0
if @document_status == nil if @document_status == nil
@source.match( /^\s*/um, true ) @source.consume( /^\s*/um )
word = @source.match( /^\s*(<.*?)>/um ) word = @source.match( /(<.*?)>/um )
#word = @source.match_to( '>', /(<.*?)>/um )
word = word[1] unless word.nil? word = word[1] unless word.nil?
case word case word
when COMMENT_START when COMMENT_START
@ -190,7 +192,7 @@ module REXML
close = md[2] close = md[2]
identity =~ IDENTITY identity =~ IDENTITY
name = $1 name = $1
raise "DOCTYPE is missing a name" if name.nil? raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip pub_sys = $2.nil? ? nil : $2.strip
long_name = $3.nil? ? nil : $3.strip long_name = $3.nil? ? nil : $3.strip
uri = $4.nil? ? nil : $4.strip uri = $4.nil? ? nil : $4.strip
@ -274,10 +276,11 @@ module REXML
return [ :end_doctype ] return [ :end_doctype ]
end end
end end
begin begin
if @source.buffer[0] == ?< if @source.buffer[0] == ?<
if @source.buffer[1] == ?/ if @source.buffer[1] == ?/
last_tag = @tags.pop last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true ) md = @source.match( CLOSE_MATCH, true )
raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+ raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
"(got \"#{md[1]}\")", @source) unless last_tag == md[1] "(got \"#{md[1]}\")", @source) unless last_tag == md[1]
@ -286,18 +289,20 @@ module REXML
md = @source.match(/\A(\s*[^>]*>)/um) md = @source.match(/\A(\s*[^>]*>)/um)
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md raise REXML::ParseException.new("Malformed node", @source) unless md
case md[1] if md[0][2] == ?-
when CDATA_START md = @source.match( COMMENT_PATTERN, true )
return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ] return [ :comment, md[1] ] if md
when COMMENT_START
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
else else
raise REXML::ParseException.new( "Declarations can only occur "+ md = @source.match( CDATA_PATTERN, true )
"in the doctype declaration.", @source) return [ :cdata, md[1] ] if md
end end
raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source)
elsif @source.buffer[1] == ?? elsif @source.buffer[1] == ??
md = @source.match( INSTRUCTION_PATTERN, true ) md = @source.match( INSTRUCTION_PATTERN, true )
return [ :processing_instruction, md[1], md[2] ] return [ :processing_instruction, md[1], md[2] ] if md
raise REXML::ParseException.new( "Bad instruction declaration",
@source)
else else
# Get the next tag # Get the next tag
md = @source.match(TAG_MATCH, true) md = @source.match(TAG_MATCH, true)
@ -318,17 +323,19 @@ module REXML
return [ :start_element, md[1], attributes ] return [ :start_element, md[1], attributes ]
end end
else else
md = @source.match(TEXT_PATTERN, true) md = @source.match( TEXT_PATTERN, true )
raise "no text to add" if md[0].length == 0 #md = @source.match_to_consume( '<', TEXT_PATTERN )
#@source.read
raise REXML::ParseException("no text to add") if md[0].length == 0
# unnormalized = Text::unnormalize( md[1], self ) # unnormalized = Text::unnormalize( md[1], self )
# return PullEvent.new( :text, md[1], unnormalized ) # return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ] return [ :text, md[1] ]
end end
rescue REXML::ParseException rescue REXML::ParseException
raise $! raise
rescue Exception, NameError => error rescue Exception, NameError => error
raise REXML::ParseException.new( "Exception parsing", raise REXML::ParseException.new( "Exception parsing",
@source, self, error ) @source, self, (error ? error : $!) )
end end
return [ :dummy ] return [ :dummy ]
end end
@ -354,7 +361,7 @@ module REXML
end if entities end if entities
copy.gsub!( EREFERENCE, '&amp;' ) copy.gsub!( EREFERENCE, '&amp;' )
DEFAULT_ENTITIES.each do |key, value| DEFAULT_ENTITIES.each do |key, value|
copy.gsub!( value[2], value[1] ) copy.gsub!( value[3], value[1] )
end end
copy copy
end end

View file

@ -16,25 +16,25 @@ module REXML
end end
def parse def parse
root = context = REXML::Light::Node.new([ :document ]) root = context = [ :document ]
while true while true
event = @parser.pull event = @parser.pull
case event[0] case event[0]
when :end_document when :end_document
break break
when :end_doctype when :end_doctype
context = context.parent context = context[1]
when :start_element, :start_doctype when :start_element, :start_doctype
new_node = REXML::Light::Node.new(event) new_node = event
context << new_node context << new_node
new_node.parent = context new_node[1,0] = [context]
context = new_node context = new_node
when :end_element, :end_doctype when :end_element, :end_doctype
context = context.parent context = context[1]
else else
new_node = REXML::Light::Node.new(event) new_node = event
context << new_node context << new_node
new_node.parent = context new_node[1,0] = [context]
end end
end end
root root

View file

@ -31,7 +31,7 @@ module REXML
results = filter([element], path) results = filter([element], path)
when /^\*/u when /^\*/u
results = filter(element.to_a, path) results = filter(element.to_a, path)
when /^[\[!\w:]/u when /^[[!\w:]/u
# match on child # match on child
matches = [] matches = []
children = element.to_a children = element.to_a

View file

@ -21,6 +21,6 @@
# A tutorial is available in docs/tutorial.html # A tutorial is available in docs/tutorial.html
module REXML module REXML
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>" Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
Date = "+2003/110" Date = "+2003/283"
Version = "2.7.1" Version = "2.7.2"
end end

View file

@ -39,10 +39,6 @@ module REXML
# Overridden to support optimized en/decoding # Overridden to support optimized en/decoding
def encoding=(enc) def encoding=(enc)
super super
eval <<-EOL
alias :encode :to_#{encoding.tr('-', '_').downcase}
alias :decode :from_#{encoding.tr('-', '_').downcase}
EOL
@line_break = encode( '>' ) @line_break = encode( '>' )
if enc != UTF_8 if enc != UTF_8
@buffer = decode(@buffer) @buffer = decode(@buffer)
@ -78,8 +74,22 @@ module REXML
def read def read
end end
def consume( pattern )
@buffer = $' if pattern.match( @buffer )
end
def match_to( char, pattern )
return pattern.match(@buffer)
end
def match_to_consume( char, pattern )
md = pattern.match(@buffer)
@buffer = $'
return md
end
def match pattern, consume=false def match pattern, consume=false
md = pattern.match @buffer md = pattern.match(@buffer)
@buffer = $' if consume and md @buffer = $' if consume and md
return md return md
end end
@ -112,7 +122,9 @@ module REXML
#@block_size = block_size #@block_size = block_size
#super @source.read(@block_size) #super @source.read(@block_size)
@line_break = '>' @line_break = '>'
super @source.readline( @line_break ) #super @source.readline( "\n" )
super @source.readline( @line_break )+@source.read
@line_break = encode( '>' )
end end
def scan pattern, consume=false def scan pattern, consume=false
@ -145,11 +157,15 @@ module REXML
str = @source.readline('>') str = @source.readline('>')
str = decode(str) if @to_utf and str str = decode(str) if @to_utf and str
@buffer << str @buffer << str
rescue rescue Exception, NameError
@source = nil @source = nil
end end
end end
def consume( pattern )
match( pattern, true )
end
def match pattern, consume=false def match pattern, consume=false
rv = pattern.match(@buffer) rv = pattern.match(@buffer)
@buffer = $' if consume and rv @buffer = $' if consume and rv

View file

@ -2,16 +2,6 @@ require 'rexml/namespace'
require 'rexml/xmltokens' require 'rexml/xmltokens'
require 'rexml/parsers/xpathparser' require 'rexml/parsers/xpathparser'
# Ignore this class. It adds a __ne__ method, because Ruby doesn't seem to
# understand object.send( "!=", foo ), whereas it *does* understand "<", "==",
# and all of the other comparison methods. Stupid, and annoying, and not at
# all POLS.
class Object
def __ne__(b)
self != b
end
end
module REXML module REXML
# You don't want to use this class. Really. Use XPath, which is a wrapper # You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here. # for this class. Believe me. You don't want to poke around in here.
@ -132,11 +122,10 @@ module REXML
when :child when :child
#puts "CHILD" #puts "CHILD"
new_nodeset = [] new_nodeset = []
ps_clone = nil nt = nil
for node in nodeset for node in nodeset
#ps_clone = path_stack.clone nt = node.node_type
#new_nodeset += internal_parse( ps_clone, node.children ) if node.parent? new_nodeset += node.children if nt == :element or nt == :document
new_nodeset += node.children if node.parent?
end end
#path_stack[0,(path_stack.size-ps_clone.size)] = [] #path_stack[0,(path_stack.size-ps_clone.size)] = []
return new_nodeset return new_nodeset
@ -238,9 +227,11 @@ module REXML
when :descendant when :descendant
#puts ":DESCENDANT" #puts ":DESCENDANT"
results = [] results = []
nt = nil
for node in nodeset for node in nodeset
nt = node.node_type
results += internal_parse( path_stack.clone.unshift( :descendant_or_self ), results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
node.children ) if node.parent? node.children ) if nt == :element or nt == :document
end end
return results return results
@ -310,11 +301,13 @@ module REXML
def d_o_s( p, ns, r ) def d_o_s( p, ns, r )
#puts r.collect{|n|n.to_s}.inspect #puts r.collect{|n|n.to_s}.inspect
#puts ns.collect{|n|n.to_s}.inspect #puts ns.collect{|n|n.to_s}.inspect
nt = nil
ns.each_index do |i| ns.each_index do |i|
n = ns[i] n = ns[i]
x = match( p.clone, [ n ] ) x = match( p.clone, [ n ] )
#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}" #puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
d_o_s( p, n.children, x ) if n.parent? nt = n.node_type
d_o_s( p, n.children, x ) if nt == :element or nt == :document
r[i,0] = [x] if x.size > 0 r[i,0] = [x] if x.size > 0
end end
end end