1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/lib/rexml/source.rb
ser d15f41b0eb * Non-String attributes are now converted to Strings; this means code such as
elem.attributes["a"] = 1
  will not cause an error when dumping the XML.  It also means that:
  elem.attributes["a"]    # => "1", not 1
* Transitive indenting has been cleaned up.
* Fixed a potential bug in parsing non-ASCII encoded streams
* Fixed a bug where trying to fill in ParseException data was causing an
  IO error (stream closed)
* Changes to Text mean that Element (and Text) can be used outside of a
  Document context.
* In some rare cases, the base parser wasn't reading enough bytes from the
  stream for the parsing algorithm to work properly.  This has been fixed
  (this was Ruby bug #48426)


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6209 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2004-04-23 15:44:30 +00:00

208 lines
5.2 KiB
Ruby

require 'rexml/encoding'
module REXML
# Generates Source-s. USE THIS CLASS.
class SourceFactory
# Generates a Source object
# @param arg Either a String, or an IO
# @return a Source, or nil if a bad argument was given
def SourceFactory::create_from arg#, slurp=true
if arg.kind_of? String
source = Source.new(arg)
elsif arg.kind_of? IO
source = IOSource.new(arg)
end
source
end
end
# A Source can be searched for patterns, and wraps buffers and other
# objects and provides consumption of text
class Source
include Encoding
# The current buffer (what we're going to read next)
attr_reader :buffer
# The line number of the last consumed text
attr_reader :line
attr_reader :encoding
# Constructor
# @param arg must be a String, and should be a valid XML document
def initialize(arg)
@orig = @buffer = arg
self.encoding = check_encoding( @buffer )
@line = 0
end
# Inherited from Encoding
# Overridden to support optimized en/decoding
def encoding=(enc)
super
@line_break = encode( '>' )
if enc != UTF_8
@buffer = decode(@buffer)
@to_utf = true
else
@to_utf = false
end
end
# Scans the source for a given pattern. Note, that this is not your
# usual scan() method. For one thing, the pattern argument has some
# requirements; for another, the source can be consumed. You can easily
# confuse this method. Originally, the patterns were easier
# to construct and this method more robust, because this method
# generated search regexes on the fly; however, this was
# computationally expensive and slowed down the entire REXML package
# considerably, since this is by far the most commonly called method.
# @param pattern must be a Regexp, and must be in the form of
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
# will be returned; the second group is used if the consume flag is
# set.
# @param consume if true, the pattern returned will be consumed, leaving
# everything after it in the Source.
# @return the pattern, if found, or nil if the Source is empty or the
# pattern is not found.
def scan(pattern, cons=false)
return nil if @buffer.nil?
rv = @buffer.scan(pattern)
@buffer = $' if cons and rv.size>0
rv
end
def read
end
def consume( pattern )
@buffer = $' if pattern.match( @buffer )
end
def match_to( char, pattern )
return pattern.match(@buffer)
end
def match_to_consume( char, pattern )
md = pattern.match(@buffer)
@buffer = $'
return md
end
def match(pattern, cons=false)
md = pattern.match(@buffer)
@buffer = $' if cons and md
return md
end
# @return true if the Source is exhausted
def empty?
@buffer == ""
end
# @return the current line in the source
def current_line
lines = @orig.split
res = lines.grep @buffer[0..30]
res = res[-1] if res.kind_of? Array
lines.index( res ) if res
end
end
# A Source that wraps an IO. See the Source class for method
# documentation
class IOSource < Source
#attr_reader :block_size
# block_size has been deprecated
def initialize(arg, block_size=500)
@er_source = @source = arg
@to_utf = false
# FIXME
# This is broken. If the user puts in enough carriage returns, this can fail
# to calculate the correct encoding.
super @source.read( 100 )
@line_break = encode( '>' )
end
def scan(pattern, cons=false)
rv = super
# You'll notice that this next section is very similar to the same
# section in match(), but just a liiittle different. This is
# because it is a touch faster to do it this way with scan()
# than the way match() does it; enough faster to warrent duplicating
# some code
if rv.size == 0
until @buffer =~ pattern or @source.nil?
begin
# READLINE OPT
#str = @source.read(@block_size)
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
rescue
@source = nil
end
end
rv = super
end
rv.taint
rv
end
def read
begin
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
rescue Exception, NameError
@source = nil
end
end
def consume( pattern )
match( pattern, true )
end
def match( pattern, cons=false )
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
while !rv and @source
begin
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
rescue
@source = nil
end
end
rv.taint
rv
end
def empty?
super and ( @source.nil? || @source.eof? )
end
# @return the current line in the source
def current_line
begin
pos = @er_source.pos # The byte position in the source
lineno = @er_source.lineno # The XML < position in the source
@er_source.rewind
line = 0 # The \r\n position in the source
begin
while @er_source.pos < pos
@er_source.readline
line += 1
end
rescue
end
rescue IOError
pos = -1
line = -1
end
[pos, lineno, line]
end
end
end