mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Initial revision
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
ca02190d88
commit
ea7a527a2a
53 changed files with 6674 additions and 0 deletions
191
lib/rexml/source.rb
Normal file
191
lib/rexml/source.rb
Normal file
|
@ -0,0 +1,191 @@
|
|||
require 'rexml/encoding'
|
||||
|
||||
module REXML
|
||||
# Generates Source-s. USE THIS CLASS.
|
||||
class SourceFactory
|
||||
# Generates a Source object
|
||||
# @param arg Either a String, or an IO
|
||||
# @return a Source, or nil if a bad argument was given
|
||||
def SourceFactory::create_from arg#, slurp=true
|
||||
if arg.kind_of? String
|
||||
source = Source.new(arg)
|
||||
elsif arg.kind_of? IO
|
||||
source = IOSource.new(arg)
|
||||
end
|
||||
source
|
||||
end
|
||||
end
|
||||
|
||||
# A Source can be searched for patterns, and wraps buffers and other
|
||||
# objects and provides consumption of text
|
||||
class Source
|
||||
include Encoding
|
||||
# The current buffer (what we're going to read next)
|
||||
attr_reader :buffer
|
||||
# The line number of the last consumed text
|
||||
attr_reader :line
|
||||
attr_reader :encoding
|
||||
|
||||
# Constructor
|
||||
# @param arg must be a String, and should be a valid XML document
|
||||
def initialize arg
|
||||
@orig = @buffer = arg
|
||||
self.encoding = check_encoding( @buffer )
|
||||
#@buffer = decode(@buffer) unless @encoding == UTF_8
|
||||
@line = 0
|
||||
end
|
||||
|
||||
# Inherited from Encoding
|
||||
# Overridden to support optimized en/decoding
|
||||
def encoding=(enc)
|
||||
super
|
||||
eval <<-EOL
|
||||
alias :encode :to_#{encoding.tr('-', '_').downcase}
|
||||
alias :decode :from_#{encoding.tr('-', '_').downcase}
|
||||
EOL
|
||||
@line_break = encode( '>' )
|
||||
if enc != UTF_8
|
||||
@buffer = decode(@buffer)
|
||||
@to_utf = true
|
||||
else
|
||||
@to_utf = false
|
||||
end
|
||||
end
|
||||
|
||||
# Scans the source for a given pattern. Note, that this is not your
|
||||
# usual scan() method. For one thing, the pattern argument has some
|
||||
# requirements; for another, the source can be consumed. You can easily
|
||||
# confuse this method. Originally, the patterns were easier
|
||||
# to construct and this method more robust, because this method
|
||||
# generated search regexes on the fly; however, this was
|
||||
# computationally expensive and slowed down the entire REXML package
|
||||
# considerably, since this is by far the most commonly called method.
|
||||
# @param pattern must be a Regexp, and must be in the form of
|
||||
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
||||
# will be returned; the second group is used if the consume flag is
|
||||
# set.
|
||||
# @param consume if true, the pattern returned will be consumed, leaving
|
||||
# everything after it in the Source.
|
||||
# @return the pattern, if found, or nil if the Source is empty or the
|
||||
# pattern is not found.
|
||||
def scan pattern, consume=false
|
||||
return nil if @buffer.nil?
|
||||
rv = @buffer.scan(pattern)
|
||||
@buffer = $' if consume and rv.size>0
|
||||
rv
|
||||
end
|
||||
|
||||
def read
|
||||
end
|
||||
|
||||
def match pattern, consume=false
|
||||
md = pattern.match @buffer
|
||||
@buffer = $' if consume and md
|
||||
return md
|
||||
end
|
||||
|
||||
# @return true if the Source is exhausted
|
||||
def empty?
|
||||
@buffer.nil? or @buffer.strip.nil?
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
def current_line
|
||||
lines = @orig.split
|
||||
res = lines.grep @buffer[0..30]
|
||||
res = res[-1] if res.kind_of? Array
|
||||
lines.index( res ) if res
|
||||
end
|
||||
end
|
||||
|
||||
# A Source that wraps an IO. See the Source class for method
|
||||
# documentation
|
||||
class IOSource < Source
|
||||
#attr_reader :block_size
|
||||
|
||||
def initialize arg, block_size=500
|
||||
@er_source = @source = arg
|
||||
@to_utf = false
|
||||
# READLINE OPT
|
||||
# The following was commented out when IOSource started using readline
|
||||
# to pull the data from the stream.
|
||||
#@block_size = block_size
|
||||
#super @source.read(@block_size)
|
||||
@line_break = '>'
|
||||
super @source.readline( @line_break )
|
||||
end
|
||||
|
||||
def scan pattern, consume=false
|
||||
rv = super
|
||||
# You'll notice that this next section is very similar to the same
|
||||
# section in match(), but just a liiittle different. This is
|
||||
# because it is a touch faster to do it this way with scan()
|
||||
# than the way match() does it; enough faster to warrent duplicating
|
||||
# some code
|
||||
if rv.size == 0
|
||||
until @buffer =~ pattern or @source.nil?
|
||||
begin
|
||||
# READLINE OPT
|
||||
#str = @source.read(@block_size)
|
||||
str = @source.readline(@line_break)
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
rescue
|
||||
@source = nil
|
||||
end
|
||||
end
|
||||
rv = super
|
||||
end
|
||||
rv.taint
|
||||
rv
|
||||
end
|
||||
|
||||
def read
|
||||
begin
|
||||
str = @source.readline('>')
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
rescue
|
||||
@source = nil
|
||||
end
|
||||
end
|
||||
|
||||
def match pattern, consume=false
|
||||
rv = pattern.match(@buffer)
|
||||
@buffer = $' if consume and rv
|
||||
while !rv and @source
|
||||
begin
|
||||
str = @source.readline('>')
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
rv = pattern.match(@buffer)
|
||||
@buffer = $' if consume and rv
|
||||
rescue
|
||||
@source = nil
|
||||
end
|
||||
end
|
||||
rv.taint
|
||||
rv
|
||||
end
|
||||
|
||||
def empty?
|
||||
super and ( @source.nil? || @source.eof? )
|
||||
end
|
||||
|
||||
# @return the current line in the source
|
||||
def current_line
|
||||
pos = @er_source.pos # The byte position in the source
|
||||
lineno = @er_source.lineno # The XML < position in the source
|
||||
@er_source.rewind
|
||||
line = 0 # The \r\n position in the source
|
||||
begin
|
||||
while @er_source.pos < pos
|
||||
@er_source.readline
|
||||
line += 1
|
||||
end
|
||||
rescue
|
||||
end
|
||||
[pos, lineno, line]
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Add table
Add a link
Reference in a new issue