mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
34a553da2e
formerly it was "\r\n". * lib/csv.rb: [CAUTION] API change * CSV::Row removed. a row is represented as just an Array. since CSV::Row was a subclass of Array, it won't hurt almost all programs except one which depended CSV::Row#match. * CSV::Cell removed. a cell is represented as just a String or nil(NULL). this change will cause widespread destruction. CSV.open("foo.csv", "r") do |row| row.each do |cell| if cell.is_null # Cell#is_null p "(NULL)" else p cell.data # Cell#data end end end must be just; CSV.open("foo.csv", "r") do |row| row.each do |cell| if cell.nil? p "(NULL)" else p cell end end end * lib/csv.rb: [CAUTION] record separator(CR, LF, CR+LF) behavior change. CSV.open, CSV.parse, and CSV,generate now do not force opened file binmode. formerly it set binmode explicitly. with CSV.open, binmode of opened file depends the given mode parameter "r", "w", "rb", and "wb". CSV.parse and CSV.generate open file with "r" and "w". setting mode properly is user's responsibility now. * lib/csv.rb: accepts String as a fs (field separator/column separator) and rs (record separator/row separator) * lib/csv.rb: added CSV.foreach(path, rs = nil, &block). CSV.foreach now does not handle "| cmd" as a path different from IO.foreach. needed? * test/csv/test_csv.rb: updated. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6359 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
947 lines
24 KiB
Ruby
947 lines
24 KiB
Ruby
# CSV -- module for generating/parsing CSV data.
|
|
# Copyright (C) 2000-2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>.
|
|
|
|
# $Id$
|
|
|
|
# This program is copyrighted free software by NAKAMURA, Hiroshi. You can
|
|
# redistribute it and/or modify it under the same terms of Ruby's license;
|
|
# either the dual license version in 2003, or any later version.
|
|
|
|
|
|
class CSV
|
|
class IllegalFormatError < RuntimeError; end
|
|
|
|
def CSV.open(path, mode, fs = ',', rs = nil, &block)
|
|
if mode == 'r' or mode == 'rb'
|
|
open_reader(path, mode, fs, rs, &block)
|
|
elsif mode == 'w' or mode == 'wb'
|
|
open_writer(path, mode, fs, rs, &block)
|
|
else
|
|
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
|
|
end
|
|
end
|
|
|
|
def CSV.foreach(path, rs = nil, &block)
|
|
open_reader(path, 'r', ',', rs, &block)
|
|
end
|
|
|
|
# Open a CSV formatted file for reading.
|
|
#
|
|
# EXAMPLE 1
|
|
# reader = CSV.parse('csvfile.csv')
|
|
# row1 = reader.shift
|
|
# row2 = reader.shift
|
|
# if row2.empty?
|
|
# p 'row2 not find.'
|
|
# end
|
|
# reader.close
|
|
#
|
|
# EXAMPLE 2
|
|
# CSV.parse('csvfile.csv') do |row|
|
|
# p row
|
|
# end
|
|
#
|
|
# ARGS
|
|
# filename: filename to parse.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
|
# want to separate records with \r, give ?\r here.
|
|
#
|
|
# RETURNS
|
|
# reader instance. To get parse result, see CSV::Reader#each.
|
|
#
|
|
def CSV.parse(path, fs = ',', rs = nil, &block)
|
|
open_reader(path, 'r', fs, rs, &block)
|
|
end
|
|
|
|
# Open a CSV formatted file for writing.
|
|
#
|
|
# EXAMPLE 1
|
|
# writer = CSV.generate('csvfile.csv')
|
|
# writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
|
|
# writer.close
|
|
#
|
|
# EXAMPLE 2
|
|
# CSV.generate('csvfile.csv') do |writer|
|
|
# writer << ['r1c1', 'r1c2']
|
|
# writer << ['r2c1', 'r2c2']
|
|
# writer << [nil, nil]
|
|
# end
|
|
#
|
|
# ARGS
|
|
# filename: filename to generate.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
|
# want to separate records with \r, give ?\r here.
|
|
#
|
|
# RETURNS
|
|
# writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
|
|
# to generate CSV string.
|
|
#
|
|
def CSV.generate(path, fs = ',', rs = nil, &block)
|
|
open_writer(path, 'w', fs, rs, &block)
|
|
end
|
|
|
|
# Parse a line from given string. Bear in mind it parses ONE LINE. Rest of
|
|
# the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
|
|
# second line 'c,d' is ignored.
|
|
#
|
|
# If you don't know whether a target string to parse is exactly 1 line or
|
|
# not, use CSV.parse_row instead of this method.
|
|
def CSV.parse_line(src, fs = ',', rs = nil)
|
|
if !fs.nil? and fs.is_a?(Fixnum)
|
|
fs = fs.chr
|
|
end
|
|
if !rs.nil? and rs.is_a?(Fixnum)
|
|
rs = rs.chr
|
|
end
|
|
idx = 0
|
|
res_type = :DT_COLSEP
|
|
row = []
|
|
begin
|
|
while (res_type.equal?(:DT_COLSEP))
|
|
res_type, idx, cell = parse_body(src, idx, fs, rs)
|
|
row << cell
|
|
end
|
|
rescue IllegalFormatError
|
|
return []
|
|
end
|
|
row
|
|
end
|
|
|
|
# Create a line from cells. each cell is stringified by to_s.
|
|
def CSV.generate_line(row, fs = ',', rs = nil)
|
|
if (row.size == 0)
|
|
return ''
|
|
end
|
|
if !fs.nil? and fs.is_a?(Fixnum)
|
|
fs = fs.chr
|
|
end
|
|
if !rs.nil? and rs.is_a?(Fixnum)
|
|
rs = rs.chr
|
|
end
|
|
res_type = :DT_COLSEP
|
|
result_str = ''
|
|
idx = 0
|
|
while true
|
|
generate_body(row[idx], result_str, fs, rs)
|
|
idx += 1
|
|
if (idx == row.size)
|
|
break
|
|
end
|
|
generate_separator(:DT_COLSEP, result_str, fs, rs)
|
|
end
|
|
result_str
|
|
end
|
|
|
|
# Parse a line from string. Consider using CSV.parse_line instead.
|
|
# To parse lines in CSV string, see EXAMPLE below.
|
|
#
|
|
# EXAMPLE
|
|
# src = "a,b\r\nc,d\r\ne,f"
|
|
# idx = 0
|
|
# begin
|
|
# parsed = []
|
|
# parsed_cells, idx = CSV.parse_row(src, idx, parsed)
|
|
# puts "Parsed #{ parsed_cells } cells."
|
|
# p parsed
|
|
# end while parsed_cells > 0
|
|
#
|
|
# ARGS
|
|
# src: a CSV data to be parsed. Must respond '[](idx)'.
|
|
# src[](idx) must return a char. (Not a string such as 'a', but 97).
|
|
# src[](idx_out_of_bounds) must return nil. A String satisfies this
|
|
# requirement.
|
|
# idx: index of parsing location of 'src'. 0 origin.
|
|
# out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
|
# want to separate records with \r, give ?\r here.
|
|
#
|
|
# RETURNS
|
|
# parsed_cells: num of parsed cells.
|
|
# idx: index of next parsing location of 'src'.
|
|
#
|
|
def CSV.parse_row(src, idx, out_dev, fs = ',', rs = nil)
|
|
if !fs.nil? and fs.is_a?(Fixnum)
|
|
fs = fs.chr
|
|
end
|
|
if !rs.nil? and rs.is_a?(Fixnum)
|
|
rs = rs.chr
|
|
end
|
|
idx_backup = idx
|
|
parsed_cells = 0
|
|
res_type = :DT_COLSEP
|
|
begin
|
|
while (!res_type.equal?(:DT_ROWSEP))
|
|
res_type, idx, cell = parse_body(src, idx, fs, rs)
|
|
if res_type.equal?(:DT_EOS)
|
|
if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
|
|
return 0, 0
|
|
end
|
|
res_type = :DT_ROWSEP
|
|
end
|
|
parsed_cells += 1
|
|
out_dev << cell
|
|
end
|
|
rescue IllegalFormatError
|
|
return 0, 0
|
|
end
|
|
return parsed_cells, idx
|
|
end
|
|
|
|
# Convert a line from cells data to string. Consider using CSV.generate_line
|
|
# instead. To generate multi-row CSV string, see EXAMPLE below.
|
|
#
|
|
# EXAMPLE
|
|
# def d(str)
|
|
# CSV::Cell.new(str, false)
|
|
# end
|
|
#
|
|
# row1 = [d('a'), d('b')]
|
|
# row2 = [d('c'), d('d')]
|
|
# row3 = [d('e'), d('f')]
|
|
# src = [row1, row2, row3]
|
|
# buf = ''
|
|
# src.each do |row|
|
|
# parsed_cells = CSV.generate_row(row, 2, buf)
|
|
# puts "Created #{ parsed_cells } cells."
|
|
# end
|
|
# p buf
|
|
#
|
|
# ARGS
|
|
# src: an Array of CSV::Cell to be converted to CSV string. Must respond to
|
|
# 'size' and '[](idx)'. src[idx] must return CSV::Cell.
|
|
# cells: num of cells in a line.
|
|
# out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
|
# want to separate records with \r, give ?\r here.
|
|
#
|
|
# RETURNS
|
|
# parsed_cells: num of converted cells.
|
|
#
|
|
def CSV.generate_row(src, cells, out_dev, fs = ',', rs = nil)
|
|
if !fs.nil? and fs.is_a?(Fixnum)
|
|
fs = fs.chr
|
|
end
|
|
if !rs.nil? and rs.is_a?(Fixnum)
|
|
rs = rs.chr
|
|
end
|
|
src_size = src.size
|
|
if (src_size == 0)
|
|
if cells == 0
|
|
generate_separator(:DT_ROWSEP, out_dev, fs, rs)
|
|
end
|
|
return 0
|
|
end
|
|
res_type = :DT_COLSEP
|
|
parsed_cells = 0
|
|
generate_body(src[parsed_cells], out_dev, fs, rs)
|
|
parsed_cells += 1
|
|
while ((parsed_cells < cells) and (parsed_cells != src_size))
|
|
generate_separator(:DT_COLSEP, out_dev, fs, rs)
|
|
generate_body(src[parsed_cells], out_dev, fs, rs)
|
|
parsed_cells += 1
|
|
end
|
|
if (parsed_cells == cells)
|
|
generate_separator(:DT_ROWSEP, out_dev, fs, rs)
|
|
else
|
|
generate_separator(:DT_COLSEP, out_dev, fs, rs)
|
|
end
|
|
parsed_cells
|
|
end
|
|
|
|
# Private class methods.
|
|
class << self
|
|
private
|
|
|
|
def open_reader(path, mode, fs, rs, &block)
|
|
file = File.open(path, mode)
|
|
if block
|
|
begin
|
|
CSV::Reader.parse(file, fs, rs) do |row|
|
|
yield(row)
|
|
end
|
|
ensure
|
|
file.close
|
|
end
|
|
nil
|
|
else
|
|
reader = CSV::Reader.create(file, fs, rs)
|
|
reader.close_on_terminate
|
|
reader
|
|
end
|
|
end
|
|
|
|
def open_writer(path, mode, fs, rs, &block)
|
|
file = File.open(path, mode)
|
|
if block
|
|
begin
|
|
CSV::Writer.generate(file, fs, rs) do |writer|
|
|
yield(writer)
|
|
end
|
|
ensure
|
|
file.close
|
|
end
|
|
nil
|
|
else
|
|
writer = CSV::Writer.create(file, fs, rs)
|
|
writer.close_on_terminate
|
|
writer
|
|
end
|
|
end
|
|
|
|
def parse_body(src, idx, fs, rs)
|
|
fs_str = fs
|
|
fs_size = fs_str.size
|
|
fs_idx = 0
|
|
rs_str = rs || "\n"
|
|
rs_size = rs_str.size
|
|
rs_idx = 0
|
|
cell = ''
|
|
state = :ST_START
|
|
quoted = false
|
|
cr = false
|
|
c = nil
|
|
last_idx = idx
|
|
while (c = src[idx])
|
|
if c == ?"
|
|
cell << src[last_idx, (idx - last_idx)]
|
|
last_idx = idx
|
|
if cr
|
|
raise IllegalFormatError
|
|
end
|
|
if fs_idx != 0
|
|
fs_idx = 0
|
|
end
|
|
if rs_idx != 0
|
|
rs_idx = 0
|
|
end
|
|
if state.equal?(:ST_DATA)
|
|
if quoted
|
|
last_idx += 1
|
|
quoted = false
|
|
state = :ST_QUOTE
|
|
else
|
|
raise IllegalFormatError
|
|
end
|
|
elsif state.equal?(:ST_QUOTE)
|
|
cell << c.chr
|
|
last_idx += 1
|
|
quoted = true
|
|
state = :ST_DATA
|
|
else # :ST_START
|
|
quoted = true
|
|
last_idx += 1
|
|
state = :ST_DATA
|
|
end
|
|
elsif c == fs_str[fs_idx]
|
|
fs_idx += 1
|
|
cell << src[last_idx, (idx - last_idx)]
|
|
last_idx = idx
|
|
if rs_idx != 0
|
|
rs_idx = 0
|
|
end
|
|
if fs_idx == fs_size
|
|
fs_idx = 0
|
|
if cr
|
|
raise IllegalFormatError
|
|
end
|
|
if state.equal?(:ST_DATA)
|
|
if rs_idx != 0
|
|
cell << rs_str[0, rs_idx]
|
|
rs_idx = 0
|
|
end
|
|
if quoted
|
|
true # ToDo: delete; dummy line for coverage
|
|
else
|
|
return :DT_COLSEP, idx + 1, cell;
|
|
end
|
|
elsif state.equal?(:ST_QUOTE)
|
|
if rs_idx != 0
|
|
raise IllegalFormatError
|
|
end
|
|
return :DT_COLSEP, idx + 1, cell;
|
|
else # :ST_START
|
|
return :DT_COLSEP, idx + 1, nil
|
|
end
|
|
end
|
|
elsif c == rs_str[rs_idx]
|
|
rs_idx += 1
|
|
unless (rs.nil? and cr)
|
|
cell << src[last_idx, (idx - last_idx)]
|
|
last_idx = idx
|
|
end
|
|
if fs_idx != 0
|
|
fs_idx = 0
|
|
end
|
|
if rs_idx == rs_size
|
|
rs_idx = 0
|
|
if state.equal?(:ST_DATA)
|
|
if quoted
|
|
true # ToDo: delete; dummy line for coverage
|
|
else
|
|
return :DT_ROWSEP, idx + 1, cell
|
|
end
|
|
elsif state.equal?(:ST_QUOTE)
|
|
return :DT_ROWSEP, idx + 1, cell
|
|
else # :ST_START
|
|
return :DT_ROWSEP, idx + 1, nil
|
|
end
|
|
end
|
|
elsif rs.nil? and c == ?\r
|
|
# special \r treatment for backward compatibility
|
|
if cr
|
|
raise IllegalFormatError
|
|
end
|
|
cell << src[last_idx, (idx - last_idx)]
|
|
last_idx = idx
|
|
if quoted
|
|
state = :ST_DATA
|
|
else
|
|
cr = true
|
|
end
|
|
else
|
|
if fs_idx != 0
|
|
fs_idx = 0
|
|
end
|
|
if rs_idx != 0
|
|
rs_idx = 0
|
|
end
|
|
if state.equal?(:ST_DATA) or state.equal?(:ST_START)
|
|
if cr
|
|
raise IllegalFormatError
|
|
end
|
|
state = :ST_DATA
|
|
else # :ST_QUOTE
|
|
raise IllegalFormatError
|
|
end
|
|
end
|
|
idx += 1
|
|
end
|
|
if state.equal?(:ST_START)
|
|
return :DT_EOS, idx, nil
|
|
elsif quoted
|
|
raise IllegalFormatError
|
|
elsif cr
|
|
raise IllegalFormatError
|
|
end
|
|
cell << src[last_idx, (idx - last_idx)]
|
|
last_idx = idx
|
|
return :DT_EOS, idx, cell
|
|
end
|
|
|
|
def generate_body(cell, out_dev, fs, rs)
|
|
if cell.nil?
|
|
# empty
|
|
else
|
|
row_data = cell.dup
|
|
if (row_data.gsub!('"', '""') or
|
|
row_data.index(fs) or
|
|
(rs and row_data.index(rs)) or
|
|
(/[\r\n]/ =~ row_data) or
|
|
(cell.empty?))
|
|
out_dev << '"' << row_data << '"'
|
|
else
|
|
out_dev << row_data
|
|
end
|
|
end
|
|
end
|
|
|
|
def generate_separator(type, out_dev, fs, rs)
|
|
case type
|
|
when :DT_COLSEP
|
|
out_dev << fs
|
|
when :DT_ROWSEP
|
|
out_dev << (rs || "\n")
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# CSV formatted string/stream reader.
|
|
#
|
|
# EXAMPLE
|
|
# read CSV lines untill the first column is 'stop'.
|
|
#
|
|
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
|
|
# p row
|
|
# break if !row[0].is_null && row[0].data == 'stop'
|
|
# end
|
|
#
|
|
class Reader
|
|
include Enumerable
|
|
|
|
# Parse CSV data and get lines. Given block is called for each parsed row.
|
|
# Block value is always nil. Rows are not cached for performance reason.
|
|
def Reader.parse(str_or_readable, fs = ',', rs = nil)
|
|
reader = create(str_or_readable, fs, rs)
|
|
reader.each do |row|
|
|
yield(row)
|
|
end
|
|
reader.close
|
|
nil
|
|
end
|
|
|
|
# Returns reader instance.
|
|
def Reader.create(str_or_readable, fs = ',', rs = nil)
|
|
case str_or_readable
|
|
when IO
|
|
IOReader.new(str_or_readable, fs, rs)
|
|
when String
|
|
StringReader.new(str_or_readable, fs, rs)
|
|
else
|
|
IOReader.new(str_or_readable, fs, rs)
|
|
end
|
|
end
|
|
|
|
def each
|
|
while true
|
|
row = []
|
|
parsed_cells = get_row(row)
|
|
if parsed_cells == 0
|
|
break
|
|
end
|
|
yield(row)
|
|
end
|
|
nil
|
|
end
|
|
|
|
def shift
|
|
row = []
|
|
parsed_cells = get_row(row)
|
|
row
|
|
end
|
|
|
|
def close
|
|
terminate
|
|
end
|
|
|
|
private
|
|
|
|
def initialize(dev)
|
|
raise RuntimeError.new('Do not instanciate this class directly.')
|
|
end
|
|
|
|
def get_row(row)
|
|
raise NotImplementedError.new('Method get_row must be defined in a derived class.')
|
|
end
|
|
|
|
def terminate
|
|
# Define if needed.
|
|
end
|
|
end
|
|
|
|
|
|
class StringReader < Reader
|
|
def initialize(string, fs = ',', rs = nil)
|
|
@fs = fs
|
|
@rs = rs
|
|
@dev = string
|
|
@idx = 0
|
|
if @dev[0, 3] == "\xef\xbb\xbf"
|
|
@idx += 3
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def get_row(row)
|
|
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
|
|
if parsed_cells == 0 and next_idx == 0 and @idx != @dev.size
|
|
raise IllegalFormatError.new
|
|
end
|
|
@idx = next_idx
|
|
parsed_cells
|
|
end
|
|
end
|
|
|
|
|
|
class IOReader < Reader
|
|
def initialize(io, fs = ',', rs = nil)
|
|
@io = io
|
|
@fs = fs
|
|
@rs = rs
|
|
@dev = CSV::IOBuf.new(@io)
|
|
@idx = 0
|
|
if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
|
|
@idx += 3
|
|
end
|
|
@close_on_terminate = false
|
|
end
|
|
|
|
# Tell this reader to close the IO when terminated (Triggered by invoking
|
|
# CSV::IOReader#close).
|
|
def close_on_terminate
|
|
@close_on_terminate = true
|
|
end
|
|
|
|
private
|
|
|
|
def get_row(row)
|
|
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
|
|
if parsed_cells == 0 and next_idx == 0 and !@dev.is_eos?
|
|
raise IllegalFormatError.new
|
|
end
|
|
dropped = @dev.drop(next_idx)
|
|
@idx = next_idx - dropped
|
|
parsed_cells
|
|
end
|
|
|
|
def terminate
|
|
if @close_on_terminate
|
|
@io.close
|
|
end
|
|
|
|
if @dev
|
|
@dev.close
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# CSV formatted string/stream writer.
|
|
#
|
|
# EXAMPLE
|
|
# Write rows to 'csvout' file.
|
|
#
|
|
# outfile = File.open('csvout', 'wb')
|
|
# CSV::Writer.generate(outfile) do |csv|
|
|
# csv << ['c1', nil, '', '"', "\r\n", 'c2']
|
|
# # or
|
|
# csv.add_row [
|
|
# CSV::Cell.new('c1', false),
|
|
# CSV::Cell.new('dummy', true),
|
|
# CSV::Cell.new('', false),
|
|
# CSV::Cell.new('"', false),
|
|
# CSV::Cell.new("\r\n", false)
|
|
# CSV::Cell.new('c2', false)
|
|
# ]
|
|
# ...
|
|
# ...
|
|
# end
|
|
#
|
|
# outfile.close
|
|
#
|
|
class Writer
|
|
# Generate CSV. Given block is called with the writer instance.
|
|
def Writer.generate(str_or_writable, fs = ',', rs = nil)
|
|
writer = Writer.create(str_or_writable, fs, rs)
|
|
yield(writer)
|
|
writer.close
|
|
nil
|
|
end
|
|
|
|
# str_or_writable must handle '<<(string)'.
|
|
def Writer.create(str_or_writable, fs = ',', rs = nil)
|
|
BasicWriter.new(str_or_writable, fs, rs)
|
|
end
|
|
|
|
# dump CSV stream to the device. argument must be an Array of String.
|
|
def <<(row)
|
|
CSV.generate_row(row, row.size, @dev, @fs, @rs)
|
|
self
|
|
end
|
|
alias add_row <<
|
|
|
|
def close
|
|
terminate
|
|
end
|
|
|
|
private
|
|
|
|
def initialize(dev)
|
|
raise RuntimeError.new('Do not instanciate this class directly.')
|
|
end
|
|
|
|
def terminate
|
|
# Define if needed.
|
|
end
|
|
end
|
|
|
|
|
|
class BasicWriter < Writer
|
|
def initialize(str_or_writable, fs = ',', rs = nil)
|
|
@fs = fs
|
|
@rs = rs
|
|
@dev = str_or_writable
|
|
@close_on_terminate = false
|
|
end
|
|
|
|
# Tell this writer to close the IO when terminated (Triggered by invoking
|
|
# CSV::BasicWriter#close).
|
|
def close_on_terminate
|
|
@close_on_terminate = true
|
|
end
|
|
|
|
private
|
|
|
|
def terminate
|
|
if @close_on_terminate
|
|
@dev.close
|
|
end
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
# Buffered stream.
|
|
#
|
|
# EXAMPLE 1 -- an IO.
|
|
# class MyBuf < StreamBuf
|
|
# # Do initialize myself before a super class. Super class might call my
|
|
# # method 'read'. (Could be awful for C++ user. :-)
|
|
# def initialize(s)
|
|
# @s = s
|
|
# super()
|
|
# end
|
|
#
|
|
# # define my own 'read' method.
|
|
# # CAUTION: Returning nil means EnfOfStream.
|
|
# def read(size)
|
|
# @s.read(size)
|
|
# end
|
|
#
|
|
# # release buffers. in Ruby which has GC, you do not have to call this...
|
|
# def terminate
|
|
# @s = nil
|
|
# super()
|
|
# end
|
|
# end
|
|
#
|
|
# buf = MyBuf.new(STDIN)
|
|
# my_str = ''
|
|
# p buf[0, 0] # => '' (null string)
|
|
# p buf[0] # => 97 (char code of 'a')
|
|
# p buf[0, 1] # => 'a'
|
|
# my_str = buf[0, 5]
|
|
# p my_str # => 'abcde' (5 chars)
|
|
# p buf[0, 6] # => "abcde\n" (6 chars)
|
|
# p buf[0, 7] # => "abcde\n" (6 chars)
|
|
# p buf.drop(3) # => 3 (dropped chars)
|
|
# p buf.get(0, 2) # => 'de' (2 chars)
|
|
# p buf.is_eos? # => false (is not EOS here)
|
|
# p buf.drop(5) # => 3 (dropped chars)
|
|
# p buf.is_eos? # => true (is EOS here)
|
|
# p buf[0] # => nil (is EOS here)
|
|
#
|
|
# EXAMPLE 2 -- String.
|
|
# This is a conceptual example. No pros with this.
|
|
#
|
|
# class StrBuf < StreamBuf
|
|
# def initialize(s)
|
|
# @str = s
|
|
# @idx = 0
|
|
# super()
|
|
# end
|
|
#
|
|
# def read(size)
|
|
# str = @str[@idx, size]
|
|
# @idx += str.size
|
|
# str
|
|
# end
|
|
# end
|
|
#
|
|
class StreamBuf
|
|
# get a char or a partial string from the stream.
|
|
# idx: index of a string to specify a start point of a string to get.
|
|
# unlike String instance, idx < 0 returns nil.
|
|
# n: size of a string to get.
|
|
# returns char at idx if n == nil.
|
|
# returns a partial string, from idx to (idx + n) if n != nil. at EOF,
|
|
# the string size could not equal to arg n.
|
|
def [](idx, n = nil)
|
|
if idx < 0
|
|
return nil
|
|
end
|
|
if (idx_is_eos?(idx))
|
|
if n and (@offset + idx == buf_size(@cur_buf))
|
|
# Like a String, 'abc'[4, 1] returns nil and
|
|
# 'abc'[3, 1] returns '' not nil.
|
|
return ''
|
|
else
|
|
return nil
|
|
end
|
|
end
|
|
my_buf = @cur_buf
|
|
my_offset = @offset
|
|
next_idx = idx
|
|
while (my_offset + next_idx >= buf_size(my_buf))
|
|
if (my_buf == @buf_tail_idx)
|
|
unless add_buf
|
|
break
|
|
end
|
|
end
|
|
next_idx = my_offset + next_idx - buf_size(my_buf)
|
|
my_buf += 1
|
|
my_offset = 0
|
|
end
|
|
loc = my_offset + next_idx
|
|
if !n
|
|
return @buf_list[my_buf][loc] # Fixnum of char code.
|
|
elsif (loc + n - 1 < buf_size(my_buf))
|
|
return @buf_list[my_buf][loc, n] # String.
|
|
else # should do loop insted of (tail) recursive call...
|
|
res = @buf_list[my_buf][loc, BufSize]
|
|
size_added = buf_size(my_buf) - loc
|
|
if size_added > 0
|
|
idx += size_added
|
|
n -= size_added
|
|
ret = self[idx, n]
|
|
if ret
|
|
res << ret
|
|
end
|
|
end
|
|
return res
|
|
end
|
|
end
|
|
alias get []
|
|
|
|
# drop a string from the stream.
|
|
# returns dropped size. at EOF, dropped size might not equals to arg n.
|
|
# Once you drop the head of the stream, access to the dropped part via []
|
|
# or get returns nil.
|
|
def drop(n)
|
|
if is_eos?
|
|
return 0
|
|
end
|
|
size_dropped = 0
|
|
while (n > 0)
|
|
if !@is_eos or (@cur_buf != @buf_tail_idx)
|
|
if (@offset + n < buf_size(@cur_buf))
|
|
size_dropped += n
|
|
@offset += n
|
|
n = 0
|
|
else
|
|
size = buf_size(@cur_buf) - @offset
|
|
size_dropped += size
|
|
n -= size
|
|
@offset = 0
|
|
unless rel_buf
|
|
unless add_buf
|
|
break
|
|
end
|
|
@cur_buf = @buf_tail_idx
|
|
end
|
|
end
|
|
end
|
|
end
|
|
size_dropped
|
|
end
|
|
|
|
def is_eos?
|
|
return idx_is_eos?(0)
|
|
end
|
|
|
|
# WARN: Do not instantiate this class directly. Define your own class
|
|
# which derives this class and define 'read' instance method.
|
|
def initialize
|
|
@buf_list = []
|
|
@cur_buf = @buf_tail_idx = -1
|
|
@offset = 0
|
|
@is_eos = false
|
|
add_buf
|
|
@cur_buf = @buf_tail_idx
|
|
end
|
|
|
|
protected
|
|
|
|
def terminate
|
|
while (rel_buf); end
|
|
end
|
|
|
|
# protected method 'read' must be defined in derived classes.
|
|
# CAUTION: Returning a string which size is not equal to 'size' means
|
|
# EnfOfStream. When it is not at EOS, you must block the callee, try to
|
|
# read and return the sized string.
|
|
def read(size) # raise EOFError
|
|
raise NotImplementedError.new('Method read must be defined in a derived class.')
|
|
end
|
|
|
|
private
|
|
|
|
def buf_size(idx)
|
|
@buf_list[idx].size
|
|
end
|
|
|
|
def add_buf
|
|
if @is_eos
|
|
return false
|
|
end
|
|
begin
|
|
str_read = read(BufSize)
|
|
rescue EOFError
|
|
str_read = nil
|
|
rescue
|
|
terminate
|
|
raise
|
|
end
|
|
if str_read.nil?
|
|
@is_eos = true
|
|
@buf_list.push('')
|
|
@buf_tail_idx += 1
|
|
false
|
|
else
|
|
@buf_list.push(str_read)
|
|
@buf_tail_idx += 1
|
|
true
|
|
end
|
|
end
|
|
|
|
def rel_buf
|
|
if (@cur_buf < 0)
|
|
return false
|
|
end
|
|
@buf_list[@cur_buf] = nil
|
|
if (@cur_buf == @buf_tail_idx)
|
|
@cur_buf = -1
|
|
return false
|
|
else
|
|
@cur_buf += 1
|
|
return true
|
|
end
|
|
end
|
|
|
|
def idx_is_eos?(idx)
|
|
(@is_eos and ((@cur_buf < 0) or (@cur_buf == @buf_tail_idx)))
|
|
end
|
|
|
|
BufSize = 1024 * 8
|
|
end
|
|
|
|
# Buffered IO.
|
|
#
|
|
# EXAMPLE
|
|
# # File 'bigdata' could be a giga-byte size one!
|
|
# buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
|
|
# CSV::Reader.new(buf).each do |row|
|
|
# p row
|
|
# break if row[0].data == 'admin'
|
|
# end
|
|
#
|
|
class IOBuf < StreamBuf
|
|
def initialize(s)
|
|
@s = s
|
|
super()
|
|
end
|
|
|
|
def close
|
|
terminate
|
|
end
|
|
|
|
private
|
|
|
|
def read(size)
|
|
@s.read(size)
|
|
end
|
|
|
|
def terminate
|
|
super()
|
|
end
|
|
end
|
|
end
|