mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
13e15cecf3
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3964 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1322 lines
32 KiB
Ruby
1322 lines
32 KiB
Ruby
# CSV -- module for generating/parsing CSV data.
|
|
|
|
# $Id$
|
|
|
|
# This module is copyrighted free software by NAKAMURA, Hiroshi.
|
|
# You can redistribute it and/or modify it under the same term as Ruby.
|
|
|
|
|
|
class CSV
|
|
public
|
|
|
|
# DESCRIPTION
|
|
# CSV::Cell -- Describes 1 cell of CSV.
|
|
#
|
|
class Cell
|
|
public
|
|
|
|
# Datum as string.
|
|
attr_accessor :data
|
|
|
|
# Is this datum null?
|
|
attr_accessor :is_null
|
|
|
|
# SYNOPSIS
|
|
# cell = CSV::Cell.new(data = '', is_null = true)
|
|
#
|
|
# ARGS
|
|
# data: datum as String
|
|
# is_null: is this datum null?
|
|
#
|
|
# RETURNS
|
|
# cell: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. If is_null is true, datum is stored in the instance
|
|
# created but it should be treated as 'NULL'.
|
|
#
|
|
def initialize(data = '', is_null = true)
|
|
@data = data
|
|
@is_null = is_null
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Cell#match(rhs)
|
|
#
|
|
# ARGS
|
|
# rhs: an instance of CSV::Cell to be compared.
|
|
#
|
|
# RETURNS
|
|
# true/false. See the souce if you want to know matching algorithm.
|
|
#
|
|
# DESCRIPTION
|
|
# Compare another cell with me. Bare in mind Null matches with Null
|
|
# using this method. Use CSV::Cell#== if you want Null never matches
|
|
# with other data including Null.
|
|
#
|
|
def match(rhs)
|
|
if @is_null and rhs.is_null
|
|
true
|
|
elsif @is_null or rhs.is_null
|
|
false
|
|
else
|
|
@data == rhs.data
|
|
end
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Cell#==(rhs)
|
|
#
|
|
# ARGS
|
|
# rhs: an instance of CSV::Cell to be compared.
|
|
#
|
|
# RETURNS
|
|
# true/false. See the souce if you want to know matching algorithm.
|
|
#
|
|
# DESCRIPTION
|
|
# Compare another cell with me. Bare in mind Null is not match with
|
|
# Null using this method. Null never matches with other data including
|
|
# Null. Use CSV::Cell#match if you want Null matches with Null.
|
|
#
|
|
def ==(rhs)
|
|
if @is_null or rhs.is_null
|
|
false
|
|
else
|
|
@data == rhs.data
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::Row -- Describes a row of CSV. Each element must be a CSV::Cell.
|
|
#
|
|
class Row < Array
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# CSV::Row#to_a
|
|
#
|
|
# RETURNS
|
|
# An Array of String.
|
|
#
|
|
# DESCRIPTION
|
|
# Convert CSV::Cell to String. Null is converted to nil.
|
|
#
|
|
def to_a
|
|
self.collect { |cell| cell.is_null ? nil : cell.data }
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Row#match(rhs)
|
|
#
|
|
# ARGS
|
|
# rhs: an Array of cells. Each cell is a instance of CSV::Cell.
|
|
#
|
|
# RETURNS
|
|
# true/false. See the souce if you want to know matching algorithm.
|
|
#
|
|
# DESCRIPTION
|
|
# Compare another row with me.
|
|
#
|
|
def match(rhs)
|
|
if self.size != rhs.size
|
|
return false
|
|
end
|
|
for idx in 0...(self.size)
|
|
unless self[idx].match(rhs[idx])
|
|
return false
|
|
end
|
|
end
|
|
true
|
|
end
|
|
end
|
|
|
|
|
|
# SYNOPSIS
|
|
# 1. reader = CSV.open(filename, 'r')
|
|
#
|
|
# 2. CSV.open(filename, 'r') do |row|
|
|
# ...
|
|
# end
|
|
#
|
|
# 3. writer = CSV.open(filename, 'w')
|
|
#
|
|
# 4. CSV.open(filename, 'w') do |writer|
|
|
# ...
|
|
# end
|
|
#
|
|
# ARGS
|
|
# filename: filename to open.
|
|
# mode: 'r' for read (parse)
|
|
# 'w' for write (generate)
|
|
# row: an Array of cells which is a parsed line.
|
|
# writer: Created writer instance. See CSV::Writer#<< and
|
|
# CSV::Writer#add_row to know how to generate CSV string.
|
|
#
|
|
# RETURNS
|
|
# reader: Create reader instance. To get parse result, see
|
|
# CSV::Reader#each.
|
|
# writer: Created writer instance. See CSV::Writer#<< and
|
|
# CSV::Writer#add_row to know how to generate CSV string.
|
|
#
|
|
# DESCRIPTION
|
|
# Open a CSV formatted file to read or write.
|
|
#
|
|
# EXAMPLE 1
|
|
# reader = CSV.open('csvfile.csv', 'r')
|
|
# row1 = reader.shift
|
|
# row2 = reader.shift
|
|
# if row2.empty?
|
|
# p 'row2 not find.'
|
|
# end
|
|
# reader.close
|
|
#
|
|
# EXAMPLE 2
|
|
# CSV.open('csvfile.csv', 'r') do |row|
|
|
# p row
|
|
# end
|
|
#
|
|
# EXAMPLE 3
|
|
# writer = CSV.open('csvfile.csv', 'w')
|
|
# writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
|
|
# writer.close
|
|
#
|
|
# EXAMPLE 4
|
|
# CSV.open('csvfile.csv', 'w') do |writer|
|
|
# writer << ['r1c1', 'r1c2']
|
|
# writer << ['r2c1', 'r2c2']
|
|
# writer << [nil, nil]
|
|
# end
|
|
#
|
|
def CSV.open(filename, mode, col_sep = ?,, &block)
|
|
if mode == 'r' or mode == 'rb'
|
|
open_reader(filename, col_sep, &block)
|
|
elsif mode == 'w' or mode == 'wb'
|
|
open_writer(filename, col_sep, &block)
|
|
else
|
|
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
|
|
end
|
|
end
|
|
|
|
def CSV.parse(filename, col_sep = ?,, &block)
|
|
open_reader(filename, col_sep, &block)
|
|
end
|
|
|
|
def CSV.generate(filename, col_sep = ?,, &block)
|
|
open_writer(filename, col_sep, &block)
|
|
end
|
|
|
|
# Private class methods.
|
|
class << self
|
|
private
|
|
def open_reader(filename, col_sep, &block)
|
|
file = File.open(filename, 'rb')
|
|
if block
|
|
begin
|
|
CSV::Reader.parse(file, col_sep) do |row|
|
|
yield(row)
|
|
end
|
|
ensure
|
|
file.close
|
|
end
|
|
nil
|
|
else
|
|
reader = CSV::Reader.create(file, col_sep)
|
|
reader.close_on_terminate
|
|
reader
|
|
end
|
|
end
|
|
|
|
def open_writer(filename, col_sep, &block)
|
|
file = File.open(filename, 'wb')
|
|
if block
|
|
begin
|
|
CSV::Writer.generate(file, col_sep) do |writer|
|
|
yield(writer)
|
|
end
|
|
ensure
|
|
file.close
|
|
end
|
|
nil
|
|
else
|
|
writer = CSV::Writer.create(file, col_sep)
|
|
writer.close_on_terminate
|
|
writer
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::Reader -- CSV formatted string/stream reader.
|
|
#
|
|
# EXAMPLE
|
|
# Read CSV lines untill the first column is 'stop'.
|
|
#
|
|
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
|
|
# p row
|
|
# break if !row[0].is_null && row[0].data == 'stop'
|
|
# end
|
|
#
|
|
class Reader
|
|
include Enumerable
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# reader = CSV::Reader.create(str_or_readable)
|
|
#
|
|
# ARGS
|
|
# str_or_readable: a CSV data to be parsed. A String or an IO.
|
|
#
|
|
# RETURNS
|
|
# reader: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To get parse result, see CSV::Reader#each.
|
|
#
|
|
def Reader.create(str_or_readable, col_sep = ?,)
|
|
case str_or_readable
|
|
when IO
|
|
IOReader.new(str_or_readable, col_sep)
|
|
when String
|
|
StringReader.new(str_or_readable, col_sep)
|
|
else
|
|
IOReader.new(str_or_readable, col_sep)
|
|
end
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Reader.parse(str_or_readable) do |row|
|
|
# ...
|
|
# end
|
|
#
|
|
# ARGS
|
|
# str_or_readable: a CSV data to be parsed. A String or an IO.
|
|
# row: a CSV::Row; an Array of a CSV::Cell in a line.
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Parse CSV data and get lines. Caller block is called for each line
|
|
# with an argument which is a chunk of cells in a row.
|
|
#
|
|
# Block value is always nil. Rows are not cached for performance
|
|
# reason.
|
|
#
|
|
def Reader.parse(str_or_readable, col_sep = ?,)
|
|
reader = create(str_or_readable, col_sep)
|
|
reader.each do |row|
|
|
yield(row)
|
|
end
|
|
reader.close
|
|
nil
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Reader#each do |row|
|
|
# ...
|
|
# end
|
|
#
|
|
# ARGS
|
|
# row: a CSV::Row; an Array of a CSV::Cell in a line.
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Caller block is called for each line with an argument which is a chunk
|
|
# of cells in a row.
|
|
#
|
|
# Block value is always nil. Rows are not cached for performance
|
|
# reason.
|
|
#
|
|
def each
|
|
while true
|
|
row = Row.new
|
|
parsed_cells = get_row(row)
|
|
if parsed_cells == 0
|
|
break
|
|
end
|
|
yield(row)
|
|
end
|
|
nil
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# cell = CSV::Reader#shift
|
|
#
|
|
# RETURNS
|
|
# cell: a CSV::Row; an Array of a CSV::Cell.
|
|
#
|
|
# DESCRIPTION
|
|
# Extract cells of next line.
|
|
#
|
|
def shift
|
|
row = Row.new
|
|
parsed_cells = get_row(row)
|
|
row
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Reader#close
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Close this reader.
|
|
#
|
|
def close
|
|
terminate
|
|
end
|
|
|
|
private
|
|
def initialize(dev)
|
|
raise RuntimeError.new('Do not instanciate this class directly.')
|
|
end
|
|
|
|
def get_row(row)
|
|
raise NotImplementedError.new('Method get_row must be defined in a derived class.')
|
|
end
|
|
|
|
def terminate
|
|
# Define if needed.
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::StringReader -- CSV formatted stream reader.
|
|
#
|
|
# EXAMPLE
|
|
# Read CSV lines untill the first column is 'stop'.
|
|
#
|
|
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
|
|
# p row
|
|
# break if !row[0].is_null && row[0].data == 'stop'
|
|
# end
|
|
#
|
|
class StringReader < Reader
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# reader = CSV::StringReader.new(string)
|
|
#
|
|
# ARGS
|
|
# string: a CSV String to be parsed.
|
|
#
|
|
# RETURNS
|
|
# reader: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To get parse result, see CSV::Reader#each.
|
|
#
|
|
def initialize(string, col_sep = ?,)
|
|
@col_sep = col_sep
|
|
@dev = string
|
|
@idx = 0
|
|
if @dev[0, 3] == "\xef\xbb\xbf"
|
|
@idx += 3
|
|
end
|
|
end
|
|
|
|
private
|
|
def get_row(row)
|
|
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep)
|
|
if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
|
|
raise IllegalFormatError.new
|
|
end
|
|
@idx = next_idx
|
|
parsed_cells
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::IOReader -- CSV formatted stream reader.
|
|
#
|
|
# EXAMPLE
|
|
# Read CSV lines untill the first column is 'stop'.
|
|
#
|
|
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
|
|
# p row
|
|
# break if !row[0].is_null && row[0].data == 'stop'
|
|
# end
|
|
#
|
|
class IOReader < Reader
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# reader = CSV::IOReader.new(io)
|
|
#
|
|
# ARGS
|
|
# io: a CSV data to be parsed. Must be an IO. (io#read is called.)
|
|
#
|
|
# RETURNS
|
|
# reader: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To get parse result, see CSV::Reader#each.
|
|
#
|
|
def initialize(io, col_sep = ?,)
|
|
@io = io
|
|
@col_sep = col_sep
|
|
@dev = CSV::IOBuf.new(@io)
|
|
@idx = 0
|
|
if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
|
|
@idx += 3
|
|
end
|
|
@close_on_terminate = false
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::IOReader#close_on_terminate
|
|
#
|
|
# RETURNS
|
|
# true
|
|
#
|
|
# DESCRIPTION
|
|
# Tell this reader to close the IO when terminated (Triggered by invoking
|
|
# CSV::IOReader#close).
|
|
#
|
|
def close_on_terminate
|
|
@close_on_terminate = true
|
|
end
|
|
|
|
private
|
|
def get_row(row)
|
|
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep)
|
|
if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
|
|
raise IllegalFormatError.new
|
|
end
|
|
dropped = @dev.drop(next_idx)
|
|
@idx = next_idx - dropped
|
|
parsed_cells
|
|
end
|
|
|
|
def terminate
|
|
if @close_on_terminate
|
|
@io.close
|
|
end
|
|
|
|
if @dev
|
|
@dev.close
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::Writer -- CSV formatted string/stream writer.
|
|
#
|
|
# EXAMPLE
|
|
# Write rows to 'csvout' file.
|
|
#
|
|
# outfile = File.open('csvout', 'wb')
|
|
# CSV::Writer.generate(outfile) do |csv|
|
|
# csv << ['c1', nil, '', '"', "\r\n", 'c2']
|
|
# # or
|
|
# csv.add_row [
|
|
# CSV::Cell.new('c1', false),
|
|
# CSV::Cell.new('dummy', true),
|
|
# CSV::Cell.new('', false),
|
|
# CSV::Cell.new('"', false),
|
|
# CSV::Cell.new("\r\n", false)
|
|
# CSV::Cell.new('c2', false)
|
|
# ]
|
|
# ...
|
|
# ...
|
|
# end
|
|
#
|
|
# outfile.close
|
|
#
|
|
class Writer
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# writer = CSV::Writer.create(str_or_readable)
|
|
#
|
|
# ARGS
|
|
# str_or_writable: device for generated CSV string. Must respond to
|
|
# '<<(string)'.
|
|
#
|
|
# RETURNS
|
|
# writer: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To add CSV data to generate CSV string, see
|
|
# CSV::Writer#<< or CSV::Writer#add_row.
|
|
#
|
|
def Writer.create(str_or_readable, col_sep = ?,)
|
|
BasicWriter.new(str_or_readable, col_sep)
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Writer.generate(str_or_writable) do |writer|
|
|
# ...
|
|
# end
|
|
#
|
|
# ARGS
|
|
# str_or_writable: device for generated CSV string. Must respond to
|
|
# '<<(string)'.
|
|
# writer: Created writer instance. See CSV::Writer#<< and
|
|
# CSV::Writer#add_row to know how to generate CSV string.
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Create writer instance. Caller block is called with the new instance.
|
|
# To add CSV data to generate CSV string, see CSV::Writer#<< or
|
|
# CSV::Writer#add_row.
|
|
#
|
|
def Writer.generate(str_or_writable, col_sep = ?,)
|
|
writer = Writer.create(str_or_writable, col_sep)
|
|
yield(writer)
|
|
writer.close
|
|
nil
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Writer#<<(row)
|
|
#
|
|
# ARGS
|
|
# row: an Array of a String.
|
|
#
|
|
# RETURNS
|
|
# self
|
|
#
|
|
# DESCRIPTION
|
|
# Dump CSV stream to the device. Argument is an array of a String like
|
|
# ['c1', 'c2', 'c3'].
|
|
#
|
|
def <<(ary)
|
|
row = ary.collect { |item|
|
|
if item.is_a?(Cell)
|
|
item
|
|
elsif (item.nil?)
|
|
Cell.new('', true)
|
|
else
|
|
Cell.new(item.to_s, false)
|
|
end
|
|
}
|
|
CSV.generate_row(row, row.size, @dev, @col_sep)
|
|
self
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Writer#<<(row)
|
|
#
|
|
# ARGS
|
|
# row: an Array of a CSV::Cell.
|
|
#
|
|
# RETURNS
|
|
# self
|
|
#
|
|
# DESCRIPTION
|
|
# Dump CSV stream to the device. Argument is an array of a CSV::Cell
|
|
# like [CSV::Cell.new('c1', false), CSV::Cell.new('dummy', true)].
|
|
# (Formar is 'c1' and latter is Null.)
|
|
#
|
|
def add_row(row)
|
|
CSV.generate_row(row, row.size, @dev, @col_sep)
|
|
self
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Writer#close
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Close this writer.
|
|
#
|
|
def close
|
|
terminate
|
|
end
|
|
|
|
private
|
|
def initialize(dev)
|
|
raise RuntimeError.new('Do not instanciate this class directly.')
|
|
end
|
|
|
|
def terminate
|
|
# Define if needed.
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::BasicWriter -- CSV formatted string/stream writer using <<.
|
|
#
|
|
class BasicWriter < Writer
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# writer = CSV::BasicWriter.new(str_or_writable)
|
|
#
|
|
# ARGS
|
|
# str_or_writable: device for generated CSV string. Must respond to
|
|
# '<<(string)'.
|
|
#
|
|
# RETURNS
|
|
# writer: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To add CSV data to generate CSV string, see
|
|
# CSV::Writer#<< or CSV::Writer#add_row.
|
|
#
|
|
def initialize(str_or_writable, col_sep = ?,)
|
|
@col_sep = col_sep
|
|
@dev = str_or_writable
|
|
@close_on_terminate = false
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::BasicWriter#close_on_terminate
|
|
#
|
|
# RETURNS
|
|
# true
|
|
#
|
|
# DESCRIPTION
|
|
# Tell this writer to close the IO when terminated (Triggered by invoking
|
|
# CSV::BasicWriter#close).
|
|
#
|
|
def close_on_terminate
|
|
@close_on_terminate = true
|
|
end
|
|
|
|
private
|
|
def terminate
|
|
if @close_on_terminate
|
|
@dev.close
|
|
end
|
|
end
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# cells = CSV.parse_line(src, col_sep = ?,)
|
|
#
|
|
# ARGS
|
|
# src: a CSV String.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
#
|
|
# RETURNS
|
|
# cells: an Array of parsed cells in first line. Each cell is a String.
|
|
#
|
|
# DESCRIPTION
|
|
# Parse one line from given string. Bare in mind it parses ONE LINE. Rest
|
|
# of the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
|
|
# second line 'c,d' is ignored.
|
|
#
|
|
# If you don't know whether a target string to parse is exactly 1 line or
|
|
# not, use CSV.parse_row instead of this method.
|
|
#
|
|
def CSV.parse_line(src, col_sep = ?,)
|
|
idx = 0
|
|
res_type = :DT_COLSEP
|
|
cells = Row.new
|
|
begin
|
|
while (res_type.equal?(:DT_COLSEP))
|
|
cell = Cell.new
|
|
res_type, idx = parse_body(src, idx, cell, col_sep)
|
|
cells.push(cell.is_null ? nil : cell.data)
|
|
end
|
|
rescue IllegalFormatError
|
|
return Row.new
|
|
end
|
|
cells
|
|
end
|
|
|
|
|
|
# SYNOPSIS
|
|
# str = CSV.generate_line(cells, col_sep = ?,)
|
|
#
|
|
# ARGS
|
|
# cells: an Array of cell to be converted to CSV string. Each cell must
|
|
# respond to 'to_s'.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
#
|
|
# RETURNS
|
|
# str: a String of generated CSV string.
|
|
#
|
|
# DESCRIPTION
|
|
# Create a line from cells. Each cell is stringified by to_s.
|
|
#
|
|
def CSV.generate_line(cells, col_sep = ?,)
|
|
if (cells.size == 0)
|
|
return ''
|
|
end
|
|
res_type = :DT_COLSEP
|
|
result_str = ''
|
|
idx = 0
|
|
while true
|
|
cell = if (cells[idx].nil?)
|
|
Cell.new('', true)
|
|
else
|
|
Cell.new(cells[idx].to_s, false)
|
|
end
|
|
generate_body(cell, result_str, col_sep)
|
|
idx += 1
|
|
if (idx == cells.size)
|
|
break
|
|
end
|
|
generate_separator(:DT_COLSEP, result_str, col_sep)
|
|
end
|
|
result_str
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,)
|
|
#
|
|
# ARGS
|
|
# src: a CSV data to be parsed. Must respond '[](idx)'.
|
|
# src[](idx) must return a char. (Not a string such as 'a', but 97).
|
|
# src[](idx_out_of_bounds) must return nil. A String satisfies this
|
|
# requirement.
|
|
# idx: index of parsing location of 'src'. 0 origin.
|
|
# out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
#
|
|
# RETURNS
|
|
# parsed_cells: num of parsed cells.
|
|
# idx: index of next parsing location of 'src'.
|
|
#
|
|
# DESCRIPTION
|
|
# Parse a line from string. To parse lines in CSV string, see EXAMPLE
|
|
# below.
|
|
#
|
|
# EXAMPLE
|
|
# src = "a,b\r\nc,d\r\ne,f"
|
|
# idx = 0
|
|
# begin
|
|
# parsed = []
|
|
# parsed_cells, idx = CSV.parse_row(src, idx, parsed)
|
|
# puts "Parsed #{ parsed_cells } cells."
|
|
# p parsed
|
|
# end while parsed_cells > 0
|
|
#
|
|
def CSV.parse_row(src, idx, out_dev, col_sep = ?,)
|
|
idx_backup = idx
|
|
parsed_cells = 0
|
|
res_type = :DT_COLSEP
|
|
begin
|
|
while (!res_type.equal?(:DT_ROWSEP))
|
|
cell = Cell.new
|
|
res_type, idx = parse_body(src, idx, cell, col_sep)
|
|
if res_type.equal?(:DT_EOS)
|
|
if idx == idx_backup #((parsed_cells == 0) && (cell.is_null))
|
|
return 0, 0
|
|
end
|
|
res_type = :DT_ROWSEP
|
|
end
|
|
parsed_cells += 1
|
|
out_dev << cell
|
|
end
|
|
rescue IllegalFormatError
|
|
return 0, 0
|
|
end
|
|
return parsed_cells, idx
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,)
|
|
#
|
|
# ARGS
|
|
# src: an Array of CSV::Cell to be converted to CSV string. Must respond to
|
|
# 'size' and '[](idx)'. src[idx] must return CSV::Cell.
|
|
# cells: num of cells in a line.
|
|
# out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
#
|
|
# RETURNS
|
|
# parsed_cells: num of converted cells.
|
|
#
|
|
# DESCRIPTION
|
|
# Convert a line from cells data to string. To generate multi-row CSV
|
|
# string, See EXAMPLE below.
|
|
#
|
|
# EXAMPLE
|
|
# def d(str)
|
|
# CSV::Cell.new(str, false)
|
|
# end
|
|
#
|
|
# row1 = [d('a'), d('b')]
|
|
# row2 = [d('c'), d('d')]
|
|
# row3 = [d('e'), d('f')]
|
|
# src = [row1, row2, row3]
|
|
# buf = ''
|
|
# src.each do |row|
|
|
# parsed_cells = CSV.generate_row(row, 2, buf)
|
|
# puts "Created #{ parsed_cells } cells."
|
|
# end
|
|
# p buf
|
|
#
|
|
def CSV.generate_row(src, cells, out_dev, col_sep = ?,)
|
|
src_size = src.size
|
|
if (src_size == 0)
|
|
if cells == 0
|
|
generate_separator(:DT_ROWSEP, out_dev, col_sep)
|
|
end
|
|
return 0
|
|
end
|
|
res_type = :DT_COLSEP
|
|
parsed_cells = 0
|
|
generate_body(src[parsed_cells], out_dev, col_sep)
|
|
parsed_cells += 1
|
|
while ((parsed_cells < cells) && (parsed_cells != src_size))
|
|
generate_separator(:DT_COLSEP, out_dev, col_sep)
|
|
generate_body(src[parsed_cells], out_dev, col_sep)
|
|
parsed_cells += 1
|
|
end
|
|
if (parsed_cells == cells)
|
|
generate_separator(:DT_ROWSEP, out_dev, col_sep)
|
|
else
|
|
generate_separator(:DT_COLSEP, out_dev, col_sep)
|
|
end
|
|
parsed_cells
|
|
end
|
|
|
|
private
|
|
class IllegalFormatError < RuntimeError; end
|
|
|
|
# Private class methods.
|
|
class << self
|
|
private
|
|
|
|
def parse_body(src, idx, cell, col_sep)
|
|
cell.is_null = false
|
|
state = :ST_START
|
|
quoted = false
|
|
cr = false
|
|
c = nil
|
|
while (c = src[idx])
|
|
idx += 1
|
|
result_state = :DT_UNKNOWN
|
|
if (c == col_sep)
|
|
if state.equal?(:ST_DATA)
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
if (!quoted)
|
|
state = :ST_END
|
|
result_state = :DT_COLSEP
|
|
else
|
|
cell.data << c.chr
|
|
end
|
|
elsif state.equal?(:ST_QUOTE)
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
state = :ST_END
|
|
result_state = :DT_COLSEP
|
|
else # :ST_START
|
|
cell.is_null = true
|
|
state = :ST_END
|
|
result_state = :DT_COLSEP
|
|
end
|
|
elsif (c == ?") # " for vim syntax hilighting.
|
|
if state.equal?(:ST_DATA)
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
if quoted
|
|
quoted = false
|
|
state = :ST_QUOTE
|
|
else
|
|
raise IllegalFormatError.new
|
|
end
|
|
elsif state.equal?(:ST_QUOTE)
|
|
cell.data << c.chr
|
|
quoted = true
|
|
state = :ST_DATA
|
|
else # :ST_START
|
|
quoted = true
|
|
state = :ST_DATA
|
|
end
|
|
elsif (c == ?\r)
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
if quoted
|
|
cell.data << c.chr
|
|
state = :ST_DATA
|
|
else
|
|
cr = true
|
|
end
|
|
elsif (c == ?\n)
|
|
if state.equal?(:ST_DATA)
|
|
if cr
|
|
state = :ST_END
|
|
result_state = :DT_ROWSEP
|
|
cr = false
|
|
else
|
|
if quoted
|
|
cell.data << c.chr
|
|
state = :ST_DATA
|
|
else
|
|
state = :ST_END
|
|
result_state = :DT_ROWSEP
|
|
end
|
|
end
|
|
elsif state.equal?(:ST_QUOTE)
|
|
state = :ST_END
|
|
result_state = :DT_ROWSEP
|
|
if cr
|
|
cr = false
|
|
end
|
|
else # :ST_START
|
|
cell.is_null = true
|
|
state = :ST_END
|
|
result_state = :DT_ROWSEP
|
|
end
|
|
else
|
|
if state.equal?(:ST_DATA) || state.equal?(:ST_START)
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
cell.data << c.chr
|
|
state = :ST_DATA
|
|
else # :ST_QUOTE
|
|
raise IllegalFormatError.new
|
|
end
|
|
end
|
|
if state.equal?(:ST_END)
|
|
return result_state, idx;
|
|
end
|
|
end
|
|
if state.equal?(:ST_START)
|
|
cell.is_null = true
|
|
elsif state.equal?(:ST_QUOTE)
|
|
true # dummy for coverate; only a data
|
|
elsif quoted
|
|
raise IllegalFormatError.new
|
|
elsif cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
return :DT_EOS, idx
|
|
end
|
|
|
|
def generate_body(cells, out_dev, col_sep)
|
|
row_data = cells.data.dup
|
|
if (!cells.is_null)
|
|
if (row_data.gsub!('"', '""') ||
|
|
row_data.include?(col_sep) ||
|
|
(/[\r\n]/ =~ row_data) || (cells.data.empty?))
|
|
out_dev << '"' << row_data << '"'
|
|
else
|
|
out_dev << row_data
|
|
end
|
|
end
|
|
end
|
|
|
|
def generate_separator(type, out_dev, col_sep)
|
|
case type
|
|
when :DT_COLSEP
|
|
out_dev << col_sep.chr
|
|
when :DT_ROWSEP
|
|
out_dev << "\r\n"
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::StreamBuf -- a class for a bufferd stream.
|
|
#
|
|
# EXAMPLE 1 -- an IO.
|
|
# class MyBuf < StreamBuf
|
|
# # Do initialize myself before a super class. Super class might call my
|
|
# # method 'read'. (Could be awful for C++ user. :-)
|
|
# def initialize(s)
|
|
# @s = s
|
|
# super()
|
|
# end
|
|
#
|
|
# # define my own 'read' method.
|
|
# # CAUTION: Returning nil means EnfOfStream.
|
|
# def read(size)
|
|
# @s.read(size)
|
|
# end
|
|
#
|
|
# # release buffers. in Ruby which has GC, you do not have to call this...
|
|
# def terminate
|
|
# @s = nil
|
|
# super()
|
|
# end
|
|
# end
|
|
#
|
|
# buf = MyBuf.new(STDIN)
|
|
# my_str = ''
|
|
# p buf[0, 0] # => '' (null string)
|
|
# p buf[0] # => 97 (char code of 'a')
|
|
# p buf[0, 1] # => 'a'
|
|
# my_str = buf[0, 5]
|
|
# p my_str # => 'abcde' (5 chars)
|
|
# p buf[0, 6] # => "abcde\n" (6 chars)
|
|
# p buf[0, 7] # => "abcde\n" (6 chars)
|
|
# p buf.drop(3) # => 3 (dropped chars)
|
|
# p buf.get(0, 2) # => 'de' (2 chars)
|
|
# p buf.is_eos? # => false (is not EOS here)
|
|
# p buf.drop(5) # => 3 (dropped chars)
|
|
# p buf.is_eos? # => true (is EOS here)
|
|
# p buf[0] # => nil (is EOS here)
|
|
#
|
|
# EXAMPLE 2 -- String.
|
|
# This is a conceptual example. No pros with this.
|
|
#
|
|
# class StrBuf < StreamBuf
|
|
# def initialize(s)
|
|
# @str = s
|
|
# @idx = 0
|
|
# super()
|
|
# end
|
|
#
|
|
# def read(size)
|
|
# str = @str[@idx, size]
|
|
# @idx += str.size
|
|
# str
|
|
# end
|
|
# end
|
|
#
|
|
class StreamBuf # pure virtual. (do not instanciate it directly)
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# char/str = CSV::StreamBuf#get(idx, n = nil)
|
|
# char/str = CSV::StreamBuf#[idx, n = nil]
|
|
#
|
|
# ARGS
|
|
# idx: index of a string to specify a start point of a string to get.
|
|
# Unlike String instance, idx < 0 returns nil.
|
|
# n: size of a string to get.
|
|
#
|
|
# RETURNS
|
|
# char: if n == nil. A char at idx.
|
|
# str: if n != nil. A partial string, from idx to (idx + size). At
|
|
# EOF, the string size could not equal to arg n.
|
|
#
|
|
# DESCRIPTION
|
|
# Get a char or a partial string from the stream.
|
|
#
|
|
def [](idx, n = nil)
|
|
if idx < 0
|
|
return nil
|
|
end
|
|
if (idx_is_eos?(idx))
|
|
if n and (@offset + idx == buf_size(@cur_buf))
|
|
# Like a String, 'abc'[4, 1] returns nil and
|
|
# 'abc'[3, 1] returns '' not nil.
|
|
return ''
|
|
else
|
|
return nil
|
|
end
|
|
end
|
|
my_buf = @cur_buf
|
|
my_offset = @offset
|
|
next_idx = idx
|
|
while (my_offset + next_idx >= buf_size(my_buf))
|
|
if (my_buf == @buf_tail_idx)
|
|
unless add_buf
|
|
break
|
|
end
|
|
end
|
|
next_idx = my_offset + next_idx - buf_size(my_buf)
|
|
my_buf += 1
|
|
my_offset = 0
|
|
end
|
|
loc = my_offset + next_idx
|
|
if !n
|
|
return @buf_list[my_buf][loc] # Fixnum of char code.
|
|
elsif (loc + n - 1 < buf_size(my_buf))
|
|
return @buf_list[my_buf][loc, n] # String.
|
|
else # should do loop insted of (tail) recursive call...
|
|
res = @buf_list[my_buf][loc, BufSize]
|
|
size_added = buf_size(my_buf) - loc
|
|
if size_added > 0
|
|
idx += size_added
|
|
n -= size_added
|
|
ret = self[idx, n]
|
|
if ret
|
|
res << ret
|
|
end
|
|
end
|
|
return res
|
|
end
|
|
end
|
|
alias get []
|
|
|
|
# SYNOPSIS
|
|
# size_dropped = CSV::StreamBuf#drop(n)
|
|
#
|
|
# ARGS
|
|
# n: drop size
|
|
#
|
|
# RETURNS
|
|
# size_dropped: droped size. At EOF, dropped size might not equals to arg n.
|
|
# 0 if n <= 0.
|
|
#
|
|
# DESCRIPTION
|
|
# Drop a string from the stream. Once you drop the head of the stream,
|
|
# access to the dropped part via [] or get returns nil.
|
|
#
|
|
def drop(n)
|
|
if is_eos?
|
|
return 0
|
|
end
|
|
size_dropped = 0
|
|
while (n > 0)
|
|
if (!@is_eos || (@cur_buf != @buf_tail_idx))
|
|
if (@offset + n < buf_size(@cur_buf))
|
|
size_dropped += n
|
|
@offset += n
|
|
n = 0
|
|
else
|
|
size = buf_size(@cur_buf) - @offset
|
|
size_dropped += size
|
|
n -= size
|
|
@offset = 0
|
|
unless rel_buf
|
|
unless add_buf
|
|
break
|
|
end
|
|
@cur_buf = @buf_tail_idx
|
|
end
|
|
end
|
|
end
|
|
end
|
|
size_dropped
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# is_eos = CSV::StreamBuf#is_eos?
|
|
#
|
|
# RETURNS
|
|
# is_eos: true if end of the stream or false.
|
|
#
|
|
# DESCRIPTION
|
|
# Check EOF or not.
|
|
#
|
|
def is_eos?
|
|
return idx_is_eos?(0)
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# N/A
|
|
#
|
|
# DESCRIPTION
|
|
# Do not instanciate this class directly. Define your own class which
|
|
# derives this class and define 'read' instance method.
|
|
#
|
|
def initialize
|
|
@buf_list = []
|
|
@cur_buf = @buf_tail_idx = -1
|
|
@offset = 0
|
|
@is_eos = false
|
|
add_buf
|
|
@cur_buf = @buf_tail_idx
|
|
end
|
|
|
|
protected
|
|
def terminate
|
|
while (rel_buf); end
|
|
end
|
|
|
|
# protected method 'read' must be defined in derived classes.
|
|
# CAUTION: Returning a string which size is not equal to 'size' means
|
|
# EnfOfStream. When it is not at EOS, you must block the callee, try to
|
|
# read and return the sized string.
|
|
def read(size) # raise EOFError
|
|
raise NotImplementedError.new('Method read must be defined in a derived class.')
|
|
end
|
|
|
|
private
|
|
|
|
def buf_size(idx)
|
|
@buf_list[idx].size
|
|
end
|
|
|
|
def add_buf
|
|
if @is_eos
|
|
return false
|
|
end
|
|
begin
|
|
str_read = read(BufSize)
|
|
rescue EOFError
|
|
str_read = nil
|
|
rescue
|
|
terminate
|
|
raise
|
|
end
|
|
if str_read.nil?
|
|
@is_eos = true
|
|
@buf_list.push('')
|
|
@buf_tail_idx += 1
|
|
false
|
|
else
|
|
@buf_list.push(str_read)
|
|
@buf_tail_idx += 1
|
|
true
|
|
end
|
|
end
|
|
|
|
def rel_buf
|
|
if (@cur_buf < 0)
|
|
return false
|
|
end
|
|
@buf_list[@cur_buf] = nil
|
|
if (@cur_buf == @buf_tail_idx)
|
|
@cur_buf = -1
|
|
return false
|
|
else
|
|
@cur_buf += 1
|
|
return true
|
|
end
|
|
end
|
|
|
|
def idx_is_eos?(idx)
|
|
(@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx)))
|
|
end
|
|
|
|
BufSize = 1024 * 8
|
|
end
|
|
|
|
# DESCRIPTION
|
|
# CSV::IOBuf -- a class for a bufferd IO.
|
|
#
|
|
# EXAMPLE
|
|
# # File 'bigdata' could be a giga-byte size one!
|
|
# buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
|
|
# CSV::Reader.new(buf).each do |row|
|
|
# p row
|
|
# break if row[0].data == 'admin'
|
|
# end
|
|
#
|
|
class IOBuf < StreamBuf
|
|
public
|
|
def initialize(s)
|
|
@s = s
|
|
super()
|
|
end
|
|
|
|
def close
|
|
terminate
|
|
end
|
|
|
|
private
|
|
def read(size)
|
|
@s.read(size)
|
|
end
|
|
|
|
def terminate
|
|
super()
|
|
end
|
|
end
|
|
end
|