mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
ffc136a024
To parse Mac's CR separated CSV, do like this. CSV.open("mac.csv", "r", ?,,?\r) { |row| p row.to_a } The 3rd parameter in this example ?, is for column separater and the 4th ?\r is for row separater. Row separater is nil by default. Nil separater means "\r\n" or "\n". * test/csv/test_csv.rb: add tests for above feature. * test/csv/mac.csv: added. Sample CR separated CSV file. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4553 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1336 lines
33 KiB
Ruby
1336 lines
33 KiB
Ruby
# CSV -- module for generating/parsing CSV data.
|
|
|
|
# $Id$
|
|
|
|
# This module is copyrighted free software by NAKAMURA, Hiroshi.
|
|
# You can redistribute it and/or modify it under the same term as Ruby.
|
|
|
|
|
|
class CSV
|
|
public
|
|
|
|
# DESCRIPTION
|
|
# CSV::Cell -- Describes 1 cell of CSV.
|
|
#
|
|
class Cell
|
|
public
|
|
|
|
# Datum as string.
|
|
attr_accessor :data
|
|
|
|
# Is this datum null?
|
|
attr_accessor :is_null
|
|
|
|
# SYNOPSIS
|
|
# cell = CSV::Cell.new(data = '', is_null = true)
|
|
#
|
|
# ARGS
|
|
# data: datum as String
|
|
# is_null: is this datum null?
|
|
#
|
|
# RETURNS
|
|
# cell: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. If is_null is true, datum is stored in the instance
|
|
# created but it should be treated as 'NULL'.
|
|
#
|
|
def initialize(data = '', is_null = true)
|
|
@data = data
|
|
@is_null = is_null
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Cell#match(rhs)
|
|
#
|
|
# ARGS
|
|
# rhs: an instance of CSV::Cell to be compared.
|
|
#
|
|
# RETURNS
|
|
# true/false. See the souce if you want to know matching algorithm.
|
|
#
|
|
# DESCRIPTION
|
|
# Compare another cell with me. Bare in mind Null matches with Null
|
|
# using this method. Use CSV::Cell#== if you want Null never matches
|
|
# with other data including Null.
|
|
#
|
|
def match(rhs)
|
|
if @is_null and rhs.is_null
|
|
true
|
|
elsif @is_null or rhs.is_null
|
|
false
|
|
else
|
|
@data == rhs.data
|
|
end
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Cell#==(rhs)
|
|
#
|
|
# ARGS
|
|
# rhs: an instance of CSV::Cell to be compared.
|
|
#
|
|
# RETURNS
|
|
# true/false. See the souce if you want to know matching algorithm.
|
|
#
|
|
# DESCRIPTION
|
|
# Compare another cell with me. Bare in mind Null is not match with
|
|
# Null using this method. Null never matches with other data including
|
|
# Null. Use CSV::Cell#match if you want Null matches with Null.
|
|
#
|
|
def ==(rhs)
|
|
if @is_null or rhs.is_null
|
|
false
|
|
else
|
|
@data == rhs.data
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::Row -- Describes a row of CSV. Each element must be a CSV::Cell.
|
|
#
|
|
class Row < Array
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# CSV::Row#to_a
|
|
#
|
|
# RETURNS
|
|
# An Array of String.
|
|
#
|
|
# DESCRIPTION
|
|
# Convert CSV::Cell to String. Null is converted to nil.
|
|
#
|
|
def to_a
|
|
self.collect { |cell| cell.is_null ? nil : cell.data }
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Row#match(rhs)
|
|
#
|
|
# ARGS
|
|
# rhs: an Array of cells. Each cell is a instance of CSV::Cell.
|
|
#
|
|
# RETURNS
|
|
# true/false. See the souce if you want to know matching algorithm.
|
|
#
|
|
# DESCRIPTION
|
|
# Compare another row with me.
|
|
#
|
|
def match(rhs)
|
|
if self.size != rhs.size
|
|
return false
|
|
end
|
|
for idx in 0...(self.size)
|
|
unless self[idx].match(rhs[idx])
|
|
return false
|
|
end
|
|
end
|
|
true
|
|
end
|
|
end
|
|
|
|
|
|
# SYNOPSIS
|
|
# 1. reader = CSV.open(filename, 'r')
|
|
#
|
|
# 2. CSV.open(filename, 'r') do |row|
|
|
# ...
|
|
# end
|
|
#
|
|
# 3. writer = CSV.open(filename, 'w')
|
|
#
|
|
# 4. CSV.open(filename, 'w') do |writer|
|
|
# ...
|
|
# end
|
|
#
|
|
# ARGS
|
|
# filename: filename to open.
|
|
# mode: 'r' for read (parse)
|
|
# 'w' for write (generate)
|
|
# row: an Array of cells which is a parsed line.
|
|
# writer: Created writer instance. See CSV::Writer#<< and
|
|
# CSV::Writer#add_row to know how to generate CSV string.
|
|
#
|
|
# RETURNS
|
|
# reader: Create reader instance. To get parse result, see
|
|
# CSV::Reader#each.
|
|
# writer: Created writer instance. See CSV::Writer#<< and
|
|
# CSV::Writer#add_row to know how to generate CSV string.
|
|
#
|
|
# DESCRIPTION
|
|
# Open a CSV formatted file to read or write.
|
|
#
|
|
# EXAMPLE 1
|
|
# reader = CSV.open('csvfile.csv', 'r')
|
|
# row1 = reader.shift
|
|
# row2 = reader.shift
|
|
# if row2.empty?
|
|
# p 'row2 not find.'
|
|
# end
|
|
# reader.close
|
|
#
|
|
# EXAMPLE 2
|
|
# CSV.open('csvfile.csv', 'r') do |row|
|
|
# p row
|
|
# end
|
|
#
|
|
# EXAMPLE 3
|
|
# writer = CSV.open('csvfile.csv', 'w')
|
|
# writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
|
|
# writer.close
|
|
#
|
|
# EXAMPLE 4
|
|
# CSV.open('csvfile.csv', 'w') do |writer|
|
|
# writer << ['r1c1', 'r1c2']
|
|
# writer << ['r2c1', 'r2c2']
|
|
# writer << [nil, nil]
|
|
# end
|
|
#
|
|
def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block)
|
|
if mode == 'r' or mode == 'rb'
|
|
open_reader(filename, col_sep, row_sep, &block)
|
|
elsif mode == 'w' or mode == 'wb'
|
|
open_writer(filename, col_sep, row_sep, &block)
|
|
else
|
|
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
|
|
end
|
|
end
|
|
|
|
def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block)
|
|
open_reader(filename, col_sep, row_sep, &block)
|
|
end
|
|
|
|
def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block)
|
|
open_writer(filename, col_sep, row_sep, &block)
|
|
end
|
|
|
|
# Private class methods.
|
|
class << self
|
|
private
|
|
def open_reader(filename, col_sep, row_sep, &block)
|
|
file = File.open(filename, 'rb')
|
|
if block
|
|
begin
|
|
CSV::Reader.parse(file, col_sep, row_sep) do |row|
|
|
yield(row)
|
|
end
|
|
ensure
|
|
file.close
|
|
end
|
|
nil
|
|
else
|
|
reader = CSV::Reader.create(file, col_sep, row_sep)
|
|
reader.close_on_terminate
|
|
reader
|
|
end
|
|
end
|
|
|
|
def open_writer(filename, col_sep, row_sep, &block)
|
|
file = File.open(filename, 'wb')
|
|
if block
|
|
begin
|
|
CSV::Writer.generate(file, col_sep, row_sep) do |writer|
|
|
yield(writer)
|
|
end
|
|
ensure
|
|
file.close
|
|
end
|
|
nil
|
|
else
|
|
writer = CSV::Writer.create(file, col_sep, row_sep)
|
|
writer.close_on_terminate
|
|
writer
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::Reader -- CSV formatted string/stream reader.
|
|
#
|
|
# EXAMPLE
|
|
# Read CSV lines untill the first column is 'stop'.
|
|
#
|
|
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
|
|
# p row
|
|
# break if !row[0].is_null && row[0].data == 'stop'
|
|
# end
|
|
#
|
|
class Reader
|
|
include Enumerable
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# reader = CSV::Reader.create(str_or_readable)
|
|
#
|
|
# ARGS
|
|
# str_or_readable: a CSV data to be parsed. A String or an IO.
|
|
#
|
|
# RETURNS
|
|
# reader: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To get parse result, see CSV::Reader#each.
|
|
#
|
|
def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil)
|
|
case str_or_readable
|
|
when IO
|
|
IOReader.new(str_or_readable, col_sep, row_sep)
|
|
when String
|
|
StringReader.new(str_or_readable, col_sep, row_sep)
|
|
else
|
|
IOReader.new(str_or_readable, col_sep, row_sep)
|
|
end
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Reader.parse(str_or_readable) do |row|
|
|
# ...
|
|
# end
|
|
#
|
|
# ARGS
|
|
# str_or_readable: a CSV data to be parsed. A String or an IO.
|
|
# row: a CSV::Row; an Array of a CSV::Cell in a line.
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Parse CSV data and get lines. Caller block is called for each line
|
|
# with an argument which is a chunk of cells in a row.
|
|
#
|
|
# Block value is always nil. Rows are not cached for performance
|
|
# reason.
|
|
#
|
|
def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil)
|
|
reader = create(str_or_readable, col_sep, row_sep)
|
|
reader.each do |row|
|
|
yield(row)
|
|
end
|
|
reader.close
|
|
nil
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Reader#each do |row|
|
|
# ...
|
|
# end
|
|
#
|
|
# ARGS
|
|
# row: a CSV::Row; an Array of a CSV::Cell in a line.
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Caller block is called for each line with an argument which is a chunk
|
|
# of cells in a row.
|
|
#
|
|
# Block value is always nil. Rows are not cached for performance
|
|
# reason.
|
|
#
|
|
def each
|
|
while true
|
|
row = Row.new
|
|
parsed_cells = get_row(row)
|
|
if parsed_cells == 0
|
|
break
|
|
end
|
|
yield(row)
|
|
end
|
|
nil
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# cell = CSV::Reader#shift
|
|
#
|
|
# RETURNS
|
|
# cell: a CSV::Row; an Array of a CSV::Cell.
|
|
#
|
|
# DESCRIPTION
|
|
# Extract cells of next line.
|
|
#
|
|
def shift
|
|
row = Row.new
|
|
parsed_cells = get_row(row)
|
|
row
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Reader#close
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Close this reader.
|
|
#
|
|
def close
|
|
terminate
|
|
end
|
|
|
|
private
|
|
def initialize(dev)
|
|
raise RuntimeError.new('Do not instanciate this class directly.')
|
|
end
|
|
|
|
def get_row(row)
|
|
raise NotImplementedError.new('Method get_row must be defined in a derived class.')
|
|
end
|
|
|
|
def terminate
|
|
# Define if needed.
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::StringReader -- CSV formatted stream reader.
|
|
#
|
|
# EXAMPLE
|
|
# Read CSV lines untill the first column is 'stop'.
|
|
#
|
|
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
|
|
# p row
|
|
# break if !row[0].is_null && row[0].data == 'stop'
|
|
# end
|
|
#
|
|
class StringReader < Reader
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# reader = CSV::StringReader.new(string)
|
|
#
|
|
# ARGS
|
|
# string: a CSV String to be parsed.
|
|
#
|
|
# RETURNS
|
|
# reader: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To get parse result, see CSV::Reader#each.
|
|
#
|
|
def initialize(string, col_sep = ?,, row_sep = nil)
|
|
@col_sep = col_sep
|
|
@row_sep = row_sep
|
|
@dev = string
|
|
@idx = 0
|
|
if @dev[0, 3] == "\xef\xbb\xbf"
|
|
@idx += 3
|
|
end
|
|
end
|
|
|
|
private
|
|
def get_row(row)
|
|
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
|
|
if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
|
|
raise IllegalFormatError.new
|
|
end
|
|
@idx = next_idx
|
|
parsed_cells
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::IOReader -- CSV formatted stream reader.
|
|
#
|
|
# EXAMPLE
|
|
# Read CSV lines untill the first column is 'stop'.
|
|
#
|
|
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
|
|
# p row
|
|
# break if !row[0].is_null && row[0].data == 'stop'
|
|
# end
|
|
#
|
|
class IOReader < Reader
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# reader = CSV::IOReader.new(io)
|
|
#
|
|
# ARGS
|
|
# io: a CSV data to be parsed. Must be an IO. (io#read is called.)
|
|
#
|
|
# RETURNS
|
|
# reader: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To get parse result, see CSV::Reader#each.
|
|
#
|
|
def initialize(io, col_sep = ?,, row_sep = nil)
|
|
@io = io
|
|
@col_sep = col_sep
|
|
@row_sep = row_sep
|
|
@dev = CSV::IOBuf.new(@io)
|
|
@idx = 0
|
|
if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
|
|
@idx += 3
|
|
end
|
|
@close_on_terminate = false
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::IOReader#close_on_terminate
|
|
#
|
|
# RETURNS
|
|
# true
|
|
#
|
|
# DESCRIPTION
|
|
# Tell this reader to close the IO when terminated (Triggered by invoking
|
|
# CSV::IOReader#close).
|
|
#
|
|
def close_on_terminate
|
|
@close_on_terminate = true
|
|
end
|
|
|
|
private
|
|
def get_row(row)
|
|
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
|
|
if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
|
|
raise IllegalFormatError.new
|
|
end
|
|
dropped = @dev.drop(next_idx)
|
|
@idx = next_idx - dropped
|
|
parsed_cells
|
|
end
|
|
|
|
def terminate
|
|
if @close_on_terminate
|
|
@io.close
|
|
end
|
|
|
|
if @dev
|
|
@dev.close
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::Writer -- CSV formatted string/stream writer.
|
|
#
|
|
# EXAMPLE
|
|
# Write rows to 'csvout' file.
|
|
#
|
|
# outfile = File.open('csvout', 'wb')
|
|
# CSV::Writer.generate(outfile) do |csv|
|
|
# csv << ['c1', nil, '', '"', "\r\n", 'c2']
|
|
# # or
|
|
# csv.add_row [
|
|
# CSV::Cell.new('c1', false),
|
|
# CSV::Cell.new('dummy', true),
|
|
# CSV::Cell.new('', false),
|
|
# CSV::Cell.new('"', false),
|
|
# CSV::Cell.new("\r\n", false)
|
|
# CSV::Cell.new('c2', false)
|
|
# ]
|
|
# ...
|
|
# ...
|
|
# end
|
|
#
|
|
# outfile.close
|
|
#
|
|
class Writer
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# writer = CSV::Writer.create(str_or_readable)
|
|
#
|
|
# ARGS
|
|
# str_or_writable: device for generated CSV string. Must respond to
|
|
# '<<(string)'.
|
|
#
|
|
# RETURNS
|
|
# writer: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To add CSV data to generate CSV string, see
|
|
# CSV::Writer#<< or CSV::Writer#add_row.
|
|
#
|
|
def Writer.create(str_or_readable, col_sep = ?,, row_sep = nil)
|
|
BasicWriter.new(str_or_readable, col_sep, row_sep)
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Writer.generate(str_or_writable) do |writer|
|
|
# ...
|
|
# end
|
|
#
|
|
# ARGS
|
|
# str_or_writable: device for generated CSV string. Must respond to
|
|
# '<<(string)'.
|
|
# writer: Created writer instance. See CSV::Writer#<< and
|
|
# CSV::Writer#add_row to know how to generate CSV string.
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Create writer instance. Caller block is called with the new instance.
|
|
# To add CSV data to generate CSV string, see CSV::Writer#<< or
|
|
# CSV::Writer#add_row.
|
|
#
|
|
def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil)
|
|
writer = Writer.create(str_or_writable, col_sep, row_sep)
|
|
yield(writer)
|
|
writer.close
|
|
nil
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Writer#<<(row)
|
|
#
|
|
# ARGS
|
|
# row: an Array of a String.
|
|
#
|
|
# RETURNS
|
|
# self
|
|
#
|
|
# DESCRIPTION
|
|
# Dump CSV stream to the device. Argument is an array of a String like
|
|
# ['c1', 'c2', 'c3'].
|
|
#
|
|
def <<(ary)
|
|
row = ary.collect { |item|
|
|
if item.is_a?(Cell)
|
|
item
|
|
elsif (item.nil?)
|
|
Cell.new('', true)
|
|
else
|
|
Cell.new(item.to_s, false)
|
|
end
|
|
}
|
|
CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
|
|
self
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Writer#<<(row)
|
|
#
|
|
# ARGS
|
|
# row: an Array of a CSV::Cell.
|
|
#
|
|
# RETURNS
|
|
# self
|
|
#
|
|
# DESCRIPTION
|
|
# Dump CSV stream to the device. Argument is an array of a CSV::Cell
|
|
# like [CSV::Cell.new('c1', false), CSV::Cell.new('dummy', true)].
|
|
# (Formar is 'c1' and latter is Null.)
|
|
#
|
|
def add_row(row)
|
|
CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
|
|
self
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::Writer#close
|
|
#
|
|
# RETURNS
|
|
# nil
|
|
#
|
|
# DESCRIPTION
|
|
# Close this writer.
|
|
#
|
|
def close
|
|
terminate
|
|
end
|
|
|
|
private
|
|
def initialize(dev)
|
|
raise RuntimeError.new('Do not instanciate this class directly.')
|
|
end
|
|
|
|
def terminate
|
|
# Define if needed.
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::BasicWriter -- CSV formatted string/stream writer using <<.
|
|
#
|
|
class BasicWriter < Writer
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# writer = CSV::BasicWriter.new(str_or_writable)
|
|
#
|
|
# ARGS
|
|
# str_or_writable: device for generated CSV string. Must respond to
|
|
# '<<(string)'.
|
|
#
|
|
# RETURNS
|
|
# writer: Created instance.
|
|
#
|
|
# DESCRIPTION
|
|
# Create instance. To add CSV data to generate CSV string, see
|
|
# CSV::Writer#<< or CSV::Writer#add_row.
|
|
#
|
|
def initialize(str_or_writable, col_sep = ?,, row_sep = nil)
|
|
@col_sep = col_sep
|
|
@row_sep = row_sep
|
|
@dev = str_or_writable
|
|
@close_on_terminate = false
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# CSV::BasicWriter#close_on_terminate
|
|
#
|
|
# RETURNS
|
|
# true
|
|
#
|
|
# DESCRIPTION
|
|
# Tell this writer to close the IO when terminated (Triggered by invoking
|
|
# CSV::BasicWriter#close).
|
|
#
|
|
def close_on_terminate
|
|
@close_on_terminate = true
|
|
end
|
|
|
|
private
|
|
def terminate
|
|
if @close_on_terminate
|
|
@dev.close
|
|
end
|
|
end
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# cells = CSV.parse_line(src, col_sep = ?,, row_sep = nil)
|
|
#
|
|
# ARGS
|
|
# src: a CSV String.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
|
# want to separate records with \r, give ?\r here.
|
|
#
|
|
# RETURNS
|
|
# cells: an Array of parsed cells in first line. Each cell is a String.
|
|
#
|
|
# DESCRIPTION
|
|
# Parse one line from given string. Bare in mind it parses ONE LINE. Rest
|
|
# of the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
|
|
# second line 'c,d' is ignored.
|
|
#
|
|
# If you don't know whether a target string to parse is exactly 1 line or
|
|
# not, use CSV.parse_row instead of this method.
|
|
#
|
|
def CSV.parse_line(src, col_sep = ?,, row_sep = nil)
|
|
idx = 0
|
|
res_type = :DT_COLSEP
|
|
cells = Row.new
|
|
begin
|
|
while (res_type.equal?(:DT_COLSEP))
|
|
cell = Cell.new
|
|
res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
|
|
cells.push(cell.is_null ? nil : cell.data)
|
|
end
|
|
rescue IllegalFormatError
|
|
return Row.new
|
|
end
|
|
cells
|
|
end
|
|
|
|
|
|
# SYNOPSIS
|
|
# str = CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
|
|
#
|
|
# ARGS
|
|
# cells: an Array of cell to be converted to CSV string. Each cell must
|
|
# respond to 'to_s'.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
|
# want to separate records with \r, give ?\r here.
|
|
#
|
|
# RETURNS
|
|
# str: a String of generated CSV string.
|
|
#
|
|
# DESCRIPTION
|
|
# Create a line from cells. Each cell is stringified by to_s.
|
|
#
|
|
def CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
|
|
if (cells.size == 0)
|
|
return ''
|
|
end
|
|
res_type = :DT_COLSEP
|
|
result_str = ''
|
|
idx = 0
|
|
while true
|
|
cell = if (cells[idx].nil?)
|
|
Cell.new('', true)
|
|
else
|
|
Cell.new(cells[idx].to_s, false)
|
|
end
|
|
generate_body(cell, result_str, col_sep, row_sep)
|
|
idx += 1
|
|
if (idx == cells.size)
|
|
break
|
|
end
|
|
generate_separator(:DT_COLSEP, result_str, col_sep, row_sep)
|
|
end
|
|
result_str
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
|
|
#
|
|
# ARGS
|
|
# src: a CSV data to be parsed. Must respond '[](idx)'.
|
|
# src[](idx) must return a char. (Not a string such as 'a', but 97).
|
|
# src[](idx_out_of_bounds) must return nil. A String satisfies this
|
|
# requirement.
|
|
# idx: index of parsing location of 'src'. 0 origin.
|
|
# out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
|
# want to separate records with \r, give ?\r here.
|
|
#
|
|
# RETURNS
|
|
# parsed_cells: num of parsed cells.
|
|
# idx: index of next parsing location of 'src'.
|
|
#
|
|
# DESCRIPTION
|
|
# Parse a line from string. To parse lines in CSV string, see EXAMPLE
|
|
# below.
|
|
#
|
|
# EXAMPLE
|
|
# src = "a,b\r\nc,d\r\ne,f"
|
|
# idx = 0
|
|
# begin
|
|
# parsed = []
|
|
# parsed_cells, idx = CSV.parse_row(src, idx, parsed)
|
|
# puts "Parsed #{ parsed_cells } cells."
|
|
# p parsed
|
|
# end while parsed_cells > 0
|
|
#
|
|
def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
|
|
idx_backup = idx
|
|
parsed_cells = 0
|
|
res_type = :DT_COLSEP
|
|
begin
|
|
while (!res_type.equal?(:DT_ROWSEP))
|
|
cell = Cell.new
|
|
res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
|
|
if res_type.equal?(:DT_EOS)
|
|
if idx == idx_backup #((parsed_cells == 0) && (cell.is_null))
|
|
return 0, 0
|
|
end
|
|
res_type = :DT_ROWSEP
|
|
end
|
|
parsed_cells += 1
|
|
out_dev << cell
|
|
end
|
|
rescue IllegalFormatError
|
|
return 0, 0
|
|
end
|
|
return parsed_cells, idx
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
|
|
#
|
|
# ARGS
|
|
# src: an Array of CSV::Cell to be converted to CSV string. Must respond to
|
|
# 'size' and '[](idx)'. src[idx] must return CSV::Cell.
|
|
# cells: num of cells in a line.
|
|
# out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
|
|
# col_sep: Column separator. ?, by default. If you want to separate
|
|
# fields with semicolon, give ?; here.
|
|
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
|
# want to separate records with \r, give ?\r here.
|
|
#
|
|
# RETURNS
|
|
# parsed_cells: num of converted cells.
|
|
#
|
|
# DESCRIPTION
|
|
# Convert a line from cells data to string. To generate multi-row CSV
|
|
# string, See EXAMPLE below.
|
|
#
|
|
# EXAMPLE
|
|
# def d(str)
|
|
# CSV::Cell.new(str, false)
|
|
# end
|
|
#
|
|
# row1 = [d('a'), d('b')]
|
|
# row2 = [d('c'), d('d')]
|
|
# row3 = [d('e'), d('f')]
|
|
# src = [row1, row2, row3]
|
|
# buf = ''
|
|
# src.each do |row|
|
|
# parsed_cells = CSV.generate_row(row, 2, buf)
|
|
# puts "Created #{ parsed_cells } cells."
|
|
# end
|
|
# p buf
|
|
#
|
|
def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
|
|
src_size = src.size
|
|
if (src_size == 0)
|
|
if cells == 0
|
|
generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
|
|
end
|
|
return 0
|
|
end
|
|
res_type = :DT_COLSEP
|
|
parsed_cells = 0
|
|
generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
|
|
parsed_cells += 1
|
|
while ((parsed_cells < cells) && (parsed_cells != src_size))
|
|
generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
|
|
generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
|
|
parsed_cells += 1
|
|
end
|
|
if (parsed_cells == cells)
|
|
generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
|
|
else
|
|
generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
|
|
end
|
|
parsed_cells
|
|
end
|
|
|
|
private
|
|
class IllegalFormatError < RuntimeError; end
|
|
|
|
# Private class methods.
|
|
class << self
|
|
private
|
|
|
|
def parse_body(src, idx, cell, col_sep, row_sep)
|
|
row_sep_end = row_sep || ?\n
|
|
cell.is_null = false
|
|
state = :ST_START
|
|
quoted = false
|
|
cr = false
|
|
c = nil
|
|
while (c = src[idx])
|
|
idx += 1
|
|
result_state = :DT_UNKNOWN
|
|
if (c == col_sep)
|
|
if state.equal?(:ST_DATA)
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
if (!quoted)
|
|
state = :ST_END
|
|
result_state = :DT_COLSEP
|
|
else
|
|
cell.data << c.chr
|
|
end
|
|
elsif state.equal?(:ST_QUOTE)
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
state = :ST_END
|
|
result_state = :DT_COLSEP
|
|
else # :ST_START
|
|
cell.is_null = true
|
|
state = :ST_END
|
|
result_state = :DT_COLSEP
|
|
end
|
|
elsif (c == ?") # " for vim syntax hilighting.
|
|
if state.equal?(:ST_DATA)
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
if quoted
|
|
quoted = false
|
|
state = :ST_QUOTE
|
|
else
|
|
raise IllegalFormatError.new
|
|
end
|
|
elsif state.equal?(:ST_QUOTE)
|
|
cell.data << c.chr
|
|
quoted = true
|
|
state = :ST_DATA
|
|
else # :ST_START
|
|
quoted = true
|
|
state = :ST_DATA
|
|
end
|
|
elsif row_sep.nil? and c == ?\r
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
if quoted
|
|
cell.data << c.chr
|
|
state = :ST_DATA
|
|
else
|
|
cr = true
|
|
end
|
|
elsif c == row_sep_end
|
|
if state.equal?(:ST_DATA)
|
|
if cr
|
|
state = :ST_END
|
|
result_state = :DT_ROWSEP
|
|
cr = false
|
|
else
|
|
if quoted
|
|
cell.data << c.chr
|
|
state = :ST_DATA
|
|
else
|
|
state = :ST_END
|
|
result_state = :DT_ROWSEP
|
|
end
|
|
end
|
|
elsif state.equal?(:ST_QUOTE)
|
|
state = :ST_END
|
|
result_state = :DT_ROWSEP
|
|
if cr
|
|
cr = false
|
|
end
|
|
else # :ST_START
|
|
cell.is_null = true
|
|
state = :ST_END
|
|
result_state = :DT_ROWSEP
|
|
end
|
|
else
|
|
if state.equal?(:ST_DATA) || state.equal?(:ST_START)
|
|
if cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
cell.data << c.chr
|
|
state = :ST_DATA
|
|
else # :ST_QUOTE
|
|
raise IllegalFormatError.new
|
|
end
|
|
end
|
|
if state.equal?(:ST_END)
|
|
return result_state, idx;
|
|
end
|
|
end
|
|
if state.equal?(:ST_START)
|
|
cell.is_null = true
|
|
elsif state.equal?(:ST_QUOTE)
|
|
true # dummy for coverate; only a data
|
|
elsif quoted
|
|
raise IllegalFormatError.new
|
|
elsif cr
|
|
raise IllegalFormatError.new
|
|
end
|
|
return :DT_EOS, idx
|
|
end
|
|
|
|
def generate_body(cells, out_dev, col_sep, row_sep)
|
|
row_data = cells.data.dup
|
|
if (!cells.is_null)
|
|
if (row_data.gsub!('"', '""') ||
|
|
row_data.include?(col_sep) ||
|
|
(row_sep && row_data.index(row_sep)) ||
|
|
(/[\r\n]/ =~ row_data) ||
|
|
(cells.data.empty?))
|
|
out_dev << '"' << row_data << '"'
|
|
else
|
|
out_dev << row_data
|
|
end
|
|
end
|
|
end
|
|
|
|
def generate_separator(type, out_dev, col_sep, row_sep)
|
|
case type
|
|
when :DT_COLSEP
|
|
out_dev << col_sep.chr
|
|
when :DT_ROWSEP
|
|
out_dev << (row_sep || "\r\n")
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
# DESCRIPTION
|
|
# CSV::StreamBuf -- a class for a bufferd stream.
|
|
#
|
|
# EXAMPLE 1 -- an IO.
|
|
# class MyBuf < StreamBuf
|
|
# # Do initialize myself before a super class. Super class might call my
|
|
# # method 'read'. (Could be awful for C++ user. :-)
|
|
# def initialize(s)
|
|
# @s = s
|
|
# super()
|
|
# end
|
|
#
|
|
# # define my own 'read' method.
|
|
# # CAUTION: Returning nil means EnfOfStream.
|
|
# def read(size)
|
|
# @s.read(size)
|
|
# end
|
|
#
|
|
# # release buffers. in Ruby which has GC, you do not have to call this...
|
|
# def terminate
|
|
# @s = nil
|
|
# super()
|
|
# end
|
|
# end
|
|
#
|
|
# buf = MyBuf.new(STDIN)
|
|
# my_str = ''
|
|
# p buf[0, 0] # => '' (null string)
|
|
# p buf[0] # => 97 (char code of 'a')
|
|
# p buf[0, 1] # => 'a'
|
|
# my_str = buf[0, 5]
|
|
# p my_str # => 'abcde' (5 chars)
|
|
# p buf[0, 6] # => "abcde\n" (6 chars)
|
|
# p buf[0, 7] # => "abcde\n" (6 chars)
|
|
# p buf.drop(3) # => 3 (dropped chars)
|
|
# p buf.get(0, 2) # => 'de' (2 chars)
|
|
# p buf.is_eos? # => false (is not EOS here)
|
|
# p buf.drop(5) # => 3 (dropped chars)
|
|
# p buf.is_eos? # => true (is EOS here)
|
|
# p buf[0] # => nil (is EOS here)
|
|
#
|
|
# EXAMPLE 2 -- String.
|
|
# This is a conceptual example. No pros with this.
|
|
#
|
|
# class StrBuf < StreamBuf
|
|
# def initialize(s)
|
|
# @str = s
|
|
# @idx = 0
|
|
# super()
|
|
# end
|
|
#
|
|
# def read(size)
|
|
# str = @str[@idx, size]
|
|
# @idx += str.size
|
|
# str
|
|
# end
|
|
# end
|
|
#
|
|
class StreamBuf # pure virtual. (do not instanciate it directly)
|
|
public
|
|
|
|
# SYNOPSIS
|
|
# char/str = CSV::StreamBuf#get(idx, n = nil)
|
|
# char/str = CSV::StreamBuf#[idx, n = nil]
|
|
#
|
|
# ARGS
|
|
# idx: index of a string to specify a start point of a string to get.
|
|
# Unlike String instance, idx < 0 returns nil.
|
|
# n: size of a string to get.
|
|
#
|
|
# RETURNS
|
|
# char: if n == nil. A char at idx.
|
|
# str: if n != nil. A partial string, from idx to (idx + size). At
|
|
# EOF, the string size could not equal to arg n.
|
|
#
|
|
# DESCRIPTION
|
|
# Get a char or a partial string from the stream.
|
|
#
|
|
def [](idx, n = nil)
|
|
if idx < 0
|
|
return nil
|
|
end
|
|
if (idx_is_eos?(idx))
|
|
if n and (@offset + idx == buf_size(@cur_buf))
|
|
# Like a String, 'abc'[4, 1] returns nil and
|
|
# 'abc'[3, 1] returns '' not nil.
|
|
return ''
|
|
else
|
|
return nil
|
|
end
|
|
end
|
|
my_buf = @cur_buf
|
|
my_offset = @offset
|
|
next_idx = idx
|
|
while (my_offset + next_idx >= buf_size(my_buf))
|
|
if (my_buf == @buf_tail_idx)
|
|
unless add_buf
|
|
break
|
|
end
|
|
end
|
|
next_idx = my_offset + next_idx - buf_size(my_buf)
|
|
my_buf += 1
|
|
my_offset = 0
|
|
end
|
|
loc = my_offset + next_idx
|
|
if !n
|
|
return @buf_list[my_buf][loc] # Fixnum of char code.
|
|
elsif (loc + n - 1 < buf_size(my_buf))
|
|
return @buf_list[my_buf][loc, n] # String.
|
|
else # should do loop insted of (tail) recursive call...
|
|
res = @buf_list[my_buf][loc, BufSize]
|
|
size_added = buf_size(my_buf) - loc
|
|
if size_added > 0
|
|
idx += size_added
|
|
n -= size_added
|
|
ret = self[idx, n]
|
|
if ret
|
|
res << ret
|
|
end
|
|
end
|
|
return res
|
|
end
|
|
end
|
|
alias get []
|
|
|
|
# SYNOPSIS
|
|
# size_dropped = CSV::StreamBuf#drop(n)
|
|
#
|
|
# ARGS
|
|
# n: drop size
|
|
#
|
|
# RETURNS
|
|
# size_dropped: droped size. At EOF, dropped size might not equals to arg n.
|
|
# 0 if n <= 0.
|
|
#
|
|
# DESCRIPTION
|
|
# Drop a string from the stream. Once you drop the head of the stream,
|
|
# access to the dropped part via [] or get returns nil.
|
|
#
|
|
def drop(n)
|
|
if is_eos?
|
|
return 0
|
|
end
|
|
size_dropped = 0
|
|
while (n > 0)
|
|
if (!@is_eos || (@cur_buf != @buf_tail_idx))
|
|
if (@offset + n < buf_size(@cur_buf))
|
|
size_dropped += n
|
|
@offset += n
|
|
n = 0
|
|
else
|
|
size = buf_size(@cur_buf) - @offset
|
|
size_dropped += size
|
|
n -= size
|
|
@offset = 0
|
|
unless rel_buf
|
|
unless add_buf
|
|
break
|
|
end
|
|
@cur_buf = @buf_tail_idx
|
|
end
|
|
end
|
|
end
|
|
end
|
|
size_dropped
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# is_eos = CSV::StreamBuf#is_eos?
|
|
#
|
|
# RETURNS
|
|
# is_eos: true if end of the stream or false.
|
|
#
|
|
# DESCRIPTION
|
|
# Check EOF or not.
|
|
#
|
|
def is_eos?
|
|
return idx_is_eos?(0)
|
|
end
|
|
|
|
# SYNOPSIS
|
|
# N/A
|
|
#
|
|
# DESCRIPTION
|
|
# Do not instanciate this class directly. Define your own class which
|
|
# derives this class and define 'read' instance method.
|
|
#
|
|
def initialize
|
|
@buf_list = []
|
|
@cur_buf = @buf_tail_idx = -1
|
|
@offset = 0
|
|
@is_eos = false
|
|
add_buf
|
|
@cur_buf = @buf_tail_idx
|
|
end
|
|
|
|
protected
|
|
def terminate
|
|
while (rel_buf); end
|
|
end
|
|
|
|
# protected method 'read' must be defined in derived classes.
|
|
# CAUTION: Returning a string which size is not equal to 'size' means
|
|
# EnfOfStream. When it is not at EOS, you must block the callee, try to
|
|
# read and return the sized string.
|
|
def read(size) # raise EOFError
|
|
raise NotImplementedError.new('Method read must be defined in a derived class.')
|
|
end
|
|
|
|
private
|
|
|
|
def buf_size(idx)
|
|
@buf_list[idx].size
|
|
end
|
|
|
|
def add_buf
|
|
if @is_eos
|
|
return false
|
|
end
|
|
begin
|
|
str_read = read(BufSize)
|
|
rescue EOFError
|
|
str_read = nil
|
|
rescue
|
|
terminate
|
|
raise
|
|
end
|
|
if str_read.nil?
|
|
@is_eos = true
|
|
@buf_list.push('')
|
|
@buf_tail_idx += 1
|
|
false
|
|
else
|
|
@buf_list.push(str_read)
|
|
@buf_tail_idx += 1
|
|
true
|
|
end
|
|
end
|
|
|
|
def rel_buf
|
|
if (@cur_buf < 0)
|
|
return false
|
|
end
|
|
@buf_list[@cur_buf] = nil
|
|
if (@cur_buf == @buf_tail_idx)
|
|
@cur_buf = -1
|
|
return false
|
|
else
|
|
@cur_buf += 1
|
|
return true
|
|
end
|
|
end
|
|
|
|
def idx_is_eos?(idx)
|
|
(@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx)))
|
|
end
|
|
|
|
BufSize = 1024 * 8
|
|
end
|
|
|
|
# DESCRIPTION
|
|
# CSV::IOBuf -- a class for a bufferd IO.
|
|
#
|
|
# EXAMPLE
|
|
# # File 'bigdata' could be a giga-byte size one!
|
|
# buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
|
|
# CSV::Reader.new(buf).each do |row|
|
|
# p row
|
|
# break if row[0].data == 'admin'
|
|
# end
|
|
#
|
|
class IOBuf < StreamBuf
|
|
public
|
|
def initialize(s)
|
|
@s = s
|
|
super()
|
|
end
|
|
|
|
def close
|
|
terminate
|
|
end
|
|
|
|
private
|
|
def read(size)
|
|
@s.read(size)
|
|
end
|
|
|
|
def terminate
|
|
super()
|
|
end
|
|
end
|
|
end
|