1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/lib/csv.rb
nahi ffc136a024 * lib/csv.rb: add extra pamameter to specify row(record) separater character.
To parse Mac's CR separated CSV, do like this.
    CSV.open("mac.csv", "r", ?,,?\r) { |row| p row.to_a }
  The 3rd parameter in this example ?, is for column separater and the 4th ?\r
  is for row separater.  Row separater is nil by default.  Nil separater means
  "\r\n" or "\n".

* test/csv/test_csv.rb: add tests for above feature.

* test/csv/mac.csv: added.  Sample CR separated CSV file.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4553 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2003-09-15 10:07:42 +00:00

1336 lines
33 KiB
Ruby

# CSV -- module for generating/parsing CSV data.
# $Id$
# This module is copyrighted free software by NAKAMURA, Hiroshi.
# You can redistribute it and/or modify it under the same term as Ruby.
class CSV
public
# DESCRIPTION
# CSV::Cell -- Describes 1 cell of CSV.
#
class Cell
public
# Datum as string.
attr_accessor :data
# Is this datum null?
attr_accessor :is_null
# SYNOPSIS
# cell = CSV::Cell.new(data = '', is_null = true)
#
# ARGS
# data: datum as String
# is_null: is this datum null?
#
# RETURNS
# cell: Created instance.
#
# DESCRIPTION
# Create instance. If is_null is true, datum is stored in the instance
# created but it should be treated as 'NULL'.
#
def initialize(data = '', is_null = true)
@data = data
@is_null = is_null
end
# SYNOPSIS
# CSV::Cell#match(rhs)
#
# ARGS
# rhs: an instance of CSV::Cell to be compared.
#
# RETURNS
# true/false. See the souce if you want to know matching algorithm.
#
# DESCRIPTION
# Compare another cell with me. Bare in mind Null matches with Null
# using this method. Use CSV::Cell#== if you want Null never matches
# with other data including Null.
#
def match(rhs)
if @is_null and rhs.is_null
true
elsif @is_null or rhs.is_null
false
else
@data == rhs.data
end
end
# SYNOPSIS
# CSV::Cell#==(rhs)
#
# ARGS
# rhs: an instance of CSV::Cell to be compared.
#
# RETURNS
# true/false. See the souce if you want to know matching algorithm.
#
# DESCRIPTION
# Compare another cell with me. Bare in mind Null is not match with
# Null using this method. Null never matches with other data including
# Null. Use CSV::Cell#match if you want Null matches with Null.
#
def ==(rhs)
if @is_null or rhs.is_null
false
else
@data == rhs.data
end
end
end
# DESCRIPTION
# CSV::Row -- Describes a row of CSV. Each element must be a CSV::Cell.
#
class Row < Array
public
# SYNOPSIS
# CSV::Row#to_a
#
# RETURNS
# An Array of String.
#
# DESCRIPTION
# Convert CSV::Cell to String. Null is converted to nil.
#
def to_a
self.collect { |cell| cell.is_null ? nil : cell.data }
end
# SYNOPSIS
# CSV::Row#match(rhs)
#
# ARGS
# rhs: an Array of cells. Each cell is a instance of CSV::Cell.
#
# RETURNS
# true/false. See the souce if you want to know matching algorithm.
#
# DESCRIPTION
# Compare another row with me.
#
def match(rhs)
if self.size != rhs.size
return false
end
for idx in 0...(self.size)
unless self[idx].match(rhs[idx])
return false
end
end
true
end
end
# SYNOPSIS
# 1. reader = CSV.open(filename, 'r')
#
# 2. CSV.open(filename, 'r') do |row|
# ...
# end
#
# 3. writer = CSV.open(filename, 'w')
#
# 4. CSV.open(filename, 'w') do |writer|
# ...
# end
#
# ARGS
# filename: filename to open.
# mode: 'r' for read (parse)
# 'w' for write (generate)
# row: an Array of cells which is a parsed line.
# writer: Created writer instance. See CSV::Writer#<< and
# CSV::Writer#add_row to know how to generate CSV string.
#
# RETURNS
# reader: Create reader instance. To get parse result, see
# CSV::Reader#each.
# writer: Created writer instance. See CSV::Writer#<< and
# CSV::Writer#add_row to know how to generate CSV string.
#
# DESCRIPTION
# Open a CSV formatted file to read or write.
#
# EXAMPLE 1
# reader = CSV.open('csvfile.csv', 'r')
# row1 = reader.shift
# row2 = reader.shift
# if row2.empty?
# p 'row2 not find.'
# end
# reader.close
#
# EXAMPLE 2
# CSV.open('csvfile.csv', 'r') do |row|
# p row
# end
#
# EXAMPLE 3
# writer = CSV.open('csvfile.csv', 'w')
# writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
# writer.close
#
# EXAMPLE 4
# CSV.open('csvfile.csv', 'w') do |writer|
# writer << ['r1c1', 'r1c2']
# writer << ['r2c1', 'r2c2']
# writer << [nil, nil]
# end
#
def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block)
if mode == 'r' or mode == 'rb'
open_reader(filename, col_sep, row_sep, &block)
elsif mode == 'w' or mode == 'wb'
open_writer(filename, col_sep, row_sep, &block)
else
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
end
end
def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block)
open_reader(filename, col_sep, row_sep, &block)
end
def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block)
open_writer(filename, col_sep, row_sep, &block)
end
# Private class methods.
class << self
private
def open_reader(filename, col_sep, row_sep, &block)
file = File.open(filename, 'rb')
if block
begin
CSV::Reader.parse(file, col_sep, row_sep) do |row|
yield(row)
end
ensure
file.close
end
nil
else
reader = CSV::Reader.create(file, col_sep, row_sep)
reader.close_on_terminate
reader
end
end
def open_writer(filename, col_sep, row_sep, &block)
file = File.open(filename, 'wb')
if block
begin
CSV::Writer.generate(file, col_sep, row_sep) do |writer|
yield(writer)
end
ensure
file.close
end
nil
else
writer = CSV::Writer.create(file, col_sep, row_sep)
writer.close_on_terminate
writer
end
end
end
# DESCRIPTION
# CSV::Reader -- CSV formatted string/stream reader.
#
# EXAMPLE
# Read CSV lines untill the first column is 'stop'.
#
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
# p row
# break if !row[0].is_null && row[0].data == 'stop'
# end
#
class Reader
include Enumerable
public
# SYNOPSIS
# reader = CSV::Reader.create(str_or_readable)
#
# ARGS
# str_or_readable: a CSV data to be parsed. A String or an IO.
#
# RETURNS
# reader: Created instance.
#
# DESCRIPTION
# Create instance. To get parse result, see CSV::Reader#each.
#
def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil)
case str_or_readable
when IO
IOReader.new(str_or_readable, col_sep, row_sep)
when String
StringReader.new(str_or_readable, col_sep, row_sep)
else
IOReader.new(str_or_readable, col_sep, row_sep)
end
end
# SYNOPSIS
# CSV::Reader.parse(str_or_readable) do |row|
# ...
# end
#
# ARGS
# str_or_readable: a CSV data to be parsed. A String or an IO.
# row: a CSV::Row; an Array of a CSV::Cell in a line.
#
# RETURNS
# nil
#
# DESCRIPTION
# Parse CSV data and get lines. Caller block is called for each line
# with an argument which is a chunk of cells in a row.
#
# Block value is always nil. Rows are not cached for performance
# reason.
#
def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil)
reader = create(str_or_readable, col_sep, row_sep)
reader.each do |row|
yield(row)
end
reader.close
nil
end
# SYNOPSIS
# CSV::Reader#each do |row|
# ...
# end
#
# ARGS
# row: a CSV::Row; an Array of a CSV::Cell in a line.
#
# RETURNS
# nil
#
# DESCRIPTION
# Caller block is called for each line with an argument which is a chunk
# of cells in a row.
#
# Block value is always nil. Rows are not cached for performance
# reason.
#
def each
while true
row = Row.new
parsed_cells = get_row(row)
if parsed_cells == 0
break
end
yield(row)
end
nil
end
# SYNOPSIS
# cell = CSV::Reader#shift
#
# RETURNS
# cell: a CSV::Row; an Array of a CSV::Cell.
#
# DESCRIPTION
# Extract cells of next line.
#
def shift
row = Row.new
parsed_cells = get_row(row)
row
end
# SYNOPSIS
# CSV::Reader#close
#
# RETURNS
# nil
#
# DESCRIPTION
# Close this reader.
#
def close
terminate
end
private
def initialize(dev)
raise RuntimeError.new('Do not instanciate this class directly.')
end
def get_row(row)
raise NotImplementedError.new('Method get_row must be defined in a derived class.')
end
def terminate
# Define if needed.
end
end
# DESCRIPTION
# CSV::StringReader -- CSV formatted stream reader.
#
# EXAMPLE
# Read CSV lines untill the first column is 'stop'.
#
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
# p row
# break if !row[0].is_null && row[0].data == 'stop'
# end
#
class StringReader < Reader
public
# SYNOPSIS
# reader = CSV::StringReader.new(string)
#
# ARGS
# string: a CSV String to be parsed.
#
# RETURNS
# reader: Created instance.
#
# DESCRIPTION
# Create instance. To get parse result, see CSV::Reader#each.
#
def initialize(string, col_sep = ?,, row_sep = nil)
@col_sep = col_sep
@row_sep = row_sep
@dev = string
@idx = 0
if @dev[0, 3] == "\xef\xbb\xbf"
@idx += 3
end
end
private
def get_row(row)
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
raise IllegalFormatError.new
end
@idx = next_idx
parsed_cells
end
end
# DESCRIPTION
# CSV::IOReader -- CSV formatted stream reader.
#
# EXAMPLE
# Read CSV lines untill the first column is 'stop'.
#
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
# p row
# break if !row[0].is_null && row[0].data == 'stop'
# end
#
class IOReader < Reader
public
# SYNOPSIS
# reader = CSV::IOReader.new(io)
#
# ARGS
# io: a CSV data to be parsed. Must be an IO. (io#read is called.)
#
# RETURNS
# reader: Created instance.
#
# DESCRIPTION
# Create instance. To get parse result, see CSV::Reader#each.
#
def initialize(io, col_sep = ?,, row_sep = nil)
@io = io
@col_sep = col_sep
@row_sep = row_sep
@dev = CSV::IOBuf.new(@io)
@idx = 0
if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
@idx += 3
end
@close_on_terminate = false
end
# SYNOPSIS
# CSV::IOReader#close_on_terminate
#
# RETURNS
# true
#
# DESCRIPTION
# Tell this reader to close the IO when terminated (Triggered by invoking
# CSV::IOReader#close).
#
def close_on_terminate
@close_on_terminate = true
end
private
def get_row(row)
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
raise IllegalFormatError.new
end
dropped = @dev.drop(next_idx)
@idx = next_idx - dropped
parsed_cells
end
def terminate
if @close_on_terminate
@io.close
end
if @dev
@dev.close
end
end
end
# DESCRIPTION
# CSV::Writer -- CSV formatted string/stream writer.
#
# EXAMPLE
# Write rows to 'csvout' file.
#
# outfile = File.open('csvout', 'wb')
# CSV::Writer.generate(outfile) do |csv|
# csv << ['c1', nil, '', '"', "\r\n", 'c2']
# # or
# csv.add_row [
# CSV::Cell.new('c1', false),
# CSV::Cell.new('dummy', true),
# CSV::Cell.new('', false),
# CSV::Cell.new('"', false),
# CSV::Cell.new("\r\n", false)
# CSV::Cell.new('c2', false)
# ]
# ...
# ...
# end
#
# outfile.close
#
class Writer
public
# SYNOPSIS
# writer = CSV::Writer.create(str_or_readable)
#
# ARGS
# str_or_writable: device for generated CSV string. Must respond to
# '<<(string)'.
#
# RETURNS
# writer: Created instance.
#
# DESCRIPTION
# Create instance. To add CSV data to generate CSV string, see
# CSV::Writer#<< or CSV::Writer#add_row.
#
def Writer.create(str_or_readable, col_sep = ?,, row_sep = nil)
BasicWriter.new(str_or_readable, col_sep, row_sep)
end
# SYNOPSIS
# CSV::Writer.generate(str_or_writable) do |writer|
# ...
# end
#
# ARGS
# str_or_writable: device for generated CSV string. Must respond to
# '<<(string)'.
# writer: Created writer instance. See CSV::Writer#<< and
# CSV::Writer#add_row to know how to generate CSV string.
#
# RETURNS
# nil
#
# DESCRIPTION
# Create writer instance. Caller block is called with the new instance.
# To add CSV data to generate CSV string, see CSV::Writer#<< or
# CSV::Writer#add_row.
#
def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil)
writer = Writer.create(str_or_writable, col_sep, row_sep)
yield(writer)
writer.close
nil
end
# SYNOPSIS
# CSV::Writer#<<(row)
#
# ARGS
# row: an Array of a String.
#
# RETURNS
# self
#
# DESCRIPTION
# Dump CSV stream to the device. Argument is an array of a String like
# ['c1', 'c2', 'c3'].
#
def <<(ary)
row = ary.collect { |item|
if item.is_a?(Cell)
item
elsif (item.nil?)
Cell.new('', true)
else
Cell.new(item.to_s, false)
end
}
CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
self
end
# SYNOPSIS
# CSV::Writer#<<(row)
#
# ARGS
# row: an Array of a CSV::Cell.
#
# RETURNS
# self
#
# DESCRIPTION
# Dump CSV stream to the device. Argument is an array of a CSV::Cell
# like [CSV::Cell.new('c1', false), CSV::Cell.new('dummy', true)].
# (Formar is 'c1' and latter is Null.)
#
def add_row(row)
CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
self
end
# SYNOPSIS
# CSV::Writer#close
#
# RETURNS
# nil
#
# DESCRIPTION
# Close this writer.
#
def close
terminate
end
private
def initialize(dev)
raise RuntimeError.new('Do not instanciate this class directly.')
end
def terminate
# Define if needed.
end
end
# DESCRIPTION
# CSV::BasicWriter -- CSV formatted string/stream writer using <<.
#
class BasicWriter < Writer
public
# SYNOPSIS
# writer = CSV::BasicWriter.new(str_or_writable)
#
# ARGS
# str_or_writable: device for generated CSV string. Must respond to
# '<<(string)'.
#
# RETURNS
# writer: Created instance.
#
# DESCRIPTION
# Create instance. To add CSV data to generate CSV string, see
# CSV::Writer#<< or CSV::Writer#add_row.
#
def initialize(str_or_writable, col_sep = ?,, row_sep = nil)
@col_sep = col_sep
@row_sep = row_sep
@dev = str_or_writable
@close_on_terminate = false
end
# SYNOPSIS
# CSV::BasicWriter#close_on_terminate
#
# RETURNS
# true
#
# DESCRIPTION
# Tell this writer to close the IO when terminated (Triggered by invoking
# CSV::BasicWriter#close).
#
def close_on_terminate
@close_on_terminate = true
end
private
def terminate
if @close_on_terminate
@dev.close
end
end
end
# SYNOPSIS
# cells = CSV.parse_line(src, col_sep = ?,, row_sep = nil)
#
# ARGS
# src: a CSV String.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
#
# RETURNS
# cells: an Array of parsed cells in first line. Each cell is a String.
#
# DESCRIPTION
# Parse one line from given string. Bare in mind it parses ONE LINE. Rest
# of the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
# second line 'c,d' is ignored.
#
# If you don't know whether a target string to parse is exactly 1 line or
# not, use CSV.parse_row instead of this method.
#
def CSV.parse_line(src, col_sep = ?,, row_sep = nil)
idx = 0
res_type = :DT_COLSEP
cells = Row.new
begin
while (res_type.equal?(:DT_COLSEP))
cell = Cell.new
res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
cells.push(cell.is_null ? nil : cell.data)
end
rescue IllegalFormatError
return Row.new
end
cells
end
# SYNOPSIS
# str = CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
#
# ARGS
# cells: an Array of cell to be converted to CSV string. Each cell must
# respond to 'to_s'.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
#
# RETURNS
# str: a String of generated CSV string.
#
# DESCRIPTION
# Create a line from cells. Each cell is stringified by to_s.
#
def CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
if (cells.size == 0)
return ''
end
res_type = :DT_COLSEP
result_str = ''
idx = 0
while true
cell = if (cells[idx].nil?)
Cell.new('', true)
else
Cell.new(cells[idx].to_s, false)
end
generate_body(cell, result_str, col_sep, row_sep)
idx += 1
if (idx == cells.size)
break
end
generate_separator(:DT_COLSEP, result_str, col_sep, row_sep)
end
result_str
end
# SYNOPSIS
# parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
#
# ARGS
# src: a CSV data to be parsed. Must respond '[](idx)'.
# src[](idx) must return a char. (Not a string such as 'a', but 97).
# src[](idx_out_of_bounds) must return nil. A String satisfies this
# requirement.
# idx: index of parsing location of 'src'. 0 origin.
# out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
#
# RETURNS
# parsed_cells: num of parsed cells.
# idx: index of next parsing location of 'src'.
#
# DESCRIPTION
# Parse a line from string. To parse lines in CSV string, see EXAMPLE
# below.
#
# EXAMPLE
# src = "a,b\r\nc,d\r\ne,f"
# idx = 0
# begin
# parsed = []
# parsed_cells, idx = CSV.parse_row(src, idx, parsed)
# puts "Parsed #{ parsed_cells } cells."
# p parsed
# end while parsed_cells > 0
#
def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
idx_backup = idx
parsed_cells = 0
res_type = :DT_COLSEP
begin
while (!res_type.equal?(:DT_ROWSEP))
cell = Cell.new
res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
if res_type.equal?(:DT_EOS)
if idx == idx_backup #((parsed_cells == 0) && (cell.is_null))
return 0, 0
end
res_type = :DT_ROWSEP
end
parsed_cells += 1
out_dev << cell
end
rescue IllegalFormatError
return 0, 0
end
return parsed_cells, idx
end
# SYNOPSIS
# parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
#
# ARGS
# src: an Array of CSV::Cell to be converted to CSV string. Must respond to
# 'size' and '[](idx)'. src[idx] must return CSV::Cell.
# cells: num of cells in a line.
# out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
#
# RETURNS
# parsed_cells: num of converted cells.
#
# DESCRIPTION
# Convert a line from cells data to string. To generate multi-row CSV
# string, See EXAMPLE below.
#
# EXAMPLE
# def d(str)
# CSV::Cell.new(str, false)
# end
#
# row1 = [d('a'), d('b')]
# row2 = [d('c'), d('d')]
# row3 = [d('e'), d('f')]
# src = [row1, row2, row3]
# buf = ''
# src.each do |row|
# parsed_cells = CSV.generate_row(row, 2, buf)
# puts "Created #{ parsed_cells } cells."
# end
# p buf
#
def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
src_size = src.size
if (src_size == 0)
if cells == 0
generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
end
return 0
end
res_type = :DT_COLSEP
parsed_cells = 0
generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
parsed_cells += 1
while ((parsed_cells < cells) && (parsed_cells != src_size))
generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
parsed_cells += 1
end
if (parsed_cells == cells)
generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
else
generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
end
parsed_cells
end
private
class IllegalFormatError < RuntimeError; end
# Private class methods.
class << self
private
def parse_body(src, idx, cell, col_sep, row_sep)
row_sep_end = row_sep || ?\n
cell.is_null = false
state = :ST_START
quoted = false
cr = false
c = nil
while (c = src[idx])
idx += 1
result_state = :DT_UNKNOWN
if (c == col_sep)
if state.equal?(:ST_DATA)
if cr
raise IllegalFormatError.new
end
if (!quoted)
state = :ST_END
result_state = :DT_COLSEP
else
cell.data << c.chr
end
elsif state.equal?(:ST_QUOTE)
if cr
raise IllegalFormatError.new
end
state = :ST_END
result_state = :DT_COLSEP
else # :ST_START
cell.is_null = true
state = :ST_END
result_state = :DT_COLSEP
end
elsif (c == ?") # " for vim syntax hilighting.
if state.equal?(:ST_DATA)
if cr
raise IllegalFormatError.new
end
if quoted
quoted = false
state = :ST_QUOTE
else
raise IllegalFormatError.new
end
elsif state.equal?(:ST_QUOTE)
cell.data << c.chr
quoted = true
state = :ST_DATA
else # :ST_START
quoted = true
state = :ST_DATA
end
elsif row_sep.nil? and c == ?\r
if cr
raise IllegalFormatError.new
end
if quoted
cell.data << c.chr
state = :ST_DATA
else
cr = true
end
elsif c == row_sep_end
if state.equal?(:ST_DATA)
if cr
state = :ST_END
result_state = :DT_ROWSEP
cr = false
else
if quoted
cell.data << c.chr
state = :ST_DATA
else
state = :ST_END
result_state = :DT_ROWSEP
end
end
elsif state.equal?(:ST_QUOTE)
state = :ST_END
result_state = :DT_ROWSEP
if cr
cr = false
end
else # :ST_START
cell.is_null = true
state = :ST_END
result_state = :DT_ROWSEP
end
else
if state.equal?(:ST_DATA) || state.equal?(:ST_START)
if cr
raise IllegalFormatError.new
end
cell.data << c.chr
state = :ST_DATA
else # :ST_QUOTE
raise IllegalFormatError.new
end
end
if state.equal?(:ST_END)
return result_state, idx;
end
end
if state.equal?(:ST_START)
cell.is_null = true
elsif state.equal?(:ST_QUOTE)
true # dummy for coverate; only a data
elsif quoted
raise IllegalFormatError.new
elsif cr
raise IllegalFormatError.new
end
return :DT_EOS, idx
end
def generate_body(cells, out_dev, col_sep, row_sep)
row_data = cells.data.dup
if (!cells.is_null)
if (row_data.gsub!('"', '""') ||
row_data.include?(col_sep) ||
(row_sep && row_data.index(row_sep)) ||
(/[\r\n]/ =~ row_data) ||
(cells.data.empty?))
out_dev << '"' << row_data << '"'
else
out_dev << row_data
end
end
end
def generate_separator(type, out_dev, col_sep, row_sep)
case type
when :DT_COLSEP
out_dev << col_sep.chr
when :DT_ROWSEP
out_dev << (row_sep || "\r\n")
end
end
end
# DESCRIPTION
# CSV::StreamBuf -- a class for a bufferd stream.
#
# EXAMPLE 1 -- an IO.
# class MyBuf < StreamBuf
# # Do initialize myself before a super class. Super class might call my
# # method 'read'. (Could be awful for C++ user. :-)
# def initialize(s)
# @s = s
# super()
# end
#
# # define my own 'read' method.
# # CAUTION: Returning nil means EnfOfStream.
# def read(size)
# @s.read(size)
# end
#
# # release buffers. in Ruby which has GC, you do not have to call this...
# def terminate
# @s = nil
# super()
# end
# end
#
# buf = MyBuf.new(STDIN)
# my_str = ''
# p buf[0, 0] # => '' (null string)
# p buf[0] # => 97 (char code of 'a')
# p buf[0, 1] # => 'a'
# my_str = buf[0, 5]
# p my_str # => 'abcde' (5 chars)
# p buf[0, 6] # => "abcde\n" (6 chars)
# p buf[0, 7] # => "abcde\n" (6 chars)
# p buf.drop(3) # => 3 (dropped chars)
# p buf.get(0, 2) # => 'de' (2 chars)
# p buf.is_eos? # => false (is not EOS here)
# p buf.drop(5) # => 3 (dropped chars)
# p buf.is_eos? # => true (is EOS here)
# p buf[0] # => nil (is EOS here)
#
# EXAMPLE 2 -- String.
# This is a conceptual example. No pros with this.
#
# class StrBuf < StreamBuf
# def initialize(s)
# @str = s
# @idx = 0
# super()
# end
#
# def read(size)
# str = @str[@idx, size]
# @idx += str.size
# str
# end
# end
#
class StreamBuf # pure virtual. (do not instanciate it directly)
public
# SYNOPSIS
# char/str = CSV::StreamBuf#get(idx, n = nil)
# char/str = CSV::StreamBuf#[idx, n = nil]
#
# ARGS
# idx: index of a string to specify a start point of a string to get.
# Unlike String instance, idx < 0 returns nil.
# n: size of a string to get.
#
# RETURNS
# char: if n == nil. A char at idx.
# str: if n != nil. A partial string, from idx to (idx + size). At
# EOF, the string size could not equal to arg n.
#
# DESCRIPTION
# Get a char or a partial string from the stream.
#
def [](idx, n = nil)
if idx < 0
return nil
end
if (idx_is_eos?(idx))
if n and (@offset + idx == buf_size(@cur_buf))
# Like a String, 'abc'[4, 1] returns nil and
# 'abc'[3, 1] returns '' not nil.
return ''
else
return nil
end
end
my_buf = @cur_buf
my_offset = @offset
next_idx = idx
while (my_offset + next_idx >= buf_size(my_buf))
if (my_buf == @buf_tail_idx)
unless add_buf
break
end
end
next_idx = my_offset + next_idx - buf_size(my_buf)
my_buf += 1
my_offset = 0
end
loc = my_offset + next_idx
if !n
return @buf_list[my_buf][loc] # Fixnum of char code.
elsif (loc + n - 1 < buf_size(my_buf))
return @buf_list[my_buf][loc, n] # String.
else # should do loop insted of (tail) recursive call...
res = @buf_list[my_buf][loc, BufSize]
size_added = buf_size(my_buf) - loc
if size_added > 0
idx += size_added
n -= size_added
ret = self[idx, n]
if ret
res << ret
end
end
return res
end
end
alias get []
# SYNOPSIS
# size_dropped = CSV::StreamBuf#drop(n)
#
# ARGS
# n: drop size
#
# RETURNS
# size_dropped: droped size. At EOF, dropped size might not equals to arg n.
# 0 if n <= 0.
#
# DESCRIPTION
# Drop a string from the stream. Once you drop the head of the stream,
# access to the dropped part via [] or get returns nil.
#
def drop(n)
if is_eos?
return 0
end
size_dropped = 0
while (n > 0)
if (!@is_eos || (@cur_buf != @buf_tail_idx))
if (@offset + n < buf_size(@cur_buf))
size_dropped += n
@offset += n
n = 0
else
size = buf_size(@cur_buf) - @offset
size_dropped += size
n -= size
@offset = 0
unless rel_buf
unless add_buf
break
end
@cur_buf = @buf_tail_idx
end
end
end
end
size_dropped
end
# SYNOPSIS
# is_eos = CSV::StreamBuf#is_eos?
#
# RETURNS
# is_eos: true if end of the stream or false.
#
# DESCRIPTION
# Check EOF or not.
#
def is_eos?
return idx_is_eos?(0)
end
# SYNOPSIS
# N/A
#
# DESCRIPTION
# Do not instanciate this class directly. Define your own class which
# derives this class and define 'read' instance method.
#
def initialize
@buf_list = []
@cur_buf = @buf_tail_idx = -1
@offset = 0
@is_eos = false
add_buf
@cur_buf = @buf_tail_idx
end
protected
def terminate
while (rel_buf); end
end
# protected method 'read' must be defined in derived classes.
# CAUTION: Returning a string which size is not equal to 'size' means
# EnfOfStream. When it is not at EOS, you must block the callee, try to
# read and return the sized string.
def read(size) # raise EOFError
raise NotImplementedError.new('Method read must be defined in a derived class.')
end
private
def buf_size(idx)
@buf_list[idx].size
end
def add_buf
if @is_eos
return false
end
begin
str_read = read(BufSize)
rescue EOFError
str_read = nil
rescue
terminate
raise
end
if str_read.nil?
@is_eos = true
@buf_list.push('')
@buf_tail_idx += 1
false
else
@buf_list.push(str_read)
@buf_tail_idx += 1
true
end
end
def rel_buf
if (@cur_buf < 0)
return false
end
@buf_list[@cur_buf] = nil
if (@cur_buf == @buf_tail_idx)
@cur_buf = -1
return false
else
@cur_buf += 1
return true
end
end
def idx_is_eos?(idx)
(@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx)))
end
BufSize = 1024 * 8
end
# DESCRIPTION
# CSV::IOBuf -- a class for a bufferd IO.
#
# EXAMPLE
# # File 'bigdata' could be a giga-byte size one!
# buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
# CSV::Reader.new(buf).each do |row|
# p row
# break if row[0].data == 'admin'
# end
#
class IOBuf < StreamBuf
public
def initialize(s)
@s = s
super()
end
def close
terminate
end
private
def read(size)
@s.read(size)
end
def terminate
super()
end
end
end