1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* lib/csv.rb: add extra pamameter to specify row(record) separater character.

To parse Mac's CR separated CSV, do like this.
    CSV.open("mac.csv", "r", ?,,?\r) { |row| p row.to_a }
  The 3rd parameter in this example ?, is for column separater and the 4th ?\r
  is for row separater.  Row separater is nil by default.  Nil separater means
  "\r\n" or "\n".

* test/csv/test_csv.rb: add tests for above feature.

* test/csv/mac.csv: added.  Sample CR separated CSV file.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4553 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nahi 2003-09-15 10:07:42 +00:00
parent bd96b4c8cc
commit ffc136a024
4 changed files with 327 additions and 177 deletions

View file

@ -1,3 +1,16 @@
Mon Sep 15 19:02:52 2003 NAKAMURA, Hiroshi <nahi@ruby-lang.org>
* lib/csv.rb: add extra pamameter to specify row(record) separater
character. To parse Mac's CR separated CSV, do like this.
CSV.open("mac.csv", "r", ?,, ?\r) { |row| p row.to_a }
The 3rd parameter in this example ?, is for column separater and the
4th ?\r is for row separater. Row separater is nil by default. Nil
separater means "\r\n" or "\n".
* test/csv/test_csv.rb: add tests for above feature.
* test/csv/mac.csv: added. Sample CR separated CSV file.
Fri Sep 12 22:41:48 2003 Michal Rokos <m.rokos@sh.cvut.cz> Fri Sep 12 22:41:48 2003 Michal Rokos <m.rokos@sh.cvut.cz>
* ext/openssl/ossl.c: move ASN.1 stuff to ossl_asn1.[ch] * ext/openssl/ossl.c: move ASN.1 stuff to ossl_asn1.[ch]

View file

@ -189,32 +189,32 @@ public
# writer << [nil, nil] # writer << [nil, nil]
# end # end
# #
def CSV.open(filename, mode, col_sep = ?,, &block) def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block)
if mode == 'r' or mode == 'rb' if mode == 'r' or mode == 'rb'
open_reader(filename, col_sep, &block) open_reader(filename, col_sep, row_sep, &block)
elsif mode == 'w' or mode == 'wb' elsif mode == 'w' or mode == 'wb'
open_writer(filename, col_sep, &block) open_writer(filename, col_sep, row_sep, &block)
else else
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'") raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
end end
end end
def CSV.parse(filename, col_sep = ?,, &block) def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block)
open_reader(filename, col_sep, &block) open_reader(filename, col_sep, row_sep, &block)
end end
def CSV.generate(filename, col_sep = ?,, &block) def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block)
open_writer(filename, col_sep, &block) open_writer(filename, col_sep, row_sep, &block)
end end
# Private class methods. # Private class methods.
class << self class << self
private private
def open_reader(filename, col_sep, &block) def open_reader(filename, col_sep, row_sep, &block)
file = File.open(filename, 'rb') file = File.open(filename, 'rb')
if block if block
begin begin
CSV::Reader.parse(file, col_sep) do |row| CSV::Reader.parse(file, col_sep, row_sep) do |row|
yield(row) yield(row)
end end
ensure ensure
@ -222,17 +222,17 @@ public
end end
nil nil
else else
reader = CSV::Reader.create(file, col_sep) reader = CSV::Reader.create(file, col_sep, row_sep)
reader.close_on_terminate reader.close_on_terminate
reader reader
end end
end end
def open_writer(filename, col_sep, &block) def open_writer(filename, col_sep, row_sep, &block)
file = File.open(filename, 'wb') file = File.open(filename, 'wb')
if block if block
begin begin
CSV::Writer.generate(file, col_sep) do |writer| CSV::Writer.generate(file, col_sep, row_sep) do |writer|
yield(writer) yield(writer)
end end
ensure ensure
@ -240,7 +240,7 @@ public
end end
nil nil
else else
writer = CSV::Writer.create(file, col_sep) writer = CSV::Writer.create(file, col_sep, row_sep)
writer.close_on_terminate writer.close_on_terminate
writer writer
end end
@ -275,14 +275,14 @@ public
# DESCRIPTION # DESCRIPTION
# Create instance. To get parse result, see CSV::Reader#each. # Create instance. To get parse result, see CSV::Reader#each.
# #
def Reader.create(str_or_readable, col_sep = ?,) def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil)
case str_or_readable case str_or_readable
when IO when IO
IOReader.new(str_or_readable, col_sep) IOReader.new(str_or_readable, col_sep, row_sep)
when String when String
StringReader.new(str_or_readable, col_sep) StringReader.new(str_or_readable, col_sep, row_sep)
else else
IOReader.new(str_or_readable, col_sep) IOReader.new(str_or_readable, col_sep, row_sep)
end end
end end
@ -305,8 +305,8 @@ public
# Block value is always nil. Rows are not cached for performance # Block value is always nil. Rows are not cached for performance
# reason. # reason.
# #
def Reader.parse(str_or_readable, col_sep = ?,) def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil)
reader = create(str_or_readable, col_sep) reader = create(str_or_readable, col_sep, row_sep)
reader.each do |row| reader.each do |row|
yield(row) yield(row)
end end
@ -413,8 +413,9 @@ public
# DESCRIPTION # DESCRIPTION
# Create instance. To get parse result, see CSV::Reader#each. # Create instance. To get parse result, see CSV::Reader#each.
# #
def initialize(string, col_sep = ?,) def initialize(string, col_sep = ?,, row_sep = nil)
@col_sep = col_sep @col_sep = col_sep
@row_sep = row_sep
@dev = string @dev = string
@idx = 0 @idx = 0
if @dev[0, 3] == "\xef\xbb\xbf" if @dev[0, 3] == "\xef\xbb\xbf"
@ -424,7 +425,7 @@ public
private private
def get_row(row) def get_row(row)
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep) parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
raise IllegalFormatError.new raise IllegalFormatError.new
end end
@ -460,9 +461,10 @@ public
# DESCRIPTION # DESCRIPTION
# Create instance. To get parse result, see CSV::Reader#each. # Create instance. To get parse result, see CSV::Reader#each.
# #
def initialize(io, col_sep = ?,) def initialize(io, col_sep = ?,, row_sep = nil)
@io = io @io = io
@col_sep = col_sep @col_sep = col_sep
@row_sep = row_sep
@dev = CSV::IOBuf.new(@io) @dev = CSV::IOBuf.new(@io)
@idx = 0 @idx = 0
if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
@ -487,7 +489,7 @@ public
private private
def get_row(row) def get_row(row)
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep) parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos? if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
raise IllegalFormatError.new raise IllegalFormatError.new
end end
@ -549,8 +551,8 @@ public
# Create instance. To add CSV data to generate CSV string, see # Create instance. To add CSV data to generate CSV string, see
# CSV::Writer#<< or CSV::Writer#add_row. # CSV::Writer#<< or CSV::Writer#add_row.
# #
def Writer.create(str_or_readable, col_sep = ?,) def Writer.create(str_or_readable, col_sep = ?,, row_sep = nil)
BasicWriter.new(str_or_readable, col_sep) BasicWriter.new(str_or_readable, col_sep, row_sep)
end end
# SYNOPSIS # SYNOPSIS
@ -572,8 +574,8 @@ public
# To add CSV data to generate CSV string, see CSV::Writer#<< or # To add CSV data to generate CSV string, see CSV::Writer#<< or
# CSV::Writer#add_row. # CSV::Writer#add_row.
# #
def Writer.generate(str_or_writable, col_sep = ?,) def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil)
writer = Writer.create(str_or_writable, col_sep) writer = Writer.create(str_or_writable, col_sep, row_sep)
yield(writer) yield(writer)
writer.close writer.close
nil nil
@ -602,7 +604,7 @@ public
Cell.new(item.to_s, false) Cell.new(item.to_s, false)
end end
} }
CSV.generate_row(row, row.size, @dev, @col_sep) CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
self self
end end
@ -621,7 +623,7 @@ public
# (Formar is 'c1' and latter is Null.) # (Formar is 'c1' and latter is Null.)
# #
def add_row(row) def add_row(row)
CSV.generate_row(row, row.size, @dev, @col_sep) CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
self self
end end
@ -669,8 +671,9 @@ public
# Create instance. To add CSV data to generate CSV string, see # Create instance. To add CSV data to generate CSV string, see
# CSV::Writer#<< or CSV::Writer#add_row. # CSV::Writer#<< or CSV::Writer#add_row.
# #
def initialize(str_or_writable, col_sep = ?,) def initialize(str_or_writable, col_sep = ?,, row_sep = nil)
@col_sep = col_sep @col_sep = col_sep
@row_sep = row_sep
@dev = str_or_writable @dev = str_or_writable
@close_on_terminate = false @close_on_terminate = false
end end
@ -698,12 +701,14 @@ public
end end
# SYNOPSIS # SYNOPSIS
# cells = CSV.parse_line(src, col_sep = ?,) # cells = CSV.parse_line(src, col_sep = ?,, row_sep = nil)
# #
# ARGS # ARGS
# src: a CSV String. # src: a CSV String.
# col_sep: Column separator. ?, by default. If you want to separate # col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here. # fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
# #
# RETURNS # RETURNS
# cells: an Array of parsed cells in first line. Each cell is a String. # cells: an Array of parsed cells in first line. Each cell is a String.
@ -716,14 +721,14 @@ public
# If you don't know whether a target string to parse is exactly 1 line or # If you don't know whether a target string to parse is exactly 1 line or
# not, use CSV.parse_row instead of this method. # not, use CSV.parse_row instead of this method.
# #
def CSV.parse_line(src, col_sep = ?,) def CSV.parse_line(src, col_sep = ?,, row_sep = nil)
idx = 0 idx = 0
res_type = :DT_COLSEP res_type = :DT_COLSEP
cells = Row.new cells = Row.new
begin begin
while (res_type.equal?(:DT_COLSEP)) while (res_type.equal?(:DT_COLSEP))
cell = Cell.new cell = Cell.new
res_type, idx = parse_body(src, idx, cell, col_sep) res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
cells.push(cell.is_null ? nil : cell.data) cells.push(cell.is_null ? nil : cell.data)
end end
rescue IllegalFormatError rescue IllegalFormatError
@ -734,13 +739,15 @@ public
# SYNOPSIS # SYNOPSIS
# str = CSV.generate_line(cells, col_sep = ?,) # str = CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
# #
# ARGS # ARGS
# cells: an Array of cell to be converted to CSV string. Each cell must # cells: an Array of cell to be converted to CSV string. Each cell must
# respond to 'to_s'. # respond to 'to_s'.
# col_sep: Column separator. ?, by default. If you want to separate # col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here. # fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
# #
# RETURNS # RETURNS
# str: a String of generated CSV string. # str: a String of generated CSV string.
@ -748,7 +755,7 @@ public
# DESCRIPTION # DESCRIPTION
# Create a line from cells. Each cell is stringified by to_s. # Create a line from cells. Each cell is stringified by to_s.
# #
def CSV.generate_line(cells, col_sep = ?,) def CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
if (cells.size == 0) if (cells.size == 0)
return '' return ''
end end
@ -761,18 +768,18 @@ public
else else
Cell.new(cells[idx].to_s, false) Cell.new(cells[idx].to_s, false)
end end
generate_body(cell, result_str, col_sep) generate_body(cell, result_str, col_sep, row_sep)
idx += 1 idx += 1
if (idx == cells.size) if (idx == cells.size)
break break
end end
generate_separator(:DT_COLSEP, result_str, col_sep) generate_separator(:DT_COLSEP, result_str, col_sep, row_sep)
end end
result_str result_str
end end
# SYNOPSIS # SYNOPSIS
# parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,) # parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
# #
# ARGS # ARGS
# src: a CSV data to be parsed. Must respond '[](idx)'. # src: a CSV data to be parsed. Must respond '[](idx)'.
@ -783,6 +790,8 @@ public
# out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'. # out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'.
# col_sep: Column separator. ?, by default. If you want to separate # col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here. # fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
# #
# RETURNS # RETURNS
# parsed_cells: num of parsed cells. # parsed_cells: num of parsed cells.
@ -802,14 +811,14 @@ public
# p parsed # p parsed
# end while parsed_cells > 0 # end while parsed_cells > 0
# #
def CSV.parse_row(src, idx, out_dev, col_sep = ?,) def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
idx_backup = idx idx_backup = idx
parsed_cells = 0 parsed_cells = 0
res_type = :DT_COLSEP res_type = :DT_COLSEP
begin begin
while (!res_type.equal?(:DT_ROWSEP)) while (!res_type.equal?(:DT_ROWSEP))
cell = Cell.new cell = Cell.new
res_type, idx = parse_body(src, idx, cell, col_sep) res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
if res_type.equal?(:DT_EOS) if res_type.equal?(:DT_EOS)
if idx == idx_backup #((parsed_cells == 0) && (cell.is_null)) if idx == idx_backup #((parsed_cells == 0) && (cell.is_null))
return 0, 0 return 0, 0
@ -826,7 +835,7 @@ public
end end
# SYNOPSIS # SYNOPSIS
# parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,) # parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
# #
# ARGS # ARGS
# src: an Array of CSV::Cell to be converted to CSV string. Must respond to # src: an Array of CSV::Cell to be converted to CSV string. Must respond to
@ -835,6 +844,8 @@ public
# out_dev: buffer for generated CSV string. Must respond to '<<(string)'. # out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
# col_sep: Column separator. ?, by default. If you want to separate # col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here. # fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
# #
# RETURNS # RETURNS
# parsed_cells: num of converted cells. # parsed_cells: num of converted cells.
@ -859,27 +870,27 @@ public
# end # end
# p buf # p buf
# #
def CSV.generate_row(src, cells, out_dev, col_sep = ?,) def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
src_size = src.size src_size = src.size
if (src_size == 0) if (src_size == 0)
if cells == 0 if cells == 0
generate_separator(:DT_ROWSEP, out_dev, col_sep) generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
end end
return 0 return 0
end end
res_type = :DT_COLSEP res_type = :DT_COLSEP
parsed_cells = 0 parsed_cells = 0
generate_body(src[parsed_cells], out_dev, col_sep) generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
parsed_cells += 1 parsed_cells += 1
while ((parsed_cells < cells) && (parsed_cells != src_size)) while ((parsed_cells < cells) && (parsed_cells != src_size))
generate_separator(:DT_COLSEP, out_dev, col_sep) generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
generate_body(src[parsed_cells], out_dev, col_sep) generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
parsed_cells += 1 parsed_cells += 1
end end
if (parsed_cells == cells) if (parsed_cells == cells)
generate_separator(:DT_ROWSEP, out_dev, col_sep) generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
else else
generate_separator(:DT_COLSEP, out_dev, col_sep) generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
end end
parsed_cells parsed_cells
end end
@ -891,7 +902,8 @@ private
class << self class << self
private private
def parse_body(src, idx, cell, col_sep) def parse_body(src, idx, cell, col_sep, row_sep)
row_sep_end = row_sep || ?\n
cell.is_null = false cell.is_null = false
state = :ST_START state = :ST_START
quoted = false quoted = false
@ -941,7 +953,7 @@ private
quoted = true quoted = true
state = :ST_DATA state = :ST_DATA
end end
elsif (c == ?\r) elsif row_sep.nil? and c == ?\r
if cr if cr
raise IllegalFormatError.new raise IllegalFormatError.new
end end
@ -951,7 +963,7 @@ private
else else
cr = true cr = true
end end
elsif (c == ?\n) elsif c == row_sep_end
if state.equal?(:ST_DATA) if state.equal?(:ST_DATA)
if cr if cr
state = :ST_END state = :ST_END
@ -1004,12 +1016,14 @@ private
return :DT_EOS, idx return :DT_EOS, idx
end end
def generate_body(cells, out_dev, col_sep) def generate_body(cells, out_dev, col_sep, row_sep)
row_data = cells.data.dup row_data = cells.data.dup
if (!cells.is_null) if (!cells.is_null)
if (row_data.gsub!('"', '""') || if (row_data.gsub!('"', '""') ||
row_data.include?(col_sep) || row_data.include?(col_sep) ||
(/[\r\n]/ =~ row_data) || (cells.data.empty?)) (row_sep && row_data.index(row_sep)) ||
(/[\r\n]/ =~ row_data) ||
(cells.data.empty?))
out_dev << '"' << row_data << '"' out_dev << '"' << row_data << '"'
else else
out_dev << row_data out_dev << row_data
@ -1017,12 +1031,12 @@ private
end end
end end
def generate_separator(type, out_dev, col_sep) def generate_separator(type, out_dev, col_sep, row_sep)
case type case type
when :DT_COLSEP when :DT_COLSEP
out_dev << col_sep.chr out_dev << col_sep.chr
when :DT_ROWSEP when :DT_ROWSEP
out_dev << "\r\n" out_dev << (row_sep || "\r\n")
end end
end end
end end

2
test/csv/mac.csv Normal file
View file

@ -0,0 +1,2 @@
"Avenches","aus Umgebung"
"Bad Hersfeld","Ausgrabung"
1 Avenches aus Umgebung
2 Bad Hersfeld Ausgrabung

View file

@ -1,4 +1,5 @@
require 'test/unit' require 'test/unit/testsuite'
require 'test/unit/testcase'
require 'tempfile' require 'tempfile'
require 'fileutils' require 'fileutils'
@ -12,130 +13,16 @@ class CSV
end end
end end
class TestCSV < Test::Unit::TestCase
class << self module CSVTestSupport
def d(data, is_null = false) def d(data, is_null = false)
CSV::Cell.new(data.to_s, is_null) CSV::Cell.new(data.to_s, is_null)
end end
end end
class TestCSVCell < Test::Unit::TestCase
@@colData = ['', nil, true, false, 'foo', '!' * 1000] @@colData = ['', nil, true, false, 'foo', '!' * 1000]
@@simpleCSVData = {
[nil] => '',
[''] => '""',
[nil, nil] => ',',
[nil, nil, nil] => ',,',
['foo'] => 'foo',
[','] => '","',
[',', ','] => '",",","',
[';'] => ';',
[';', ';'] => ';,;',
["\"\r", "\"\r"] => "\"\"\"\r\",\"\"\"\r\"",
["\"\n", "\"\n"] => "\"\"\"\n\",\"\"\"\n\"",
["\t"] => "\t",
["\t", "\t"] => "\t,\t",
['foo', 'bar'] => 'foo,bar',
['foo', '"bar"', 'baz'] => 'foo,"""bar""",baz',
['foo', 'foo,bar', 'baz'] => 'foo,"foo,bar",baz',
['foo', '""', 'baz'] => 'foo,"""""",baz',
['foo', '', 'baz'] => 'foo,"",baz',
['foo', nil, 'baz'] => 'foo,,baz',
[nil, 'foo', 'bar'] => ',foo,bar',
['foo', 'bar', nil] => 'foo,bar,',
['foo', "\r", 'baz'] => "foo,\"\r\",baz",
['foo', "\n", 'baz'] => "foo,\"\n\",baz",
['foo', "\r\n\r", 'baz'] => "foo,\"\r\n\r\",baz",
['foo', "\r\n", 'baz'] => "foo,\"\r\n\",baz",
['foo', "\r.\n", 'baz'] => "foo,\"\r.\n\",baz",
['foo', "\r\n\n", 'baz'] => "foo,\"\r\n\n\",baz",
['foo', '"', 'baz'] => 'foo,"""",baz',
}
@@fullCSVData = {
[d('', true)] => '',
[d('')] => '""',
[d('', true), d('', true)] => ',',
[d('', true), d('', true), d('', true)] => ',,',
[d('foo')] => 'foo',
[d('foo'), d('bar')] => 'foo,bar',
[d('foo'), d('"bar"'), d('baz')] => 'foo,"""bar""",baz',
[d('foo'), d('foo,bar'), d('baz')] => 'foo,"foo,bar",baz',
[d('foo'), d('""'), d('baz')] => 'foo,"""""",baz',
[d('foo'), d(''), d('baz')] => 'foo,"",baz',
[d('foo'), d('', true), d('baz')] => 'foo,,baz',
[d('foo'), d("\r"), d('baz')] => "foo,\"\r\",baz",
[d('foo'), d("\n"), d('baz')] => "foo,\"\n\",baz",
[d('foo'), d("\r\n"), d('baz')] => "foo,\"\r\n\",baz",
[d('foo'), d("\r.\n"), d('baz')] => "foo,\"\r.\n\",baz",
[d('foo'), d("\r\n\n"), d('baz')] => "foo,\"\r\n\n\",baz",
[d('foo'), d('"'), d('baz')] => 'foo,"""",baz',
}
@@fullCSVDataArray = @@fullCSVData.collect { |key, value| key }
def ssv2csv(ssvStr)
sepConv(ssvStr, ?;, ?,)
end
def csv2ssv(csvStr)
sepConv(csvStr, ?,, ?;)
end
def tsv2csv(tsvStr)
sepConv(tsvStr, ?\t, ?,)
end
def csv2tsv(csvStr)
sepConv(csvStr, ?,, ?\t)
end
def sepConv(srcStr, srcSep, destSep)
rows = CSV::Row.new
cols, idx = CSV.parse_row(srcStr, 0, rows, srcSep)
destStr = ''
cols = CSV.generate_row(rows, rows.size, destStr, destSep)
destStr
end
public
def setup
@tmpdir = File.join(Dir.tmpdir, "ruby_test_csv_tmp_#{$$}")
Dir.mkdir(@tmpdir)
@infile = File.join(@tmpdir, 'in.csv')
@infiletsv = File.join(@tmpdir, 'in.tsv')
@emptyfile = File.join(@tmpdir, 'empty.csv')
@outfile = File.join(@tmpdir, 'out.csv')
@bomfile = File.join(File.dirname(__FILE__), "bom.csv")
CSV.open(@infile, "w") do |writer|
@@fullCSVDataArray.each do |row|
writer.add_row(row)
end
end
CSV.open(@infiletsv, "w", ?\t) do |writer|
@@fullCSVDataArray.each do |row|
writer.add_row(row)
end
end
CSV.generate(@emptyfile) do |writer|
# Create empty file.
end
end
def teardown
FileUtils.rm_rf(@tmpdir)
end
def d(*arg)
TestCSV.d(*arg)
end
#### CSV::Cell unit test
def test_Cell_EQUAL # '==' def test_Cell_EQUAL # '=='
d1 = CSV::Cell.new('d', false) d1 = CSV::Cell.new('d', false)
@ -206,9 +93,11 @@ public
d3 = CSV::Cell.new(nil, false) d3 = CSV::Cell.new(nil, false)
assert_equal(d3.is_null, false, "Data: false.") assert_equal(d3.is_null, false, "Data: false.")
end end
end
#### CSV::Row unit test class TestCSVRow < Test::Unit::TestCase
include CSVTestSupport
def test_Row_s_match def test_Row_s_match
c1 = CSV::Row[d(1), d(2), d(3)] c1 = CSV::Row[d(1), d(2), d(3)]
@ -267,8 +156,126 @@ public
r = CSV::Row[] r = CSV::Row[]
assert_equal([], r.to_a, 'Empty') assert_equal([], r.to_a, 'Empty')
end end
end
class TestCSV < Test::Unit::TestCase
include CSVTestSupport
class << self
include CSVTestSupport
end
@@simpleCSVData = {
[nil] => '',
[''] => '""',
[nil, nil] => ',',
[nil, nil, nil] => ',,',
['foo'] => 'foo',
[','] => '","',
[',', ','] => '",",","',
[';'] => ';',
[';', ';'] => ';,;',
["\"\r", "\"\r"] => "\"\"\"\r\",\"\"\"\r\"",
["\"\n", "\"\n"] => "\"\"\"\n\",\"\"\"\n\"",
["\t"] => "\t",
["\t", "\t"] => "\t,\t",
['foo', 'bar'] => 'foo,bar',
['foo', '"bar"', 'baz'] => 'foo,"""bar""",baz',
['foo', 'foo,bar', 'baz'] => 'foo,"foo,bar",baz',
['foo', '""', 'baz'] => 'foo,"""""",baz',
['foo', '', 'baz'] => 'foo,"",baz',
['foo', nil, 'baz'] => 'foo,,baz',
[nil, 'foo', 'bar'] => ',foo,bar',
['foo', 'bar', nil] => 'foo,bar,',
['foo', "\r", 'baz'] => "foo,\"\r\",baz",
['foo', "\n", 'baz'] => "foo,\"\n\",baz",
['foo', "\r\n\r", 'baz'] => "foo,\"\r\n\r\",baz",
['foo', "\r\n", 'baz'] => "foo,\"\r\n\",baz",
['foo', "\r.\n", 'baz'] => "foo,\"\r.\n\",baz",
['foo', "\r\n\n", 'baz'] => "foo,\"\r\n\n\",baz",
['foo', '"', 'baz'] => 'foo,"""",baz',
}
@@fullCSVData = {
[d('', true)] => '',
[d('')] => '""',
[d('', true), d('', true)] => ',',
[d('', true), d('', true), d('', true)] => ',,',
[d('foo')] => 'foo',
[d('foo'), d('bar')] => 'foo,bar',
[d('foo'), d('"bar"'), d('baz')] => 'foo,"""bar""",baz',
[d('foo'), d('foo,bar'), d('baz')] => 'foo,"foo,bar",baz',
[d('foo'), d('""'), d('baz')] => 'foo,"""""",baz',
[d('foo'), d(''), d('baz')] => 'foo,"",baz',
[d('foo'), d('', true), d('baz')] => 'foo,,baz',
[d('foo'), d("\r"), d('baz')] => "foo,\"\r\",baz",
[d('foo'), d("\n"), d('baz')] => "foo,\"\n\",baz",
[d('foo'), d("\r\n"), d('baz')] => "foo,\"\r\n\",baz",
[d('foo'), d("\r.\n"), d('baz')] => "foo,\"\r.\n\",baz",
[d('foo'), d("\r\n\n"), d('baz')] => "foo,\"\r\n\n\",baz",
[d('foo'), d('"'), d('baz')] => 'foo,"""",baz',
}
@@fullCSVDataArray = @@fullCSVData.collect { |key, value| key }
def ssv2csv(ssvStr, row_sep = nil)
sepConv(ssvStr, ?;, ?,, row_sep)
end
def csv2ssv(csvStr, row_sep = nil)
sepConv(csvStr, ?,, ?;, row_sep)
end
def tsv2csv(tsvStr, row_sep = nil)
sepConv(tsvStr, ?\t, ?,, row_sep)
end
def csv2tsv(csvStr, row_sep = nil)
sepConv(csvStr, ?,, ?\t, row_sep)
end
def sepConv(srcStr, srcSep, destSep, row_sep = nil)
rows = CSV::Row.new
cols, idx = CSV.parse_row(srcStr, 0, rows, srcSep, row_sep)
destStr = ''
cols = CSV.generate_row(rows, rows.size, destStr, destSep, row_sep)
destStr
end
public
def setup
@tmpdir = File.join(Dir.tmpdir, "ruby_test_csv_tmp_#{$$}")
Dir.mkdir(@tmpdir)
@infile = File.join(@tmpdir, 'in.csv')
@infiletsv = File.join(@tmpdir, 'in.tsv')
@emptyfile = File.join(@tmpdir, 'empty.csv')
@outfile = File.join(@tmpdir, 'out.csv')
@bomfile = File.join(File.dirname(__FILE__), "bom.csv")
@macfile = File.join(File.dirname(__FILE__), "mac.csv")
CSV.open(@infile, "w") do |writer|
@@fullCSVDataArray.each do |row|
writer.add_row(row)
end
end
CSV.open(@infiletsv, "w", ?\t) do |writer|
@@fullCSVDataArray.each do |row|
writer.add_row(row)
end
end
CSV.generate(@emptyfile) do |writer|
# Create empty file.
end
end
def teardown
FileUtils.rm_rf(@tmpdir)
end
#### CSV::Reader unit test #### CSV::Reader unit test
def test_Reader_each def test_Reader_each
@ -725,6 +732,11 @@ public
assert_equal(0, cols) assert_equal(0, cols)
assert_equal("\r\n", buf, "Extra boundary check.") assert_equal("\r\n", buf, "Extra boundary check.")
buf = ''
cols = CSV.generate_row([], 0, buf, ?\t, ?|)
assert_equal(0, cols)
assert_equal("|", buf, "Extra boundary check.")
buf = '' buf = ''
cols = CSV.generate_row([d(1)], 2, buf) cols = CSV.generate_row([d(1)], 2, buf)
assert_equal('1,', buf) assert_equal('1,', buf)
@ -737,6 +749,10 @@ public
cols = CSV.generate_row([d(1)], 2, buf, ?\t) cols = CSV.generate_row([d(1)], 2, buf, ?\t)
assert_equal("1\t", buf) assert_equal("1\t", buf)
buf = ''
cols = CSV.generate_row([d(1)], 2, buf, ?\t, ?|)
assert_equal("1\t", buf)
buf = '' buf = ''
cols = CSV.generate_row([d(1), d(2)], 1, buf) cols = CSV.generate_row([d(1), d(2)], 1, buf)
assert_equal("1\r\n", buf) assert_equal("1\r\n", buf)
@ -749,6 +765,18 @@ public
cols = CSV.generate_row([d(1), d(2)], 1, buf, ?\t) cols = CSV.generate_row([d(1), d(2)], 1, buf, ?\t)
assert_equal("1\r\n", buf) assert_equal("1\r\n", buf)
buf = ''
cols = CSV.generate_row([d(1), d(2)], 1, buf, ?\t, ?\n)
assert_equal("1\n", buf)
buf = ''
cols = CSV.generate_row([d(1), d(2)], 1, buf, ?\t, ?\r)
assert_equal("1\r", buf)
buf = ''
cols = CSV.generate_row([d(1), d(2)], 1, buf, ?\t, ?|)
assert_equal("1|", buf)
@@fullCSVData.each do |col, str| @@fullCSVData.each do |col, str|
buf = '' buf = ''
cols = CSV.generate_row(col, col.size, buf) cols = CSV.generate_row(col, col.size, buf)
@ -770,6 +798,22 @@ public
assert_equal(str + "\r\n", tsv2csv(buf)) assert_equal(str + "\r\n", tsv2csv(buf))
end end
# row separator
@@fullCSVData.each do |col, str|
buf = ''
cols = CSV.generate_row(col, col.size, buf, ?,, ?|)
assert_equal(col.size, cols)
assert_equal(str + "|", buf)
end
# col and row separator
@@fullCSVData.each do |col, str|
buf = ''
cols = CSV.generate_row(col, col.size, buf, ?\t, ?|)
assert_equal(col.size, cols)
assert_equal(str + "|", tsv2csv(buf, ?|))
end
buf = '' buf = ''
toBe = '' toBe = ''
cols = 0 cols = 0
@ -809,6 +853,20 @@ public
end end
assert_equal(colsToBe, cols) assert_equal(colsToBe, cols)
assert_equal(toBe, buf) assert_equal(toBe, buf)
buf = ''
toBe = ''
cols = 0
colsToBe = 0
@@fullCSVData.each do |col, str|
lineBuf = ''
cols += CSV.generate_row(col, col.size, lineBuf, ?|)
buf << tsv2csv(lineBuf, ?|)
toBe << tsv2csv(lineBuf, ?|)
colsToBe += col.size
end
assert_equal(colsToBe, cols)
assert_equal(toBe, buf)
end end
def test_s_parse_line def test_s_parse_line
@ -901,6 +959,16 @@ public
assert_equal(cols, buf.size, "Reported size.") assert_equal(cols, buf.size, "Reported size.")
assert_equal(col.size, buf.size, "Size.") assert_equal(col.size, buf.size, "Size.")
assert(buf.match(col)) assert(buf.match(col))
# separator: |
buf = CSV::Row.new
cols, idx = CSV.parse_row(str + "|", 0, buf, ?,)
assert(!buf.match(col))
buf = CSV::Row.new
cols, idx = CSV.parse_row(str + "|", 0, buf, ?,, ?|)
assert_equal(cols, buf.size, "Reported size.")
assert_equal(col.size, buf.size, "Size.")
assert(buf.match(col))
end end
@@fullCSVData.each do |col, str| @@fullCSVData.each do |col, str|
@ -921,6 +989,15 @@ public
assert(buf.match(col)) assert(buf.match(col))
end end
@@fullCSVData.each do |col, str|
str = csv2tsv(str, ?|)
buf = CSV::Row.new
cols, idx = CSV.parse_row(str + "|", 0, buf, ?\t, ?|)
assert_equal(cols, buf.size, "Reported size.")
assert_equal(col.size, buf.size, "Size.")
assert(buf.match(col), str)
end
buf = CSV::Row.new buf = CSV::Row.new
cols, idx = CSV.parse_row("a,b,\"c\r\"", 0, buf) cols, idx = CSV.parse_row("a,b,\"c\r\"", 0, buf)
assert_equal(["a", "b", "c\r"], buf.to_a) assert_equal(["a", "b", "c\r"], buf.to_a)
@ -1086,6 +1163,24 @@ public
assert_equal(toBe.size, parsedCols) assert_equal(toBe.size, parsedCols)
assert_equal(toBe.size, parsed.size) assert_equal(toBe.size, parsed.size)
assert(parsed.match(toBe)) assert(parsed.match(toBe))
buf = ''
toBe = []
@@fullCSVData.each do |col, str|
buf << str << "|"
toBe.concat(col)
end
idx = 0
cols = 0
parsed = CSV::Row.new
parsedCols = 0
begin
cols, idx = CSV.parse_row(buf, idx, parsed, ?,, ?|)
parsedCols += cols
end while cols > 0
assert_equal(toBe.size, parsedCols)
assert_equal(toBe.size, parsed.size)
assert(parsed.match(toBe))
end end
def test_utf8 def test_utf8
@ -1104,6 +1199,22 @@ public
file.close file.close
end end
def test_macCR
rows = []
CSV.open(@macfile, "r", ?,, ?\r) do |row|
rows << row.to_a
end
assert_equal([["Avenches", "aus Umgebung"], ["Bad Hersfeld", "Ausgrabung"]], rows)
rows = []
file = File.open(@macfile)
CSV::Reader.parse(file.read, ?,, ?\r) do |row|
rows << row.to_a
end
assert_equal([["Avenches", "aus Umgebung"], ["Bad Hersfeld", "Ausgrabung"]], rows)
file.close
end
#### CSV unit test #### CSV unit test
@ -1518,3 +1629,13 @@ public
assert_equal(csvStrTerminated, buf) assert_equal(csvStrTerminated, buf)
end end
end end
if $0 == __FILE__
suite = Test::Unit::TestSuite.new('CSV')
ObjectSpace.each_object(Class) do |klass|
suite << klass.suite if (Test::Unit::TestCase > klass)
end
require 'test/unit/ui/console/testrunner'
Test::Unit::UI::Console::TestRunner.run(suite).passed?
end