1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* lib/csv.rb: fixed a few bugs around multi char record/field separator.

* test/csv/test_csv.rb: added boundary test for above feature.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6377 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nahi 2004-05-20 17:24:04 +00:00
parent fc04396ea3
commit 406f506d59
4 changed files with 251 additions and 86 deletions

View file

@ -1,3 +1,9 @@
Fri May 21 02:21:11 2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>
* lib/csv.rb: fixed a few bugs around multi char record/field separator.
* test/csv/test_csv.rb: added boundary test for above feature.
Thu May 20 17:02:03 2004 Nobuyoshi Nakada <nobu@ruby-lang.org>
* lib/mkmf.rb (check_sizeof): define result size. [ruby-core:02911]

View file

@ -11,7 +11,7 @@
class CSV
class IllegalFormatError < RuntimeError; end
def CSV.open(path, mode, fs = ',', rs = nil, &block)
def CSV.open(path, mode, fs = nil, rs = nil, &block)
if mode == 'r' or mode == 'rb'
open_reader(path, mode, fs, rs, &block)
elsif mode == 'w' or mode == 'wb'
@ -51,7 +51,7 @@ class CSV
# RETURNS
# reader instance. To get parse result, see CSV::Reader#each.
#
def CSV.parse(path, fs = ',', rs = nil, &block)
def CSV.parse(path, fs = nil, rs = nil, &block)
open_reader(path, 'r', fs, rs, &block)
end
@ -80,7 +80,7 @@ class CSV
# writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
# to generate CSV string.
#
def CSV.generate(path, fs = ',', rs = nil, &block)
def CSV.generate(path, fs = nil, rs = nil, &block)
open_writer(path, 'w', fs, rs, &block)
end
@ -90,8 +90,9 @@ class CSV
#
# If you don't know whether a target string to parse is exactly 1 line or
# not, use CSV.parse_row instead of this method.
def CSV.parse_line(src, fs = ',', rs = nil)
if !fs.nil? and fs.is_a?(Fixnum)
def CSV.parse_line(src, fs = nil, rs = nil)
fs ||= ','
if fs.is_a?(Fixnum)
fs = fs.chr
end
if !rs.nil? and rs.is_a?(Fixnum)
@ -101,7 +102,7 @@ class CSV
res_type = :DT_COLSEP
row = []
begin
while (res_type.equal?(:DT_COLSEP))
while res_type == :DT_COLSEP
res_type, idx, cell = parse_body(src, idx, fs, rs)
row << cell
end
@ -112,11 +113,12 @@ class CSV
end
# Create a line from cells. each cell is stringified by to_s.
def CSV.generate_line(row, fs = ',', rs = nil)
if (row.size == 0)
def CSV.generate_line(row, fs = nil, rs = nil)
if row.size == 0
return ''
end
if !fs.nil? and fs.is_a?(Fixnum)
fs ||= ','
if fs.is_a?(Fixnum)
fs = fs.chr
end
if !rs.nil? and rs.is_a?(Fixnum)
@ -165,8 +167,9 @@ class CSV
# parsed_cells: num of parsed cells.
# idx: index of next parsing location of 'src'.
#
def CSV.parse_row(src, idx, out_dev, fs = ',', rs = nil)
if !fs.nil? and fs.is_a?(Fixnum)
def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil)
fs ||= ','
if fs.is_a?(Fixnum)
fs = fs.chr
end
if !rs.nil? and rs.is_a?(Fixnum)
@ -176,9 +179,9 @@ class CSV
parsed_cells = 0
res_type = :DT_COLSEP
begin
while (!res_type.equal?(:DT_ROWSEP))
while res_type != :DT_ROWSEP
res_type, idx, cell = parse_body(src, idx, fs, rs)
if res_type.equal?(:DT_EOS)
if res_type == :DT_EOS
if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
return 0, 0
end
@ -225,8 +228,9 @@ class CSV
# RETURNS
# parsed_cells: num of converted cells.
#
def CSV.generate_row(src, cells, out_dev, fs = ',', rs = nil)
if !fs.nil? and fs.is_a?(Fixnum)
def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil)
fs ||= ','
if fs.is_a?(Fixnum)
fs = fs.chr
end
if !rs.nil? and rs.is_a?(Fixnum)
@ -299,30 +303,46 @@ class CSV
def parse_body(src, idx, fs, rs)
fs_str = fs
fs_size = fs_str.size
fs_idx = 0
rs_str = rs || "\n"
rs_size = rs_str.size
rs_idx = 0
fs_idx = rs_idx = 0
cell = ''
state = :ST_START
quoted = false
cr = false
quoted = cr = false
c = nil
last_idx = idx
while (c = src[idx])
while c = src[idx]
unless quoted
fschar = (c == fs_str[fs_idx])
rschar = (c == rs_str[rs_idx])
# simple 1 char backtrack
if !fschar and c == fs_str[0]
fs_idx = 0
fschar = true
if state == :ST_START
state = :ST_DATA
elsif state == :ST_QUOTE
raise IllegalFormatError
end
end
if !rschar and c == rs_str[0]
rs_idx = 0
rschar = true
if state == :ST_START
state = :ST_DATA
elsif state == :ST_QUOTE
raise IllegalFormatError
end
end
end
if c == ?"
cell << src[last_idx, (idx - last_idx)]
last_idx = idx
fs_idx = rs_idx = 0
if cr
raise IllegalFormatError
end
if fs_idx != 0
fs_idx = 0
end
if rs_idx != 0
rs_idx = 0
end
if state.equal?(:ST_DATA)
cell << src[last_idx, (idx - last_idx)]
last_idx = idx
if state == :ST_DATA
if quoted
last_idx += 1
quoted = false
@ -330,7 +350,7 @@ class CSV
else
raise IllegalFormatError
end
elsif state.equal?(:ST_QUOTE)
elsif state == :ST_QUOTE
cell << c.chr
last_idx += 1
quoted = true
@ -340,62 +360,48 @@ class CSV
last_idx += 1
state = :ST_DATA
end
elsif c == fs_str[fs_idx]
fs_idx += 1
cell << src[last_idx, (idx - last_idx)]
last_idx = idx
if rs_idx != 0
rs_idx = 0
elsif fschar or rschar
if fschar
fs_idx += 1
end
if rschar
rs_idx += 1
end
sep = nil
if fs_idx == fs_size
fs_idx = 0
if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx
state = :ST_DATA
end
cell << src[last_idx, (idx - last_idx - (fs_size - 1))]
last_idx = idx
fs_idx = rs_idx = 0
if cr
raise IllegalFormatError
end
if state.equal?(:ST_DATA)
if rs_idx != 0
cell << rs_str[0, rs_idx]
rs_idx = 0
end
if quoted
true # ToDo: delete; dummy line for coverage
else
return :DT_COLSEP, idx + 1, cell;
end
elsif state.equal?(:ST_QUOTE)
if rs_idx != 0
raise IllegalFormatError
end
return :DT_COLSEP, idx + 1, cell;
else # :ST_START
return :DT_COLSEP, idx + 1, nil
sep = :DT_COLSEP
elsif rs_idx == rs_size
if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx
state = :ST_DATA
end
if !(rs.nil? and cr)
cell << src[last_idx, (idx - last_idx - (rs_size - 1))]
last_idx = idx
end
fs_idx = rs_idx = 0
sep = :DT_ROWSEP
end
elsif c == rs_str[rs_idx]
rs_idx += 1
unless (rs.nil? and cr)
cell << src[last_idx, (idx - last_idx)]
last_idx = idx
end
if fs_idx != 0
fs_idx = 0
end
if rs_idx == rs_size
rs_idx = 0
if state.equal?(:ST_DATA)
if quoted
true # ToDo: delete; dummy line for coverage
else
return :DT_ROWSEP, idx + 1, cell
end
elsif state.equal?(:ST_QUOTE)
return :DT_ROWSEP, idx + 1, cell
if sep
if state == :ST_DATA
return sep, idx + 1, cell;
elsif state == :ST_QUOTE
return sep, idx + 1, cell;
else # :ST_START
return :DT_ROWSEP, idx + 1, nil
return sep, idx + 1, nil
end
end
elsif rs.nil? and c == ?\r
# special \r treatment for backward compatibility
fs_idx = rs_idx = 0
if cr
raise IllegalFormatError
end
@ -407,13 +413,8 @@ class CSV
cr = true
end
else
if fs_idx != 0
fs_idx = 0
end
if rs_idx != 0
rs_idx = 0
end
if state.equal?(:ST_DATA) or state.equal?(:ST_START)
fs_idx = rs_idx = 0
if state == :ST_DATA or state == :ST_START
if cr
raise IllegalFormatError
end
@ -424,8 +425,12 @@ class CSV
end
idx += 1
end
if state.equal?(:ST_START)
return :DT_EOS, idx, nil
if state == :ST_START
if fs_idx > 0 or rs_idx > 0
state = :ST_DATA
else
return :DT_EOS, idx, nil
end
elsif quoted
raise IllegalFormatError
elsif cr
@ -440,6 +445,7 @@ class CSV
if cell.nil?
# empty
else
cell = cell.to_s
row_data = cell.dup
if (row_data.gsub!('"', '""') or
row_data.index(fs) or

View file

@ -639,6 +639,12 @@ public
buf = CSV.generate_line(col, ?\t)
assert_equal(str + "\n", tsv2csv(buf))
end
str = CSV.generate_line(['a', 'b'], nil, ?|)
assert_equal('a,b', str)
str = CSV.generate_line(['a', 'b'], nil, "a")
assert_equal('"a",b', str)
end
def test_s_generate_row
@ -818,6 +824,15 @@ public
assert_equal(col, row)
end
row = CSV.parse_line("a,b,c", nil, nil)
assert_equal(['a', 'b', 'c'], row)
row = CSV.parse_line("a,b,c", nil, ?b)
assert_equal(['a', nil], row)
row = CSV.parse_line("a,b,c", nil, "c")
assert_equal(['a', 'b', nil], row)
# Illegal format.
buf = []
row = CSV.parse_line("a,b,\"c\"\ra")
@ -923,6 +938,18 @@ public
assert_equal(col, buf, str)
end
buf = []
CSV.parse_row("a,b,c", 0, buf, nil, nil)
assert_equal(['a', 'b', 'c'], buf)
buf = []
CSV.parse_row("a,b,c", 0, buf, nil, ?b)
assert_equal(['a', nil], buf)
buf = []
CSV.parse_row("a,b,c", 0, buf, nil, "c")
assert_equal(['a', 'b', nil], buf)
buf = Array.new
cols, idx = CSV.parse_row("a,b,\"c\r\"", 0, buf)
assert_equal(["a", "b", "c\r"], buf.to_a)
@ -1577,4 +1604,130 @@ public
end
assert_equal(csvStrTerminated, buf)
end
def test_writer_fs_rs_generate
buf = ''
CSV::Writer.generate(buf, ",,") do |writer|
writer << []
end
assert_equal("\n", buf)
buf = ''
CSV::Writer.generate(buf, ",,") do |writer|
writer << [] << []
end
assert_equal("\n\n", buf)
buf = ''
CSV::Writer.generate(buf, ",,") do |writer|
writer << [1]
end
assert_equal("1\n", buf)
buf = ''
CSV::Writer.generate(buf, ",,") do |writer|
writer << [1, 2, 3]
writer << [4, ",,", 5]
end
assert_equal("1,,2,,3\n4,,\",,\",,5\n", buf)
buf = ''
CSV::Writer.generate(buf, ",,:", ",,;") do |writer|
writer << [nil, nil, nil]
writer << [nil, ",,", nil]
end
assert_equal(",,:,,:,,;,,:,,,,:,,;", buf)
buf = ''
CSV::Writer.generate(buf, "---") do |writer|
writer << [1, 2, 3]
writer << [4, "---\"---", 5]
end
assert_equal("1---2---3\n4---\"---\"\"---\"---5\n", buf)
buf = ''
CSV::Writer.generate(buf, nil) do |writer|
writer << [1, 2, 3]
writer << [4, ",\",", 5]
end
assert_equal("1,2,3\n4,\",\"\",\",5\n", buf)
end
def test_writer_fs_rs_parse
reader = CSV::Reader.create('a||b--c||d', '||', '--')
assert_equal(['a', 'b'], reader.shift)
assert_equal(['c', 'd'], reader.shift)
reader = CSV::Reader.create("a@|b@-c@|d", "@|", "@-")
assert_equal(['a', 'b'], reader.shift)
assert_equal(['c', 'd'], reader.shift)
reader = CSV::Reader.create("ababfsababrs", "abfs", "abrs")
assert_equal(['ab', 'ab'], reader.shift)
reader = CSV::Reader.create('"ab"abfsababrs', "abfs", "abrs")
assert_equal(['ab', 'ab'], reader.shift)
reader = CSV::Reader.create('"ab"aabfsababrs', "abfs", "abrs")
assert_raises(CSV::IllegalFormatError) do
reader.shift
end
# fs match while matching rs progress
reader = CSV::Reader.create("ab,ababrs", nil, "abrs")
assert_equal(['ab', 'ab'], reader.shift)
reader = CSV::Reader.create(',ababrs', nil, "abrs")
assert_equal([nil, 'ab'], reader.shift)
reader = CSV::Reader.create('"",ababrs', nil, "abrs")
assert_equal(['', 'ab'], reader.shift)
reader = CSV::Reader.create('ab,"ab"abrs', nil, "abrs")
assert_equal(['ab', 'ab'], reader.shift)
reader = CSV::Reader.create('ab,"ab"aabrs', nil, "abrs")
assert_raises(CSV::IllegalFormatError) do
reader.shift
end
# rs match while matching fs progress
reader = CSV::Reader.create("ab|abc", 'ab-', "ab|")
assert_equal([nil], reader.shift)
assert_equal(['abc'], reader.shift)
# EOF while fs/rs matching
reader = CSV::Reader.create("ab", 'ab-', "xyz")
assert_equal(['ab'], reader.shift)
reader = CSV::Reader.create("ab", 'xyz', "ab|")
assert_equal(['ab'], reader.shift)
reader = CSV::Reader.create("ab", 'ab-', "ab|")
assert_equal(['ab'], reader.shift)
reader = CSV::Reader.create(",,:,,:,,;,,:,,,,:,,;", ",,:", ",,;")
assert_equal([nil, nil, nil], reader.shift)
assert_equal([nil, ",,", nil], reader.shift)
end
def test_foreach
File.open(@outfile, "w") do |f|
f << "1,2,3\n4,5,6"
end
row = []
CSV.foreach(@outfile) { |line|
row << line
}
assert_equal([['1', '2', '3'], ['4', '5', '6']], row)
File.open(@outfile, "w") do |f|
f << "1,2,3\r4,5,6"
end
row = []
CSV.foreach(@outfile, "\r") { |line|
row << line
}
assert_equal([['1', '2', '3'], ['4', '5', '6']], row)
end
end

View file

@ -1,11 +1,11 @@
#define RUBY_VERSION "1.9.0"
#define RUBY_RELEASE_DATE "2004-05-20"
#define RUBY_RELEASE_DATE "2004-05-21"
#define RUBY_VERSION_CODE 190
#define RUBY_RELEASE_CODE 20040520
#define RUBY_RELEASE_CODE 20040521
#define RUBY_VERSION_MAJOR 1
#define RUBY_VERSION_MINOR 9
#define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2004
#define RUBY_RELEASE_MONTH 5
#define RUBY_RELEASE_DAY 20
#define RUBY_RELEASE_DAY 21