From 406f506d59510743469db2421ecdf6460aace6af Mon Sep 17 00:00:00 2001 From: nahi Date: Thu, 20 May 2004 17:24:04 +0000 Subject: [PATCH] * lib/csv.rb: fixed a few bugs around multi char record/field separator. * test/csv/test_csv.rb: added boundary test for above feature. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6377 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 6 ++ lib/csv.rb | 172 ++++++++++++++++++++++--------------------- test/csv/test_csv.rb | 153 ++++++++++++++++++++++++++++++++++++++ version.h | 6 +- 4 files changed, 251 insertions(+), 86 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8d1a2446b0..8546735817 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Fri May 21 02:21:11 2004 NAKAMURA, Hiroshi + + * lib/csv.rb: fixed a few bugs around multi char record/field separator. + + * test/csv/test_csv.rb: added boundary test for above feature. + Thu May 20 17:02:03 2004 Nobuyoshi Nakada * lib/mkmf.rb (check_sizeof): define result size. [ruby-core:02911] diff --git a/lib/csv.rb b/lib/csv.rb index 351976fd00..26fc6435eb 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -11,7 +11,7 @@ class CSV class IllegalFormatError < RuntimeError; end - def CSV.open(path, mode, fs = ',', rs = nil, &block) + def CSV.open(path, mode, fs = nil, rs = nil, &block) if mode == 'r' or mode == 'rb' open_reader(path, mode, fs, rs, &block) elsif mode == 'w' or mode == 'wb' @@ -51,7 +51,7 @@ class CSV # RETURNS # reader instance. To get parse result, see CSV::Reader#each. # - def CSV.parse(path, fs = ',', rs = nil, &block) + def CSV.parse(path, fs = nil, rs = nil, &block) open_reader(path, 'r', fs, rs, &block) end @@ -80,7 +80,7 @@ class CSV # writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how # to generate CSV string. # - def CSV.generate(path, fs = ',', rs = nil, &block) + def CSV.generate(path, fs = nil, rs = nil, &block) open_writer(path, 'w', fs, rs, &block) end @@ -90,8 +90,9 @@ class CSV # # If you don't know whether a target string to parse is exactly 1 line or # not, use CSV.parse_row instead of this method. - def CSV.parse_line(src, fs = ',', rs = nil) - if !fs.nil? and fs.is_a?(Fixnum) + def CSV.parse_line(src, fs = nil, rs = nil) + fs ||= ',' + if fs.is_a?(Fixnum) fs = fs.chr end if !rs.nil? and rs.is_a?(Fixnum) @@ -101,7 +102,7 @@ class CSV res_type = :DT_COLSEP row = [] begin - while (res_type.equal?(:DT_COLSEP)) + while res_type == :DT_COLSEP res_type, idx, cell = parse_body(src, idx, fs, rs) row << cell end @@ -112,11 +113,12 @@ class CSV end # Create a line from cells. each cell is stringified by to_s. - def CSV.generate_line(row, fs = ',', rs = nil) - if (row.size == 0) + def CSV.generate_line(row, fs = nil, rs = nil) + if row.size == 0 return '' end - if !fs.nil? and fs.is_a?(Fixnum) + fs ||= ',' + if fs.is_a?(Fixnum) fs = fs.chr end if !rs.nil? and rs.is_a?(Fixnum) @@ -165,8 +167,9 @@ class CSV # parsed_cells: num of parsed cells. # idx: index of next parsing location of 'src'. # - def CSV.parse_row(src, idx, out_dev, fs = ',', rs = nil) - if !fs.nil? and fs.is_a?(Fixnum) + def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil) + fs ||= ',' + if fs.is_a?(Fixnum) fs = fs.chr end if !rs.nil? and rs.is_a?(Fixnum) @@ -176,9 +179,9 @@ class CSV parsed_cells = 0 res_type = :DT_COLSEP begin - while (!res_type.equal?(:DT_ROWSEP)) + while res_type != :DT_ROWSEP res_type, idx, cell = parse_body(src, idx, fs, rs) - if res_type.equal?(:DT_EOS) + if res_type == :DT_EOS if idx == idx_backup #((parsed_cells == 0) and cell.nil?) return 0, 0 end @@ -225,8 +228,9 @@ class CSV # RETURNS # parsed_cells: num of converted cells. # - def CSV.generate_row(src, cells, out_dev, fs = ',', rs = nil) - if !fs.nil? and fs.is_a?(Fixnum) + def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil) + fs ||= ',' + if fs.is_a?(Fixnum) fs = fs.chr end if !rs.nil? and rs.is_a?(Fixnum) @@ -299,30 +303,46 @@ class CSV def parse_body(src, idx, fs, rs) fs_str = fs fs_size = fs_str.size - fs_idx = 0 rs_str = rs || "\n" rs_size = rs_str.size - rs_idx = 0 + fs_idx = rs_idx = 0 cell = '' state = :ST_START - quoted = false - cr = false + quoted = cr = false c = nil last_idx = idx - while (c = src[idx]) + while c = src[idx] + unless quoted + fschar = (c == fs_str[fs_idx]) + rschar = (c == rs_str[rs_idx]) + # simple 1 char backtrack + if !fschar and c == fs_str[0] + fs_idx = 0 + fschar = true + if state == :ST_START + state = :ST_DATA + elsif state == :ST_QUOTE + raise IllegalFormatError + end + end + if !rschar and c == rs_str[0] + rs_idx = 0 + rschar = true + if state == :ST_START + state = :ST_DATA + elsif state == :ST_QUOTE + raise IllegalFormatError + end + end + end if c == ?" - cell << src[last_idx, (idx - last_idx)] - last_idx = idx + fs_idx = rs_idx = 0 if cr raise IllegalFormatError end - if fs_idx != 0 - fs_idx = 0 - end - if rs_idx != 0 - rs_idx = 0 - end - if state.equal?(:ST_DATA) + cell << src[last_idx, (idx - last_idx)] + last_idx = idx + if state == :ST_DATA if quoted last_idx += 1 quoted = false @@ -330,7 +350,7 @@ class CSV else raise IllegalFormatError end - elsif state.equal?(:ST_QUOTE) + elsif state == :ST_QUOTE cell << c.chr last_idx += 1 quoted = true @@ -340,62 +360,48 @@ class CSV last_idx += 1 state = :ST_DATA end - elsif c == fs_str[fs_idx] - fs_idx += 1 - cell << src[last_idx, (idx - last_idx)] - last_idx = idx - if rs_idx != 0 - rs_idx = 0 + elsif fschar or rschar + if fschar + fs_idx += 1 end + if rschar + rs_idx += 1 + end + sep = nil if fs_idx == fs_size - fs_idx = 0 + if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx + state = :ST_DATA + end + cell << src[last_idx, (idx - last_idx - (fs_size - 1))] + last_idx = idx + fs_idx = rs_idx = 0 if cr raise IllegalFormatError end - if state.equal?(:ST_DATA) - if rs_idx != 0 - cell << rs_str[0, rs_idx] - rs_idx = 0 - end - if quoted - true # ToDo: delete; dummy line for coverage - else - return :DT_COLSEP, idx + 1, cell; - end - elsif state.equal?(:ST_QUOTE) - if rs_idx != 0 - raise IllegalFormatError - end - return :DT_COLSEP, idx + 1, cell; - else # :ST_START - return :DT_COLSEP, idx + 1, nil + sep = :DT_COLSEP + elsif rs_idx == rs_size + if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx + state = :ST_DATA end + if !(rs.nil? and cr) + cell << src[last_idx, (idx - last_idx - (rs_size - 1))] + last_idx = idx + end + fs_idx = rs_idx = 0 + sep = :DT_ROWSEP end - elsif c == rs_str[rs_idx] - rs_idx += 1 - unless (rs.nil? and cr) - cell << src[last_idx, (idx - last_idx)] - last_idx = idx - end - if fs_idx != 0 - fs_idx = 0 - end - if rs_idx == rs_size - rs_idx = 0 - if state.equal?(:ST_DATA) - if quoted - true # ToDo: delete; dummy line for coverage - else - return :DT_ROWSEP, idx + 1, cell - end - elsif state.equal?(:ST_QUOTE) - return :DT_ROWSEP, idx + 1, cell + if sep + if state == :ST_DATA + return sep, idx + 1, cell; + elsif state == :ST_QUOTE + return sep, idx + 1, cell; else # :ST_START - return :DT_ROWSEP, idx + 1, nil + return sep, idx + 1, nil end end elsif rs.nil? and c == ?\r # special \r treatment for backward compatibility + fs_idx = rs_idx = 0 if cr raise IllegalFormatError end @@ -407,13 +413,8 @@ class CSV cr = true end else - if fs_idx != 0 - fs_idx = 0 - end - if rs_idx != 0 - rs_idx = 0 - end - if state.equal?(:ST_DATA) or state.equal?(:ST_START) + fs_idx = rs_idx = 0 + if state == :ST_DATA or state == :ST_START if cr raise IllegalFormatError end @@ -424,8 +425,12 @@ class CSV end idx += 1 end - if state.equal?(:ST_START) - return :DT_EOS, idx, nil + if state == :ST_START + if fs_idx > 0 or rs_idx > 0 + state = :ST_DATA + else + return :DT_EOS, idx, nil + end elsif quoted raise IllegalFormatError elsif cr @@ -440,6 +445,7 @@ class CSV if cell.nil? # empty else + cell = cell.to_s row_data = cell.dup if (row_data.gsub!('"', '""') or row_data.index(fs) or diff --git a/test/csv/test_csv.rb b/test/csv/test_csv.rb index 6dc101edf1..eca88321f0 100644 --- a/test/csv/test_csv.rb +++ b/test/csv/test_csv.rb @@ -639,6 +639,12 @@ public buf = CSV.generate_line(col, ?\t) assert_equal(str + "\n", tsv2csv(buf)) end + + str = CSV.generate_line(['a', 'b'], nil, ?|) + assert_equal('a,b', str) + + str = CSV.generate_line(['a', 'b'], nil, "a") + assert_equal('"a",b', str) end def test_s_generate_row @@ -818,6 +824,15 @@ public assert_equal(col, row) end + row = CSV.parse_line("a,b,c", nil, nil) + assert_equal(['a', 'b', 'c'], row) + + row = CSV.parse_line("a,b,c", nil, ?b) + assert_equal(['a', nil], row) + + row = CSV.parse_line("a,b,c", nil, "c") + assert_equal(['a', 'b', nil], row) + # Illegal format. buf = [] row = CSV.parse_line("a,b,\"c\"\ra") @@ -923,6 +938,18 @@ public assert_equal(col, buf, str) end + buf = [] + CSV.parse_row("a,b,c", 0, buf, nil, nil) + assert_equal(['a', 'b', 'c'], buf) + + buf = [] + CSV.parse_row("a,b,c", 0, buf, nil, ?b) + assert_equal(['a', nil], buf) + + buf = [] + CSV.parse_row("a,b,c", 0, buf, nil, "c") + assert_equal(['a', 'b', nil], buf) + buf = Array.new cols, idx = CSV.parse_row("a,b,\"c\r\"", 0, buf) assert_equal(["a", "b", "c\r"], buf.to_a) @@ -1577,4 +1604,130 @@ public end assert_equal(csvStrTerminated, buf) end + + def test_writer_fs_rs_generate + buf = '' + CSV::Writer.generate(buf, ",,") do |writer| + writer << [] + end + assert_equal("\n", buf) + + buf = '' + CSV::Writer.generate(buf, ",,") do |writer| + writer << [] << [] + end + assert_equal("\n\n", buf) + + buf = '' + CSV::Writer.generate(buf, ",,") do |writer| + writer << [1] + end + assert_equal("1\n", buf) + + buf = '' + CSV::Writer.generate(buf, ",,") do |writer| + writer << [1, 2, 3] + writer << [4, ",,", 5] + end + assert_equal("1,,2,,3\n4,,\",,\",,5\n", buf) + + buf = '' + CSV::Writer.generate(buf, ",,:", ",,;") do |writer| + writer << [nil, nil, nil] + writer << [nil, ",,", nil] + end + assert_equal(",,:,,:,,;,,:,,,,:,,;", buf) + + buf = '' + CSV::Writer.generate(buf, "---") do |writer| + writer << [1, 2, 3] + writer << [4, "---\"---", 5] + end + assert_equal("1---2---3\n4---\"---\"\"---\"---5\n", buf) + + buf = '' + CSV::Writer.generate(buf, nil) do |writer| + writer << [1, 2, 3] + writer << [4, ",\",", 5] + end + assert_equal("1,2,3\n4,\",\"\",\",5\n", buf) + end + + def test_writer_fs_rs_parse + reader = CSV::Reader.create('a||b--c||d', '||', '--') + assert_equal(['a', 'b'], reader.shift) + assert_equal(['c', 'd'], reader.shift) + + reader = CSV::Reader.create("a@|b@-c@|d", "@|", "@-") + assert_equal(['a', 'b'], reader.shift) + assert_equal(['c', 'd'], reader.shift) + + reader = CSV::Reader.create("ababfsababrs", "abfs", "abrs") + assert_equal(['ab', 'ab'], reader.shift) + + reader = CSV::Reader.create('"ab"abfsababrs', "abfs", "abrs") + assert_equal(['ab', 'ab'], reader.shift) + + reader = CSV::Reader.create('"ab"aabfsababrs', "abfs", "abrs") + assert_raises(CSV::IllegalFormatError) do + reader.shift + end + + # fs match while matching rs progress + reader = CSV::Reader.create("ab,ababrs", nil, "abrs") + assert_equal(['ab', 'ab'], reader.shift) + + reader = CSV::Reader.create(',ababrs', nil, "abrs") + assert_equal([nil, 'ab'], reader.shift) + + reader = CSV::Reader.create('"",ababrs', nil, "abrs") + assert_equal(['', 'ab'], reader.shift) + + reader = CSV::Reader.create('ab,"ab"abrs', nil, "abrs") + assert_equal(['ab', 'ab'], reader.shift) + + reader = CSV::Reader.create('ab,"ab"aabrs', nil, "abrs") + assert_raises(CSV::IllegalFormatError) do + reader.shift + end + + # rs match while matching fs progress + reader = CSV::Reader.create("ab|abc", 'ab-', "ab|") + assert_equal([nil], reader.shift) + assert_equal(['abc'], reader.shift) + + # EOF while fs/rs matching + reader = CSV::Reader.create("ab", 'ab-', "xyz") + assert_equal(['ab'], reader.shift) + + reader = CSV::Reader.create("ab", 'xyz', "ab|") + assert_equal(['ab'], reader.shift) + + reader = CSV::Reader.create("ab", 'ab-', "ab|") + assert_equal(['ab'], reader.shift) + + reader = CSV::Reader.create(",,:,,:,,;,,:,,,,:,,;", ",,:", ",,;") + assert_equal([nil, nil, nil], reader.shift) + assert_equal([nil, ",,", nil], reader.shift) + end + + def test_foreach + File.open(@outfile, "w") do |f| + f << "1,2,3\n4,5,6" + end + row = [] + CSV.foreach(@outfile) { |line| + row << line + } + assert_equal([['1', '2', '3'], ['4', '5', '6']], row) + + File.open(@outfile, "w") do |f| + f << "1,2,3\r4,5,6" + end + row = [] + CSV.foreach(@outfile, "\r") { |line| + row << line + } + assert_equal([['1', '2', '3'], ['4', '5', '6']], row) + end end diff --git a/version.h b/version.h index 8c0ef35bde..ce02c28dd9 100644 --- a/version.h +++ b/version.h @@ -1,11 +1,11 @@ #define RUBY_VERSION "1.9.0" -#define RUBY_RELEASE_DATE "2004-05-20" +#define RUBY_RELEASE_DATE "2004-05-21" #define RUBY_VERSION_CODE 190 -#define RUBY_RELEASE_CODE 20040520 +#define RUBY_RELEASE_CODE 20040521 #define RUBY_VERSION_MAJOR 1 #define RUBY_VERSION_MINOR 9 #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_YEAR 2004 #define RUBY_RELEASE_MONTH 5 -#define RUBY_RELEASE_DAY 20 +#define RUBY_RELEASE_DAY 21