mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/csv.rb: add extra pamameter to specify row(record) separater character.
To parse Mac's CR separated CSV, do like this.
CSV.open("mac.csv", "r", ?,,?\r) { |row| p row.to_a }
The 3rd parameter in this example ?, is for column separater and the 4th ?\r
is for row separater. Row separater is nil by default. Nil separater means
"\r\n" or "\n".
* test/csv/test_csv.rb: add tests for above feature.
* test/csv/mac.csv: added. Sample CR separated CSV file.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4553 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
bd96b4c8cc
commit
ffc136a024
4 changed files with 327 additions and 177 deletions
124
lib/csv.rb
124
lib/csv.rb
|
|
@ -189,32 +189,32 @@ public
|
|||
# writer << [nil, nil]
|
||||
# end
|
||||
#
|
||||
def CSV.open(filename, mode, col_sep = ?,, &block)
|
||||
def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block)
|
||||
if mode == 'r' or mode == 'rb'
|
||||
open_reader(filename, col_sep, &block)
|
||||
open_reader(filename, col_sep, row_sep, &block)
|
||||
elsif mode == 'w' or mode == 'wb'
|
||||
open_writer(filename, col_sep, &block)
|
||||
open_writer(filename, col_sep, row_sep, &block)
|
||||
else
|
||||
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
|
||||
end
|
||||
end
|
||||
|
||||
def CSV.parse(filename, col_sep = ?,, &block)
|
||||
open_reader(filename, col_sep, &block)
|
||||
def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block)
|
||||
open_reader(filename, col_sep, row_sep, &block)
|
||||
end
|
||||
|
||||
def CSV.generate(filename, col_sep = ?,, &block)
|
||||
open_writer(filename, col_sep, &block)
|
||||
def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block)
|
||||
open_writer(filename, col_sep, row_sep, &block)
|
||||
end
|
||||
|
||||
# Private class methods.
|
||||
class << self
|
||||
private
|
||||
def open_reader(filename, col_sep, &block)
|
||||
def open_reader(filename, col_sep, row_sep, &block)
|
||||
file = File.open(filename, 'rb')
|
||||
if block
|
||||
begin
|
||||
CSV::Reader.parse(file, col_sep) do |row|
|
||||
CSV::Reader.parse(file, col_sep, row_sep) do |row|
|
||||
yield(row)
|
||||
end
|
||||
ensure
|
||||
|
|
@ -222,17 +222,17 @@ public
|
|||
end
|
||||
nil
|
||||
else
|
||||
reader = CSV::Reader.create(file, col_sep)
|
||||
reader = CSV::Reader.create(file, col_sep, row_sep)
|
||||
reader.close_on_terminate
|
||||
reader
|
||||
end
|
||||
end
|
||||
|
||||
def open_writer(filename, col_sep, &block)
|
||||
def open_writer(filename, col_sep, row_sep, &block)
|
||||
file = File.open(filename, 'wb')
|
||||
if block
|
||||
begin
|
||||
CSV::Writer.generate(file, col_sep) do |writer|
|
||||
CSV::Writer.generate(file, col_sep, row_sep) do |writer|
|
||||
yield(writer)
|
||||
end
|
||||
ensure
|
||||
|
|
@ -240,7 +240,7 @@ public
|
|||
end
|
||||
nil
|
||||
else
|
||||
writer = CSV::Writer.create(file, col_sep)
|
||||
writer = CSV::Writer.create(file, col_sep, row_sep)
|
||||
writer.close_on_terminate
|
||||
writer
|
||||
end
|
||||
|
|
@ -275,14 +275,14 @@ public
|
|||
# DESCRIPTION
|
||||
# Create instance. To get parse result, see CSV::Reader#each.
|
||||
#
|
||||
def Reader.create(str_or_readable, col_sep = ?,)
|
||||
def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil)
|
||||
case str_or_readable
|
||||
when IO
|
||||
IOReader.new(str_or_readable, col_sep)
|
||||
IOReader.new(str_or_readable, col_sep, row_sep)
|
||||
when String
|
||||
StringReader.new(str_or_readable, col_sep)
|
||||
StringReader.new(str_or_readable, col_sep, row_sep)
|
||||
else
|
||||
IOReader.new(str_or_readable, col_sep)
|
||||
IOReader.new(str_or_readable, col_sep, row_sep)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
@ -305,8 +305,8 @@ public
|
|||
# Block value is always nil. Rows are not cached for performance
|
||||
# reason.
|
||||
#
|
||||
def Reader.parse(str_or_readable, col_sep = ?,)
|
||||
reader = create(str_or_readable, col_sep)
|
||||
def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil)
|
||||
reader = create(str_or_readable, col_sep, row_sep)
|
||||
reader.each do |row|
|
||||
yield(row)
|
||||
end
|
||||
|
|
@ -413,8 +413,9 @@ public
|
|||
# DESCRIPTION
|
||||
# Create instance. To get parse result, see CSV::Reader#each.
|
||||
#
|
||||
def initialize(string, col_sep = ?,)
|
||||
def initialize(string, col_sep = ?,, row_sep = nil)
|
||||
@col_sep = col_sep
|
||||
@row_sep = row_sep
|
||||
@dev = string
|
||||
@idx = 0
|
||||
if @dev[0, 3] == "\xef\xbb\xbf"
|
||||
|
|
@ -424,7 +425,7 @@ public
|
|||
|
||||
private
|
||||
def get_row(row)
|
||||
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep)
|
||||
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
|
||||
if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
|
||||
raise IllegalFormatError.new
|
||||
end
|
||||
|
|
@ -460,9 +461,10 @@ public
|
|||
# DESCRIPTION
|
||||
# Create instance. To get parse result, see CSV::Reader#each.
|
||||
#
|
||||
def initialize(io, col_sep = ?,)
|
||||
def initialize(io, col_sep = ?,, row_sep = nil)
|
||||
@io = io
|
||||
@col_sep = col_sep
|
||||
@row_sep = row_sep
|
||||
@dev = CSV::IOBuf.new(@io)
|
||||
@idx = 0
|
||||
if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
|
||||
|
|
@ -487,7 +489,7 @@ public
|
|||
|
||||
private
|
||||
def get_row(row)
|
||||
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep)
|
||||
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
|
||||
if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
|
||||
raise IllegalFormatError.new
|
||||
end
|
||||
|
|
@ -549,8 +551,8 @@ public
|
|||
# Create instance. To add CSV data to generate CSV string, see
|
||||
# CSV::Writer#<< or CSV::Writer#add_row.
|
||||
#
|
||||
def Writer.create(str_or_readable, col_sep = ?,)
|
||||
BasicWriter.new(str_or_readable, col_sep)
|
||||
def Writer.create(str_or_readable, col_sep = ?,, row_sep = nil)
|
||||
BasicWriter.new(str_or_readable, col_sep, row_sep)
|
||||
end
|
||||
|
||||
# SYNOPSIS
|
||||
|
|
@ -572,8 +574,8 @@ public
|
|||
# To add CSV data to generate CSV string, see CSV::Writer#<< or
|
||||
# CSV::Writer#add_row.
|
||||
#
|
||||
def Writer.generate(str_or_writable, col_sep = ?,)
|
||||
writer = Writer.create(str_or_writable, col_sep)
|
||||
def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil)
|
||||
writer = Writer.create(str_or_writable, col_sep, row_sep)
|
||||
yield(writer)
|
||||
writer.close
|
||||
nil
|
||||
|
|
@ -602,7 +604,7 @@ public
|
|||
Cell.new(item.to_s, false)
|
||||
end
|
||||
}
|
||||
CSV.generate_row(row, row.size, @dev, @col_sep)
|
||||
CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
|
||||
self
|
||||
end
|
||||
|
||||
|
|
@ -621,7 +623,7 @@ public
|
|||
# (Formar is 'c1' and latter is Null.)
|
||||
#
|
||||
def add_row(row)
|
||||
CSV.generate_row(row, row.size, @dev, @col_sep)
|
||||
CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
|
||||
self
|
||||
end
|
||||
|
||||
|
|
@ -669,8 +671,9 @@ public
|
|||
# Create instance. To add CSV data to generate CSV string, see
|
||||
# CSV::Writer#<< or CSV::Writer#add_row.
|
||||
#
|
||||
def initialize(str_or_writable, col_sep = ?,)
|
||||
def initialize(str_or_writable, col_sep = ?,, row_sep = nil)
|
||||
@col_sep = col_sep
|
||||
@row_sep = row_sep
|
||||
@dev = str_or_writable
|
||||
@close_on_terminate = false
|
||||
end
|
||||
|
|
@ -698,12 +701,14 @@ public
|
|||
end
|
||||
|
||||
# SYNOPSIS
|
||||
# cells = CSV.parse_line(src, col_sep = ?,)
|
||||
# cells = CSV.parse_line(src, col_sep = ?,, row_sep = nil)
|
||||
#
|
||||
# ARGS
|
||||
# src: a CSV String.
|
||||
# col_sep: Column separator. ?, by default. If you want to separate
|
||||
# fields with semicolon, give ?; here.
|
||||
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
||||
# want to separate records with \r, give ?\r here.
|
||||
#
|
||||
# RETURNS
|
||||
# cells: an Array of parsed cells in first line. Each cell is a String.
|
||||
|
|
@ -716,14 +721,14 @@ public
|
|||
# If you don't know whether a target string to parse is exactly 1 line or
|
||||
# not, use CSV.parse_row instead of this method.
|
||||
#
|
||||
def CSV.parse_line(src, col_sep = ?,)
|
||||
def CSV.parse_line(src, col_sep = ?,, row_sep = nil)
|
||||
idx = 0
|
||||
res_type = :DT_COLSEP
|
||||
cells = Row.new
|
||||
begin
|
||||
while (res_type.equal?(:DT_COLSEP))
|
||||
cell = Cell.new
|
||||
res_type, idx = parse_body(src, idx, cell, col_sep)
|
||||
res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
|
||||
cells.push(cell.is_null ? nil : cell.data)
|
||||
end
|
||||
rescue IllegalFormatError
|
||||
|
|
@ -734,13 +739,15 @@ public
|
|||
|
||||
|
||||
# SYNOPSIS
|
||||
# str = CSV.generate_line(cells, col_sep = ?,)
|
||||
# str = CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
|
||||
#
|
||||
# ARGS
|
||||
# cells: an Array of cell to be converted to CSV string. Each cell must
|
||||
# respond to 'to_s'.
|
||||
# col_sep: Column separator. ?, by default. If you want to separate
|
||||
# fields with semicolon, give ?; here.
|
||||
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
||||
# want to separate records with \r, give ?\r here.
|
||||
#
|
||||
# RETURNS
|
||||
# str: a String of generated CSV string.
|
||||
|
|
@ -748,7 +755,7 @@ public
|
|||
# DESCRIPTION
|
||||
# Create a line from cells. Each cell is stringified by to_s.
|
||||
#
|
||||
def CSV.generate_line(cells, col_sep = ?,)
|
||||
def CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
|
||||
if (cells.size == 0)
|
||||
return ''
|
||||
end
|
||||
|
|
@ -761,18 +768,18 @@ public
|
|||
else
|
||||
Cell.new(cells[idx].to_s, false)
|
||||
end
|
||||
generate_body(cell, result_str, col_sep)
|
||||
generate_body(cell, result_str, col_sep, row_sep)
|
||||
idx += 1
|
||||
if (idx == cells.size)
|
||||
break
|
||||
end
|
||||
generate_separator(:DT_COLSEP, result_str, col_sep)
|
||||
generate_separator(:DT_COLSEP, result_str, col_sep, row_sep)
|
||||
end
|
||||
result_str
|
||||
end
|
||||
|
||||
# SYNOPSIS
|
||||
# parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,)
|
||||
# parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
|
||||
#
|
||||
# ARGS
|
||||
# src: a CSV data to be parsed. Must respond '[](idx)'.
|
||||
|
|
@ -783,6 +790,8 @@ public
|
|||
# out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'.
|
||||
# col_sep: Column separator. ?, by default. If you want to separate
|
||||
# fields with semicolon, give ?; here.
|
||||
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
||||
# want to separate records with \r, give ?\r here.
|
||||
#
|
||||
# RETURNS
|
||||
# parsed_cells: num of parsed cells.
|
||||
|
|
@ -802,14 +811,14 @@ public
|
|||
# p parsed
|
||||
# end while parsed_cells > 0
|
||||
#
|
||||
def CSV.parse_row(src, idx, out_dev, col_sep = ?,)
|
||||
def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
|
||||
idx_backup = idx
|
||||
parsed_cells = 0
|
||||
res_type = :DT_COLSEP
|
||||
begin
|
||||
while (!res_type.equal?(:DT_ROWSEP))
|
||||
cell = Cell.new
|
||||
res_type, idx = parse_body(src, idx, cell, col_sep)
|
||||
res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
|
||||
if res_type.equal?(:DT_EOS)
|
||||
if idx == idx_backup #((parsed_cells == 0) && (cell.is_null))
|
||||
return 0, 0
|
||||
|
|
@ -826,7 +835,7 @@ public
|
|||
end
|
||||
|
||||
# SYNOPSIS
|
||||
# parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,)
|
||||
# parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
|
||||
#
|
||||
# ARGS
|
||||
# src: an Array of CSV::Cell to be converted to CSV string. Must respond to
|
||||
|
|
@ -835,6 +844,8 @@ public
|
|||
# out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
|
||||
# col_sep: Column separator. ?, by default. If you want to separate
|
||||
# fields with semicolon, give ?; here.
|
||||
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
||||
# want to separate records with \r, give ?\r here.
|
||||
#
|
||||
# RETURNS
|
||||
# parsed_cells: num of converted cells.
|
||||
|
|
@ -859,27 +870,27 @@ public
|
|||
# end
|
||||
# p buf
|
||||
#
|
||||
def CSV.generate_row(src, cells, out_dev, col_sep = ?,)
|
||||
def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
|
||||
src_size = src.size
|
||||
if (src_size == 0)
|
||||
if cells == 0
|
||||
generate_separator(:DT_ROWSEP, out_dev, col_sep)
|
||||
generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
|
||||
end
|
||||
return 0
|
||||
end
|
||||
res_type = :DT_COLSEP
|
||||
parsed_cells = 0
|
||||
generate_body(src[parsed_cells], out_dev, col_sep)
|
||||
generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
|
||||
parsed_cells += 1
|
||||
while ((parsed_cells < cells) && (parsed_cells != src_size))
|
||||
generate_separator(:DT_COLSEP, out_dev, col_sep)
|
||||
generate_body(src[parsed_cells], out_dev, col_sep)
|
||||
generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
|
||||
generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
|
||||
parsed_cells += 1
|
||||
end
|
||||
if (parsed_cells == cells)
|
||||
generate_separator(:DT_ROWSEP, out_dev, col_sep)
|
||||
generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
|
||||
else
|
||||
generate_separator(:DT_COLSEP, out_dev, col_sep)
|
||||
generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
|
||||
end
|
||||
parsed_cells
|
||||
end
|
||||
|
|
@ -891,7 +902,8 @@ private
|
|||
class << self
|
||||
private
|
||||
|
||||
def parse_body(src, idx, cell, col_sep)
|
||||
def parse_body(src, idx, cell, col_sep, row_sep)
|
||||
row_sep_end = row_sep || ?\n
|
||||
cell.is_null = false
|
||||
state = :ST_START
|
||||
quoted = false
|
||||
|
|
@ -941,7 +953,7 @@ private
|
|||
quoted = true
|
||||
state = :ST_DATA
|
||||
end
|
||||
elsif (c == ?\r)
|
||||
elsif row_sep.nil? and c == ?\r
|
||||
if cr
|
||||
raise IllegalFormatError.new
|
||||
end
|
||||
|
|
@ -951,7 +963,7 @@ private
|
|||
else
|
||||
cr = true
|
||||
end
|
||||
elsif (c == ?\n)
|
||||
elsif c == row_sep_end
|
||||
if state.equal?(:ST_DATA)
|
||||
if cr
|
||||
state = :ST_END
|
||||
|
|
@ -1004,12 +1016,14 @@ private
|
|||
return :DT_EOS, idx
|
||||
end
|
||||
|
||||
def generate_body(cells, out_dev, col_sep)
|
||||
def generate_body(cells, out_dev, col_sep, row_sep)
|
||||
row_data = cells.data.dup
|
||||
if (!cells.is_null)
|
||||
if (row_data.gsub!('"', '""') ||
|
||||
row_data.include?(col_sep) ||
|
||||
(/[\r\n]/ =~ row_data) || (cells.data.empty?))
|
||||
(row_sep && row_data.index(row_sep)) ||
|
||||
(/[\r\n]/ =~ row_data) ||
|
||||
(cells.data.empty?))
|
||||
out_dev << '"' << row_data << '"'
|
||||
else
|
||||
out_dev << row_data
|
||||
|
|
@ -1017,12 +1031,12 @@ private
|
|||
end
|
||||
end
|
||||
|
||||
def generate_separator(type, out_dev, col_sep)
|
||||
def generate_separator(type, out_dev, col_sep, row_sep)
|
||||
case type
|
||||
when :DT_COLSEP
|
||||
out_dev << col_sep.chr
|
||||
when :DT_ROWSEP
|
||||
out_dev << "\r\n"
|
||||
out_dev << (row_sep || "\r\n")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue