mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/logger.rb: leading 0 padding of timestamp usec part.
* lib/csv.rb (CSV.parse): [CAUTION] behavior changed. in the past, CSV.parse accepts a filename to be read-opened (it was just a shortcut of CSV.open(filename, 'r')). now CSV.parse accepts a string or a stream to be parsed e.g. CSV.parse("1,2\n3,r") #=> [['1', '2'], ['3', '4']] * lib/csv.rb: CSV::Row and CSV::Cell are deprecated. these classes are removed in the future. in the new csv.rb, row is represented as just an Array. since CSV::Row was a subclass of Array, it won't hurt almost all programs except one which depended CSV::Row#match. and a cell is represented as just a String or nil(NULL). this change will cause widespread destruction. CSV.open("foo.csv", "r") do |row| row.each do |cell| if cell.is_null # using Cell#is_null p "(NULL)" else p cell.data # using Cell#data end end end must be just; CSV.open("foo.csv", "r") do |row| row.each do |cell| if cell.nil? p "(NULL)" else p cell end end end * lib/csv.rb: [CAUTION] record separator(CR, LF, CR+LF) behavior change. CSV.open, CSV.parse, and CSV,generate now do not force opened file binmode. formerly it set binmode explicitly. with CSV.open, binmode of opened file depends the given mode parameter "r", "w", "rb", and "wb". CSV.parse and CSV.generate open file with "r" and "w". setting mode properly is user's responsibility now. * lib/csv.rb: accepts String as a fs (field separator/column separator) and rs (record separator/row separator) * lib/csv.rb (CSV.read, CSV.readlines): added. works as IO.read and IO.readlines in CSV format. * lib/csv.rb: added CSV.foreach(path, rs = nil, &block). CSV.foreach now does not handle "| cmd" as a path different from IO.foreach. needed? * test/csv/test_csv.rb: updated. * test/ruby/test_float.rb: added test_strtod to test Float("0"). git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@6424 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
2546a366ed
commit
1c9d6dd646
5 changed files with 792 additions and 696 deletions
63
ChangeLog
63
ChangeLog
|
@ -1,3 +1,66 @@
|
|||
Thu May 27 23:15:18 2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>
|
||||
|
||||
* lib/logger.rb: leading 0 padding of timestamp usec part.
|
||||
|
||||
* lib/csv.rb (CSV.parse): [CAUTION] behavior changed. in the past,
|
||||
CSV.parse accepts a filename to be read-opened (it was just a
|
||||
shortcut of CSV.open(filename, 'r')). now CSV.parse accepts a
|
||||
string or a stream to be parsed e.g.
|
||||
CSV.parse("1,2\n3,r") #=> [['1', '2'], ['3', '4']]
|
||||
|
||||
* lib/csv.rb: CSV::Row and CSV::Cell are deprecated. these classes
|
||||
are removed in the future. in the new csv.rb, row is represented
|
||||
as just an Array. since CSV::Row was a subclass of Array, it won't
|
||||
hurt almost all programs except one which depended CSV::Row#match.
|
||||
and a cell is represented as just a String or nil(NULL). this
|
||||
change will cause widespread destruction.
|
||||
|
||||
CSV.open("foo.csv", "r") do |row|
|
||||
row.each do |cell|
|
||||
if cell.is_null # using Cell#is_null
|
||||
p "(NULL)"
|
||||
else
|
||||
p cell.data # using Cell#data
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
must be just;
|
||||
|
||||
CSV.open("foo.csv", "r") do |row|
|
||||
row.each do |cell|
|
||||
if cell.nil?
|
||||
p "(NULL)"
|
||||
else
|
||||
p cell
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
* lib/csv.rb: [CAUTION] record separator(CR, LF, CR+LF) behavior
|
||||
change. CSV.open, CSV.parse, and CSV,generate now do not force
|
||||
opened file binmode. formerly it set binmode explicitly.
|
||||
|
||||
with CSV.open, binmode of opened file depends the given mode
|
||||
parameter "r", "w", "rb", and "wb". CSV.parse and CSV.generate open
|
||||
file with "r" and "w".
|
||||
|
||||
setting mode properly is user's responsibility now.
|
||||
|
||||
* lib/csv.rb: accepts String as a fs (field separator/column separator)
|
||||
and rs (record separator/row separator)
|
||||
|
||||
* lib/csv.rb (CSV.read, CSV.readlines): added. works as IO.read and
|
||||
IO.readlines in CSV format.
|
||||
|
||||
* lib/csv.rb: added CSV.foreach(path, rs = nil, &block). CSV.foreach
|
||||
now does not handle "| cmd" as a path different from IO.foreach.
|
||||
needed?
|
||||
|
||||
* test/csv/test_csv.rb: updated.
|
||||
|
||||
* test/ruby/test_float.rb: added test_strtod to test Float("0").
|
||||
|
||||
Thu May 27 21:37:50 2004 Tanaka Akira <akr@m17n.org>
|
||||
|
||||
* lib/pathname.rb (Pathname#initialize): refine pathname initialization
|
||||
|
|
636
lib/csv.rb
636
lib/csv.rb
|
@ -1,4 +1,5 @@
|
|||
# CSV -- module for generating/parsing CSV data.
|
||||
# Copyright (C) 2000-2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>.
|
||||
|
||||
# $Id$
|
||||
|
||||
|
@ -8,103 +9,34 @@
|
|||
|
||||
|
||||
class CSV
|
||||
|
||||
# Describes a cell of CSV.
|
||||
class Cell
|
||||
# Datum as string.
|
||||
attr_accessor :data
|
||||
|
||||
# Is this datum NULL?
|
||||
attr_accessor :is_null
|
||||
|
||||
# If is_null is true, datum is stored in the instance created but it
|
||||
# should be treated as 'NULL'.
|
||||
def initialize(data = '', is_null = true)
|
||||
@data = data
|
||||
@is_null = is_null
|
||||
end
|
||||
|
||||
# Compares another cell with self. Bear in mind NULL matches with NULL.
|
||||
# Use CSV::Cell#== if you don't want NULL matches with NULL.
|
||||
# rhs: an instance of CSV::Cell to be compared.
|
||||
def match(rhs)
|
||||
if @is_null and rhs.is_null
|
||||
true
|
||||
elsif @is_null or rhs.is_null
|
||||
false
|
||||
else
|
||||
@data == rhs.data
|
||||
end
|
||||
end
|
||||
|
||||
# Compares another cell with self. Bear in mind NULL does not match with
|
||||
# NULL. Use CSV::Cell#match if you want NULL matches with NULL.
|
||||
# rhs: an instance of CSV::Cell to be compared.
|
||||
def ==(rhs)
|
||||
if @is_null or rhs.is_null
|
||||
false
|
||||
else
|
||||
@data == rhs.data
|
||||
end
|
||||
end
|
||||
|
||||
def to_str
|
||||
content.to_str
|
||||
end
|
||||
|
||||
def to_s
|
||||
content.to_s
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def content
|
||||
@is_null ? nil : data
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Describes a row of CSV. Each element must be a CSV::Cell.
|
||||
class Row < Array
|
||||
|
||||
# Returns the strings contained in the row's cells.
|
||||
def to_a
|
||||
self.collect { |cell| cell.is_null ? nil : cell.data }
|
||||
end
|
||||
|
||||
# Compares another row with self.
|
||||
# rhs: an Array of cells. Each cell should be a CSV::Cell.
|
||||
def match(rhs)
|
||||
if self.size != rhs.size
|
||||
return false
|
||||
end
|
||||
for idx in 0...(self.size)
|
||||
unless self[idx].match(rhs[idx])
|
||||
return false
|
||||
end
|
||||
end
|
||||
true
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
class IllegalFormatError < RuntimeError; end
|
||||
|
||||
# deprecated
|
||||
class Cell < String
|
||||
def initialize(data = "", is_null = false)
|
||||
super(is_null ? "" : data)
|
||||
end
|
||||
|
||||
def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block)
|
||||
if mode == 'r' or mode == 'rb'
|
||||
open_reader(filename, col_sep, row_sep, &block)
|
||||
elsif mode == 'w' or mode == 'wb'
|
||||
open_writer(filename, col_sep, row_sep, &block)
|
||||
else
|
||||
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
|
||||
def data
|
||||
to_s
|
||||
end
|
||||
end
|
||||
|
||||
# Open a CSV formatted file for reading.
|
||||
# deprecated
|
||||
class Row < Array
|
||||
end
|
||||
|
||||
# Open a CSV formatted file for reading or writing.
|
||||
#
|
||||
# For reading.
|
||||
#
|
||||
# EXAMPLE 1
|
||||
# reader = CSV.parse('csvfile.csv')
|
||||
# CSV.open('csvfile.csv', 'r') do |row|
|
||||
# p row
|
||||
# end
|
||||
#
|
||||
# EXAMPLE 2
|
||||
# reader = CSV.open('csvfile.csv', 'r')
|
||||
# row1 = reader.shift
|
||||
# row2 = reader.shift
|
||||
# if row2.empty?
|
||||
|
@ -112,11 +44,6 @@ class CSV
|
|||
# end
|
||||
# reader.close
|
||||
#
|
||||
# EXAMPLE 2
|
||||
# CSV.parse('csvfile.csv') do |row|
|
||||
# p row
|
||||
# end
|
||||
#
|
||||
# ARGS
|
||||
# filename: filename to parse.
|
||||
# col_sep: Column separator. ?, by default. If you want to separate
|
||||
|
@ -127,24 +54,21 @@ class CSV
|
|||
# RETURNS
|
||||
# reader instance. To get parse result, see CSV::Reader#each.
|
||||
#
|
||||
def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block)
|
||||
open_reader(filename, col_sep, row_sep, &block)
|
||||
end
|
||||
|
||||
# Open a CSV formatted file for writing.
|
||||
#
|
||||
# For writing.
|
||||
#
|
||||
# EXAMPLE 1
|
||||
# writer = CSV.generate('csvfile.csv')
|
||||
# writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
|
||||
# writer.close
|
||||
#
|
||||
# EXAMPLE 2
|
||||
# CSV.generate('csvfile.csv') do |writer|
|
||||
# CSV.open('csvfile.csv', 'w') do |writer|
|
||||
# writer << ['r1c1', 'r1c2']
|
||||
# writer << ['r2c1', 'r2c2']
|
||||
# writer << [nil, nil]
|
||||
# end
|
||||
#
|
||||
# EXAMPLE 2
|
||||
# writer = CSV.open('csvfile.csv', 'w')
|
||||
# writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
|
||||
# writer.close
|
||||
#
|
||||
# ARGS
|
||||
# filename: filename to generate.
|
||||
# col_sep: Column separator. ?, by default. If you want to separate
|
||||
|
@ -156,8 +80,52 @@ class CSV
|
|||
# writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
|
||||
# to generate CSV string.
|
||||
#
|
||||
def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block)
|
||||
open_writer(filename, col_sep, row_sep, &block)
|
||||
def CSV.open(path, mode, fs = nil, rs = nil, &block)
|
||||
if mode == 'r' or mode == 'rb'
|
||||
open_reader(path, mode, fs, rs, &block)
|
||||
elsif mode == 'w' or mode == 'wb'
|
||||
open_writer(path, mode, fs, rs, &block)
|
||||
else
|
||||
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
|
||||
end
|
||||
end
|
||||
|
||||
def CSV.foreach(path, rs = nil, &block)
|
||||
open_reader(path, 'r', ',', rs, &block)
|
||||
end
|
||||
|
||||
def CSV.read(path, length = nil, offset = nil)
|
||||
CSV.parse(IO.read(path, length, offset))
|
||||
end
|
||||
|
||||
def CSV.readlines(path, rs = nil)
|
||||
reader = open_reader(path, 'r', ',', rs)
|
||||
begin
|
||||
reader.collect { |row| row }
|
||||
ensure
|
||||
reader.close
|
||||
end
|
||||
end
|
||||
|
||||
def CSV.generate(path, fs = nil, rs = nil, &block)
|
||||
open_writer(path, 'w', fs, rs, &block)
|
||||
end
|
||||
|
||||
# Parse lines from given string or stream. Return rows as an Array of Arrays.
|
||||
def CSV.parse(str_or_readable, fs = nil, rs = nil, &block)
|
||||
if File.exist?(str_or_readable)
|
||||
STDERR.puts("CSV.parse(filename) is deprecated." +
|
||||
" Use CSV.open(filename, 'r') instead.")
|
||||
return open_reader(str_or_readable, 'r', fs, rs, &block)
|
||||
end
|
||||
if block
|
||||
CSV::Reader.parse(str_or_readable, fs, rs) do |row|
|
||||
yield(row)
|
||||
end
|
||||
nil
|
||||
else
|
||||
CSV::Reader.create(str_or_readable, fs, rs).collect { |row| row }
|
||||
end
|
||||
end
|
||||
|
||||
# Parse a line from given string. Bear in mind it parses ONE LINE. Rest of
|
||||
|
@ -166,47 +134,54 @@ class CSV
|
|||
#
|
||||
# If you don't know whether a target string to parse is exactly 1 line or
|
||||
# not, use CSV.parse_row instead of this method.
|
||||
def CSV.parse_line(src, col_sep = ?,, row_sep = nil)
|
||||
def CSV.parse_line(src, fs = nil, rs = nil)
|
||||
fs ||= ','
|
||||
if fs.is_a?(Fixnum)
|
||||
fs = fs.chr
|
||||
end
|
||||
if !rs.nil? and rs.is_a?(Fixnum)
|
||||
rs = rs.chr
|
||||
end
|
||||
idx = 0
|
||||
res_type = :DT_COLSEP
|
||||
cells = Row.new
|
||||
row = []
|
||||
begin
|
||||
while (res_type.equal?(:DT_COLSEP))
|
||||
cell = Cell.new
|
||||
res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
|
||||
cells.push(cell.is_null ? nil : cell.data)
|
||||
while res_type == :DT_COLSEP
|
||||
res_type, idx, cell = parse_body(src, idx, fs, rs)
|
||||
row << cell
|
||||
end
|
||||
rescue IllegalFormatError
|
||||
return Row.new
|
||||
return []
|
||||
end
|
||||
cells
|
||||
row
|
||||
end
|
||||
|
||||
# Create a line from cells. each cell is stringified by to_s.
|
||||
def CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
|
||||
if (cells.size == 0)
|
||||
def CSV.generate_line(row, fs = nil, rs = nil)
|
||||
if row.size == 0
|
||||
return ''
|
||||
end
|
||||
fs ||= ','
|
||||
if fs.is_a?(Fixnum)
|
||||
fs = fs.chr
|
||||
end
|
||||
if !rs.nil? and rs.is_a?(Fixnum)
|
||||
rs = rs.chr
|
||||
end
|
||||
res_type = :DT_COLSEP
|
||||
result_str = ''
|
||||
idx = 0
|
||||
while true
|
||||
cell = if (cells[idx].nil?)
|
||||
Cell.new('', true)
|
||||
else
|
||||
Cell.new(cells[idx].to_s, false)
|
||||
end
|
||||
generate_body(cell, result_str, col_sep, row_sep)
|
||||
generate_body(row[idx], result_str, fs, rs)
|
||||
idx += 1
|
||||
if (idx == cells.size)
|
||||
if (idx == row.size)
|
||||
break
|
||||
end
|
||||
generate_separator(:DT_COLSEP, result_str, col_sep, row_sep)
|
||||
generate_separator(:DT_COLSEP, result_str, fs, rs)
|
||||
end
|
||||
result_str
|
||||
end
|
||||
|
||||
|
||||
# Parse a line from string. Consider using CSV.parse_line instead.
|
||||
# To parse lines in CSV string, see EXAMPLE below.
|
||||
#
|
||||
|
@ -226,7 +201,7 @@ class CSV
|
|||
# src[](idx_out_of_bounds) must return nil. A String satisfies this
|
||||
# requirement.
|
||||
# idx: index of parsing location of 'src'. 0 origin.
|
||||
# out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'.
|
||||
# out_dev: buffer for parsed cells. Must respond '<<(aString)'.
|
||||
# col_sep: Column separator. ?, by default. If you want to separate
|
||||
# fields with semicolon, give ?; here.
|
||||
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
|
||||
|
@ -236,16 +211,22 @@ class CSV
|
|||
# parsed_cells: num of parsed cells.
|
||||
# idx: index of next parsing location of 'src'.
|
||||
#
|
||||
def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
|
||||
def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil)
|
||||
fs ||= ','
|
||||
if fs.is_a?(Fixnum)
|
||||
fs = fs.chr
|
||||
end
|
||||
if !rs.nil? and rs.is_a?(Fixnum)
|
||||
rs = rs.chr
|
||||
end
|
||||
idx_backup = idx
|
||||
parsed_cells = 0
|
||||
res_type = :DT_COLSEP
|
||||
begin
|
||||
while (!res_type.equal?(:DT_ROWSEP))
|
||||
cell = Cell.new
|
||||
res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
|
||||
if res_type.equal?(:DT_EOS)
|
||||
if idx == idx_backup #((parsed_cells == 0) && (cell.is_null))
|
||||
while res_type != :DT_ROWSEP
|
||||
res_type, idx, cell = parse_body(src, idx, fs, rs)
|
||||
if res_type == :DT_EOS
|
||||
if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
|
||||
return 0, 0
|
||||
end
|
||||
res_type = :DT_ROWSEP
|
||||
|
@ -259,18 +240,13 @@ class CSV
|
|||
return parsed_cells, idx
|
||||
end
|
||||
|
||||
|
||||
# Convert a line from cells data to string. Consider using CSV.generate_line
|
||||
# instead. To generate multi-row CSV string, see EXAMPLE below.
|
||||
#
|
||||
# EXAMPLE
|
||||
# def d(str)
|
||||
# CSV::Cell.new(str, false)
|
||||
# end
|
||||
#
|
||||
# row1 = [d('a'), d('b')]
|
||||
# row2 = [d('c'), d('d')]
|
||||
# row3 = [d('e'), d('f')]
|
||||
# row1 = ['a', 'b']
|
||||
# row2 = ['c', 'd']
|
||||
# row3 = ['e', 'f']
|
||||
# src = [row1, row2, row3]
|
||||
# buf = ''
|
||||
# src.each do |row|
|
||||
|
@ -280,8 +256,8 @@ class CSV
|
|||
# p buf
|
||||
#
|
||||
# ARGS
|
||||
# src: an Array of CSV::Cell to be converted to CSV string. Must respond to
|
||||
# 'size' and '[](idx)'. src[idx] must return CSV::Cell.
|
||||
# src: an Array of String to be converted to CSV string. Must respond to
|
||||
# 'size' and '[](idx)'. src[idx] must return String.
|
||||
# cells: num of cells in a line.
|
||||
# out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
|
||||
# col_sep: Column separator. ?, by default. If you want to separate
|
||||
|
@ -292,39 +268,47 @@ class CSV
|
|||
# RETURNS
|
||||
# parsed_cells: num of converted cells.
|
||||
#
|
||||
def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
|
||||
def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil)
|
||||
fs ||= ','
|
||||
if fs.is_a?(Fixnum)
|
||||
fs = fs.chr
|
||||
end
|
||||
if !rs.nil? and rs.is_a?(Fixnum)
|
||||
rs = rs.chr
|
||||
end
|
||||
src_size = src.size
|
||||
if (src_size == 0)
|
||||
if cells == 0
|
||||
generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
|
||||
generate_separator(:DT_ROWSEP, out_dev, fs, rs)
|
||||
end
|
||||
return 0
|
||||
end
|
||||
res_type = :DT_COLSEP
|
||||
parsed_cells = 0
|
||||
generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
|
||||
generate_body(src[parsed_cells], out_dev, fs, rs)
|
||||
parsed_cells += 1
|
||||
while ((parsed_cells < cells) && (parsed_cells != src_size))
|
||||
generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
|
||||
generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
|
||||
while ((parsed_cells < cells) and (parsed_cells != src_size))
|
||||
generate_separator(:DT_COLSEP, out_dev, fs, rs)
|
||||
generate_body(src[parsed_cells], out_dev, fs, rs)
|
||||
parsed_cells += 1
|
||||
end
|
||||
if (parsed_cells == cells)
|
||||
generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
|
||||
generate_separator(:DT_ROWSEP, out_dev, fs, rs)
|
||||
else
|
||||
generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
|
||||
generate_separator(:DT_COLSEP, out_dev, fs, rs)
|
||||
end
|
||||
parsed_cells
|
||||
end
|
||||
|
||||
# Private class methods.
|
||||
class << self
|
||||
private
|
||||
|
||||
def open_reader(filename, col_sep, row_sep, &block)
|
||||
file = File.open(filename, 'rb')
|
||||
def open_reader(path, mode, fs, rs, &block)
|
||||
file = File.open(path, mode)
|
||||
if block
|
||||
begin
|
||||
CSV::Reader.parse(file, col_sep, row_sep) do |row|
|
||||
CSV::Reader.parse(file, fs, rs) do |row|
|
||||
yield(row)
|
||||
end
|
||||
ensure
|
||||
|
@ -332,17 +316,17 @@ class CSV
|
|||
end
|
||||
nil
|
||||
else
|
||||
reader = CSV::Reader.create(file, col_sep, row_sep)
|
||||
reader = CSV::Reader.create(file, fs, rs)
|
||||
reader.close_on_terminate
|
||||
reader
|
||||
end
|
||||
end
|
||||
|
||||
def open_writer(filename, col_sep, row_sep, &block)
|
||||
file = File.open(filename, 'wb')
|
||||
def open_writer(path, mode, fs, rs, &block)
|
||||
file = File.open(path, mode)
|
||||
if block
|
||||
begin
|
||||
CSV::Writer.generate(file, col_sep, row_sep) do |writer|
|
||||
CSV::Writer.generate(file, fs, rs) do |writer|
|
||||
yield(writer)
|
||||
end
|
||||
ensure
|
||||
|
@ -350,134 +334,164 @@ class CSV
|
|||
end
|
||||
nil
|
||||
else
|
||||
writer = CSV::Writer.create(file, col_sep, row_sep)
|
||||
writer = CSV::Writer.create(file, fs, rs)
|
||||
writer.close_on_terminate
|
||||
writer
|
||||
end
|
||||
end
|
||||
|
||||
def parse_body(src, idx, cell, col_sep, row_sep)
|
||||
row_sep_end = row_sep || ?\n
|
||||
cell.is_null = false
|
||||
def parse_body(src, idx, fs, rs)
|
||||
fs_str = fs
|
||||
fs_size = fs_str.size
|
||||
rs_str = rs || "\n"
|
||||
rs_size = rs_str.size
|
||||
fs_idx = rs_idx = 0
|
||||
cell = Cell.new
|
||||
state = :ST_START
|
||||
quoted = false
|
||||
cr = false
|
||||
quoted = cr = false
|
||||
c = nil
|
||||
while (c = src[idx])
|
||||
idx += 1
|
||||
result_state = :DT_UNKNOWN
|
||||
if (c == col_sep)
|
||||
if state.equal?(:ST_DATA)
|
||||
last_idx = idx
|
||||
while c = src[idx]
|
||||
unless quoted
|
||||
fschar = (c == fs_str[fs_idx])
|
||||
rschar = (c == rs_str[rs_idx])
|
||||
# simple 1 char backtrack
|
||||
if !fschar and c == fs_str[0]
|
||||
fs_idx = 0
|
||||
fschar = true
|
||||
if state == :ST_START
|
||||
state = :ST_DATA
|
||||
elsif state == :ST_QUOTE
|
||||
raise IllegalFormatError
|
||||
end
|
||||
end
|
||||
if !rschar and c == rs_str[0]
|
||||
rs_idx = 0
|
||||
rschar = true
|
||||
if state == :ST_START
|
||||
state = :ST_DATA
|
||||
elsif state == :ST_QUOTE
|
||||
raise IllegalFormatError
|
||||
end
|
||||
end
|
||||
end
|
||||
if c == ?"
|
||||
fs_idx = rs_idx = 0
|
||||
if cr
|
||||
raise IllegalFormatError.new
|
||||
end
|
||||
if (!quoted)
|
||||
state = :ST_END
|
||||
result_state = :DT_COLSEP
|
||||
else
|
||||
cell.data << c.chr
|
||||
end
|
||||
elsif state.equal?(:ST_QUOTE)
|
||||
if cr
|
||||
raise IllegalFormatError.new
|
||||
end
|
||||
state = :ST_END
|
||||
result_state = :DT_COLSEP
|
||||
else # :ST_START
|
||||
cell.is_null = true
|
||||
state = :ST_END
|
||||
result_state = :DT_COLSEP
|
||||
end
|
||||
elsif (c == ?") # " for vim syntax hilighting.
|
||||
if state.equal?(:ST_DATA)
|
||||
if cr
|
||||
raise IllegalFormatError.new
|
||||
raise IllegalFormatError
|
||||
end
|
||||
cell << src[last_idx, (idx - last_idx)]
|
||||
last_idx = idx
|
||||
if state == :ST_DATA
|
||||
if quoted
|
||||
last_idx += 1
|
||||
quoted = false
|
||||
state = :ST_QUOTE
|
||||
else
|
||||
raise IllegalFormatError.new
|
||||
raise IllegalFormatError
|
||||
end
|
||||
elsif state.equal?(:ST_QUOTE)
|
||||
cell.data << c.chr
|
||||
elsif state == :ST_QUOTE
|
||||
cell << c.chr
|
||||
last_idx += 1
|
||||
quoted = true
|
||||
state = :ST_DATA
|
||||
else # :ST_START
|
||||
quoted = true
|
||||
last_idx += 1
|
||||
state = :ST_DATA
|
||||
end
|
||||
elsif row_sep.nil? and c == ?\r
|
||||
if cr
|
||||
raise IllegalFormatError.new
|
||||
elsif fschar or rschar
|
||||
if fschar
|
||||
fs_idx += 1
|
||||
end
|
||||
if rschar
|
||||
rs_idx += 1
|
||||
end
|
||||
sep = nil
|
||||
if fs_idx == fs_size
|
||||
if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx
|
||||
state = :ST_DATA
|
||||
end
|
||||
cell << src[last_idx, (idx - last_idx - (fs_size - 1))]
|
||||
last_idx = idx
|
||||
fs_idx = rs_idx = 0
|
||||
if cr
|
||||
raise IllegalFormatError
|
||||
end
|
||||
sep = :DT_COLSEP
|
||||
elsif rs_idx == rs_size
|
||||
if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx
|
||||
state = :ST_DATA
|
||||
end
|
||||
if !(rs.nil? and cr)
|
||||
cell << src[last_idx, (idx - last_idx - (rs_size - 1))]
|
||||
last_idx = idx
|
||||
end
|
||||
fs_idx = rs_idx = 0
|
||||
sep = :DT_ROWSEP
|
||||
end
|
||||
if sep
|
||||
if state == :ST_DATA
|
||||
return sep, idx + 1, cell;
|
||||
elsif state == :ST_QUOTE
|
||||
return sep, idx + 1, cell;
|
||||
else # :ST_START
|
||||
return sep, idx + 1, nil
|
||||
end
|
||||
end
|
||||
elsif rs.nil? and c == ?\r
|
||||
# special \r treatment for backward compatibility
|
||||
fs_idx = rs_idx = 0
|
||||
if cr
|
||||
raise IllegalFormatError
|
||||
end
|
||||
cell << src[last_idx, (idx - last_idx)]
|
||||
last_idx = idx
|
||||
if quoted
|
||||
cell.data << c.chr
|
||||
state = :ST_DATA
|
||||
else
|
||||
cr = true
|
||||
end
|
||||
elsif c == row_sep_end
|
||||
if state.equal?(:ST_DATA)
|
||||
if cr
|
||||
state = :ST_END
|
||||
result_state = :DT_ROWSEP
|
||||
cr = false
|
||||
else
|
||||
if quoted
|
||||
cell.data << c.chr
|
||||
state = :ST_DATA
|
||||
else
|
||||
state = :ST_END
|
||||
result_state = :DT_ROWSEP
|
||||
end
|
||||
end
|
||||
elsif state.equal?(:ST_QUOTE)
|
||||
state = :ST_END
|
||||
result_state = :DT_ROWSEP
|
||||
fs_idx = rs_idx = 0
|
||||
if state == :ST_DATA or state == :ST_START
|
||||
if cr
|
||||
cr = false
|
||||
raise IllegalFormatError
|
||||
end
|
||||
else # :ST_START
|
||||
cell.is_null = true
|
||||
state = :ST_END
|
||||
result_state = :DT_ROWSEP
|
||||
end
|
||||
else
|
||||
if state.equal?(:ST_DATA) || state.equal?(:ST_START)
|
||||
if cr
|
||||
raise IllegalFormatError.new
|
||||
end
|
||||
cell.data << c.chr
|
||||
state = :ST_DATA
|
||||
else # :ST_QUOTE
|
||||
raise IllegalFormatError.new
|
||||
raise IllegalFormatError
|
||||
end
|
||||
end
|
||||
if state.equal?(:ST_END)
|
||||
return result_state, idx;
|
||||
idx += 1
|
||||
end
|
||||
if state == :ST_START
|
||||
if fs_idx > 0 or rs_idx > 0
|
||||
state = :ST_DATA
|
||||
else
|
||||
return :DT_EOS, idx, nil
|
||||
end
|
||||
if state.equal?(:ST_START)
|
||||
cell.is_null = true
|
||||
elsif state.equal?(:ST_QUOTE)
|
||||
true # dummy for coverate; only a data
|
||||
elsif quoted
|
||||
raise IllegalFormatError.new
|
||||
raise IllegalFormatError
|
||||
elsif cr
|
||||
raise IllegalFormatError.new
|
||||
raise IllegalFormatError
|
||||
end
|
||||
return :DT_EOS, idx
|
||||
cell << src[last_idx, (idx - last_idx)]
|
||||
last_idx = idx
|
||||
return :DT_EOS, idx, cell
|
||||
end
|
||||
|
||||
def generate_body(cells, out_dev, col_sep, row_sep)
|
||||
row_data = cells.data.dup
|
||||
if (!cells.is_null)
|
||||
if (row_data.gsub!('"', '""') ||
|
||||
row_data.include?(col_sep) ||
|
||||
(row_sep && row_data.index(row_sep)) ||
|
||||
(/[\r\n]/ =~ row_data) ||
|
||||
(cells.data.empty?))
|
||||
def generate_body(cell, out_dev, fs, rs)
|
||||
if cell.nil?
|
||||
# empty
|
||||
else
|
||||
cell = cell.to_s
|
||||
row_data = cell.dup
|
||||
if (row_data.gsub!('"', '""') or
|
||||
row_data.index(fs) or
|
||||
(rs and row_data.index(rs)) or
|
||||
(/[\r\n]/ =~ row_data) or
|
||||
(cell.empty?))
|
||||
out_dev << '"' << row_data << '"'
|
||||
else
|
||||
out_dev << row_data
|
||||
|
@ -485,12 +499,12 @@ class CSV
|
|||
end
|
||||
end
|
||||
|
||||
def generate_separator(type, out_dev, col_sep, row_sep)
|
||||
def generate_separator(type, out_dev, fs, rs)
|
||||
case type
|
||||
when :DT_COLSEP
|
||||
out_dev << col_sep.chr
|
||||
out_dev << fs
|
||||
when :DT_ROWSEP
|
||||
out_dev << (row_sep ? row_sep.chr : "\r\n")
|
||||
out_dev << (rs || "\n")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -499,7 +513,7 @@ class CSV
|
|||
# CSV formatted string/stream reader.
|
||||
#
|
||||
# EXAMPLE
|
||||
# read CSV lines until the first column is 'stop'.
|
||||
# read CSV lines untill the first column is 'stop'.
|
||||
#
|
||||
# CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
|
||||
# p row
|
||||
|
@ -511,30 +525,34 @@ class CSV
|
|||
|
||||
# Parse CSV data and get lines. Given block is called for each parsed row.
|
||||
# Block value is always nil. Rows are not cached for performance reason.
|
||||
def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil)
|
||||
reader = create(str_or_readable, col_sep, row_sep)
|
||||
def Reader.parse(str_or_readable, fs = ',', rs = nil, &block)
|
||||
reader = Reader.create(str_or_readable, fs, rs)
|
||||
if block
|
||||
reader.each do |row|
|
||||
yield(row)
|
||||
end
|
||||
reader.close
|
||||
nil
|
||||
else
|
||||
reader
|
||||
end
|
||||
end
|
||||
|
||||
# Returns reader instance.
|
||||
def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil)
|
||||
def Reader.create(str_or_readable, fs = ',', rs = nil)
|
||||
case str_or_readable
|
||||
when IO
|
||||
IOReader.new(str_or_readable, col_sep, row_sep)
|
||||
IOReader.new(str_or_readable, fs, rs)
|
||||
when String
|
||||
StringReader.new(str_or_readable, col_sep, row_sep)
|
||||
StringReader.new(str_or_readable, fs, rs)
|
||||
else
|
||||
IOReader.new(str_or_readable, col_sep, row_sep)
|
||||
IOReader.new(str_or_readable, fs, rs)
|
||||
end
|
||||
end
|
||||
|
||||
def each
|
||||
while true
|
||||
row = Row.new
|
||||
row = []
|
||||
parsed_cells = get_row(row)
|
||||
if parsed_cells == 0
|
||||
break
|
||||
|
@ -545,7 +563,7 @@ class CSV
|
|||
end
|
||||
|
||||
def shift
|
||||
row = Row.new
|
||||
row = []
|
||||
parsed_cells = get_row(row)
|
||||
row
|
||||
end
|
||||
|
@ -557,12 +575,11 @@ class CSV
|
|||
private
|
||||
|
||||
def initialize(dev)
|
||||
raise RuntimeError.new('do not instantiate this class directly')
|
||||
raise RuntimeError.new('Do not instanciate this class directly.')
|
||||
end
|
||||
|
||||
def get_row(row)
|
||||
raise NotImplementedError.new(
|
||||
'method get_row must be defined in a derived class')
|
||||
raise NotImplementedError.new('Method get_row must be defined in a derived class.')
|
||||
end
|
||||
|
||||
def terminate
|
||||
|
@ -572,10 +589,9 @@ class CSV
|
|||
|
||||
|
||||
class StringReader < Reader
|
||||
|
||||
def initialize(string, col_sep = ?,, row_sep = nil)
|
||||
@col_sep = col_sep
|
||||
@row_sep = row_sep
|
||||
def initialize(string, fs = ',', rs = nil)
|
||||
@fs = fs
|
||||
@rs = rs
|
||||
@dev = string
|
||||
@idx = 0
|
||||
if @dev[0, 3] == "\xef\xbb\xbf"
|
||||
|
@ -586,9 +602,8 @@ class CSV
|
|||
private
|
||||
|
||||
def get_row(row)
|
||||
parsed_cells, next_idx =
|
||||
CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
|
||||
if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
|
||||
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
|
||||
if parsed_cells == 0 and next_idx == 0 and @idx != @dev.size
|
||||
raise IllegalFormatError.new
|
||||
end
|
||||
@idx = next_idx
|
||||
|
@ -598,12 +613,10 @@ class CSV
|
|||
|
||||
|
||||
class IOReader < Reader
|
||||
|
||||
def initialize(io, col_sep = ?,, row_sep = nil)
|
||||
def initialize(io, fs = ',', rs = nil)
|
||||
@io = io
|
||||
@io.binmode if @io.respond_to?(:binmode)
|
||||
@col_sep = col_sep
|
||||
@row_sep = row_sep
|
||||
@fs = fs
|
||||
@rs = rs
|
||||
@dev = CSV::IOBuf.new(@io)
|
||||
@idx = 0
|
||||
if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
|
||||
|
@ -621,9 +634,8 @@ class CSV
|
|||
private
|
||||
|
||||
def get_row(row)
|
||||
parsed_cells, next_idx =
|
||||
CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
|
||||
if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
|
||||
parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
|
||||
if parsed_cells == 0 and next_idx == 0 and !@dev.is_eos?
|
||||
raise IllegalFormatError.new
|
||||
end
|
||||
dropped = @dev.drop(next_idx)
|
||||
|
@ -651,56 +663,36 @@ class CSV
|
|||
# outfile = File.open('csvout', 'wb')
|
||||
# CSV::Writer.generate(outfile) do |csv|
|
||||
# csv << ['c1', nil, '', '"', "\r\n", 'c2']
|
||||
# # or
|
||||
# csv.add_row [
|
||||
# CSV::Cell.new('c1', false),
|
||||
# CSV::Cell.new('dummy', true),
|
||||
# CSV::Cell.new('', false),
|
||||
# CSV::Cell.new('"', false),
|
||||
# CSV::Cell.new("\r\n", false)
|
||||
# CSV::Cell.new('c2', false)
|
||||
# ]
|
||||
# ...
|
||||
# ...
|
||||
# end
|
||||
#
|
||||
# outfile.close
|
||||
#
|
||||
class Writer
|
||||
|
||||
# Generate CSV. Given block is called with the writer instance.
|
||||
def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil)
|
||||
writer = Writer.create(str_or_writable, col_sep, row_sep)
|
||||
# Given block is called with the writer instance. str_or_writable must
|
||||
# handle '<<(string)'.
|
||||
def Writer.generate(str_or_writable, fs = ',', rs = nil, &block)
|
||||
writer = Writer.create(str_or_writable, fs, rs)
|
||||
if block
|
||||
yield(writer)
|
||||
writer.close
|
||||
nil
|
||||
else
|
||||
writer
|
||||
end
|
||||
end
|
||||
|
||||
# str_or_writable must handle '<<(string)'.
|
||||
def Writer.create(str_or_writable, col_sep = ?,, row_sep = nil)
|
||||
BasicWriter.new(str_or_writable, col_sep, row_sep)
|
||||
def Writer.create(str_or_writable, fs = ',', rs = nil)
|
||||
BasicWriter.new(str_or_writable, fs, rs)
|
||||
end
|
||||
|
||||
# dump CSV stream to the device. argument must be an Array of String.
|
||||
def <<(ary)
|
||||
row = ary.collect { |item|
|
||||
if item.is_a?(Cell)
|
||||
item
|
||||
elsif (item.nil?)
|
||||
Cell.new('', true)
|
||||
else
|
||||
Cell.new(item.to_s, false)
|
||||
end
|
||||
}
|
||||
CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
|
||||
self
|
||||
end
|
||||
|
||||
# dump CSV stream to the device. argument must be an Array of CSV::Cell.
|
||||
def add_row(row)
|
||||
CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
|
||||
def <<(row)
|
||||
CSV.generate_row(row, row.size, @dev, @fs, @rs)
|
||||
self
|
||||
end
|
||||
alias add_row <<
|
||||
|
||||
def close
|
||||
terminate
|
||||
|
@ -709,7 +701,7 @@ class CSV
|
|||
private
|
||||
|
||||
def initialize(dev)
|
||||
raise RuntimeError.new('do not instantiate this class directly')
|
||||
raise RuntimeError.new('Do not instanciate this class directly.')
|
||||
end
|
||||
|
||||
def terminate
|
||||
|
@ -719,12 +711,10 @@ class CSV
|
|||
|
||||
|
||||
class BasicWriter < Writer
|
||||
|
||||
def initialize(str_or_writable, col_sep = ?,, row_sep = nil)
|
||||
@col_sep = col_sep
|
||||
@row_sep = row_sep
|
||||
def initialize(str_or_writable, fs = ',', rs = nil)
|
||||
@fs = fs
|
||||
@rs = rs
|
||||
@dev = str_or_writable
|
||||
@dev.binmode if @dev.respond_to?(:binmode)
|
||||
@close_on_terminate = false
|
||||
end
|
||||
|
||||
|
@ -743,6 +733,7 @@ class CSV
|
|||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Buffered stream.
|
||||
#
|
||||
|
@ -756,7 +747,7 @@ class CSV
|
|||
# end
|
||||
#
|
||||
# # define my own 'read' method.
|
||||
# # CAUTION: Returning nil means EndOfStream.
|
||||
# # CAUTION: Returning nil means EnfOfStream.
|
||||
# def read(size)
|
||||
# @s.read(size)
|
||||
# end
|
||||
|
@ -801,8 +792,7 @@ class CSV
|
|||
# end
|
||||
# end
|
||||
#
|
||||
class StreamBuf # pure virtual. (do not instantiate it directly)
|
||||
|
||||
class StreamBuf
|
||||
# get a char or a partial string from the stream.
|
||||
# idx: index of a string to specify a start point of a string to get.
|
||||
# unlike String instance, idx < 0 returns nil.
|
||||
|
@ -867,7 +857,7 @@ class CSV
|
|||
end
|
||||
size_dropped = 0
|
||||
while (n > 0)
|
||||
if (!@is_eos || (@cur_buf != @buf_tail_idx))
|
||||
if !@is_eos or (@cur_buf != @buf_tail_idx)
|
||||
if (@offset + n < buf_size(@cur_buf))
|
||||
size_dropped += n
|
||||
@offset += n
|
||||
|
@ -912,11 +902,10 @@ class CSV
|
|||
|
||||
# protected method 'read' must be defined in derived classes.
|
||||
# CAUTION: Returning a string which size is not equal to 'size' means
|
||||
# EndOfStream. When it is not at EOS, you must block the callee, try to
|
||||
# EnfOfStream. When it is not at EOS, you must block the callee, try to
|
||||
# read and return the sized string.
|
||||
def read(size) # raise EOFError
|
||||
raise NotImplementedError.new(
|
||||
'method read must be defined in a derived class')
|
||||
raise NotImplementedError.new('Method read must be defined in a derived class.')
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -964,13 +953,12 @@ class CSV
|
|||
end
|
||||
|
||||
def idx_is_eos?(idx)
|
||||
(@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx)))
|
||||
(@is_eos and ((@cur_buf < 0) or (@cur_buf == @buf_tail_idx)))
|
||||
end
|
||||
|
||||
BufSize = 1024 * 8
|
||||
end
|
||||
|
||||
|
||||
# Buffered IO.
|
||||
#
|
||||
# EXAMPLE
|
||||
|
|
|
@ -427,7 +427,7 @@ private
|
|||
|
||||
def format_datetime(datetime)
|
||||
if @datetime_format.nil?
|
||||
datetime.strftime("%Y-%m-%dT%H:%M:%S.") << "%6d " % datetime.usec
|
||||
datetime.strftime("%Y-%m-%dT%H:%M:%S.") << "%06d " % datetime.usec
|
||||
else
|
||||
datetime.strftime(@datetime_format)
|
||||
end
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -51,4 +51,22 @@ class TestFloat < Test::Unit::TestCase
|
|||
b = 100000000000000000000000.0
|
||||
assert_equal(a == b, b == a)
|
||||
end
|
||||
|
||||
def test_strtod
|
||||
a = Float("0")
|
||||
assert(a.abs < Float::EPSILON)
|
||||
a = Float("0.0")
|
||||
assert(a.abs < Float::EPSILON)
|
||||
a = Float("+0.0")
|
||||
assert(a.abs < Float::EPSILON)
|
||||
a = Float("-0.0")
|
||||
assert(a.abs < Float::EPSILON)
|
||||
a = Float("0." + "00" * Float::DIG + "1")
|
||||
assert(a != 0.0)
|
||||
a = Float("+0." + "00" * Float::DIG + "1")
|
||||
assert(a != 0.0)
|
||||
a = Float("-0." + "00" * Float::DIG + "1")
|
||||
assert(a != 0.0)
|
||||
# add expected behaviour here.
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue