1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Import CSV 3.0.8

This includes performance improvements and backward incompatibility
fixes.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67560 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
kou 2019-04-14 21:01:51 +00:00
parent fb96811d15
commit e3b6c7c7eb
23 changed files with 1534 additions and 650 deletions

2
NEWS
View file

@ -62,7 +62,7 @@ Regexp/String::
CSV::
* Upgrade to 3.0.4.
* Upgrade to 3.0.8.
See https://github.com/ruby/csv/blob/master/NEWS.md.
Date::

View file

@ -504,9 +504,9 @@ class CSV
# <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
# but transcode it to UTF-8 before CSV parses it.
#
def self.foreach(path, **options, &block)
return to_enum(__method__, path, options) unless block_given?
open(path, options) do |csv|
def self.foreach(path, mode="r", **options, &block)
return to_enum(__method__, path, mode, options) unless block_given?
open(path, mode, options) do |csv|
csv.each(&block)
end
end
@ -885,6 +885,10 @@ class CSV
# blank string field is replaced by
# the set object.
# <b><tt>:quote_empty</tt></b>:: TODO
# <b><tt>:write_converters</tt></b>:: TODO
# <b><tt>:write_nil_value</tt></b>:: TODO
# <b><tt>:write_empty_value</tt></b>:: TODO
# <b><tt>:strip</tt></b>:: TODO
#
# See CSV::DEFAULT_OPTIONS for the default settings.
#
@ -911,7 +915,11 @@ class CSV
encoding: nil,
nil_value: nil,
empty_value: "",
quote_empty: true)
quote_empty: true,
write_converters: nil,
write_nil_value: nil,
write_empty_value: "",
strip: false)
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
# create the IO object we will read from
@ -922,8 +930,13 @@ class CSV
nil_value: nil_value,
empty_value: empty_value,
}
@write_fields_converter_options = {
nil_value: write_nil_value,
empty_value: write_empty_value,
}
@initial_converters = converters
@initial_header_converters = header_converters
@initial_write_converters = write_converters
@parser_options = {
column_separator: col_sep,
@ -939,6 +952,7 @@ class CSV
encoding: @encoding,
nil_value: nil_value,
empty_value: empty_value,
strip: strip,
}
@parser = nil
@ -998,7 +1012,7 @@ class CSV
# as is.
#
def converters
fields_converter.map do |converter|
parser_fields_converter.map do |converter|
name = Converters.rassoc(converter)
name ? name.first : converter
end
@ -1098,12 +1112,58 @@ class CSV
### IO and StringIO Delegation ###
extend Forwardable
def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
:closed?, :eof, :eof?, :external_encoding, :fcntl,
:fileno, :flock, :flush, :fsync, :internal_encoding,
:ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
:seek, :stat, :string, :sync, :sync=, :tell, :to_i,
:to_io, :truncate, :tty?
def_delegators :@io, :binmode, :close, :close_read, :close_write,
:closed?, :external_encoding, :fcntl,
:fileno, :flush, :fsync, :internal_encoding,
:isatty, :pid, :pos, :pos=, :reopen,
:seek, :string, :sync, :sync=, :tell,
:truncate, :tty?
def binmode?
if @io.respond_to?(:binmode?)
@io.binmode?
else
false
end
end
def flock(*args)
raise NotImplementedError unless @io.respond_to?(:flock)
@io.flock(*args)
end
def ioctl(*args)
raise NotImplementedError unless @io.respond_to?(:ioctl)
@io.ioctl(*args)
end
def path
@io.path if @io.respond_to?(:path)
end
def stat(*args)
raise NotImplementedError unless @io.respond_to?(:stat)
@io.stat(*args)
end
def to_i
raise NotImplementedError unless @io.respond_to?(:to_i)
@io.to_i
end
def to_io
@io.respond_to?(:to_io) ? @io.to_io : @io
end
def eof?
begin
parser_enumerator.peek
false
rescue StopIteration
true
end
end
alias_method :eof, :eof?
# Rewinds the underlying IO object and resets CSV's lineno() counter.
def rewind
@ -1145,7 +1205,7 @@ class CSV
# converted field or the field itself.
#
def convert(name = nil, &converter)
fields_converter.add_converter(name, &converter)
parser_fields_converter.add_converter(name, &converter)
end
#
@ -1173,7 +1233,7 @@ class CSV
# The data source must be open for reading.
#
def each(&block)
parser.parse(&block)
parser_enumerator.each(&block)
end
#
@ -1204,9 +1264,8 @@ class CSV
# The data source must be open for reading.
#
def shift
@parser_enumerator ||= parser.parse
begin
@parser_enumerator.next
parser_enumerator.next
rescue StopIteration
nil
end
@ -1299,7 +1358,7 @@ class CSV
if headers
header_fields_converter.convert(fields, nil, 0)
else
fields_converter.convert(fields, @headers, lineno)
parser_fields_converter.convert(fields, @headers, lineno)
end
end
@ -1316,20 +1375,16 @@ class CSV
end
end
def fields_converter
@fields_converter ||= build_fields_converter
def parser_fields_converter
@parser_fields_converter ||= build_parser_fields_converter
end
def build_fields_converter
def build_parser_fields_converter
specific_options = {
builtin_converters: Converters,
}
options = @base_fields_converter_options.merge(specific_options)
fields_converter = FieldsConverter.new(options)
normalize_converters(@initial_converters).each do |name, converter|
fields_converter.add_converter(name, &converter)
end
fields_converter
build_fields_converter(@initial_converters, options)
end
def header_fields_converter
@ -1342,8 +1397,21 @@ class CSV
accept_nil: true,
}
options = @base_fields_converter_options.merge(specific_options)
build_fields_converter(@initial_header_converters, options)
end
def writer_fields_converter
@writer_fields_converter ||= build_writer_fields_converter
end
def build_writer_fields_converter
build_fields_converter(@initial_write_converters,
@write_fields_converter_options)
end
def build_fields_converter(initial_converters, options)
fields_converter = FieldsConverter.new(options)
normalize_converters(@initial_header_converters).each do |name, converter|
normalize_converters(initial_converters).each do |name, converter|
fields_converter.add_converter(name, &converter)
end
fields_converter
@ -1354,8 +1422,12 @@ class CSV
end
def parser_options
@parser_options.merge(fields_converter: fields_converter,
header_fields_converter: header_fields_converter)
@parser_options.merge(header_fields_converter: header_fields_converter,
fields_converter: parser_fields_converter)
end
def parser_enumerator
@parser_enumerator ||= parser.parse
end
def writer
@ -1363,7 +1435,8 @@ class CSV
end
def writer_options
@writer_options.merge(header_fields_converter: header_fields_converter)
@writer_options.merge(header_fields_converter: header_fields_converter,
fields_converter: writer_fields_converter)
end
end

View file

@ -25,6 +25,7 @@ Gem::Specification.new do |spec|
"lib/csv.rb",
"lib/csv/core_ext/array.rb",
"lib/csv/core_ext/string.rb",
"lib/csv/delete_suffix.rb",
"lib/csv/fields_converter.rb",
"lib/csv/match_p.rb",
"lib/csv/parser.rb",

18
lib/csv/delete_suffix.rb Normal file
View file

@ -0,0 +1,18 @@
# frozen_string_literal: true
# This provides String#delete_suffix? for Ruby 2.4.
unless String.method_defined?(:delete_suffix)
class CSV
module DeleteSuffix
refine String do
def delete_suffix(suffix)
if end_with?(suffix)
self[0..(-(suffix.size + 1))]
else
self
end
end
end
end
end
end

View file

@ -2,10 +2,12 @@
require "strscan"
require_relative "delete_suffix"
require_relative "match_p"
require_relative "row"
require_relative "table"
using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV
@ -21,6 +23,15 @@ class CSV
@keeps = []
end
def each_line(row_separator)
position = pos
rest.each_line(row_separator) do |line|
position += line.bytesize
self.pos = position
yield(line)
end
end
def keep_start
@keeps.push(pos)
end
@ -49,6 +60,50 @@ class CSV
read_chunk
end
def each_line(row_separator)
buffer = nil
input = @scanner.rest
position = @scanner.pos
offset = 0
n_row_separator_chars = row_separator.size
while true
input.each_line(row_separator) do |line|
@scanner.pos += line.bytesize
if buffer
if n_row_separator_chars == 2 and
buffer.end_with?(row_separator[0]) and
line.start_with?(row_separator[1])
buffer << line[0]
line = line[1..-1]
position += buffer.bytesize + offset
@scanner.pos = position
offset = 0
yield(buffer)
buffer = nil
next if line.empty?
else
buffer << line
line = buffer
buffer = nil
end
end
if line.end_with?(row_separator)
position += line.bytesize + offset
@scanner.pos = position
offset = 0
yield(line)
else
buffer = line
end
end
break unless read_chunk
input = @scanner.rest
position = @scanner.pos
offset = -buffer.bytesize if buffer
end
yield(buffer) if buffer
end
def scan(pattern)
value = @scanner.scan(pattern)
return value if @last_scanner
@ -94,7 +149,7 @@ class CSV
start, buffer = @keeps.pop
if buffer
string = @scanner.string
keep = string[start, string.size - start]
keep = string.byteslice(start, string.bytesize - start)
if keep and not keep.empty?
@inputs.unshift(StringIO.new(keep))
@last_scanner = false
@ -103,6 +158,7 @@ class CSV
else
@scanner.pos = start
end
read_chunk if @scanner.eos?
end
def keep_drop
@ -121,7 +177,7 @@ class CSV
keep = @keeps.last
keep_start = keep[0]
string = @scanner.string
keep_data = string[keep_start, @scanner.pos - keep_start]
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
if keep_data
keep_buffer = keep[1]
if keep_buffer
@ -170,7 +226,6 @@ class CSV
@input = input
@options = options
@samples = []
@parsed = false
prepare
end
@ -230,9 +285,7 @@ class CSV
def parse(&block)
return to_enum(__method__) unless block_given?
return if @parsed
if @return_headers and @headers
if @return_headers and @headers and @raw_headers
headers = Row.new(@headers, @raw_headers, true)
if @unconverted_fields
headers = add_unconverted_fields(headers, [])
@ -240,58 +293,25 @@ class CSV
yield headers
end
row = []
begin
@scanner = build_scanner
skip_needless_lines
start_row
while true
@quoted_column_value = false
@unquoted_column_value = false
value = parse_column_value
if value and @field_size_limit and value.size >= @field_size_limit
raise MalformedCSVError.new("Field size exceeded", @lineno + 1)
end
if parse_column_end
row << value
elsif parse_row_end
if row.empty? and value.nil?
emit_row([], &block) unless @skip_blanks
else
row << value
emit_row(row, &block)
row = []
end
skip_needless_lines
start_row
elsif @scanner.eos?
break if row.empty? and value.nil?
row << value
emit_row(row, &block)
break
else
if @quoted_column_value
message = "Do not allow except col_sep_split_separator " +
"after quoted fields"
raise MalformedCSVError.new(message, @lineno + 1)
elsif @unquoted_column_value and @scanner.scan(@cr_or_lf)
message = "Unquoted fields do not allow \\r or \\n"
raise MalformedCSVError.new(message, @lineno + 1)
elsif @scanner.rest.start_with?(@quote_character)
message = "Illegal quoting"
raise MalformedCSVError.new(message, @lineno + 1)
else
raise MalformedCSVError.new("TODO: Meaningful message",
@lineno + 1)
end
end
@scanner ||= build_scanner
if quote_character.nil?
parse_no_quote(&block)
elsif @need_robust_parsing
parse_quotable_robust(&block)
else
parse_quotable_loose(&block)
end
rescue InvalidEncoding
if @scanner
ignore_broken_line
lineno = @lineno
else
lineno = @lineno + 1
end
message = "Invalid byte sequence in #{@encoding}"
raise MalformedCSVError.new(message, @lineno + 1)
raise MalformedCSVError.new(message, lineno)
end
@parsed = true
end
def use_headers?
@ -301,13 +321,20 @@ class CSV
private
def prepare
prepare_variable
prepare_regexp
prepare_quote_character
prepare_backslash
prepare_skip_lines
prepare_strip
prepare_separators
prepare_quoted
prepare_unquoted
prepare_line
prepare_header
prepare_parser
end
def prepare_variable
@need_robust_parsing = false
@encoding = @options[:encoding]
liberal_parsing = @options[:liberal_parsing]
if liberal_parsing
@ -315,11 +342,15 @@ class CSV
if liberal_parsing.is_a?(Hash)
@double_quote_outside_quote =
liberal_parsing[:double_quote_outside_quote]
@backslash_quote = liberal_parsing[:backslash_quote]
else
@double_quote_outside_quote = false
@backslash_quote = false
end
@need_robust_parsing = true
else
@liberal_parsing = false
@backslash_quote = false
end
@unconverted_fields = @options[:unconverted_fields]
@field_size_limit = @options[:field_size_limit]
@ -328,20 +359,39 @@ class CSV
@header_fields_converter = @options[:header_fields_converter]
end
def prepare_regexp
@column_separator = @options[:column_separator].to_s.encode(@encoding)
@row_separator =
resolve_row_separator(@options[:row_separator]).encode(@encoding)
@quote_character = @options[:quote_character].to_s.encode(@encoding)
if @quote_character.length != 1
raise ArgumentError, ":quote_char has to be a single character String"
def prepare_quote_character
@quote_character = @options[:quote_character]
if @quote_character.nil?
@escaped_quote_character = nil
@escaped_quote = nil
else
@quote_character = @quote_character.to_s.encode(@encoding)
if @quote_character.length != 1
message = ":quote_char has to be nil or a single character String"
raise ArgumentError, message
end
@double_quote_character = @quote_character * 2
@escaped_quote_character = Regexp.escape(@quote_character)
@escaped_quote = Regexp.new(@escaped_quote_character)
end
end
escaped_column_separator = Regexp.escape(@column_separator)
escaped_first_column_separator = Regexp.escape(@column_separator[0])
escaped_row_separator = Regexp.escape(@row_separator)
escaped_quote_character = Regexp.escape(@quote_character)
def prepare_backslash
return unless @backslash_quote
@backslash_character = "\\".encode(@encoding)
@escaped_backslash_character = Regexp.escape(@backslash_character)
@escaped_backslash = Regexp.new(@escaped_backslash_character)
if @quote_character.nil?
@backslash_quote_character = nil
else
@backslash_quote_character =
@backslash_character + @escaped_quote_character
end
end
def prepare_skip_lines
skip_lines = @options[:skip_lines]
case skip_lines
when String
@ -356,18 +406,71 @@ class CSV
end
@skip_lines = skip_lines
end
end
@column_end = Regexp.new(escaped_column_separator)
def prepare_strip
@strip = @options[:strip]
@escaped_strip = nil
@strip_value = nil
if @strip.is_a?(String)
case @strip.length
when 0
raise ArgumentError, ":strip must not be an empty String"
when 1
# ok
else
raise ArgumentError, ":strip doesn't support 2 or more characters yet"
end
@strip = @strip.encode(@encoding)
@escaped_strip = Regexp.escape(@strip)
if @quote_character
@strip_value = Regexp.new(@escaped_strip +
"+".encode(@encoding))
end
@need_robust_parsing = true
elsif @strip
strip_values = " \t\r\n\f\v"
@escaped_strip = strip_values.encode(@encoding)
if @quote_character
@strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
end
@need_robust_parsing = true
end
end
begin
StringScanner.new("x").scan("x")
rescue TypeError
@@string_scanner_scan_accept_string = false
else
@@string_scanner_scan_accept_string = true
end
def prepare_separators
@column_separator = @options[:column_separator].to_s.encode(@encoding)
@row_separator =
resolve_row_separator(@options[:row_separator]).encode(@encoding)
@escaped_column_separator = Regexp.escape(@column_separator)
@escaped_first_column_separator = Regexp.escape(@column_separator[0])
if @column_separator.size > 1
@column_end = Regexp.new(@escaped_column_separator)
@column_ends = @column_separator.each_char.collect do |char|
Regexp.new(Regexp.escape(char))
end
@first_column_separators = Regexp.new(escaped_first_column_separator +
@first_column_separators = Regexp.new(@escaped_first_column_separator +
"+".encode(@encoding))
else
if @@string_scanner_scan_accept_string
@column_end = @column_separator
else
@column_end = Regexp.new(@escaped_column_separator)
end
@column_ends = nil
@first_column_separators = nil
end
escaped_row_separator = Regexp.escape(@row_separator)
@row_end = Regexp.new(escaped_row_separator)
if @row_separator.size > 1
@row_ends = @row_separator.each_char.collect do |char|
@ -376,25 +479,56 @@ class CSV
else
@row_ends = nil
end
@quotes = Regexp.new(escaped_quote_character +
"+".encode(@encoding))
@quoted_value = Regexp.new("[^".encode(@encoding) +
escaped_quote_character +
"]+".encode(@encoding))
if @liberal_parsing
@unquoted_value = Regexp.new("[^".encode(@encoding) +
escaped_first_column_separator +
"\r\n]+".encode(@encoding))
else
@unquoted_value = Regexp.new("[^".encode(@encoding) +
escaped_quote_character +
escaped_first_column_separator +
"\r\n]+".encode(@encoding))
end
@cr = "\r".encode(@encoding)
@lf = "\n".encode(@encoding)
@cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
@not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
end
def prepare_quoted
if @quote_character
@quotes = Regexp.new(@escaped_quote_character +
"+".encode(@encoding))
no_quoted_values = @escaped_quote_character.dup
if @backslash_quote
no_quoted_values << @escaped_backslash_character
end
@quoted_value = Regexp.new("[^".encode(@encoding) +
no_quoted_values +
"]+".encode(@encoding))
end
if @escaped_strip
@split_column_separator = Regexp.new(@escaped_strip +
"*".encode(@encoding) +
@escaped_column_separator +
@escaped_strip +
"*".encode(@encoding))
else
if @column_separator == " ".encode(@encoding)
@split_column_separator = Regexp.new(@escaped_column_separator)
else
@split_column_separator = @column_separator
end
end
end
def prepare_unquoted
return if @quote_character.nil?
no_unquoted_values = "\r\n".encode(@encoding)
no_unquoted_values << @escaped_first_column_separator
unless @liberal_parsing
no_unquoted_values << @escaped_quote_character
end
if @escaped_strip
no_unquoted_values << @escaped_strip
end
@unquoted_value = Regexp.new("[^".encode(@encoding) +
no_unquoted_values +
"]+".encode(@encoding))
end
def resolve_row_separator(separator)
if separator == :auto
cr = "\r".encode(@encoding)
@ -514,6 +648,8 @@ class CSV
end
def may_quoted?
return false if @quote_character.nil?
if @input.is_a?(StringIO)
sample = @input.string
else
@ -534,6 +670,10 @@ class CSV
@io.gets(*args)
end
def each_line(*args, &block)
@io.each_line(*args, &block)
end
def eof?
@io.eof?
end
@ -548,7 +688,10 @@ class CSV
else
inputs << @input
end
InputsScanner.new(inputs, @encoding, chunk_size: 1)
chunk_size = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"
InputsScanner.new(inputs,
@encoding,
chunk_size: Integer(chunk_size, 10))
end
else
def build_scanner
@ -560,8 +703,13 @@ class CSV
end
if string
unless string.valid_encoding?
message = "Invalid byte sequence in #{@encoding}"
raise MalformedCSVError.new(message, @lineno + 1)
index = string.lines(@row_separator).index do |line|
!line.valid_encoding?
end
if index
message = "Invalid byte sequence in #{@encoding}"
raise MalformedCSVError.new(message, @lineno + index + 1)
end
end
Scanner.new(string)
else
@ -582,6 +730,7 @@ class CSV
line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
line << @row_separator if parse_row_end
if skip_line?(line)
@lineno += 1
@scanner.keep_drop
else
@scanner.keep_back
@ -601,6 +750,147 @@ class CSV
end
end
def parse_no_quote(&block)
@scanner.each_line(@row_separator) do |line|
next if @skip_lines and skip_line?(line)
original_line = line
line = line.delete_suffix(@row_separator)
if line.empty?
next if @skip_blanks
row = []
else
line = strip_value(line)
row = line.split(@split_column_separator, -1)
n_columns = row.size
i = 0
while i < n_columns
row[i] = nil if row[i].empty?
i += 1
end
end
@last_line = original_line
emit_row(row, &block)
end
end
def parse_quotable_loose(&block)
@scanner.keep_start
@scanner.each_line(@row_separator) do |line|
if @skip_lines and skip_line?(line)
@scanner.keep_drop
@scanner.keep_start
next
end
original_line = line
line = line.delete_suffix(@row_separator)
if line.empty?
if @skip_blanks
@scanner.keep_drop
@scanner.keep_start
next
end
row = []
elsif line.include?(@cr) or line.include?(@lf)
@scanner.keep_back
@need_robust_parsing = true
return parse_quotable_robust(&block)
else
row = line.split(@split_column_separator, -1)
n_columns = row.size
i = 0
while i < n_columns
column = row[i]
if column.empty?
row[i] = nil
else
n_quotes = column.count(@quote_character)
if n_quotes.zero?
# no quote
elsif n_quotes == 2 and
column.start_with?(@quote_character) and
column.end_with?(@quote_character)
row[i] = column[1..-2]
else
@scanner.keep_back
@need_robust_parsing = true
return parse_quotable_robust(&block)
end
end
i += 1
end
end
@scanner.keep_drop
@scanner.keep_start
@last_line = original_line
emit_row(row, &block)
end
@scanner.keep_drop
end
def parse_quotable_robust(&block)
row = []
skip_needless_lines
start_row
while true
@quoted_column_value = false
@unquoted_column_value = false
@scanner.scan_all(@strip_value) if @strip_value
value = parse_column_value
if value
@scanner.scan_all(@strip_value) if @strip_value
if @field_size_limit and value.size >= @field_size_limit
ignore_broken_line
raise MalformedCSVError.new("Field size exceeded", @lineno)
end
end
if parse_column_end
row << value
elsif parse_row_end
if row.empty? and value.nil?
emit_row([], &block) unless @skip_blanks
else
row << value
emit_row(row, &block)
row = []
end
skip_needless_lines
start_row
elsif @scanner.eos?
break if row.empty? and value.nil?
row << value
emit_row(row, &block)
break
else
if @quoted_column_value
ignore_broken_line
message = "Any value after quoted field isn't allowed"
raise MalformedCSVError.new(message, @lineno)
elsif @unquoted_column_value and
(new_line = @scanner.scan(@cr_or_lf))
ignore_broken_line
message = "Unquoted fields do not allow new line " +
"<#{new_line.inspect}>"
raise MalformedCSVError.new(message, @lineno)
elsif @scanner.rest.start_with?(@quote_character)
ignore_broken_line
message = "Illegal quoting"
raise MalformedCSVError.new(message, @lineno)
elsif (new_line = @scanner.scan(@cr_or_lf))
ignore_broken_line
message = "New line must be <#{@row_separator.inspect}> " +
"not <#{new_line.inspect}>"
raise MalformedCSVError.new(message, @lineno)
else
ignore_broken_line
raise MalformedCSVError.new("TODO: Meaningful message",
@lineno)
end
end
end
end
def parse_column_value
if @liberal_parsing
quoted_value = parse_quoted_column_value
@ -651,6 +941,7 @@ class CSV
value << sub_value
end
end
value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
value
end
@ -667,10 +958,22 @@ class CSV
while true
quoted_value = @scanner.scan_all(@quoted_value)
value << quoted_value if quoted_value
if @backslash_quote
if @scanner.scan(@escaped_backslash)
if @scanner.scan(@escaped_quote)
value << @quote_character
else
value << @backslash_character
end
next
end
end
quotes = @scanner.scan_all(@quotes)
unless quotes
ignore_broken_line
message = "Unclosed quoted field"
raise MalformedCSVError.new(message, @lineno + 1)
raise MalformedCSVError.new(message, @lineno)
end
n_quotes = quotes.size
if n_quotes == 1
@ -713,6 +1016,33 @@ class CSV
end
end
def strip_value(value)
return value unless @strip
return nil if value.nil?
case @strip
when String
size = value.size
while value.start_with?(@strip)
size -= 1
value = value[1, size]
end
while value.end_with?(@strip)
size -= 1
value = value[0, size]
end
else
value.strip!
end
value
end
def ignore_broken_line
@scanner.scan_all(@not_line_end)
@scanner.scan_all(@cr_or_lf)
@lineno += 1
end
def start_row
if @last_line
@last_line = nil

View file

@ -2,5 +2,5 @@
class CSV
# The version of the installed library.
VERSION = "3.0.4"
VERSION = "3.0.9"
end

View file

@ -18,6 +18,7 @@ class CSV
if @options[:write_headers] and @headers
self << @headers
end
@fields_converter = @options[:fields_converter]
end
def <<(row)
@ -31,6 +32,8 @@ class CSV
@headers ||= row if @use_headers
@lineno += 1
row = @fields_converter.convert(row, nil, lineno) if @fields_converter
converted_row = row.collect do |field|
quote(field)
end

View file

@ -1,5 +1,18 @@
require "tempfile"
require "test/unit"
require "csv"
require_relative "../lib/with_different_ofs.rb"
module Helper
def with_chunk_size(chunk_size)
chunk_size_keep = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"]
begin
ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] = chunk_size
yield
ensure
ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] = chunk_size_keep
end
end
end

View file

@ -0,0 +1,47 @@
# frozen_string_literal: false
require_relative "../helper"
class TestCSVInterfaceDelegation < Test::Unit::TestCase
class TestStringIO < self
def setup
@csv = CSV.new("h1,h2")
end
def test_flock
assert_raise(NotImplementedError) do
@csv.flock(File::LOCK_EX)
end
end
def test_ioctl
assert_raise(NotImplementedError) do
@csv.ioctl(0)
end
end
def test_stat
assert_raise(NotImplementedError) do
@csv.stat
end
end
def test_to_i
assert_raise(NotImplementedError) do
@csv.to_i
end
end
def test_binmode?
assert_equal(false, @csv.binmode?)
end
def test_path
assert_equal(nil, @csv.path)
end
def test_to_io
assert_instance_of(StringIO, @csv.to_io)
end
end
end

View file

@ -0,0 +1,277 @@
# frozen_string_literal: false
require_relative "../helper"
class TestCSVInterfaceRead < Test::Unit::TestCase
extend DifferentOFS
def setup
super
@data = ""
@data << "1\t2\t3\r\n"
@data << "4\t5\r\n"
@input = Tempfile.new(["interface-read", ".csv"], options: {binmode: true})
@input << @data
@input.rewind
@rows = [
["1", "2", "3"],
["4", "5"],
]
end
def teardown
@input.close(true)
super
end
def test_foreach
rows = []
CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").each do |row|
rows << row
end
assert_equal(@rows, rows)
end
def test_foreach_mode
rows = []
CSV.foreach(@input.path, "r", col_sep: "\t", row_sep: "\r\n").each do |row|
rows << row
end
assert_equal(@rows, rows)
end
def test_foreach_enumurator
rows = CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").to_a
assert_equal(@rows, rows)
end
def test_closed?
csv = CSV.open(@input.path, "r+", col_sep: "\t", row_sep: "\r\n")
assert_not_predicate(csv, :closed?)
csv.close
assert_predicate(csv, :closed?)
end
def test_open_auto_close
csv = nil
CSV.open(@input.path) do |_csv|
csv = _csv
end
assert_predicate(csv, :closed?)
end
def test_open_closed
csv = nil
CSV.open(@input.path) do |_csv|
csv = _csv
csv.close
end
assert_predicate(csv, :closed?)
end
def test_open_block_return_value
return_value = CSV.open(@input.path) do
"Return value."
end
assert_equal("Return value.", return_value)
end
def test_open_encoding_valid
# U+1F600 GRINNING FACE
# U+1F601 GRINNING FACE WITH SMILING EYES
File.open(@input.path, "w") do |file|
file << "\u{1F600},\u{1F601}"
end
CSV.open(@input.path, encoding: "utf-8") do |csv|
assert_equal([["\u{1F600}", "\u{1F601}"]],
csv.to_a)
end
end
def test_open_encoding_invalid
# U+1F600 GRINNING FACE
# U+1F601 GRINNING FACE WITH SMILING EYES
File.open(@input.path, "w") do |file|
file << "\u{1F600},\u{1F601}"
end
CSV.open(@input.path, encoding: "EUC-JP") do |csv|
error = assert_raise(CSV::MalformedCSVError) do
csv.shift
end
assert_equal("Invalid byte sequence in EUC-JP in line 1.",
error.message)
end
end
def test_open_encoding_nonexistent
_output, error = capture_io do
CSV.open(@input.path, encoding: "nonexistent") do
end
end
assert_equal("path:0: warning: Unsupported encoding nonexistent ignored\n",
error.gsub(/\A.+:\d+: /, "path:0: "))
end
def test_open_encoding_utf_8_with_bom
# U+FEFF ZERO WIDTH NO-BREAK SPACE, BOM
# U+1F600 GRINNING FACE
# U+1F601 GRINNING FACE WITH SMILING EYES
File.open(@input.path, "w") do |file|
file << "\u{FEFF}\u{1F600},\u{1F601}"
end
CSV.open(@input.path, encoding: "bom|utf-8") do |csv|
assert_equal([["\u{1F600}", "\u{1F601}"]],
csv.to_a)
end
end
def test_parse
assert_equal(@rows,
CSV.parse(@data, col_sep: "\t", row_sep: "\r\n"))
end
def test_parse_block
rows = []
CSV.parse(@data, col_sep: "\t", row_sep: "\r\n") do |row|
rows << row
end
assert_equal(@rows, rows)
end
def test_parse_enumerator
rows = CSV.parse(@data, col_sep: "\t", row_sep: "\r\n").to_a
assert_equal(@rows, rows)
end
def test_parse_headers_only
table = CSV.parse("a,b,c", headers: true)
assert_equal([
["a", "b", "c"],
[],
],
[
table.headers,
table.each.to_a,
])
end
def test_parse_line
assert_equal(["1", "2", "3"],
CSV.parse_line("1;2;3", col_sep: ";"))
end
def test_parse_line_shortcut
assert_equal(["1", "2", "3"],
"1;2;3".parse_csv(col_sep: ";"))
end
def test_parse_line_empty
assert_equal(nil, CSV.parse_line("")) # to signal eof
end
def test_parse_line_empty_line
assert_equal([], CSV.parse_line("\n1,2,3"))
end
def test_read
assert_equal(@rows,
CSV.read(@input.path, col_sep: "\t", row_sep: "\r\n"))
end
def test_readlines
assert_equal(@rows,
CSV.readlines(@input.path, col_sep: "\t", row_sep: "\r\n"))
end
def test_open_read
rows = CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
csv.read
end
assert_equal(@rows, rows)
end
def test_open_readlines
rows = CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
csv.readlines
end
assert_equal(@rows, rows)
end
def test_table
table = CSV.table(@input.path, col_sep: "\t", row_sep: "\r\n")
assert_equal(CSV::Table.new([
CSV::Row.new([:"1", :"2", :"3"], [4, 5, nil]),
]),
table)
end
def test_shift # aliased as gets() and readline()
CSV.open(@input.path, "rb+", col_sep: "\t", row_sep: "\r\n") do |csv|
rows = [
csv.shift,
csv.shift,
csv.shift,
]
assert_equal(@rows + [nil],
rows)
end
end
def test_enumerator
CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
assert_equal(@rows, csv.each.to_a)
end
end
def test_shift_and_each
CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
rows = []
rows << csv.shift
rows.concat(csv.each.to_a)
assert_equal(@rows, rows)
end
end
def test_each_twice
CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
assert_equal([
@rows,
[],
],
[
csv.each.to_a,
csv.each.to_a,
])
end
end
def test_eof?
eofs = []
CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
eofs << csv.eof?
csv.shift
eofs << csv.eof?
csv.shift
eofs << csv.eof?
end
assert_equal([false, false, true],
eofs)
end
def test_new_nil
assert_raise_with_message ArgumentError, "Cannot parse nil as CSV" do
CSV.new(nil)
end
end
def test_options_not_modified
options = {}.freeze
CSV.foreach(@input.path, options)
CSV.open(@input.path, options) {}
CSV.parse("", options)
CSV.parse_line("", options)
CSV.read(@input.path, options)
CSV.readlines(@input.path, options)
CSV.table(@input.path, options)
end
end

View file

@ -0,0 +1,51 @@
# frozen_string_literal: false
require_relative "../helper"
class TestCSVInterfaceReadWrite < Test::Unit::TestCase
extend DifferentOFS
def test_filter
rows = [[1, 2, 3], [4, 5]]
input = <<-CSV
1;2;3
4;5
CSV
output = ""
CSV.filter(input, output,
in_col_sep: ";",
out_col_sep: ",",
converters: :all) do |row|
row.map! {|n| n * 2}
row << "Added\r"
end
assert_equal(<<-CSV, output)
2,4,6,"Added\r"
8,10,"Added\r"
CSV
end
def test_instance_same
data = ""
assert_equal(CSV.instance(data, col_sep: ";").object_id,
CSV.instance(data, col_sep: ";").object_id)
end
def test_instance_append
output = ""
CSV.instance(output, col_sep: ";") << ["a", "b", "c"]
assert_equal(<<-CSV, output)
a;b;c
CSV
CSV.instance(output, col_sep: ";") << [1, 2, 3]
assert_equal(<<-CSV, output)
a;b;c
1;2;3
CSV
end
def test_instance_shortcut
assert_equal(CSV.instance,
CSV {|csv| csv})
end
end

View file

@ -0,0 +1,174 @@
# frozen_string_literal: false
require_relative "../helper"
class TestCSVInterfaceWrite < Test::Unit::TestCase
extend DifferentOFS
def setup
super
@output = Tempfile.new(["interface-write", ".csv"])
end
def teardown
@output.close(true)
super
end
def test_generate_default
csv_text = CSV.generate do |csv|
csv << [1, 2, 3] << [4, nil, 5]
end
assert_equal(<<-CSV, csv_text)
1,2,3
4,,5
CSV
end
def test_generate_append
csv_text = <<-CSV
1,2,3
4,,5
CSV
CSV.generate(csv_text) do |csv|
csv << ["last", %Q{"row"}]
end
assert_equal(<<-CSV, csv_text)
1,2,3
4,,5
last,"""row"""
CSV
end
def test_generate_no_new_line
csv_text = CSV.generate("test") do |csv|
csv << ["row"]
end
assert_equal(<<-CSV, csv_text)
testrow
CSV
end
def test_generate_line_col_sep
line = CSV.generate_line(["1", "2", "3"], col_sep: ";")
assert_equal(<<-LINE, line)
1;2;3
LINE
end
def test_generate_line_row_sep
line = CSV.generate_line(["1", "2"], row_sep: nil)
assert_equal(<<-LINE.chomp, line)
1,2
LINE
end
def test_generate_line_shortcut
line = ["1", "2", "3"].to_csv(col_sep: ";")
assert_equal(<<-LINE, line)
1;2;3
LINE
end
def test_headers_detection
headers = ["a", "b", "c"]
CSV.open(@output.path, "w", headers: true) do |csv|
csv << headers
csv << ["1", "2", "3"]
assert_equal(headers, csv.headers)
end
end
def test_lineno
CSV.open(@output.path, "w") do |csv|
n_lines = 20
n_lines.times do
csv << ["a", "b", "c"]
end
assert_equal(n_lines, csv.lineno)
end
end
def test_append_row
CSV.open(@output.path, "wb") do |csv|
csv <<
CSV::Row.new([], ["1", "2", "3"]) <<
CSV::Row.new([], ["a", "b", "c"])
end
assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
1,2,3
a,b,c
CSV
end
def test_append_hash
CSV.open(@output.path, "wb", headers: true) do |csv|
csv << [:a, :b, :c]
csv << {a: 1, b: 2, c: 3}
csv << {a: 4, b: 5, c: 6}
end
assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
a,b,c
1,2,3
4,5,6
CSV
end
def test_append_hash_headers_array
CSV.open(@output.path, "wb", headers: [:b, :a, :c]) do |csv|
csv << {a: 1, b: 2, c: 3}
csv << {a: 4, b: 5, c: 6}
end
assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
2,1,3
5,4,6
CSV
end
def test_append_hash_headers_string
CSV.open(@output.path, "wb", headers: "b|a|c", col_sep: "|") do |csv|
csv << {"a" => 1, "b" => 2, "c" => 3}
csv << {"a" => 4, "b" => 5, "c" => 6}
end
assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
2|1|3
5|4|6
CSV
end
def test_write_headers
CSV.open(@output.path,
"wb",
headers: "b|a|c",
write_headers: true,
col_sep: "|" ) do |csv|
csv << {"a" => 1, "b" => 2, "c" => 3}
csv << {"a" => 4, "b" => 5, "c" => 6}
end
assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
b|a|c
2|1|3
5|4|6
CSV
end
def test_write_headers_empty
CSV.open(@output.path,
"wb",
headers: "b|a|c",
write_headers: true,
col_sep: "|" ) do |csv|
end
assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
b|a|c
CSV
end
def test_options_not_modified
options = {}.freeze
CSV.generate(options) {}
CSV.generate_line([], options)
CSV.filter("", "", options)
CSV.instance("", options)
end
end

View file

@ -142,7 +142,7 @@ class TestCSVParseGeneral < Test::Unit::TestCase
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,2\r,3", row_sep: "\n")
end
assert_equal("Unquoted fields do not allow \\r or \\n in line 1.",
assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.",
error.message)
end
@ -158,7 +158,7 @@ line,5,jkl
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse(csv)
end
assert_equal("Unquoted fields do not allow \\r or \\n in line 4.",
assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.",
error.message)
end

View file

@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false
require_relative "../helper"
class TestCSVParseInvalid < Test::Unit::TestCase
def test_no_column_mixed_new_lines
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse("\n" +
"\r")
end
assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.",
error.message)
end
def test_ignore_invalid_line
csv = CSV.new(<<-CSV, headers: true, return_headers: true)
head1,head2,head3
aaa,bbb,ccc
ddd,ee"e.fff
ggg,hhh,iii
CSV
headers = ["head1", "head2", "head3"]
assert_equal(CSV::Row.new(headers, headers),
csv.shift)
assert_equal(CSV::Row.new(headers, ["aaa", "bbb", "ccc"]),
csv.shift)
error = assert_raise(CSV::MalformedCSVError) do
csv.shift
end
assert_equal("Illegal quoting in line 3.",
error.message)
assert_equal(CSV::Row.new(headers, ["ggg", "hhh", "iii"]),
csv.shift)
end
end

View file

@ -22,8 +22,7 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line(input)
end
assert_equal("Do not allow except col_sep_split_separator " +
"after quoted fields in line 1.",
assert_equal("Any value after quoted field isn't allowed in line 1.",
error.message)
assert_equal(['"quoted" field'],
CSV.parse_line(input, liberal_parsing: true))
@ -75,8 +74,7 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse(data)
end
assert_equal("Do not allow except col_sep_split_separator " +
"after quoted fields in line 1.",
assert_equal("Any value after quoted field isn't allowed in line 1.",
error.message)
assert_equal([
[["a", %Q{""b""}]],
@ -90,4 +88,73 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
}),
])
end
class TestBackslashQuote < Test::Unit::TestCase
extend ::DifferentOFS
def test_double_quote_outside_quote
data = %Q{a,""b""}
assert_equal([
[["a", %Q{""b""}]],
[["a", %Q{"b"}]],
],
[
CSV.parse(data,
liberal_parsing: {
backslash_quote: true
}),
CSV.parse(data,
liberal_parsing: {
backslash_quote: true,
double_quote_outside_quote: true
}),
])
end
def test_unquoted_value
data = %q{\"\"a\"\"}
assert_equal([
[[%q{\"\"a\"\"}]],
[[%q{""a""}]],
],
[
CSV.parse(data, liberal_parsing: true),
CSV.parse(data,
liberal_parsing: {
backslash_quote: true
}),
])
end
def test_unquoted_value_multiple_characters_col_sep
data = %q{a<\\"b<=>x}
assert_equal([[%Q{a<"b}, "x"]],
CSV.parse(data,
col_sep: "<=>",
liberal_parsing: {
backslash_quote: true
}))
end
def test_quoted_value
data = %q{"\"\"a\"\""}
assert_equal([
[[%q{"\"\"a\"\""}]],
[[%q{""a""}]],
[[%q{""a""}]],
],
[
CSV.parse(data, liberal_parsing: true),
CSV.parse(data,
liberal_parsing: {
backslash_quote: true
}),
CSV.parse(data,
liberal_parsing: {
backslash_quote: true,
double_quote_outside_quote: true
}),
])
end
end
end

View file

@ -0,0 +1,93 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false
require_relative "../helper"
class TestCSVParseQuoteCharNil < Test::Unit::TestCase
extend DifferentOFS
def test_full
assert_equal(["a", "b"], CSV.parse_line(%Q{a,b}, quote_char: nil))
end
def test_end_with_nil
assert_equal(["a", nil, nil, nil], CSV.parse_line(%Q{a,,,}, quote_char: nil))
end
def test_nil_nil
assert_equal([nil, nil], CSV.parse_line(%Q{,}, quote_char: nil))
end
def test_unquoted_value_multiple_characters_col_sep
data = %q{a<b<=>x}
assert_equal([[%Q{a<b}, "x"]], CSV.parse(data, col_sep: "<=>", quote_char: nil))
end
def test_csv_header_string
data = <<~DATA
first,second,third
A,B,C
1,2,3
DATA
assert_equal(
CSV::Table.new([
CSV::Row.new(["my", "new", "headers"], ["first", "second", "third"]),
CSV::Row.new(["my", "new", "headers"], ["A", "B", "C"]),
CSV::Row.new(["my", "new", "headers"], ["1", "2", "3"])
]),
CSV.parse(data, headers: "my,new,headers", quote_char: nil)
)
end
def test_comma
assert_equal([["a", "b", nil, "d"]],
CSV.parse("a,b,,d", col_sep: ",", quote_char: nil))
end
def test_space
assert_equal([["a", "b", nil, "d"]],
CSV.parse("a b d", col_sep: " ", quote_char: nil))
end
def encode_array(array, encoding)
array.collect do |element|
element ? element.encode(encoding) : element
end
end
def test_space_no_ascii
encoding = Encoding::UTF_16LE
assert_equal([encode_array(["a", "b", nil, "d"], encoding)],
CSV.parse("a b d".encode(encoding),
col_sep: " ".encode(encoding),
quote_char: nil))
end
def test_multiple_space
assert_equal([["a b", nil, "d"]],
CSV.parse("a b d", col_sep: " ", quote_char: nil))
end
def test_multiple_characters_leading_empty_fields
data = <<-CSV
<=><=>A<=>B<=>C
1<=>2<=>3
CSV
assert_equal([
[nil, nil, "A", "B", "C"],
["1", "2", "3"],
],
CSV.parse(data, col_sep: "<=>", quote_char: nil))
end
def test_line
lines = [
"abc,def\n",
]
csv = CSV.new(lines.join(""), quote_char: nil)
lines.each do |line|
csv.shift
assert_equal(line, csv.line)
end
end
end

View file

@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false
require_relative "../helper"
class TestCSVParseRowSeparator < Test::Unit::TestCase
extend DifferentOFS
include Helper
def test_multiple_characters
with_chunk_size("1") do
assert_equal([["a"], ["b"]],
CSV.parse("a\r\nb\r\n", row_sep: "\r\n"))
end
end
end

View file

@ -0,0 +1,105 @@
# frozen_string_literal: false
require_relative "../helper"
class TestCSVParseSkipLines < Test::Unit::TestCase
extend DifferentOFS
include Helper
def test_default
csv = CSV.new("a,b,c\n")
assert_nil(csv.skip_lines)
end
def test_regexp
csv = <<-CSV
1
#2
#3
4
CSV
assert_equal([
["1"],
["4"],
],
CSV.parse(csv, :skip_lines => /\A\s*#/))
end
def test_regexp_quoted
csv = <<-CSV
1
#2
"#3"
4
CSV
assert_equal([
["1"],
["#3"],
["4"],
],
CSV.parse(csv, :skip_lines => /\A\s*#/))
end
def test_string
csv = <<-CSV
1
.2
3.
4
CSV
assert_equal([
["1"],
["4"],
],
CSV.parse(csv, :skip_lines => "."))
end
class RegexStub
end
def test_not_matchable
regex_stub = RegexStub.new
csv = CSV.new("1\n", :skip_lines => regex_stub)
error = assert_raise(ArgumentError) do
csv.shift
end
assert_equal(":skip_lines has to respond to #match: #{regex_stub.inspect}",
error.message)
end
class Matchable
def initialize(pattern)
@pattern = pattern
end
def match(line)
@pattern.match(line)
end
end
def test_matchable
csv = <<-CSV
1
# 2
3
# 4
CSV
assert_equal([
["1"],
["3"],
],
CSV.parse(csv, :skip_lines => Matchable.new(/\A#/)))
end
def test_multibyte_data
# U+3042 HIRAGANA LETTER A
# U+3044 HIRAGANA LETTER I
# U+3046 HIRAGANA LETTER U
value = "\u3042\u3044\u3046"
with_chunk_size("5") do
assert_equal([[value], [value]],
CSV.parse("#{value}\n#{value}\n",
:skip_lines => /\A#/))
end
end
end

View file

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false
require_relative "../helper"
class TestCSVParseStrip < Test::Unit::TestCase
extend DifferentOFS
def test_both
assert_equal(["a", "b"],
CSV.parse_line(%Q{ a , b }, strip: true))
end
def test_left
assert_equal(["a", "b"],
CSV.parse_line(%Q{ a, b}, strip: true))
end
def test_right
assert_equal(["a", "b"],
CSV.parse_line(%Q{a ,b }, strip: true))
end
def test_quoted
assert_equal([" a ", " b "],
CSV.parse_line(%Q{" a "," b "}, strip: true))
end
def test_liberal_parsing
assert_equal([" a ", "b", " c ", " d "],
CSV.parse_line(%Q{" a ", b , " c "," d " },
strip: true,
liberal_parsing: true))
end
def test_string
assert_equal(["a", " b"],
CSV.parse_line(%Q{ a , " b" },
strip: " "))
end
def test_no_quote
assert_equal([" a ", " b "],
CSV.parse_line(%Q{" a ", b },
strip: %Q{"},
quote_char: nil))
end
end

View file

@ -256,12 +256,13 @@ class TestCSVEncodings < Test::Unit::TestCase
end
def test_invalid_encoding_row_error
csv = CSV.new("invalid,\xF8\r\nvalid,x\r\n".force_encoding("UTF-8"),
encoding: "UTF-8")
csv = CSV.new("valid,x\rinvalid,\xF8\r".force_encoding("UTF-8"),
encoding: "UTF-8", row_sep: "\r")
error = assert_raise(CSV::MalformedCSVError) do
csv.shift
csv.shift
end
assert_equal("Invalid byte sequence in UTF-8 in line 1.",
assert_equal("Invalid byte sequence in UTF-8 in line 2.",
error.message)
end
@ -270,9 +271,9 @@ class TestCSVEncodings < Test::Unit::TestCase
def assert_parses(fields, encoding, options = { })
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
orig_fields = fields
fields = encode_ary(fields, encoding)
fields = encode_ary(fields, encoding)
data = ary_to_data(fields, options)
parsed = CSV.parse(data, options)
parsed = CSV.parse(data, options)
assert_equal(fields, parsed)
parsed.flatten.each_with_index do |field, i|
assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")

View file

@ -56,7 +56,7 @@ line,4,jkl
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n")
end
assert_equal("Unquoted fields do not allow \\r or \\n in line 1.",
assert_equal("Unquoted fields do not allow new line <\"\\n\"> in line 1.",
error.message)
assert_equal( ["1", "2", "3\n", "4", "5"],
CSV.parse_line(%Q{1,2,"3\n",4,5\r\n}, row_sep: "\r\n"))
@ -295,78 +295,6 @@ line,4,jkl
assert_match(/\A\d\.\d\.\d\z/, CSV::VERSION)
end
def test_accepts_comment_skip_lines_option
assert_nothing_raised(ArgumentError) do
CSV.new(@sample_data, :skip_lines => /\A\s*#/)
end
end
def test_accepts_comment_defaults_to_nil
c = CSV.new(@sample_data)
assert_nil(c.skip_lines)
end
class RegexStub
end
def test_requires_skip_lines_to_call_match
regex_stub = RegexStub.new
csv = CSV.new(@sample_data, :skip_lines => regex_stub)
assert_raise_with_message(ArgumentError, /skip_lines/) do
csv.shift
end
end
class Matchable
def initialize(pattern)
@pattern = pattern
end
def match(line)
@pattern.match(line)
end
end
def test_skip_lines_match
csv = <<-CSV.chomp
1
# 2
3
# 4
CSV
assert_equal([["1"], ["3"]],
CSV.parse(csv, :skip_lines => Matchable.new(/\A#/)))
end
def test_comment_rows_are_ignored
sample_data = "line,1,a\n#not,a,line\nline,2,b\n #also,no,line"
c = CSV.new sample_data, :skip_lines => /\A\s*#/
assert_equal [["line", "1", "a"], ["line", "2", "b"]], c.each.to_a
end
def test_comment_rows_are_ignored_with_heredoc
sample_data = <<~EOL
1,foo
.2,bar
3,baz
EOL
c = CSV.new(sample_data, skip_lines: ".")
assert_equal [["1", "foo"], ["3", "baz"]], c.each.to_a
end
def test_quoted_skip_line_markers_are_ignored
sample_data = "line,1,a\n\"#not\",a,line\nline,2,b"
c = CSV.new sample_data, :skip_lines => /\A\s*#/
assert_equal [["line", "1", "a"], ["#not", "a", "line"], ["line", "2", "b"]], c.each.to_a
end
def test_string_works_like_a_regexp
sample_data = "line,1,a\n#(not,a,line\nline,2,b\n also,#no,line"
c = CSV.new sample_data, :skip_lines => "#"
assert_equal [["line", "1", "a"], ["line", "2", "b"]], c.each.to_a
end
def test_table_nil_equality
assert_nothing_raised(NoMethodError) { CSV.parse("test", headers: true) == nil }
end

View file

@ -1,450 +0,0 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false
require_relative "helper"
require "tempfile"
class TestCSVInterface < Test::Unit::TestCase
extend DifferentOFS
def setup
super
@tempfile = Tempfile.new(%w"temp .csv")
@tempfile.close
@path = @tempfile.path
File.open(@path, "wb") do |file|
file << "1\t2\t3\r\n"
file << "4\t5\r\n"
end
@expected = [%w{1 2 3}, %w{4 5}]
end
def teardown
@tempfile.close(true)
super
end
### Test Read Interface ###
def test_foreach
CSV.foreach(@path, col_sep: "\t", row_sep: "\r\n") do |row|
assert_equal(@expected.shift, row)
end
end
def test_foreach_enum
CSV.foreach(@path, col_sep: "\t", row_sep: "\r\n").zip(@expected) do |row, exp|
assert_equal(exp, row)
end
end
def test_open_and_close
csv = CSV.open(@path, "r+", col_sep: "\t", row_sep: "\r\n")
assert_not_nil(csv)
assert_instance_of(CSV, csv)
assert_not_predicate(csv, :closed?)
csv.close
assert_predicate(csv, :closed?)
ret = CSV.open(@path) do |new_csv|
csv = new_csv
assert_instance_of(CSV, new_csv)
"Return value."
end
assert_predicate(csv, :closed?)
assert_equal("Return value.", ret)
end
def test_open_encoding_valid
# U+1F600 GRINNING FACE
# U+1F601 GRINNING FACE WITH SMILING EYES
File.open(@path, "w") do |file|
file << "\u{1F600},\u{1F601}"
end
CSV.open(@path, encoding: "utf-8") do |csv|
assert_equal([["\u{1F600}", "\u{1F601}"]],
csv.to_a)
end
end
def test_open_encoding_invalid
# U+1F600 GRINNING FACE
# U+1F601 GRINNING FACE WITH SMILING EYES
File.open(@path, "w") do |file|
file << "\u{1F600},\u{1F601}"
end
CSV.open(@path, encoding: "EUC-JP") do |csv|
error = assert_raise(CSV::MalformedCSVError) do
csv.shift
end
assert_equal("Invalid byte sequence in EUC-JP in line 1.",
error.message)
end
end
def test_open_encoding_nonexistent
_output, error = capture_io do
CSV.open(@path, encoding: "nonexistent") do
end
end
assert_equal("path:0: warning: Unsupported encoding nonexistent ignored\n",
error.gsub(/\A.+:\d+: /, "path:0: "))
end
def test_open_encoding_utf_8_with_bom
# U+FEFF ZERO WIDTH NO-BREAK SPACE, BOM
# U+1F600 GRINNING FACE
# U+1F601 GRINNING FACE WITH SMILING EYES
File.open(@path, "w") do |file|
file << "\u{FEFF}\u{1F600},\u{1F601}"
end
CSV.open(@path, encoding: "bom|utf-8") do |csv|
assert_equal([["\u{1F600}", "\u{1F601}"]],
csv.to_a)
end
end
def test_parse
data = File.binread(@path)
assert_equal( @expected,
CSV.parse(data, col_sep: "\t", row_sep: "\r\n") )
CSV.parse(data, col_sep: "\t", row_sep: "\r\n") do |row|
assert_equal(@expected.shift, row)
end
end
def test_parse_line
row = CSV.parse_line("1;2;3", col_sep: ";")
assert_not_nil(row)
assert_instance_of(Array, row)
assert_equal(%w{1 2 3}, row)
# shortcut interface
row = "1;2;3".parse_csv(col_sep: ";")
assert_not_nil(row)
assert_instance_of(Array, row)
assert_equal(%w{1 2 3}, row)
end
def test_parse_line_with_empty_lines
assert_equal(nil, CSV.parse_line("")) # to signal eof
assert_equal(Array.new, CSV.parse_line("\n1,2,3"))
end
def test_parse_header_only
table = CSV.parse("a,b,c", headers: true)
assert_equal([
["a", "b", "c"],
[],
],
[
table.headers,
table.each.to_a,
])
end
def test_read_and_readlines
assert_equal( @expected,
CSV.read(@path, col_sep: "\t", row_sep: "\r\n") )
assert_equal( @expected,
CSV.readlines(@path, col_sep: "\t", row_sep: "\r\n") )
data = CSV.open(@path, col_sep: "\t", row_sep: "\r\n") do |csv|
csv.read
end
assert_equal(@expected, data)
data = CSV.open(@path, col_sep: "\t", row_sep: "\r\n") do |csv|
csv.readlines
end
assert_equal(@expected, data)
end
def test_table
table = CSV.table(@path, col_sep: "\t", row_sep: "\r\n")
assert_instance_of(CSV::Table, table)
assert_equal([[:"1", :"2", :"3"], [4, 5, nil]], table.to_a)
end
def test_shift # aliased as gets() and readline()
CSV.open(@path, "rb+", col_sep: "\t", row_sep: "\r\n") do |csv|
assert_equal(@expected.shift, csv.shift)
assert_equal(@expected.shift, csv.shift)
assert_equal(nil, csv.shift)
end
end
def test_enumerators_are_supported
CSV.open(@path, col_sep: "\t", row_sep: "\r\n") do |csv|
enum = csv.each
assert_instance_of(Enumerator, enum)
assert_equal(@expected.shift, enum.next)
end
end
def test_nil_is_not_acceptable
assert_raise_with_message ArgumentError, "Cannot parse nil as CSV" do
CSV.new(nil)
end
end
def test_open_handles_prematurely_closed_file_descriptor_gracefully
assert_nothing_raised(Exception) do
CSV.open(@path) do |csv|
csv.close
end
end
end
### Test Write Interface ###
def test_generate
str = CSV.generate do |csv| # default empty String
assert_instance_of(CSV, csv)
assert_equal(csv, csv << [1, 2, 3])
assert_equal(csv, csv << [4, nil, 5])
end
assert_not_nil(str)
assert_instance_of(String, str)
assert_equal("1,2,3\n4,,5\n", str)
CSV.generate(str) do |csv| # appending to a String
assert_equal(csv, csv << ["last", %Q{"row"}])
end
assert_equal(%Q{1,2,3\n4,,5\nlast,"""row"""\n}, str)
out = CSV.generate("test") { |csv| csv << ["row"] }
assert_equal("testrow\n", out)
end
def test_generate_line
line = CSV.generate_line(%w{1 2 3}, col_sep: ";")
assert_not_nil(line)
assert_instance_of(String, line)
assert_equal("1;2;3\n", line)
# shortcut interface
line = %w{1 2 3}.to_csv(col_sep: ";")
assert_not_nil(line)
assert_instance_of(String, line)
assert_equal("1;2;3\n", line)
line = CSV.generate_line(%w"1 2", row_sep: nil)
assert_equal("1,2", line)
end
def test_write_header_detection
File.unlink(@path)
headers = %w{a b c}
CSV.open(@path, "w", headers: true) do |csv|
csv << headers
csv << %w{1 2 3}
assert_equal(headers, csv.headers)
end
end
def test_write_lineno
File.unlink(@path)
CSV.open(@path, "w") do |csv|
lines = 20
lines.times { csv << %w{a b c} }
assert_equal(lines, csv.lineno)
end
end
def test_write_hash
File.unlink(@path)
lines = [{a: 1, b: 2, c: 3}, {a: 4, b: 5, c: 6}]
CSV.open( @path, "wb", headers: true,
header_converters: :symbol ) do |csv|
csv << lines.first.keys
lines.each { |line| csv << line }
end
CSV.open( @path, "rb", headers: true,
converters: :all,
header_converters: :symbol ) do |csv|
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
end
end
def test_write_hash_with_string_keys
File.unlink(@path)
lines = [{a: 1, b: 2, c: 3}, {a: 4, b: 5, c: 6}]
CSV.open( @path, "wb", headers: true ) do |csv|
csv << lines.first.keys
lines.each { |line| csv << line }
end
CSV.open( @path, "rb", headers: true ) do |csv|
csv.each do |line|
csv.headers.each_with_index do |header, h|
keys = line.to_hash.keys
assert_instance_of(String, keys[h])
assert_same(header, keys[h])
end
end
end
end
def test_write_hash_with_headers_array
File.unlink(@path)
lines = [{a: 1, b: 2, c: 3}, {a: 4, b: 5, c: 6}]
CSV.open(@path, "wb", headers: [:b, :a, :c]) do |csv|
lines.each { |line| csv << line }
end
# test writing fields in the correct order
File.open(@path, "rb") do |f|
assert_equal("2,1,3", f.gets.strip)
assert_equal("5,4,6", f.gets.strip)
end
# test reading CSV with headers
CSV.open( @path, "rb", headers: [:b, :a, :c],
converters: :all ) do |csv|
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
end
end
def test_write_hash_with_headers_string
File.unlink(@path)
lines = [{"a" => 1, "b" => 2, "c" => 3}, {"a" => 4, "b" => 5, "c" => 6}]
CSV.open(@path, "wb", headers: "b|a|c", col_sep: "|") do |csv|
lines.each { |line| csv << line }
end
# test writing fields in the correct order
File.open(@path, "rb") do |f|
assert_equal("2|1|3", f.gets.strip)
assert_equal("5|4|6", f.gets.strip)
end
# test reading CSV with headers
CSV.open( @path, "rb", headers: "b|a|c",
col_sep: "|",
converters: :all ) do |csv|
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
end
end
def test_write_headers
File.unlink(@path)
lines = [{"a" => 1, "b" => 2, "c" => 3}, {"a" => 4, "b" => 5, "c" => 6}]
CSV.open( @path, "wb", headers: "b|a|c",
write_headers: true,
col_sep: "|" ) do |csv|
lines.each { |line| csv << line }
end
# test writing fields in the correct order
File.open(@path, "rb") do |f|
assert_equal("b|a|c", f.gets.strip)
assert_equal("2|1|3", f.gets.strip)
assert_equal("5|4|6", f.gets.strip)
end
# test reading CSV with headers
CSV.open( @path, "rb", headers: true,
col_sep: "|",
converters: :all ) do |csv|
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
end
end
def test_write_headers_empty
File.unlink(@path)
CSV.open( @path, "wb", headers: "b|a|c",
write_headers: true,
col_sep: "|" ) do |csv|
end
File.open(@path, "rb") do |f|
assert_equal("b|a|c", f.gets.strip)
end
end
def test_append # aliased add_row() and puts()
File.unlink(@path)
CSV.open(@path, "wb", col_sep: "\t", row_sep: "\r\n") do |csv|
@expected.each { |row| csv << row }
end
test_shift
# same thing using CSV::Row objects
File.unlink(@path)
CSV.open(@path, "wb", col_sep: "\t", row_sep: "\r\n") do |csv|
@expected.each { |row| csv << CSV::Row.new(Array.new, row) }
end
test_shift
end
### Test Read and Write Interface ###
def test_filter
assert_respond_to(CSV, :filter)
expected = [[1, 2, 3], [4, 5]]
CSV.filter( "1;2;3\n4;5\n", (result = String.new),
in_col_sep: ";", out_col_sep: ",",
converters: :all ) do |row|
assert_equal(row, expected.shift)
row.map! { |n| n * 2 }
row << "Added\r"
end
assert_equal("2,4,6,\"Added\r\"\n8,10,\"Added\r\"\n", result)
end
def test_instance
csv = String.new
first = nil
assert_nothing_raised(Exception) do
first = CSV.instance(csv, col_sep: ";")
first << %w{a b c}
end
assert_equal("a;b;c\n", csv)
second = nil
assert_nothing_raised(Exception) do
second = CSV.instance(csv, col_sep: ";")
second << [1, 2, 3]
end
assert_equal(first.object_id, second.object_id)
assert_equal("a;b;c\n1;2;3\n", csv)
# shortcuts
assert_equal(STDOUT, CSV.instance.instance_eval { @io })
assert_equal(STDOUT, CSV { |new_csv| new_csv.instance_eval { @io } })
end
def test_options_are_not_modified
opt = {}.freeze
assert_nothing_raised { CSV.foreach(@path, opt) }
assert_nothing_raised { CSV.open(@path, opt){} }
assert_nothing_raised { CSV.parse("", opt) }
assert_nothing_raised { CSV.parse_line("", opt) }
assert_nothing_raised { CSV.read(@path, opt) }
assert_nothing_raised { CSV.readlines(@path, opt) }
assert_nothing_raised { CSV.table(@path, opt) }
assert_nothing_raised { CSV.generate(opt){} }
assert_nothing_raised { CSV.generate_line([], opt) }
assert_nothing_raised { CSV.filter("", "", opt){} }
assert_nothing_raised { CSV.instance("", opt) }
end
end

View file

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false
require_relative "../helper"
module TestCSVWriteConverters
def test_one
assert_equal(%Q[=a,=b,=c\n],
generate_line(["a", "b", "c"],
write_converters: ->(value) {"=" + value}))
end
def test_multiple
assert_equal(%Q[=a_,=b_,=c_\n],
generate_line(["a", "b", "c"],
write_converters: [
->(value) {"=" + value},
->(value) {value + "_"},
]))
end
def test_nil_value
assert_equal(%Q[a,NaN,c\n],
generate_line(["a", nil, "c"],
write_nil_value: "NaN"))
end
def test_empty_value
assert_equal(%Q[a,,c\n],
generate_line(["a", "", "c"],
write_empty_value: nil))
end
end
class TestCSVWriteConvertersGenerateLine < Test::Unit::TestCase
include TestCSVWriteConverters
extend DifferentOFS
def generate_line(row, **kwargs)
CSV.generate_line(row, **kwargs)
end
end
class TestCSVWriteConvertersGenerate < Test::Unit::TestCase
include TestCSVWriteConverters
extend DifferentOFS
def generate_line(row, **kwargs)
CSV.generate(**kwargs) do |csv|
csv << row
end
end
end