mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Import CSV 3.0.2
This includes performance improvement especially writing. Writing is about 2 times faster. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66507 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
c20a1946a6
commit
e5d634260e
14 changed files with 1466 additions and 687 deletions
6
NEWS
6
NEWS
|
@ -404,6 +404,12 @@ sufficient information, see the ChangeLog file or Redmine
|
|||
* Coverage.line_stub, which is a simple helper function that
|
||||
creates the "stub" of line coverage from a given source code.
|
||||
|
||||
[CSV]
|
||||
|
||||
* Upgrade to 3.0.2. This includes performance improvement especially
|
||||
writing. Writing is about 2 times faster.
|
||||
https://github.com/ruby/csv/blob/master/NEWS.md
|
||||
|
||||
[ERB]
|
||||
|
||||
[New options]
|
||||
|
|
843
lib/csv.rb
843
lib/csv.rb
File diff suppressed because it is too large
Load diff
|
@ -18,12 +18,26 @@ Gem::Specification.new do |spec|
|
|||
spec.homepage = "https://github.com/ruby/csv"
|
||||
spec.license = "BSD-2-Clause"
|
||||
|
||||
spec.files = ["lib/csv.rb", "lib/csv/table.rb", "lib/csv/core_ext/string.rb", "lib/csv/core_ext/array.rb", "lib/csv/row.rb", "lib/csv/version.rb"]
|
||||
spec.files += ["README.md", "LICENSE.txt", "news.md"]
|
||||
spec.files = [
|
||||
"LICENSE.txt",
|
||||
"NEWS.md",
|
||||
"README.md",
|
||||
"lib/csv.rb",
|
||||
"lib/csv/core_ext/array.rb",
|
||||
"lib/csv/core_ext/string.rb",
|
||||
"lib/csv/fields_converter.rb",
|
||||
"lib/csv/match_p.rb",
|
||||
"lib/csv/parser.rb",
|
||||
"lib/csv/row.rb",
|
||||
"lib/csv/table.rb",
|
||||
"lib/csv/version.rb",
|
||||
"lib/csv/writer.rb",
|
||||
]
|
||||
spec.require_paths = ["lib"]
|
||||
spec.required_ruby_version = ">= 2.3.0"
|
||||
|
||||
spec.add_development_dependency "bundler"
|
||||
spec.add_development_dependency "rake"
|
||||
spec.add_development_dependency "benchmark-ips"
|
||||
spec.add_development_dependency "simplecov"
|
||||
end
|
||||
|
|
78
lib/csv/fields_converter.rb
Normal file
78
lib/csv/fields_converter.rb
Normal file
|
@ -0,0 +1,78 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class CSV
|
||||
class FieldsConverter
|
||||
include Enumerable
|
||||
|
||||
def initialize(options={})
|
||||
@converters = []
|
||||
@nil_value = options[:nil_value]
|
||||
@empty_value = options[:empty_value]
|
||||
@empty_value_is_empty_string = (@empty_value == "")
|
||||
@accept_nil = options[:accept_nil]
|
||||
@builtin_converters = options[:builtin_converters]
|
||||
@need_static_convert = need_static_convert?
|
||||
end
|
||||
|
||||
def add_converter(name=nil, &converter)
|
||||
if name.nil? # custom converter
|
||||
@converters << converter
|
||||
else # named converter
|
||||
combo = @builtin_converters[name]
|
||||
case combo
|
||||
when Array # combo converter
|
||||
combo.each do |sub_name|
|
||||
add_converter(sub_name)
|
||||
end
|
||||
else # individual named converter
|
||||
@converters << combo
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def each(&block)
|
||||
@converters.each(&block)
|
||||
end
|
||||
|
||||
def empty?
|
||||
@converters.empty?
|
||||
end
|
||||
|
||||
def convert(fields, headers, lineno)
|
||||
return fields unless need_convert?
|
||||
|
||||
fields.collect.with_index do |field, index|
|
||||
if field.nil?
|
||||
field = @nil_value
|
||||
elsif field.empty?
|
||||
field = @empty_value unless @empty_value_is_empty_string
|
||||
end
|
||||
@converters.each do |converter|
|
||||
break if field.nil? and @accept_nil
|
||||
if converter.arity == 1 # straight field converter
|
||||
field = converter[field]
|
||||
else # FieldInfo converter
|
||||
if headers
|
||||
header = headers[index]
|
||||
else
|
||||
header = nil
|
||||
end
|
||||
field = converter[field, FieldInfo.new(index, lineno, header)]
|
||||
end
|
||||
break unless field.is_a?(String) # short-circuit pipeline for speed
|
||||
end
|
||||
field # final state of each field, converted or original
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
def need_static_convert?
|
||||
not (@nil_value.nil? and @empty_value_is_empty_string)
|
||||
end
|
||||
|
||||
def need_convert?
|
||||
@need_static_convert or
|
||||
(not @converters.empty?)
|
||||
end
|
||||
end
|
||||
end
|
20
lib/csv/match_p.rb
Normal file
20
lib/csv/match_p.rb
Normal file
|
@ -0,0 +1,20 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
# This provides String#match? and Regexp#match? for Ruby 2.3.
|
||||
unless String.method_defined?(:match?)
|
||||
class CSV
|
||||
module MatchP
|
||||
refine String do
|
||||
def match?(pattern)
|
||||
self =~ pattern
|
||||
end
|
||||
end
|
||||
|
||||
refine Regexp do
|
||||
def match?(string)
|
||||
self =~ string
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
713
lib/csv/parser.rb
Normal file
713
lib/csv/parser.rb
Normal file
|
@ -0,0 +1,713 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require "strscan"
|
||||
|
||||
require_relative "match_p"
|
||||
require_relative "row"
|
||||
require_relative "table"
|
||||
|
||||
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
||||
|
||||
class CSV
|
||||
class Parser
|
||||
class InvalidEncoding < StandardError
|
||||
end
|
||||
|
||||
class Scanner < StringScanner
|
||||
alias_method :scan_all, :scan
|
||||
|
||||
def initialize(*args)
|
||||
super
|
||||
@keeps = []
|
||||
end
|
||||
|
||||
def keep_start
|
||||
@keeps.push(pos)
|
||||
end
|
||||
|
||||
def keep_end
|
||||
start = @keeps.pop
|
||||
string[start, pos - start]
|
||||
end
|
||||
|
||||
def keep_back
|
||||
self.pos = @keeps.pop
|
||||
end
|
||||
|
||||
def keep_drop
|
||||
@keeps.pop
|
||||
end
|
||||
end
|
||||
|
||||
class InputsScanner
|
||||
def initialize(inputs, encoding, chunk_size: 8192)
|
||||
@inputs = inputs.dup
|
||||
@encoding = encoding
|
||||
@chunk_size = chunk_size
|
||||
@last_scanner = @inputs.empty?
|
||||
@keeps = []
|
||||
read_chunk
|
||||
end
|
||||
|
||||
def scan(pattern)
|
||||
value = @scanner.scan(pattern)
|
||||
return value if @last_scanner
|
||||
|
||||
if value
|
||||
read_chunk if @scanner.eos?
|
||||
return value
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def scan_all(pattern)
|
||||
value = @scanner.scan(pattern)
|
||||
return value if @last_scanner
|
||||
|
||||
return nil if value.nil?
|
||||
while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
|
||||
value << sub_value
|
||||
end
|
||||
value
|
||||
end
|
||||
|
||||
def eos?
|
||||
@scanner.eos?
|
||||
end
|
||||
|
||||
def keep_start
|
||||
@keeps.push([@scanner.pos, nil])
|
||||
end
|
||||
|
||||
def keep_end
|
||||
start, buffer = @keeps.pop
|
||||
keep = @scanner.string[start, @scanner.pos - start]
|
||||
if buffer
|
||||
buffer << keep
|
||||
keep = buffer
|
||||
end
|
||||
keep
|
||||
end
|
||||
|
||||
def keep_back
|
||||
start, buffer = @keeps.pop
|
||||
if buffer
|
||||
string = @scanner.string
|
||||
keep = string[start, string.size - start]
|
||||
if keep and not keep.empty?
|
||||
@inputs.unshift(StringIO.new(keep))
|
||||
@last_scanner = false
|
||||
end
|
||||
@scanner = StringScanner.new(buffer)
|
||||
else
|
||||
@scanner.pos = start
|
||||
end
|
||||
end
|
||||
|
||||
def keep_drop
|
||||
@keeps.pop
|
||||
end
|
||||
|
||||
def rest
|
||||
@scanner.rest
|
||||
end
|
||||
|
||||
private
|
||||
def read_chunk
|
||||
return false if @last_scanner
|
||||
|
||||
unless @keeps.empty?
|
||||
keep = @keeps.last
|
||||
keep_start = keep[0]
|
||||
string = @scanner.string
|
||||
keep_data = string[keep_start, @scanner.pos - keep_start]
|
||||
if keep_data
|
||||
keep_buffer = keep[1]
|
||||
if keep_buffer
|
||||
keep_buffer << keep_data
|
||||
else
|
||||
keep[1] = keep_data.dup
|
||||
end
|
||||
end
|
||||
keep[0] = 0
|
||||
end
|
||||
|
||||
input = @inputs.first
|
||||
case input
|
||||
when StringIO
|
||||
string = input.string
|
||||
raise InvalidEncoding unless string.valid_encoding?
|
||||
@scanner = StringScanner.new(string)
|
||||
@inputs.shift
|
||||
@last_scanner = @inputs.empty?
|
||||
true
|
||||
else
|
||||
chunk = input.gets(nil, @chunk_size)
|
||||
if chunk
|
||||
raise InvalidEncoding unless chunk.valid_encoding?
|
||||
@scanner = StringScanner.new(chunk)
|
||||
if input.respond_to?(:eof?) and input.eof?
|
||||
@inputs.shift
|
||||
@last_scanner = @inputs.empty?
|
||||
end
|
||||
true
|
||||
else
|
||||
@scanner = StringScanner.new("".encode(@encoding))
|
||||
@inputs.shift
|
||||
@last_scanner = @inputs.empty?
|
||||
if @last_scanner
|
||||
false
|
||||
else
|
||||
read_chunk
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(input, options)
|
||||
@input = input
|
||||
@options = options
|
||||
@samples = []
|
||||
|
||||
prepare
|
||||
end
|
||||
|
||||
def column_separator
|
||||
@column_separator
|
||||
end
|
||||
|
||||
def row_separator
|
||||
@row_separator
|
||||
end
|
||||
|
||||
def quote_character
|
||||
@quote_character
|
||||
end
|
||||
|
||||
def field_size_limit
|
||||
@field_size_limit
|
||||
end
|
||||
|
||||
def skip_lines
|
||||
@skip_lines
|
||||
end
|
||||
|
||||
def unconverted_fields?
|
||||
@unconverted_fields
|
||||
end
|
||||
|
||||
def headers
|
||||
@headers
|
||||
end
|
||||
|
||||
def header_row?
|
||||
@use_headers and @headers.nil?
|
||||
end
|
||||
|
||||
def return_headers?
|
||||
@return_headers
|
||||
end
|
||||
|
||||
def skip_blanks?
|
||||
@skip_blanks
|
||||
end
|
||||
|
||||
def liberal_parsing?
|
||||
@liberal_parsing
|
||||
end
|
||||
|
||||
def lineno
|
||||
@lineno
|
||||
end
|
||||
|
||||
def line
|
||||
last_line
|
||||
end
|
||||
|
||||
def parse(&block)
|
||||
return to_enum(__method__) unless block_given?
|
||||
|
||||
if @return_headers and @headers
|
||||
headers = Row.new(@headers, @raw_headers, true)
|
||||
if @unconverted_fields
|
||||
headers = add_unconverted_fields(headers, [])
|
||||
end
|
||||
yield headers
|
||||
end
|
||||
|
||||
row = []
|
||||
begin
|
||||
@scanner = build_scanner
|
||||
skip_needless_lines
|
||||
start_row
|
||||
while true
|
||||
@quoted_column_value = false
|
||||
@unquoted_column_value = false
|
||||
value = parse_column_value
|
||||
if value and @field_size_limit and value.size >= @field_size_limit
|
||||
raise MalformedCSVError.new("Field size exceeded", @lineno + 1)
|
||||
end
|
||||
if parse_column_end
|
||||
row << value
|
||||
elsif parse_row_end
|
||||
if row.empty? and value.nil?
|
||||
emit_row([], &block) unless @skip_blanks
|
||||
else
|
||||
row << value
|
||||
emit_row(row, &block)
|
||||
row = []
|
||||
end
|
||||
skip_needless_lines
|
||||
start_row
|
||||
elsif @scanner.eos?
|
||||
return if row.empty? and value.nil?
|
||||
row << value
|
||||
emit_row(row, &block)
|
||||
return
|
||||
else
|
||||
if @quoted_column_value
|
||||
message = "Do not allow except col_sep_split_separator " +
|
||||
"after quoted fields"
|
||||
raise MalformedCSVError.new(message, @lineno + 1)
|
||||
elsif @unquoted_column_value and @scanner.scan(@cr_or_lf)
|
||||
message = "Unquoted fields do not allow \\r or \\n"
|
||||
raise MalformedCSVError.new(message, @lineno + 1)
|
||||
elsif @scanner.rest.start_with?(@quote_character)
|
||||
message = "Illegal quoting"
|
||||
raise MalformedCSVError.new(message, @lineno + 1)
|
||||
else
|
||||
raise MalformedCSVError.new("TODO: Meaningful message",
|
||||
@lineno + 1)
|
||||
end
|
||||
end
|
||||
end
|
||||
rescue InvalidEncoding
|
||||
message = "Invalid byte sequence in #{@encoding}"
|
||||
raise MalformedCSVError.new(message, @lineno + 1)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
def prepare
|
||||
prepare_variable
|
||||
prepare_regexp
|
||||
prepare_line
|
||||
prepare_header
|
||||
prepare_parser
|
||||
end
|
||||
|
||||
def prepare_variable
|
||||
@encoding = @options[:encoding]
|
||||
@liberal_parsing = @options[:liberal_parsing]
|
||||
@unconverted_fields = @options[:unconverted_fields]
|
||||
@field_size_limit = @options[:field_size_limit]
|
||||
@skip_blanks = @options[:skip_blanks]
|
||||
@fields_converter = @options[:fields_converter]
|
||||
@header_fields_converter = @options[:header_fields_converter]
|
||||
end
|
||||
|
||||
def prepare_regexp
|
||||
@column_separator = @options[:column_separator].to_s.encode(@encoding)
|
||||
@row_separator =
|
||||
resolve_row_separator(@options[:row_separator]).encode(@encoding)
|
||||
@quote_character = @options[:quote_character].to_s.encode(@encoding)
|
||||
if @quote_character.length != 1
|
||||
raise ArgumentError, ":quote_char has to be a single character String"
|
||||
end
|
||||
|
||||
escaped_column_separator = Regexp.escape(@column_separator)
|
||||
escaped_row_separator = Regexp.escape(@row_separator)
|
||||
escaped_quote_character = Regexp.escape(@quote_character)
|
||||
|
||||
skip_lines = @options[:skip_lines]
|
||||
case skip_lines
|
||||
when String
|
||||
@skip_lines = skip_lines.encode(@encoding)
|
||||
when Regexp, nil
|
||||
@skip_lines = skip_lines
|
||||
else
|
||||
unless skip_lines.respond_to?(:match)
|
||||
message =
|
||||
":skip_lines has to respond to \#match: #{skip_lines.inspect}"
|
||||
raise ArgumentError, message
|
||||
end
|
||||
@skip_lines = skip_lines
|
||||
end
|
||||
|
||||
@column_end = Regexp.new(escaped_column_separator)
|
||||
if @column_separator.size > 1
|
||||
@column_ends = @column_separator.each_char.collect do |char|
|
||||
Regexp.new(Regexp.escape(char))
|
||||
end
|
||||
else
|
||||
@column_ends = nil
|
||||
end
|
||||
@row_end = Regexp.new(escaped_row_separator)
|
||||
if @row_separator.size > 1
|
||||
@row_ends = @row_separator.each_char.collect do |char|
|
||||
Regexp.new(Regexp.escape(char))
|
||||
end
|
||||
else
|
||||
@row_ends = nil
|
||||
end
|
||||
@quotes = Regexp.new(escaped_quote_character +
|
||||
"+".encode(@encoding))
|
||||
@quoted_value = Regexp.new("[^".encode(@encoding) +
|
||||
escaped_quote_character +
|
||||
"]+".encode(@encoding))
|
||||
if @liberal_parsing
|
||||
@unquoted_value = Regexp.new("[^".encode(@encoding) +
|
||||
escaped_column_separator +
|
||||
"\r\n]+".encode(@encoding))
|
||||
else
|
||||
@unquoted_value = Regexp.new("[^".encode(@encoding) +
|
||||
escaped_quote_character +
|
||||
escaped_column_separator +
|
||||
"\r\n]+".encode(@encoding))
|
||||
end
|
||||
@cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
|
||||
@not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
|
||||
end
|
||||
|
||||
def resolve_row_separator(separator)
|
||||
if separator == :auto
|
||||
cr = "\r".encode(@encoding)
|
||||
lf = "\n".encode(@encoding)
|
||||
if @input.is_a?(StringIO)
|
||||
separator = detect_row_separator(@input.string, cr, lf)
|
||||
elsif @input.respond_to?(:gets)
|
||||
if @input.is_a?(File)
|
||||
chunk_size = 32 * 1024
|
||||
else
|
||||
chunk_size = 1024
|
||||
end
|
||||
begin
|
||||
while separator == :auto
|
||||
#
|
||||
# if we run out of data, it's probably a single line
|
||||
# (ensure will set default value)
|
||||
#
|
||||
break unless sample = @input.gets(nil, chunk_size)
|
||||
|
||||
# extend sample if we're unsure of the line ending
|
||||
if sample.end_with?(cr)
|
||||
sample << (@input.gets(nil, 1) || "")
|
||||
end
|
||||
|
||||
@samples << sample
|
||||
|
||||
separator = detect_row_separator(sample, cr, lf)
|
||||
end
|
||||
rescue IOError
|
||||
# do nothing: ensure will set default
|
||||
end
|
||||
end
|
||||
separator = $INPUT_RECORD_SEPARATOR if separator == :auto
|
||||
end
|
||||
separator.to_s.encode(@encoding)
|
||||
end
|
||||
|
||||
def detect_row_separator(sample, cr, lf)
|
||||
lf_index = sample.index(lf)
|
||||
if lf_index
|
||||
cr_index = sample[0, lf_index].index(cr)
|
||||
else
|
||||
cr_index = sample.index(cr)
|
||||
end
|
||||
if cr_index and lf_index
|
||||
if cr_index + 1 == lf_index
|
||||
cr + lf
|
||||
elsif cr_index < lf_index
|
||||
cr
|
||||
else
|
||||
lf
|
||||
end
|
||||
elsif cr_index
|
||||
cr
|
||||
elsif lf_index
|
||||
lf
|
||||
else
|
||||
:auto
|
||||
end
|
||||
end
|
||||
|
||||
def prepare_line
|
||||
@lineno = 0
|
||||
@last_line = nil
|
||||
@scanner = nil
|
||||
end
|
||||
|
||||
def last_line
|
||||
if @scanner
|
||||
@last_line ||= @scanner.keep_end
|
||||
else
|
||||
@last_line
|
||||
end
|
||||
end
|
||||
|
||||
def prepare_header
|
||||
@return_headers = @options[:return_headers]
|
||||
|
||||
headers = @options[:headers]
|
||||
case headers
|
||||
when Array
|
||||
@raw_headers = headers
|
||||
@use_headers = true
|
||||
when String
|
||||
@raw_headers = parse_headers(headers)
|
||||
@use_headers = true
|
||||
when nil, false
|
||||
@raw_headers = nil
|
||||
@use_headers = false
|
||||
else
|
||||
@raw_headers = nil
|
||||
@use_headers = true
|
||||
end
|
||||
if @raw_headers
|
||||
@headers = adjust_headers(@raw_headers)
|
||||
else
|
||||
@headers = nil
|
||||
end
|
||||
end
|
||||
|
||||
def parse_headers(row)
|
||||
CSV.parse_line(row,
|
||||
col_sep: @column_separator,
|
||||
row_sep: @row_separator,
|
||||
quote_char: @quote_character)
|
||||
end
|
||||
|
||||
def adjust_headers(headers)
|
||||
adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno)
|
||||
adjusted_headers.each {|h| h.freeze if h.is_a? String}
|
||||
adjusted_headers
|
||||
end
|
||||
|
||||
def prepare_parser
|
||||
@may_quoted = may_quoted?
|
||||
end
|
||||
|
||||
def may_quoted?
|
||||
if @input.is_a?(StringIO)
|
||||
sample = @input.string
|
||||
else
|
||||
return false if @samples.empty?
|
||||
sample = @samples.first
|
||||
end
|
||||
sample[0, 128].index(@quote_character)
|
||||
end
|
||||
|
||||
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
|
||||
if SCANNER_TEST
|
||||
class UnoptimizedStringIO
|
||||
def initialize(string)
|
||||
@io = StringIO.new(string)
|
||||
end
|
||||
|
||||
def gets(*args)
|
||||
@io.gets(*args)
|
||||
end
|
||||
|
||||
def eof?
|
||||
@io.eof?
|
||||
end
|
||||
end
|
||||
|
||||
def build_scanner
|
||||
inputs = @samples.collect do |sample|
|
||||
UnoptimizedStringIO.new(sample)
|
||||
end
|
||||
if @input.is_a?(StringIO)
|
||||
inputs << UnoptimizedStringIO.new(@input.string)
|
||||
else
|
||||
inputs << @input
|
||||
end
|
||||
InputsScanner.new(inputs, @encoding, chunk_size: 1)
|
||||
end
|
||||
else
|
||||
def build_scanner
|
||||
string = nil
|
||||
if @samples.empty? and @input.is_a?(StringIO)
|
||||
string = @input.string
|
||||
elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
|
||||
string = @samples[0]
|
||||
end
|
||||
if string
|
||||
unless string.valid_encoding?
|
||||
message = "Invalid byte sequence in #{@encoding}"
|
||||
raise MalformedCSVError.new(message, @lineno + 1)
|
||||
end
|
||||
Scanner.new(string)
|
||||
else
|
||||
inputs = @samples.collect do |sample|
|
||||
StringIO.new(sample)
|
||||
end
|
||||
inputs << @input
|
||||
InputsScanner.new(inputs, @encoding)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def skip_needless_lines
|
||||
return unless @skip_lines
|
||||
|
||||
while true
|
||||
@scanner.keep_start
|
||||
line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
|
||||
line << @row_separator if parse_row_end
|
||||
if skip_line?(line)
|
||||
@scanner.keep_drop
|
||||
else
|
||||
@scanner.keep_back
|
||||
return
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def skip_line?(line)
|
||||
case @skip_lines
|
||||
when String
|
||||
line.include?(@skip_lines)
|
||||
when Regexp
|
||||
@skip_lines.match?(line)
|
||||
else
|
||||
@skip_lines.match(line)
|
||||
end
|
||||
end
|
||||
|
||||
def parse_column_value
|
||||
if @liberal_parsing
|
||||
quoted_value = parse_quoted_column_value
|
||||
if quoted_value
|
||||
unquoted_value = parse_unquoted_column_value
|
||||
if unquoted_value
|
||||
@quote_character + quoted_value + @quote_character + unquoted_value
|
||||
else
|
||||
quoted_value
|
||||
end
|
||||
else
|
||||
parse_unquoted_column_value
|
||||
end
|
||||
elsif @may_quoted
|
||||
parse_quoted_column_value ||
|
||||
parse_unquoted_column_value
|
||||
else
|
||||
parse_unquoted_column_value ||
|
||||
parse_quoted_column_value
|
||||
end
|
||||
end
|
||||
|
||||
def parse_unquoted_column_value
|
||||
value = @scanner.scan_all(@unquoted_value)
|
||||
@unquoted_column_value = true if value
|
||||
value
|
||||
end
|
||||
|
||||
def parse_quoted_column_value
|
||||
quotes = @scanner.scan_all(@quotes)
|
||||
return nil unless quotes
|
||||
|
||||
@quoted_column_value = true
|
||||
n_quotes = quotes.size
|
||||
if (n_quotes % 2).zero?
|
||||
quotes[0, (n_quotes - 2) / 2]
|
||||
else
|
||||
value = quotes[0, (n_quotes - 1) / 2]
|
||||
while true
|
||||
quoted_value = @scanner.scan_all(@quoted_value)
|
||||
value << quoted_value if quoted_value
|
||||
quotes = @scanner.scan_all(@quotes)
|
||||
unless quotes
|
||||
message = "Unclosed quoted field"
|
||||
raise MalformedCSVError.new(message, @lineno + 1)
|
||||
end
|
||||
n_quotes = quotes.size
|
||||
if n_quotes == 1
|
||||
break
|
||||
elsif (n_quotes % 2) == 1
|
||||
value << quotes[0, (n_quotes - 1) / 2]
|
||||
break
|
||||
else
|
||||
value << quotes[0, n_quotes / 2]
|
||||
end
|
||||
end
|
||||
value
|
||||
end
|
||||
end
|
||||
|
||||
def parse_column_end
|
||||
return true if @scanner.scan(@column_end)
|
||||
return false unless @column_ends
|
||||
|
||||
@scanner.keep_start
|
||||
if @column_ends.all? {|column_end| @scanner.scan(column_end)}
|
||||
@scanner.keep_drop
|
||||
true
|
||||
else
|
||||
@scanner.keep_back
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
def parse_row_end
|
||||
return true if @scanner.scan(@row_end)
|
||||
return false unless @row_ends
|
||||
@scanner.keep_start
|
||||
if @row_ends.all? {|row_end| @scanner.scan(row_end)}
|
||||
@scanner.keep_drop
|
||||
true
|
||||
else
|
||||
@scanner.keep_back
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
def start_row
|
||||
if @last_line
|
||||
@last_line = nil
|
||||
else
|
||||
@scanner.keep_drop
|
||||
end
|
||||
@scanner.keep_start
|
||||
end
|
||||
|
||||
def emit_row(row, &block)
|
||||
@lineno += 1
|
||||
|
||||
raw_row = row
|
||||
if @use_headers
|
||||
if @headers.nil?
|
||||
@headers = adjust_headers(row)
|
||||
return unless @return_headers
|
||||
row = Row.new(@headers, row, true)
|
||||
else
|
||||
row = Row.new(@headers,
|
||||
@fields_converter.convert(raw_row, @headers, @lineno))
|
||||
end
|
||||
else
|
||||
# convert fields, if needed...
|
||||
row = @fields_converter.convert(raw_row, nil, @lineno)
|
||||
end
|
||||
|
||||
# inject unconverted fields and accessor, if requested...
|
||||
if @unconverted_fields and not row.respond_to?(:unconverted_fields)
|
||||
add_unconverted_fields(row, raw_row)
|
||||
end
|
||||
|
||||
yield(row)
|
||||
end
|
||||
|
||||
# This method injects an instance variable <tt>unconverted_fields</tt> into
|
||||
# +row+ and an accessor method for +row+ called unconverted_fields(). The
|
||||
# variable is set to the contents of +fields+.
|
||||
def add_unconverted_fields(row, fields)
|
||||
class << row
|
||||
attr_reader :unconverted_fields
|
||||
end
|
||||
row.instance_variable_set(:@unconverted_fields, fields)
|
||||
row
|
||||
end
|
||||
end
|
||||
end
|
|
@ -16,6 +16,11 @@ class CSV
|
|||
# Construct a new CSV::Table from +array_of_rows+, which are expected
|
||||
# to be CSV::Row objects. All rows are assumed to have the same headers.
|
||||
#
|
||||
# The optional +headers+ parameter can be set to Array of headers.
|
||||
# If headers aren't set, headers are fetched from CSV::Row objects.
|
||||
# Otherwise, headers() method will return headers being set in
|
||||
# headers arugument.
|
||||
#
|
||||
# A CSV::Table object supports the following Array methods through
|
||||
# delegation:
|
||||
#
|
||||
|
@ -23,8 +28,17 @@ class CSV
|
|||
# * length()
|
||||
# * size()
|
||||
#
|
||||
def initialize(array_of_rows)
|
||||
def initialize(array_of_rows, headers: nil)
|
||||
@table = array_of_rows
|
||||
@headers = headers
|
||||
unless @headers
|
||||
if @table.empty?
|
||||
@headers = []
|
||||
else
|
||||
@headers = @table.first.headers
|
||||
end
|
||||
end
|
||||
|
||||
@mode = :col_or_row
|
||||
end
|
||||
|
||||
|
@ -122,11 +136,7 @@ class CSV
|
|||
# other rows). An empty Array is returned for empty tables.
|
||||
#
|
||||
def headers
|
||||
if @table.empty?
|
||||
Array.new
|
||||
else
|
||||
@table.first.headers
|
||||
end
|
||||
@headers.dup
|
||||
end
|
||||
|
||||
#
|
||||
|
@ -171,6 +181,10 @@ class CSV
|
|||
@table[index_or_header] = value
|
||||
end
|
||||
else # set column
|
||||
unless index_or_header.is_a? Integer
|
||||
index = @headers.index(index_or_header) || @headers.size
|
||||
@headers[index] = index_or_header
|
||||
end
|
||||
if value.is_a? Array # multiple values
|
||||
@table.each_with_index do |row, i|
|
||||
if row.header_row?
|
||||
|
@ -258,6 +272,11 @@ class CSV
|
|||
(@mode == :col_or_row and index_or_header.is_a? Integer)
|
||||
@table.delete_at(index_or_header)
|
||||
else # by header
|
||||
if index_or_header.is_a? Integer
|
||||
@headers.delete_at(index_or_header)
|
||||
else
|
||||
@headers.delete(index_or_header)
|
||||
end
|
||||
@table.map { |row| row.delete(index_or_header).last }
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,5 +2,5 @@
|
|||
|
||||
class CSV
|
||||
# The version of the installed library.
|
||||
VERSION = "3.0.1"
|
||||
VERSION = "3.0.2"
|
||||
end
|
||||
|
|
144
lib/csv/writer.rb
Normal file
144
lib/csv/writer.rb
Normal file
|
@ -0,0 +1,144 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative "match_p"
|
||||
require_relative "row"
|
||||
|
||||
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
||||
|
||||
class CSV
|
||||
class Writer
|
||||
attr_reader :lineno
|
||||
attr_reader :headers
|
||||
|
||||
def initialize(output, options)
|
||||
@output = output
|
||||
@options = options
|
||||
@lineno = 0
|
||||
prepare
|
||||
if @options[:write_headers] and @headers
|
||||
self << @headers
|
||||
end
|
||||
end
|
||||
|
||||
def <<(row)
|
||||
case row
|
||||
when Row
|
||||
row = row.fields
|
||||
when Hash
|
||||
row = @headers.collect {|header| row[header]}
|
||||
end
|
||||
|
||||
@headers ||= row if @use_headers
|
||||
@lineno += 1
|
||||
|
||||
line = row.collect(&@quote).join(@column_separator) + @row_separator
|
||||
if @output_encoding
|
||||
line = line.encode(@output_encoding)
|
||||
end
|
||||
@output << line
|
||||
|
||||
self
|
||||
end
|
||||
|
||||
def rewind
|
||||
@lineno = 0
|
||||
@headers = nil if @options[:headers].nil?
|
||||
end
|
||||
|
||||
private
|
||||
def prepare
|
||||
@encoding = @options[:encoding]
|
||||
|
||||
prepare_header
|
||||
prepare_format
|
||||
prepare_output
|
||||
end
|
||||
|
||||
def prepare_header
|
||||
headers = @options[:headers]
|
||||
case headers
|
||||
when Array
|
||||
@headers = headers
|
||||
@use_headers = true
|
||||
when String
|
||||
@headers = CSV.parse_line(headers,
|
||||
col_sep: @options[:column_separator],
|
||||
row_sep: @options[:row_separator],
|
||||
quote_char: @options[:quote_character])
|
||||
@use_headers = true
|
||||
when true
|
||||
@headers = nil
|
||||
@use_headers = true
|
||||
else
|
||||
@headers = nil
|
||||
@use_headers = false
|
||||
end
|
||||
return unless @headers
|
||||
|
||||
converter = @options[:header_fields_converter]
|
||||
@headers = converter.convert(@headers, nil, 0)
|
||||
@headers.each do |header|
|
||||
header.freeze if header.is_a?(String)
|
||||
end
|
||||
end
|
||||
|
||||
def prepare_format
|
||||
@column_separator = @options[:column_separator].to_s.encode(@encoding)
|
||||
row_separator = @options[:row_separator]
|
||||
if row_separator == :auto
|
||||
@row_separator = $INPUT_RECORD_SEPARATOR.encode(@encoding)
|
||||
else
|
||||
@row_separator = row_separator.to_s.encode(@encoding)
|
||||
end
|
||||
quote_character = @options[:quote_character]
|
||||
quote = lambda do |field|
|
||||
field = String(field)
|
||||
encoded_quote_character = quote_character.encode(field.encoding)
|
||||
encoded_quote_character +
|
||||
field.gsub(encoded_quote_character,
|
||||
encoded_quote_character * 2) +
|
||||
encoded_quote_character
|
||||
end
|
||||
if @options[:force_quotes]
|
||||
@quote = quote
|
||||
else
|
||||
quotable_pattern =
|
||||
Regexp.new("[\r\n".encode(@encoding) +
|
||||
Regexp.escape(@column_separator) +
|
||||
Regexp.escape(quote_character.encode(@encoding)) +
|
||||
"]".encode(@encoding))
|
||||
@quote = lambda do |field|
|
||||
if field.nil? # represent +nil+ fields as empty unquoted fields
|
||||
""
|
||||
else
|
||||
field = String(field) # Stringify fields
|
||||
# represent empty fields as empty quoted fields
|
||||
if field.empty? or quotable_pattern.match?(field)
|
||||
quote.call(field)
|
||||
else
|
||||
field # unquoted field
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def prepare_output
|
||||
@output_encoding = nil
|
||||
return unless @output.is_a?(StringIO)
|
||||
|
||||
output_encoding = @output.internal_encoding || @output.external_encoding
|
||||
if @encoding != output_encoding
|
||||
if @options[:force_encoding]
|
||||
@output_encoding = output_encoding
|
||||
else
|
||||
compatible_encoding = Encoding.compatible?(@encoding, output_encoding)
|
||||
if compatible_encoding
|
||||
@output.set_encoding(compatible_encoding)
|
||||
@output.seek(0, IO::SEEK_END)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -143,55 +143,52 @@ class TestCSV::Parsing < TestCSV
|
|||
end
|
||||
end
|
||||
|
||||
def test_malformed_csv
|
||||
assert_raise(CSV::MalformedCSVError) do
|
||||
def test_malformed_csv_cr_first_line
|
||||
error = assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line("1,2\r,3", row_sep: "\n")
|
||||
end
|
||||
assert_equal("Unquoted fields do not allow \\r or \\n in line 1.",
|
||||
error.message)
|
||||
end
|
||||
|
||||
bad_data = <<-CSV
|
||||
def test_malformed_csv_cr_middle_line
|
||||
csv = <<-CSV
|
||||
line,1,abc
|
||||
line,2,"def\nghi"
|
||||
|
||||
line,4,some\rjunk
|
||||
line,5,jkl
|
||||
CSV
|
||||
lines = bad_data.lines.to_a
|
||||
assert_equal(6, lines.size)
|
||||
assert_match(/\Aline,4/, lines.find { |l| l =~ /some\rjunk/ })
|
||||
|
||||
csv = CSV.new(bad_data)
|
||||
begin
|
||||
loop do
|
||||
assert_not_nil(csv.shift)
|
||||
assert_send([csv.lineno, :<, 4])
|
||||
end
|
||||
rescue CSV::MalformedCSVError
|
||||
assert_equal( "Unquoted fields do not allow \\r or \\n in line 4.",
|
||||
$!.message )
|
||||
error = assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse(csv)
|
||||
end
|
||||
assert_equal("Unquoted fields do not allow \\r or \\n in line 4.",
|
||||
error.message)
|
||||
end
|
||||
|
||||
assert_raise(CSV::MalformedCSVError) { CSV.parse_line('1,2,"3...') }
|
||||
def test_malformed_csv_unclosed_quote
|
||||
error = assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line('1,2,"3...')
|
||||
end
|
||||
assert_equal("Unclosed quoted field in line 1.",
|
||||
error.message)
|
||||
end
|
||||
|
||||
bad_data = <<-CSV
|
||||
def test_malformed_csv_illegal_quote_middle_line
|
||||
csv = <<-CSV
|
||||
line,1,abc
|
||||
line,2,"def\nghi"
|
||||
|
||||
line,4,8'10"
|
||||
line,5,jkl
|
||||
CSV
|
||||
lines = bad_data.lines.to_a
|
||||
assert_equal(6, lines.size)
|
||||
assert_match(/\Aline,4/, lines.find { |l| l =~ /8'10"/ })
|
||||
|
||||
csv = CSV.new(bad_data)
|
||||
begin
|
||||
loop do
|
||||
assert_not_nil(csv.shift)
|
||||
assert_send([csv.lineno, :<, 4])
|
||||
end
|
||||
rescue CSV::MalformedCSVError
|
||||
assert_equal("Illegal quoting in line 4.", $!.message)
|
||||
error = assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse(csv)
|
||||
end
|
||||
assert_equal("Illegal quoting in line 4.",
|
||||
error.message)
|
||||
end
|
||||
|
||||
def test_the_parse_fails_fast_when_it_can_for_unquoted_fields
|
||||
|
@ -239,6 +236,24 @@ line,5,jkl
|
|||
CSV.parse("a b d", col_sep: " "))
|
||||
end
|
||||
|
||||
def test_row_sep_auto_cr
|
||||
assert_equal([["a"]], CSV.parse("a\r"))
|
||||
end
|
||||
|
||||
def test_row_sep_auto_lf
|
||||
assert_equal([["a"]], CSV.parse("a\n"))
|
||||
end
|
||||
|
||||
def test_row_sep_auto_cr_lf
|
||||
assert_equal([["a"]], CSV.parse("a\r\n"))
|
||||
end
|
||||
|
||||
def test_headers_empty_line
|
||||
assert_equal(CSV::Table.new([CSV::Row.new(["header1"], [])],
|
||||
headers: ["header1"]),
|
||||
CSV.parse("\n", headers: "header1"))
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def assert_parse_errors_out(*args)
|
||||
|
|
|
@ -243,23 +243,35 @@ class TestCSV::DataConverters < TestCSV
|
|||
CSV.parse_line(@data, converters: [:numeric, @custom]) )
|
||||
end
|
||||
|
||||
def test_unconverted_fields
|
||||
[ [ @data,
|
||||
["Numbers", :integer, 1, :float, 3.015],
|
||||
%w{Numbers :integer 1 :float 3.015} ],
|
||||
["\n", Array.new, Array.new] ].each do |test, fields, unconverted|
|
||||
row = nil
|
||||
assert_nothing_raised(Exception) do
|
||||
row = CSV.parse_line( test,
|
||||
converters: [:numeric, @custom],
|
||||
unconverted_fields: true )
|
||||
end
|
||||
assert_not_nil(row)
|
||||
assert_equal(fields, row)
|
||||
assert_respond_to(row, :unconverted_fields)
|
||||
assert_equal(unconverted, row.unconverted_fields)
|
||||
end
|
||||
def test_unconverted_fields_number
|
||||
row = CSV.parse_line(@data,
|
||||
converters: [:numeric, @custom],
|
||||
unconverted_fields: true)
|
||||
assert_equal([
|
||||
["Numbers", :integer, 1, :float, 3.015],
|
||||
["Numbers", ":integer", "1", ":float", "3.015"],
|
||||
],
|
||||
[
|
||||
row,
|
||||
row.unconverted_fields,
|
||||
])
|
||||
end
|
||||
|
||||
def test_unconverted_fields_empty_line
|
||||
row = CSV.parse_line("\n",
|
||||
converters: [:numeric, @custom],
|
||||
unconverted_fields: true)
|
||||
assert_equal([
|
||||
[],
|
||||
[],
|
||||
],
|
||||
[
|
||||
row,
|
||||
row.unconverted_fields,
|
||||
])
|
||||
end
|
||||
|
||||
def test_unconverted_fields
|
||||
data = <<-CSV
|
||||
first,second,third
|
||||
1,2,3
|
||||
|
|
|
@ -58,26 +58,37 @@ line,4,jkl
|
|||
end
|
||||
|
||||
def test_row_sep
|
||||
assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n")
|
||||
error = assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n")
|
||||
end
|
||||
assert_equal("Unquoted fields do not allow \\r or \\n in line 1.",
|
||||
error.message)
|
||||
assert_equal( ["1", "2", "3\n", "4", "5"],
|
||||
CSV.parse_line(%Q{1,2,"3\n",4,5\r\n}, row_sep: "\r\n"))
|
||||
end
|
||||
|
||||
def test_quote_char
|
||||
TEST_CASES.each do |test_case|
|
||||
assert_equal( test_case.last.map { |t| t.tr('"', "'") unless t.nil? },
|
||||
CSV.parse_line( test_case.first.tr('"', "'"),
|
||||
quote_char: "'" ) )
|
||||
assert_equal(test_case.last.map {|t| t.tr('"', "'") unless t.nil?},
|
||||
CSV.parse_line(test_case.first.tr('"', "'"),
|
||||
quote_char: "'" ))
|
||||
end
|
||||
end
|
||||
|
||||
def test_bug_8405
|
||||
def test_quote_char_special_regexp_char
|
||||
TEST_CASES.each do |test_case|
|
||||
assert_equal( test_case.last.map { |t| t.tr('"', "|") unless t.nil? },
|
||||
CSV.parse_line( test_case.first.tr('"', "|"),
|
||||
quote_char: "|" ) )
|
||||
assert_equal(test_case.last.map {|t| t.tr('"', "|") unless t.nil?},
|
||||
CSV.parse_line(test_case.first.tr('"', "|"),
|
||||
quote_char: "|"))
|
||||
end
|
||||
end
|
||||
|
||||
def test_quote_char_special_regexp_char_liberal_parsing
|
||||
TEST_CASES.each do |test_case|
|
||||
assert_equal(test_case.last.map {|t| t.tr('"', "|") unless t.nil?},
|
||||
CSV.parse_line(test_case.first.tr('"', "|"),
|
||||
quote_char: "|",
|
||||
liberal_parsing: true))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -157,27 +168,68 @@ line,4,jkl
|
|||
assert_equal(3, count)
|
||||
end
|
||||
|
||||
def test_liberal_parsing
|
||||
def test_liberal_parsing_middle_quote_start
|
||||
input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson'
|
||||
assert_raise(CSV::MalformedCSVError) do
|
||||
error = assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line(input)
|
||||
end
|
||||
assert_equal("Illegal quoting in line 1.",
|
||||
error.message)
|
||||
assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'],
|
||||
CSV.parse_line(input, liberal_parsing: true))
|
||||
end
|
||||
|
||||
def test_liberal_parsing_middle_quote_end
|
||||
input = '"quoted" field'
|
||||
assert_raise(CSV::MalformedCSVError) do
|
||||
error = assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line(input)
|
||||
end
|
||||
assert_equal("Do not allow except col_sep_split_separator " +
|
||||
"after quoted fields in line 1.",
|
||||
error.message)
|
||||
assert_equal(['"quoted" field'],
|
||||
CSV.parse_line(input, liberal_parsing: true))
|
||||
end
|
||||
|
||||
assert_raise(CSV::MalformedCSVError) do
|
||||
def test_liberal_parsing_quote_after_column_separator
|
||||
error = assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
|
||||
end
|
||||
assert_equal("Unclosed quoted field in line 1.",
|
||||
error.message)
|
||||
end
|
||||
|
||||
def test_liberal_parsing_quote_before_column_separator
|
||||
assert_equal(["is", 'this "three', ' or four"', "fields"],
|
||||
CSV.parse_line('is,this "three, or four",fields', liberal_parsing: true))
|
||||
CSV.parse_line('is,this "three, or four",fields',
|
||||
liberal_parsing: true))
|
||||
end
|
||||
|
||||
def test_liberal_parsing_backslash_quote
|
||||
assert_equal([
|
||||
"1",
|
||||
"\"Hamlet says, \\\"Seems",
|
||||
"\\\" madam! Nay it is; I know not \\\"seems.\\\"\"",
|
||||
],
|
||||
CSV.parse_line('1,' +
|
||||
'"Hamlet says, \"Seems,' +
|
||||
'\" madam! Nay it is; I know not \"seems.\""',
|
||||
liberal_parsing: true))
|
||||
end
|
||||
|
||||
def test_liberal_parsing_space_quote
|
||||
input = <<~CSV
|
||||
Los Angeles, 34°03'N, 118°15'W
|
||||
New York City, 40°42'46"N, 74°00'21"W
|
||||
Paris, 48°51'24"N, 2°21'03"E
|
||||
CSV
|
||||
assert_equal(
|
||||
[
|
||||
["Los Angeles", " 34°03'N", " 118°15'W"],
|
||||
["New York City", " 40°42'46\"N", " 74°00'21\"W"],
|
||||
["Paris", " 48°51'24\"N", " 2°21'03\"E"],
|
||||
],
|
||||
CSV.parse(input, liberal_parsing: true))
|
||||
end
|
||||
|
||||
def test_csv_behavior_readers
|
||||
|
@ -338,11 +390,33 @@ line,4,jkl
|
|||
|
||||
def test_requires_skip_lines_to_call_match
|
||||
regex_stub = RegexStub.new
|
||||
csv = CSV.new(@sample_data, :skip_lines => regex_stub)
|
||||
assert_raise_with_message(ArgumentError, /skip_lines/) do
|
||||
CSV.new(@sample_data, :skip_lines => regex_stub)
|
||||
csv.shift
|
||||
end
|
||||
end
|
||||
|
||||
class Matchable
|
||||
def initialize(pattern)
|
||||
@pattern = pattern
|
||||
end
|
||||
|
||||
def match(line)
|
||||
@pattern.match(line)
|
||||
end
|
||||
end
|
||||
|
||||
def test_skip_lines_match
|
||||
csv = <<-CSV.chomp
|
||||
1
|
||||
# 2
|
||||
3
|
||||
# 4
|
||||
CSV
|
||||
assert_equal([["1"], ["3"]],
|
||||
CSV.parse(csv, :skip_lines => Matchable.new(/\A#/)))
|
||||
end
|
||||
|
||||
def test_comment_rows_are_ignored
|
||||
sample_data = "line,1,a\n#not,a,line\nline,2,b\n #also,no,line"
|
||||
c = CSV.new sample_data, :skip_lines => /\A\s*#/
|
||||
|
@ -375,4 +449,48 @@ line,4,jkl
|
|||
def test_table_nil_equality
|
||||
assert_nothing_raised(NoMethodError) { CSV.parse("test", headers: true) == nil }
|
||||
end
|
||||
|
||||
# non-seekable input stream for testing https://github.com/ruby/csv/issues/44
|
||||
class DummyIO
|
||||
extend Forwardable
|
||||
def_delegators :@io, :gets, :read, :pos, :eof? # no seek or rewind!
|
||||
def initialize(data)
|
||||
@io = StringIO.new(data)
|
||||
end
|
||||
end
|
||||
|
||||
def test_line_separator_autodetection_for_non_seekable_input_lf
|
||||
c = CSV.new(DummyIO.new("one,two,three\nfoo,bar,baz\n"))
|
||||
assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a
|
||||
end
|
||||
|
||||
def test_line_separator_autodetection_for_non_seekable_input_cr
|
||||
c = CSV.new(DummyIO.new("one,two,three\rfoo,bar,baz\r"))
|
||||
assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a
|
||||
end
|
||||
|
||||
def test_line_separator_autodetection_for_non_seekable_input_cr_lf
|
||||
c = CSV.new(DummyIO.new("one,two,three\r\nfoo,bar,baz\r\n"))
|
||||
assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a
|
||||
end
|
||||
|
||||
def test_line_separator_autodetection_for_non_seekable_input_1024_over_lf
|
||||
table = (1..10).map { |row| (1..200).map { |col| "row#{row}col#{col}" }.to_a }.to_a
|
||||
input = table.map { |line| line.join(",") }.join("\n")
|
||||
c = CSV.new(DummyIO.new(input))
|
||||
assert_equal table, c.each.to_a
|
||||
end
|
||||
|
||||
def test_line_separator_autodetection_for_non_seekable_input_1024_over_cr_lf
|
||||
table = (1..10).map { |row| (1..200).map { |col| "row#{row}col#{col}" }.to_a }.to_a
|
||||
input = table.map { |line| line.join(",") }.join("\r\n")
|
||||
c = CSV.new(DummyIO.new(input))
|
||||
assert_equal table, c.each.to_a
|
||||
end
|
||||
|
||||
def test_line_separator_autodetection_for_non_seekable_input_many_cr_only
|
||||
# input with lots of CRs (to make sure no bytes are lost due to look-ahead)
|
||||
c = CSV.new(DummyIO.new("foo\r" + "\r" * 9999 + "bar\r"))
|
||||
assert_equal [["foo"]] + [[]] * 9999 + [["bar"]], c.each.to_a
|
||||
end
|
||||
end
|
||||
|
|
|
@ -139,6 +139,18 @@ class TestCSV::Interface < TestCSV
|
|||
assert_equal(Array.new, CSV.parse_line("\n1,2,3"))
|
||||
end
|
||||
|
||||
def test_parse_header_only
|
||||
table = CSV.parse("a,b,c", headers: true)
|
||||
assert_equal([
|
||||
["a", "b", "c"],
|
||||
[],
|
||||
],
|
||||
[
|
||||
table.headers,
|
||||
table.each.to_a,
|
||||
])
|
||||
end
|
||||
|
||||
def test_read_and_readlines
|
||||
assert_equal( @expected,
|
||||
CSV.read(@path, col_sep: "\t", row_sep: "\r\n") )
|
||||
|
@ -236,7 +248,7 @@ class TestCSV::Interface < TestCSV
|
|||
CSV.open(@path, "w", headers: true) do |csv|
|
||||
csv << headers
|
||||
csv << %w{1 2 3}
|
||||
assert_equal(headers, csv.instance_variable_get(:@headers))
|
||||
assert_equal(headers, csv.headers)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -21,6 +21,8 @@ class TestCSV::Table < TestCSV
|
|||
@header_table = CSV::Table.new(
|
||||
[CSV::Row.new(%w{A B C}, %w{A B C}, true)] + @rows
|
||||
)
|
||||
|
||||
@header_only_table = CSV::Table.new([], headers: %w{A B C})
|
||||
end
|
||||
|
||||
def test_initialze
|
||||
|
@ -63,6 +65,10 @@ class TestCSV::Table < TestCSV
|
|||
assert_equal Array.new, t.headers
|
||||
end
|
||||
|
||||
def test_headers_only
|
||||
assert_equal(%w[A B C], @header_only_table.headers)
|
||||
end
|
||||
|
||||
def test_index
|
||||
##################
|
||||
### Mixed Mode ###
|
||||
|
@ -471,6 +477,21 @@ A
|
|||
CSV
|
||||
end
|
||||
|
||||
def test_delete_headers_only
|
||||
###################
|
||||
### Column Mode ###
|
||||
###################
|
||||
@header_only_table.by_col!
|
||||
|
||||
# delete by index
|
||||
assert_equal([], @header_only_table.delete(0))
|
||||
assert_equal(%w[B C], @header_only_table.headers)
|
||||
|
||||
# delete by header
|
||||
assert_equal([], @header_only_table.delete("C"))
|
||||
assert_equal(%w[B], @header_only_table.headers)
|
||||
end
|
||||
|
||||
def test_values_at
|
||||
##################
|
||||
### Mixed Mode ###
|
||||
|
|
Loading…
Reference in a new issue