1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Import CSV 3.1.2 (#2547)

This commit is contained in:
Sutou Kouhei 2019-10-12 14:03:21 +09:00 committed by GitHub
parent d6e68bb263
commit 92df7d98b6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
Notes: git 2019-10-12 14:03:45 +09:00
Merged-By: kou <kou@clear-code.com>
14 changed files with 582 additions and 447 deletions

2
NEWS
View file

@ -343,7 +343,7 @@ CGI::
CSV::
* Upgrade to 3.0.9.
* Upgrade to 3.1.2.
See https://github.com/ruby/csv/blob/master/NEWS.md.
Date::

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,14 @@
# frozen_string_literal: true
class CSV
# Note: Don't use this class directly. This is an internal class.
class FieldsConverter
include Enumerable
#
# A CSV::FieldsConverter is a data structure for storing the
# fields converter properties to be passed as a parameter
# when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
#
def initialize(options={})
@converters = []

View file

@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV
# Note: Don't use this class directly. This is an internal class.
class Parser
#
# A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
# or String object being read from or written to. Your data is never transcoded
# (unless you ask Ruby to transcode it for you) and will literally be parsed in
# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
# Encoding of your data. This is accomplished by transcoding the parser itself
# into your Encoding.
#
# Raised when encoding is invalid.
class InvalidEncoding < StandardError
end
#
# CSV::Scanner receives a CSV output, scans it and return the content.
# It also controls the life cycle of the object with its methods +keep_start+,
# +keep_end+, +keep_back+, +keep_drop+.
#
# Uses StringScanner (the official strscan gem). Strscan provides lexical
# scanning operations on a String. We inherit its object and take advantage
# on the methods. For more information, please visit:
# https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
#
class Scanner < StringScanner
alias_method :scan_all, :scan
@ -38,7 +59,7 @@ class CSV
def keep_end
start = @keeps.pop
string[start, pos - start]
string.byteslice(start, pos - start)
end
def keep_back
@ -50,6 +71,18 @@ class CSV
end
end
#
# CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
# It also controls the life cycle of the object with its methods +keep_start+,
# +keep_end+, +keep_back+, +keep_drop+.
#
# CSV::InputsScanner.scan() tries to match with pattern at the current position.
# If there's a match, the scanner advances the “scan pointer” and returns the matched string.
# Otherwise, the scanner returns nil.
#
# CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
# If there is no more data (eos? = true), it returns "".
#
class InputsScanner
def initialize(inputs, encoding, chunk_size: 8192)
@inputs = inputs.dup
@ -137,7 +170,7 @@ class CSV
def keep_end
start, buffer = @keeps.pop
keep = @scanner.string[start, @scanner.pos - start]
keep = @scanner.string.byteslice(start, @scanner.pos - start)
if buffer
buffer << keep
keep = buffer
@ -192,7 +225,7 @@ class CSV
input = @inputs.first
case input
when StringIO
string = input.string
string = input.read
raise InvalidEncoding unless string.valid_encoding?
@scanner = StringScanner.new(string)
@inputs.shift
@ -319,6 +352,7 @@ class CSV
end
private
# A set of tasks to prepare the file in order to parse it
def prepare
prepare_variable
prepare_quote_character
@ -447,7 +481,13 @@ class CSV
end
def prepare_separators
@column_separator = @options[:column_separator].to_s.encode(@encoding)
column_separator = @options[:column_separator]
@column_separator = column_separator.to_s.encode(@encoding)
if @column_separator.size < 1
message = ":col_sep must be 1 or more characters: "
message += column_separator.inspect
raise ArgumentError, message
end
@row_separator =
resolve_row_separator(@options[:row_separator]).encode(@encoding)
@ -534,7 +574,9 @@ class CSV
cr = "\r".encode(@encoding)
lf = "\n".encode(@encoding)
if @input.is_a?(StringIO)
separator = detect_row_separator(@input.string, cr, lf)
pos = @input.pos
separator = detect_row_separator(@input.read, cr, lf)
@input.seek(pos)
elsif @input.respond_to?(:gets)
if @input.is_a?(File)
chunk_size = 32 * 1024
@ -651,7 +693,9 @@ class CSV
return false if @quote_character.nil?
if @input.is_a?(StringIO)
sample = @input.string
pos = @input.pos
sample = @input.read
@input.seek(pos)
else
return false if @samples.empty?
sample = @samples.first
@ -684,7 +728,7 @@ class CSV
UnoptimizedStringIO.new(sample)
end
if @input.is_a?(StringIO)
inputs << UnoptimizedStringIO.new(@input.string)
inputs << UnoptimizedStringIO.new(@input.read)
else
inputs << @input
end
@ -697,7 +741,7 @@ class CSV
def build_scanner
string = nil
if @samples.empty? and @input.is_a?(StringIO)
string = @input.string
string = @input.read
elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
string = @samples[0]
end

View file

@ -4,7 +4,7 @@ require "forwardable"
class CSV
#
# A CSV::Row is part Array and part Hash. It retains an order for the fields
# A CSV::Row is part Array and part Hash. It retains an order for the fields
# and allows duplicates just as an Array would, but also allows you to access
# fields by name just as you could if they were in a Hash.
#
@ -13,13 +13,13 @@ class CSV
#
class Row
#
# Construct a new CSV::Row from +headers+ and +fields+, which are expected
# to be Arrays. If one Array is shorter than the other, it will be padded
# Constructs a new CSV::Row from +headers+ and +fields+, which are expected
# to be Arrays. If one Array is shorter than the other, it will be padded
# with +nil+ objects.
#
# The optional +header_row+ parameter can be set to +true+ to indicate, via
# CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
# row. Otherwise, the row is assumes to be a field row.
# row. Otherwise, the row assumes to be a field row.
#
# A CSV::Row object supports the following Array methods through delegation:
#
@ -74,11 +74,11 @@ class CSV
# field( header, offset )
# field( index )
#
# This method will return the field value by +header+ or +index+. If a field
# This method will return the field value by +header+ or +index+. If a field
# is not found, +nil+ is returned.
#
# When provided, +offset+ ensures that a header match occurs on or later
# than the +offset+ index. You can use this to find duplicate headers,
# than the +offset+ index. You can use this to find duplicate headers,
# without resorting to hard-coding exact indices.
#
def field(header_or_index, minimum_index = 0)
@ -142,7 +142,7 @@ class CSV
# assigns the +value+.
#
# Assigning past the end of the row with an index will set all pairs between
# to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
# to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
# pair.
#
def []=(*args)
@ -172,8 +172,8 @@ class CSV
# <<( header_and_field_hash )
#
# If a two-element Array is provided, it is assumed to be a header and field
# and the pair is appended. A Hash works the same way with the key being
# the header and the value being the field. Anything else is assumed to be
# and the pair is appended. A Hash works the same way with the key being
# the header and the value being the field. Anything else is assumed to be
# a lone field which is appended with a +nil+ header.
#
# This method returns the row for chaining.
@ -191,7 +191,7 @@ class CSV
end
#
# A shortcut for appending multiple fields. Equivalent to:
# A shortcut for appending multiple fields. Equivalent to:
#
# args.each { |arg| csv_row << arg }
#
@ -209,8 +209,8 @@ class CSV
# delete( header, offset )
# delete( index )
#
# Used to remove a pair from the row by +header+ or +index+. The pair is
# located as described in CSV::Row.field(). The deleted pair is returned,
# Removes a pair from the row by +header+ or +index+. The pair is
# located as described in CSV::Row.field(). The deleted pair is returned,
# or +nil+ if a pair could not be found.
#
def delete(header_or_index, minimum_index = 0)
@ -325,7 +325,7 @@ class CSV
end
#
# Collapses the row into a simple Hash. Be warned that this discards field
# Collapses the row into a simple Hash. Be warned that this discards field
# order and clobbers duplicate fields.
#
def to_h
@ -340,7 +340,7 @@ class CSV
alias_method :to_ary, :to_a
#
# Returns the row as a CSV String. Headers are not used. Equivalent to:
# Returns the row as a CSV String. Headers are not used. Equivalent to:
#
# csv_row.fields.to_csv( options )
#
@ -367,7 +367,9 @@ class CSV
end
end
#
# A summary of fields, by header, in an ASCII compatible String.
#
def inspect
str = ["#<", self.class.to_s]
each do |header, field|

View file

@ -5,7 +5,7 @@ require "forwardable"
class CSV
#
# A CSV::Table is a two-dimensional data structure for representing CSV
# documents. Tables allow you to work with the data by row or column,
# documents. Tables allow you to work with the data by row or column,
# manipulate the data, and even convert the results back to CSV, if needed.
#
# All tables returned by CSV will be constructed from this class, if header
@ -13,8 +13,8 @@ class CSV
#
class Table
#
# Construct a new CSV::Table from +array_of_rows+, which are expected
# to be CSV::Row objects. All rows are assumed to have the same headers.
# Constructs a new CSV::Table from +array_of_rows+, which are expected
# to be CSV::Row objects. All rows are assumed to have the same headers.
#
# The optional +headers+ parameter can be set to Array of headers.
# If headers aren't set, headers are fetched from CSV::Row objects.
@ -55,11 +55,11 @@ class CSV
def_delegators :@table, :empty?, :length, :size
#
# Returns a duplicate table object, in column mode. This is handy for
# Returns a duplicate table object, in column mode. This is handy for
# chaining in a single call without changing the table mode, but be aware
# that this method can consume a fair amount of memory for bigger data sets.
#
# This method returns the duplicate table for chaining. Don't chain
# This method returns the duplicate table for chaining. Don't chain
# destructive methods (like []=()) this way though, since you are working
# with a duplicate.
#
@ -68,7 +68,7 @@ class CSV
end
#
# Switches the mode of this table to column mode. All calls to indexing and
# Switches the mode of this table to column mode. All calls to indexing and
# iteration methods will work with columns until the mode is changed again.
#
# This method returns the table and is safe to chain.
@ -80,7 +80,7 @@ class CSV
end
#
# Returns a duplicate table object, in mixed mode. This is handy for
# Returns a duplicate table object, in mixed mode. This is handy for
# chaining in a single call without changing the table mode, but be aware
# that this method can consume a fair amount of memory for bigger data sets.
#
@ -93,9 +93,9 @@ class CSV
end
#
# Switches the mode of this table to mixed mode. All calls to indexing and
# Switches the mode of this table to mixed mode. All calls to indexing and
# iteration methods will use the default intelligent indexing system until
# the mode is changed again. In mixed mode an index is assumed to be a row
# the mode is changed again. In mixed mode an index is assumed to be a row
# reference while anything else is assumed to be column access by headers.
#
# This method returns the table and is safe to chain.
@ -120,7 +120,7 @@ class CSV
end
#
# Switches the mode of this table to row mode. All calls to indexing and
# Switches the mode of this table to row mode. All calls to indexing and
# iteration methods will work with rows until the mode is changed again.
#
# This method returns the table and is safe to chain.
@ -146,7 +146,7 @@ class CSV
#
# In the default mixed mode, this method returns rows for index access and
# columns for header access. You can force the index association by first
# columns for header access. You can force the index association by first
# calling by_col!() or by_row!().
#
# Columns are returned as an Array of values. Altering that Array has no
@ -163,18 +163,18 @@ class CSV
#
# In the default mixed mode, this method assigns rows for index access and
# columns for header access. You can force the index association by first
# columns for header access. You can force the index association by first
# calling by_col!() or by_row!().
#
# Rows may be set to an Array of values (which will inherit the table's
# headers()) or a CSV::Row.
#
# Columns may be set to a single value, which is copied to each row of the
# column, or an Array of values. Arrays of values are assigned to rows top
# to bottom in row major order. Excess values are ignored and if the Array
# column, or an Array of values. Arrays of values are assigned to rows top
# to bottom in row major order. Excess values are ignored and if the Array
# does not have a value for each row the extra rows will receive a +nil+.
#
# Assigning to an existing column or row clobbers the data. Assigning to
# Assigning to an existing column or row clobbers the data. Assigning to
# new columns creates them at the right end of the table.
#
def []=(index_or_header, value)
@ -212,9 +212,9 @@ class CSV
#
# The mixed mode default is to treat a list of indices as row access,
# returning the rows indicated. Anything else is considered columnar
# access. For columnar access, the return set has an Array for each row
# with the values indicated by the headers in each Array. You can force
# returning the rows indicated. Anything else is considered columnar
# access. For columnar access, the return set has an Array for each row
# with the values indicated by the headers in each Array. You can force
# column or row mode using by_col!() or by_row!().
#
# You cannot mix column and row access.
@ -234,7 +234,7 @@ class CSV
end
#
# Adds a new row to the bottom end of this table. You can provide an Array,
# Adds a new row to the bottom end of this table. You can provide an Array,
# which will be converted to a CSV::Row (inheriting the table's headers()),
# or a CSV::Row.
#
@ -251,7 +251,7 @@ class CSV
end
#
# A shortcut for appending multiple rows. Equivalent to:
# A shortcut for appending multiple rows. Equivalent to:
#
# rows.each { |row| self << row }
#
@ -264,9 +264,9 @@ class CSV
end
#
# Removes and returns the indicated columns or rows. In the default mixed
# Removes and returns the indicated columns or rows. In the default mixed
# mode indices refer to rows and everything else is assumed to be a column
# headers. Use by_col!() or by_row!() to force the lookup.
# headers. Use by_col!() or by_row!() to force the lookup.
#
def delete(*indexes_or_headers)
if indexes_or_headers.empty?
@ -293,9 +293,9 @@ class CSV
end
#
# Removes any column or row for which the block returns +true+. In the
# Removes any column or row for which the block returns +true+. In the
# default mixed mode or row mode, iteration is the standard row major
# walking of rows. In column mode, iteration will +yield+ two element
# walking of rows. In column mode, iteration will +yield+ two element
# tuples containing the column name and an Array of values for that column.
#
# This method returns the table for chaining.
@ -321,7 +321,7 @@ class CSV
#
# In the default mixed mode or row mode, iteration is the standard row major
# walking of rows. In column mode, iteration will +yield+ two element
# walking of rows. In column mode, iteration will +yield+ two element
# tuples containing the column name and an Array of values for that column.
#
# This method returns the table for chaining.
@ -347,7 +347,7 @@ class CSV
end
#
# Returns the table as an Array of Arrays. Headers will be the first row,
# Returns the table as an Array of Arrays. Headers will be the first row,
# then all of the field rows will follow.
#
def to_a
@ -360,7 +360,7 @@ class CSV
end
#
# Returns the table as a complete CSV String. Headers will be listed first,
# Returns the table as a complete CSV String. Headers will be listed first,
# then all of the field rows.
#
# This method assumes you want the Table.headers(), unless you explicitly

View file

@ -2,5 +2,5 @@
class CSV
# The version of the installed library.
VERSION = "3.1.1"
VERSION = "3.1.2"
end

View file

@ -6,7 +6,12 @@ require_relative "row"
using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV
# Note: Don't use this class directly. This is an internal class.
class Writer
#
# A CSV::Writer receives an output, prepares the header, format and output.
# It allows us to write new rows in the object and rewind it.
#
attr_reader :lineno
attr_reader :headers
@ -22,6 +27,9 @@ class CSV
@fields_converter = @options[:fields_converter]
end
#
# Adds a new row
#
def <<(row)
case row
when Row
@ -47,6 +55,9 @@ class CSV
self
end
#
# Winds back to the beginning
#
def rewind
@lineno = 0
@headers = nil if @options[:headers].nil?

View file

@ -233,11 +233,21 @@ line,5,jkl
assert_equal([["a"]], CSV.parse("a\r\n"))
end
def test_seeked_string_io
input_with_bom = StringIO.new("\ufeffあ,い,う\r\na,b,c\r\n")
input_with_bom.read(3)
assert_equal([
["", "", ""],
["a", "b", "c"],
],
CSV.new(input_with_bom).each.to_a)
end
private
def assert_parse_errors_out(*args, **options)
def assert_parse_errors_out(data, **options)
assert_raise(CSV::MalformedCSVError) do
Timeout.timeout(0.2) do
CSV.parse(*args, **options)
CSV.parse(data, **options)
fail("Parse didn't error out")
end
end

View file

@ -312,12 +312,12 @@ A
end
def test_parse_empty
assert_equal(CSV::Table.new([], **{}),
assert_equal(CSV::Table.new([]),
CSV.parse("", headers: true))
end
def test_parse_empty_line
assert_equal(CSV::Table.new([], **{}),
assert_equal(CSV::Table.new([]),
CSV.parse("\n", headers: true))
end

View file

@ -6,7 +6,7 @@ require_relative "../helper"
class TestCSVParseRewind < Test::Unit::TestCase
extend DifferentOFS
def parse(data, options={})
def parse(data, **options)
csv = CSV.new(data, **options)
records = csv.to_a
csv.rewind

View file

@ -268,11 +268,11 @@ class TestCSVEncodings < Test::Unit::TestCase
private
def assert_parses(fields, encoding, options = { })
def assert_parses(fields, encoding, **options)
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
orig_fields = fields
fields = encode_ary(fields, encoding)
data = ary_to_data(fields, options)
data = ary_to_data(fields, **options)
parsed = CSV.parse(data, **options)
assert_equal(fields, parsed)
parsed.flatten.each_with_index do |field, i|
@ -285,7 +285,9 @@ class TestCSVEncodings < Test::Unit::TestCase
end
end
begin
CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", **options) do |csv|
CSV.open(@temp_csv_path,
"rb:#{encoding}:#{__ENCODING__}",
**options) do |csv|
csv.each_with_index do |row, i|
assert_equal(orig_fields[i], row)
end
@ -315,7 +317,7 @@ class TestCSVEncodings < Test::Unit::TestCase
ary.map { |row| row.map { |field| field.encode(encoding) } }
end
def ary_to_data(ary, options = { })
def ary_to_data(ary, **options)
encoding = ary.flatten.first.encoding
quote_char = (options[:quote_char] || '"').encode(encoding)
col_sep = (options[:col_sep] || ",").encode(encoding)
@ -327,9 +329,9 @@ class TestCSVEncodings < Test::Unit::TestCase
}.join('').encode(encoding)
end
def encode_for_tests(data, options = { })
yield ary_to_data(encode_ary(data, "UTF-8"), options)
yield ary_to_data(encode_ary(data, "UTF-16BE"), options)
def encode_for_tests(data, **options)
yield ary_to_data(encode_ary(data, "UTF-8"), **options)
yield ary_to_data(encode_ary(data, "UTF-16BE"), **options)
end
def each_encoding

View file

@ -52,6 +52,20 @@ line,4,jkl
assert_equal([",,,", nil], CSV.parse_line(",,,;", col_sep: ";"))
end
def test_col_sep_nil
assert_raise_with_message(ArgumentError,
":col_sep must be 1 or more characters: nil") do
CSV.parse(@sample_data, col_sep: nil)
end
end
def test_col_sep_empty
assert_raise_with_message(ArgumentError,
":col_sep must be 1 or more characters: \"\"") do
CSV.parse(@sample_data, col_sep: "")
end
end
def test_row_sep
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n")
@ -110,10 +124,10 @@ line,4,jkl
def test_line
lines = [
%Q(abc,def\n),
%Q(abc,"d\nef"\n),
%Q(abc,"d\r\nef"\n),
%Q(abc,"d\ref")
%Q(\u{3000}abc,def\n),
%Q(\u{3000}abc,"d\nef"\n),
%Q(\u{3000}abc,"d\r\nef"\n),
%Q(\u{3000}abc,"d\ref")
]
csv = CSV.new(lines.join(''))
lines.each do |line|

View file

@ -205,6 +205,32 @@ module TestCSVWriteGeneral
assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
generate_line(row))
end
def test_encoding_with_default_internal
with_default_internal(Encoding::UTF_8) do
row = ["", "", ""].collect {|field| field.encode("EUC-JP")}
assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
generate_line(row, encoding: Encoding::EUC_JP))
end
end
def test_with_default_internal
with_default_internal(Encoding::UTF_8) do
row = ["", "", ""].collect {|field| field.encode("EUC-JP")}
assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
generate_line(row))
end
end
def with_default_internal(encoding)
original = Encoding.default_internal
begin
Encoding.default_internal = encoding
yield
ensure
Encoding.default_internal = original
end
end
end
class TestCSVWriteGeneralGenerateLine < Test::Unit::TestCase