mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Import CSV 3.1.2 (#2547)
This commit is contained in:
parent
d6e68bb263
commit
92df7d98b6
Notes:
git
2019-10-12 14:03:45 +09:00
Merged-By: kou <kou@clear-code.com>
14 changed files with 582 additions and 447 deletions
2
NEWS
2
NEWS
|
@ -343,7 +343,7 @@ CGI::
|
|||
|
||||
CSV::
|
||||
|
||||
* Upgrade to 3.0.9.
|
||||
* Upgrade to 3.1.2.
|
||||
See https://github.com/ruby/csv/blob/master/NEWS.md.
|
||||
|
||||
Date::
|
||||
|
|
108
lib/csv.rb
108
lib/csv.rb
|
@ -29,7 +29,7 @@
|
|||
# the original library as of Ruby 1.9. If you are migrating code from 1.8 or
|
||||
# earlier, you may have to change your code to comply with the new interface.
|
||||
#
|
||||
# == What's Different From the Old CSV?
|
||||
# == What's the Different From the Old CSV?
|
||||
#
|
||||
# I'm sure I'll miss something, but I'll try to mention most of the major
|
||||
# differences I am aware of, to help others quickly get up to speed:
|
||||
|
@ -74,9 +74,9 @@
|
|||
# place and that is to make using this library easier. CSV will parse all valid
|
||||
# CSV.
|
||||
#
|
||||
# What you don't want to do is feed CSV invalid data. Because of the way the
|
||||
# What you don't want to do is to feed CSV invalid data. Because of the way the
|
||||
# CSV format works, it's common for a parser to need to read until the end of
|
||||
# the file to be sure a field is invalid. This eats a lot of time and memory.
|
||||
# the file to be sure a field is invalid. This consumes a lot of time and memory.
|
||||
#
|
||||
# Luckily, when working with invalid CSV, Ruby's built-in methods will almost
|
||||
# always be superior in every way. For example, parsing non-quoted fields is as
|
||||
|
@ -184,7 +184,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|||
# === CSV with headers
|
||||
#
|
||||
# CSV allows to specify column names of CSV file, whether they are in data, or
|
||||
# provided separately. If headers specified, reading methods return an instance
|
||||
# provided separately. If headers are specified, reading methods return an instance
|
||||
# of CSV::Table, consisting of CSV::Row.
|
||||
#
|
||||
# # Headers are part of data
|
||||
|
@ -416,6 +416,7 @@ class CSV
|
|||
quote_empty: true,
|
||||
}.freeze
|
||||
|
||||
class << self
|
||||
#
|
||||
# This method will return a CSV instance, just like CSV::new(), but the
|
||||
# instance will be cached and returned for all future calls to this method for
|
||||
|
@ -425,7 +426,7 @@ class CSV
|
|||
# If a block is given, the instance is passed to the block and the return
|
||||
# value becomes the return value of the block.
|
||||
#
|
||||
def self.instance(data = $stdout, **options)
|
||||
def instance(data = $stdout, **options)
|
||||
# create a _signature_ for this method call, data object and options
|
||||
sig = [data.object_id] +
|
||||
options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
|
||||
|
@ -465,7 +466,7 @@ class CSV
|
|||
# The <tt>:output_row_sep</tt> +option+ defaults to
|
||||
# <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
|
||||
#
|
||||
def self.filter(input=nil, output=nil, **options)
|
||||
def filter(input=nil, output=nil, **options)
|
||||
# parse options for input, output, or both
|
||||
in_options, out_options = Hash.new, {row_sep: $INPUT_RECORD_SEPARATOR}
|
||||
options.each do |key, value|
|
||||
|
@ -504,7 +505,7 @@ class CSV
|
|||
# <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
|
||||
# but transcode it to UTF-8 before CSV parses it.
|
||||
#
|
||||
def self.foreach(path, mode="r", **options, &block)
|
||||
def foreach(path, mode="r", **options, &block)
|
||||
return to_enum(__method__, path, mode, **options) unless block_given?
|
||||
open(path, mode, **options) do |csv|
|
||||
csv.each(&block)
|
||||
|
@ -529,7 +530,7 @@ class CSV
|
|||
# String to set the base Encoding for the output. CSV needs this hint if you
|
||||
# plan to output non-ASCII compatible data.
|
||||
#
|
||||
def self.generate(str=nil, **options)
|
||||
def generate(str=nil, **options)
|
||||
# add a default empty String, if none was given
|
||||
if str
|
||||
str = StringIO.new(str)
|
||||
|
@ -557,7 +558,7 @@ class CSV
|
|||
# The <tt>:row_sep</tt> +option+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
|
||||
# (<tt>$/</tt>) when calling this method.
|
||||
#
|
||||
def self.generate_line(row, **options)
|
||||
def generate_line(row, **options)
|
||||
options = {row_sep: $INPUT_RECORD_SEPARATOR}.merge(options)
|
||||
str = +""
|
||||
if options[:encoding]
|
||||
|
@ -631,7 +632,7 @@ class CSV
|
|||
# * truncate()
|
||||
# * tty?()
|
||||
#
|
||||
def self.open(filename, mode="r", **options)
|
||||
def open(filename, mode="r", **options)
|
||||
# wrap a File opened with the remaining +args+ with no newline
|
||||
# decorator
|
||||
file_opts = {universal_newline: false}.merge(options)
|
||||
|
@ -675,8 +676,8 @@ class CSV
|
|||
# You pass your +str+ to read from, and an optional +options+ containing
|
||||
# anything CSV::new() understands.
|
||||
#
|
||||
def self.parse(*args, **options, &block)
|
||||
csv = new(*args, **options)
|
||||
def parse(str, **options, &block)
|
||||
csv = new(str, **options)
|
||||
|
||||
return csv.each(&block) if block_given?
|
||||
|
||||
|
@ -695,7 +696,7 @@ class CSV
|
|||
#
|
||||
# The +options+ parameter can be anything CSV::new() understands.
|
||||
#
|
||||
def self.parse_line(line, **options)
|
||||
def parse_line(line, **options)
|
||||
new(line, **options).shift
|
||||
end
|
||||
|
||||
|
@ -710,13 +711,13 @@ class CSV
|
|||
# <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
|
||||
# but transcode it to UTF-8 before CSV parses it.
|
||||
#
|
||||
def self.read(*args, **options)
|
||||
open(*args, **options) { |csv| csv.read }
|
||||
def read(path, **options)
|
||||
open(path, **options) { |csv| csv.read }
|
||||
end
|
||||
|
||||
# Alias for CSV::read().
|
||||
def self.readlines(*args, **options)
|
||||
read(*args, **options)
|
||||
def readlines(path, **options)
|
||||
read(path, **options)
|
||||
end
|
||||
|
||||
#
|
||||
|
@ -726,10 +727,15 @@ class CSV
|
|||
# converters: :numeric,
|
||||
# header_converters: :symbol }.merge(options) )
|
||||
#
|
||||
def self.table(path, **options)
|
||||
read( path, **{ headers: true,
|
||||
def table(path, **options)
|
||||
default_options = {
|
||||
headers: true,
|
||||
converters: :numeric,
|
||||
header_converters: :symbol }.merge(options) )
|
||||
header_converters: :symbol,
|
||||
}
|
||||
options = default_options.merge(options)
|
||||
read(path, **options)
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
|
@ -853,7 +859,7 @@ class CSV
|
|||
# converting. The conversion will fail
|
||||
# if the data cannot be transcoded,
|
||||
# leaving the header unchanged.
|
||||
# <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, CSV will
|
||||
# <b><tt>:skip_blanks</tt></b>:: When setting a +true+ value, CSV will
|
||||
# skip over any empty rows. Note that
|
||||
# this setting will not skip rows that
|
||||
# contain column separators, even if
|
||||
|
@ -863,9 +869,9 @@ class CSV
|
|||
# using <tt>:skip_lines</tt>, or
|
||||
# inspecting fields.compact.empty? on
|
||||
# each row.
|
||||
# <b><tt>:force_quotes</tt></b>:: When set to a +true+ value, CSV will
|
||||
# <b><tt>:force_quotes</tt></b>:: When setting a +true+ value, CSV will
|
||||
# quote all CSV fields it creates.
|
||||
# <b><tt>:skip_lines</tt></b>:: When set to an object responding to
|
||||
# <b><tt>:skip_lines</tt></b>:: When setting an object responding to
|
||||
# <tt>match</tt>, every line matching
|
||||
# it is considered a comment and ignored
|
||||
# during parsing. When set to a String,
|
||||
|
@ -874,17 +880,17 @@ class CSV
|
|||
# a comment. If the passed object does
|
||||
# not respond to <tt>match</tt>,
|
||||
# <tt>ArgumentError</tt> is thrown.
|
||||
# <b><tt>:liberal_parsing</tt></b>:: When set to a +true+ value, CSV will
|
||||
# <b><tt>:liberal_parsing</tt></b>:: When setting a +true+ value, CSV will
|
||||
# attempt to parse input not conformant
|
||||
# with RFC 4180, such as double quotes
|
||||
# in unquoted fields.
|
||||
# <b><tt>:nil_value</tt></b>:: When set an object, any values of an
|
||||
# empty field are replaced by the set
|
||||
# empty field is replaced by the set
|
||||
# object, not nil.
|
||||
# <b><tt>:empty_value</tt></b>:: When set an object, any values of a
|
||||
# <b><tt>:empty_value</tt></b>:: When setting an object, any values of a
|
||||
# blank string field is replaced by
|
||||
# the set object.
|
||||
# <b><tt>:quote_empty</tt></b>:: When set to a +true+ value, CSV will
|
||||
# <b><tt>:quote_empty</tt></b>:: When setting a +true+ value, CSV will
|
||||
# quote empty values with double quotes.
|
||||
# When +false+, CSV will emit an
|
||||
# empty string for an empty field value.
|
||||
|
@ -901,11 +907,11 @@ class CSV
|
|||
# <b><tt>:write_empty_value</tt></b>:: When a <tt>String</tt> or +nil+ value,
|
||||
# empty value(s) on each line will be
|
||||
# replaced with the specified value.
|
||||
# <b><tt>:strip</tt></b>:: When set to a +true+ value, CSV will
|
||||
# <b><tt>:strip</tt></b>:: When setting a +true+ value, CSV will
|
||||
# strip "\t\r\n\f\v" around the values.
|
||||
# If you specify a string instead of
|
||||
# +true+, CSV will strip string. The
|
||||
# length of string must be 1.
|
||||
# length of the string must be 1.
|
||||
#
|
||||
# See CSV::DEFAULT_OPTIONS for the default settings.
|
||||
#
|
||||
|
@ -939,8 +945,12 @@ class CSV
|
|||
strip: false)
|
||||
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
||||
|
||||
# create the IO object we will read from
|
||||
@io = data.is_a?(String) ? StringIO.new(data) : data
|
||||
if data.is_a?(String)
|
||||
@io = StringIO.new(data)
|
||||
@io.set_encoding(encoding || data.encoding)
|
||||
else
|
||||
@io = data
|
||||
end
|
||||
@encoding = determine_encoding(encoding, internal_encoding)
|
||||
|
||||
@base_fields_converter_options = {
|
||||
|
@ -992,35 +1002,41 @@ class CSV
|
|||
end
|
||||
|
||||
#
|
||||
# The encoded <tt>:col_sep</tt> used in parsing and writing. See CSV::new
|
||||
# for details.
|
||||
# The encoded <tt>:col_sep</tt> used in parsing and writing.
|
||||
# See CSV::new for details.
|
||||
#
|
||||
def col_sep
|
||||
parser.column_separator
|
||||
end
|
||||
|
||||
#
|
||||
# The encoded <tt>:row_sep</tt> used in parsing and writing. See CSV::new
|
||||
# for details.
|
||||
# The encoded <tt>:row_sep</tt> used in parsing and writing.
|
||||
# See CSV::new for details.
|
||||
#
|
||||
def row_sep
|
||||
parser.row_separator
|
||||
end
|
||||
|
||||
#
|
||||
# The encoded <tt>:quote_char</tt> used in parsing and writing. See CSV::new
|
||||
# for details.
|
||||
# The encoded <tt>:quote_char</tt> used in parsing and writing.
|
||||
# See CSV::new for details.
|
||||
#
|
||||
def quote_char
|
||||
parser.quote_character
|
||||
end
|
||||
|
||||
# The limit for field size, if any. See CSV::new for details.
|
||||
#
|
||||
# The limit for field size, if any.
|
||||
# See CSV::new for details.
|
||||
#
|
||||
def field_size_limit
|
||||
parser.field_size_limit
|
||||
end
|
||||
|
||||
# The regex marking a line as a comment. See CSV::new for details
|
||||
#
|
||||
# The regex marking a line as a comment.
|
||||
# See CSV::new for details.
|
||||
#
|
||||
def skip_lines
|
||||
parser.skip_lines
|
||||
end
|
||||
|
@ -1036,9 +1052,10 @@ class CSV
|
|||
name ? name.first : converter
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
# Returns +true+ if unconverted_fields() to parsed results. See CSV::new
|
||||
# for details.
|
||||
# Returns +true+ if unconverted_fields() to parsed results.
|
||||
# See CSV::new for details.
|
||||
#
|
||||
def unconverted_fields?
|
||||
parser.unconverted_fields?
|
||||
|
@ -1046,8 +1063,8 @@ class CSV
|
|||
|
||||
#
|
||||
# Returns +nil+ if headers will not be used, +true+ if they will but have not
|
||||
# yet been read, or the actual headers after they have been read. See
|
||||
# CSV::new for details.
|
||||
# yet been read, or the actual headers after they have been read.
|
||||
# See CSV::new for details.
|
||||
#
|
||||
def headers
|
||||
if @writer
|
||||
|
@ -1068,7 +1085,10 @@ class CSV
|
|||
parser.return_headers?
|
||||
end
|
||||
|
||||
# Returns +true+ if headers are written in output. See CSV::new for details.
|
||||
#
|
||||
# Returns +true+ if headers are written in output.
|
||||
# See CSV::new for details.
|
||||
#
|
||||
def write_headers?
|
||||
@writer_options[:write_headers]
|
||||
end
|
||||
|
|
|
@ -1,8 +1,14 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class CSV
|
||||
# Note: Don't use this class directly. This is an internal class.
|
||||
class FieldsConverter
|
||||
include Enumerable
|
||||
#
|
||||
# A CSV::FieldsConverter is a data structure for storing the
|
||||
# fields converter properties to be passed as a parameter
|
||||
# when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
|
||||
#
|
||||
|
||||
def initialize(options={})
|
||||
@converters = []
|
||||
|
|
|
@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
|||
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
||||
|
||||
class CSV
|
||||
# Note: Don't use this class directly. This is an internal class.
|
||||
class Parser
|
||||
#
|
||||
# A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
|
||||
# or String object being read from or written to. Your data is never transcoded
|
||||
# (unless you ask Ruby to transcode it for you) and will literally be parsed in
|
||||
# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
|
||||
# Encoding of your data. This is accomplished by transcoding the parser itself
|
||||
# into your Encoding.
|
||||
#
|
||||
|
||||
# Raised when encoding is invalid.
|
||||
class InvalidEncoding < StandardError
|
||||
end
|
||||
|
||||
#
|
||||
# CSV::Scanner receives a CSV output, scans it and return the content.
|
||||
# It also controls the life cycle of the object with its methods +keep_start+,
|
||||
# +keep_end+, +keep_back+, +keep_drop+.
|
||||
#
|
||||
# Uses StringScanner (the official strscan gem). Strscan provides lexical
|
||||
# scanning operations on a String. We inherit its object and take advantage
|
||||
# on the methods. For more information, please visit:
|
||||
# https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
|
||||
#
|
||||
class Scanner < StringScanner
|
||||
alias_method :scan_all, :scan
|
||||
|
||||
|
@ -38,7 +59,7 @@ class CSV
|
|||
|
||||
def keep_end
|
||||
start = @keeps.pop
|
||||
string[start, pos - start]
|
||||
string.byteslice(start, pos - start)
|
||||
end
|
||||
|
||||
def keep_back
|
||||
|
@ -50,6 +71,18 @@ class CSV
|
|||
end
|
||||
end
|
||||
|
||||
#
|
||||
# CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
|
||||
# It also controls the life cycle of the object with its methods +keep_start+,
|
||||
# +keep_end+, +keep_back+, +keep_drop+.
|
||||
#
|
||||
# CSV::InputsScanner.scan() tries to match with pattern at the current position.
|
||||
# If there's a match, the scanner advances the “scan pointer” and returns the matched string.
|
||||
# Otherwise, the scanner returns nil.
|
||||
#
|
||||
# CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
|
||||
# If there is no more data (eos? = true), it returns "".
|
||||
#
|
||||
class InputsScanner
|
||||
def initialize(inputs, encoding, chunk_size: 8192)
|
||||
@inputs = inputs.dup
|
||||
|
@ -137,7 +170,7 @@ class CSV
|
|||
|
||||
def keep_end
|
||||
start, buffer = @keeps.pop
|
||||
keep = @scanner.string[start, @scanner.pos - start]
|
||||
keep = @scanner.string.byteslice(start, @scanner.pos - start)
|
||||
if buffer
|
||||
buffer << keep
|
||||
keep = buffer
|
||||
|
@ -192,7 +225,7 @@ class CSV
|
|||
input = @inputs.first
|
||||
case input
|
||||
when StringIO
|
||||
string = input.string
|
||||
string = input.read
|
||||
raise InvalidEncoding unless string.valid_encoding?
|
||||
@scanner = StringScanner.new(string)
|
||||
@inputs.shift
|
||||
|
@ -319,6 +352,7 @@ class CSV
|
|||
end
|
||||
|
||||
private
|
||||
# A set of tasks to prepare the file in order to parse it
|
||||
def prepare
|
||||
prepare_variable
|
||||
prepare_quote_character
|
||||
|
@ -447,7 +481,13 @@ class CSV
|
|||
end
|
||||
|
||||
def prepare_separators
|
||||
@column_separator = @options[:column_separator].to_s.encode(@encoding)
|
||||
column_separator = @options[:column_separator]
|
||||
@column_separator = column_separator.to_s.encode(@encoding)
|
||||
if @column_separator.size < 1
|
||||
message = ":col_sep must be 1 or more characters: "
|
||||
message += column_separator.inspect
|
||||
raise ArgumentError, message
|
||||
end
|
||||
@row_separator =
|
||||
resolve_row_separator(@options[:row_separator]).encode(@encoding)
|
||||
|
||||
|
@ -534,7 +574,9 @@ class CSV
|
|||
cr = "\r".encode(@encoding)
|
||||
lf = "\n".encode(@encoding)
|
||||
if @input.is_a?(StringIO)
|
||||
separator = detect_row_separator(@input.string, cr, lf)
|
||||
pos = @input.pos
|
||||
separator = detect_row_separator(@input.read, cr, lf)
|
||||
@input.seek(pos)
|
||||
elsif @input.respond_to?(:gets)
|
||||
if @input.is_a?(File)
|
||||
chunk_size = 32 * 1024
|
||||
|
@ -651,7 +693,9 @@ class CSV
|
|||
return false if @quote_character.nil?
|
||||
|
||||
if @input.is_a?(StringIO)
|
||||
sample = @input.string
|
||||
pos = @input.pos
|
||||
sample = @input.read
|
||||
@input.seek(pos)
|
||||
else
|
||||
return false if @samples.empty?
|
||||
sample = @samples.first
|
||||
|
@ -684,7 +728,7 @@ class CSV
|
|||
UnoptimizedStringIO.new(sample)
|
||||
end
|
||||
if @input.is_a?(StringIO)
|
||||
inputs << UnoptimizedStringIO.new(@input.string)
|
||||
inputs << UnoptimizedStringIO.new(@input.read)
|
||||
else
|
||||
inputs << @input
|
||||
end
|
||||
|
@ -697,7 +741,7 @@ class CSV
|
|||
def build_scanner
|
||||
string = nil
|
||||
if @samples.empty? and @input.is_a?(StringIO)
|
||||
string = @input.string
|
||||
string = @input.read
|
||||
elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
|
||||
string = @samples[0]
|
||||
end
|
||||
|
|
|
@ -13,13 +13,13 @@ class CSV
|
|||
#
|
||||
class Row
|
||||
#
|
||||
# Construct a new CSV::Row from +headers+ and +fields+, which are expected
|
||||
# Constructs a new CSV::Row from +headers+ and +fields+, which are expected
|
||||
# to be Arrays. If one Array is shorter than the other, it will be padded
|
||||
# with +nil+ objects.
|
||||
#
|
||||
# The optional +header_row+ parameter can be set to +true+ to indicate, via
|
||||
# CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
|
||||
# row. Otherwise, the row is assumes to be a field row.
|
||||
# row. Otherwise, the row assumes to be a field row.
|
||||
#
|
||||
# A CSV::Row object supports the following Array methods through delegation:
|
||||
#
|
||||
|
@ -209,7 +209,7 @@ class CSV
|
|||
# delete( header, offset )
|
||||
# delete( index )
|
||||
#
|
||||
# Used to remove a pair from the row by +header+ or +index+. The pair is
|
||||
# Removes a pair from the row by +header+ or +index+. The pair is
|
||||
# located as described in CSV::Row.field(). The deleted pair is returned,
|
||||
# or +nil+ if a pair could not be found.
|
||||
#
|
||||
|
@ -367,7 +367,9 @@ class CSV
|
|||
end
|
||||
end
|
||||
|
||||
#
|
||||
# A summary of fields, by header, in an ASCII compatible String.
|
||||
#
|
||||
def inspect
|
||||
str = ["#<", self.class.to_s]
|
||||
each do |header, field|
|
||||
|
|
|
@ -13,7 +13,7 @@ class CSV
|
|||
#
|
||||
class Table
|
||||
#
|
||||
# Construct a new CSV::Table from +array_of_rows+, which are expected
|
||||
# Constructs a new CSV::Table from +array_of_rows+, which are expected
|
||||
# to be CSV::Row objects. All rows are assumed to have the same headers.
|
||||
#
|
||||
# The optional +headers+ parameter can be set to Array of headers.
|
||||
|
|
|
@ -2,5 +2,5 @@
|
|||
|
||||
class CSV
|
||||
# The version of the installed library.
|
||||
VERSION = "3.1.1"
|
||||
VERSION = "3.1.2"
|
||||
end
|
||||
|
|
|
@ -6,7 +6,12 @@ require_relative "row"
|
|||
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
||||
|
||||
class CSV
|
||||
# Note: Don't use this class directly. This is an internal class.
|
||||
class Writer
|
||||
#
|
||||
# A CSV::Writer receives an output, prepares the header, format and output.
|
||||
# It allows us to write new rows in the object and rewind it.
|
||||
#
|
||||
attr_reader :lineno
|
||||
attr_reader :headers
|
||||
|
||||
|
@ -22,6 +27,9 @@ class CSV
|
|||
@fields_converter = @options[:fields_converter]
|
||||
end
|
||||
|
||||
#
|
||||
# Adds a new row
|
||||
#
|
||||
def <<(row)
|
||||
case row
|
||||
when Row
|
||||
|
@ -47,6 +55,9 @@ class CSV
|
|||
self
|
||||
end
|
||||
|
||||
#
|
||||
# Winds back to the beginning
|
||||
#
|
||||
def rewind
|
||||
@lineno = 0
|
||||
@headers = nil if @options[:headers].nil?
|
||||
|
|
|
@ -233,11 +233,21 @@ line,5,jkl
|
|||
assert_equal([["a"]], CSV.parse("a\r\n"))
|
||||
end
|
||||
|
||||
def test_seeked_string_io
|
||||
input_with_bom = StringIO.new("\ufeffあ,い,う\r\na,b,c\r\n")
|
||||
input_with_bom.read(3)
|
||||
assert_equal([
|
||||
["あ", "い", "う"],
|
||||
["a", "b", "c"],
|
||||
],
|
||||
CSV.new(input_with_bom).each.to_a)
|
||||
end
|
||||
|
||||
private
|
||||
def assert_parse_errors_out(*args, **options)
|
||||
def assert_parse_errors_out(data, **options)
|
||||
assert_raise(CSV::MalformedCSVError) do
|
||||
Timeout.timeout(0.2) do
|
||||
CSV.parse(*args, **options)
|
||||
CSV.parse(data, **options)
|
||||
fail("Parse didn't error out")
|
||||
end
|
||||
end
|
||||
|
|
|
@ -312,12 +312,12 @@ A
|
|||
end
|
||||
|
||||
def test_parse_empty
|
||||
assert_equal(CSV::Table.new([], **{}),
|
||||
assert_equal(CSV::Table.new([]),
|
||||
CSV.parse("", headers: true))
|
||||
end
|
||||
|
||||
def test_parse_empty_line
|
||||
assert_equal(CSV::Table.new([], **{}),
|
||||
assert_equal(CSV::Table.new([]),
|
||||
CSV.parse("\n", headers: true))
|
||||
end
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ require_relative "../helper"
|
|||
class TestCSVParseRewind < Test::Unit::TestCase
|
||||
extend DifferentOFS
|
||||
|
||||
def parse(data, options={})
|
||||
def parse(data, **options)
|
||||
csv = CSV.new(data, **options)
|
||||
records = csv.to_a
|
||||
csv.rewind
|
||||
|
|
|
@ -268,11 +268,11 @@ class TestCSVEncodings < Test::Unit::TestCase
|
|||
|
||||
private
|
||||
|
||||
def assert_parses(fields, encoding, options = { })
|
||||
def assert_parses(fields, encoding, **options)
|
||||
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
|
||||
orig_fields = fields
|
||||
fields = encode_ary(fields, encoding)
|
||||
data = ary_to_data(fields, options)
|
||||
data = ary_to_data(fields, **options)
|
||||
parsed = CSV.parse(data, **options)
|
||||
assert_equal(fields, parsed)
|
||||
parsed.flatten.each_with_index do |field, i|
|
||||
|
@ -285,7 +285,9 @@ class TestCSVEncodings < Test::Unit::TestCase
|
|||
end
|
||||
end
|
||||
begin
|
||||
CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", **options) do |csv|
|
||||
CSV.open(@temp_csv_path,
|
||||
"rb:#{encoding}:#{__ENCODING__}",
|
||||
**options) do |csv|
|
||||
csv.each_with_index do |row, i|
|
||||
assert_equal(orig_fields[i], row)
|
||||
end
|
||||
|
@ -315,7 +317,7 @@ class TestCSVEncodings < Test::Unit::TestCase
|
|||
ary.map { |row| row.map { |field| field.encode(encoding) } }
|
||||
end
|
||||
|
||||
def ary_to_data(ary, options = { })
|
||||
def ary_to_data(ary, **options)
|
||||
encoding = ary.flatten.first.encoding
|
||||
quote_char = (options[:quote_char] || '"').encode(encoding)
|
||||
col_sep = (options[:col_sep] || ",").encode(encoding)
|
||||
|
@ -327,9 +329,9 @@ class TestCSVEncodings < Test::Unit::TestCase
|
|||
}.join('').encode(encoding)
|
||||
end
|
||||
|
||||
def encode_for_tests(data, options = { })
|
||||
yield ary_to_data(encode_ary(data, "UTF-8"), options)
|
||||
yield ary_to_data(encode_ary(data, "UTF-16BE"), options)
|
||||
def encode_for_tests(data, **options)
|
||||
yield ary_to_data(encode_ary(data, "UTF-8"), **options)
|
||||
yield ary_to_data(encode_ary(data, "UTF-16BE"), **options)
|
||||
end
|
||||
|
||||
def each_encoding
|
||||
|
|
|
@ -52,6 +52,20 @@ line,4,jkl
|
|||
assert_equal([",,,", nil], CSV.parse_line(",,,;", col_sep: ";"))
|
||||
end
|
||||
|
||||
def test_col_sep_nil
|
||||
assert_raise_with_message(ArgumentError,
|
||||
":col_sep must be 1 or more characters: nil") do
|
||||
CSV.parse(@sample_data, col_sep: nil)
|
||||
end
|
||||
end
|
||||
|
||||
def test_col_sep_empty
|
||||
assert_raise_with_message(ArgumentError,
|
||||
":col_sep must be 1 or more characters: \"\"") do
|
||||
CSV.parse(@sample_data, col_sep: "")
|
||||
end
|
||||
end
|
||||
|
||||
def test_row_sep
|
||||
error = assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n")
|
||||
|
@ -110,10 +124,10 @@ line,4,jkl
|
|||
|
||||
def test_line
|
||||
lines = [
|
||||
%Q(abc,def\n),
|
||||
%Q(abc,"d\nef"\n),
|
||||
%Q(abc,"d\r\nef"\n),
|
||||
%Q(abc,"d\ref")
|
||||
%Q(\u{3000}abc,def\n),
|
||||
%Q(\u{3000}abc,"d\nef"\n),
|
||||
%Q(\u{3000}abc,"d\r\nef"\n),
|
||||
%Q(\u{3000}abc,"d\ref")
|
||||
]
|
||||
csv = CSV.new(lines.join(''))
|
||||
lines.each do |line|
|
||||
|
|
|
@ -205,6 +205,32 @@ module TestCSVWriteGeneral
|
|||
assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
|
||||
generate_line(row))
|
||||
end
|
||||
|
||||
def test_encoding_with_default_internal
|
||||
with_default_internal(Encoding::UTF_8) do
|
||||
row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")}
|
||||
assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
|
||||
generate_line(row, encoding: Encoding::EUC_JP))
|
||||
end
|
||||
end
|
||||
|
||||
def test_with_default_internal
|
||||
with_default_internal(Encoding::UTF_8) do
|
||||
row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")}
|
||||
assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
|
||||
generate_line(row))
|
||||
end
|
||||
end
|
||||
|
||||
def with_default_internal(encoding)
|
||||
original = Encoding.default_internal
|
||||
begin
|
||||
Encoding.default_internal = encoding
|
||||
yield
|
||||
ensure
|
||||
Encoding.default_internal = original
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class TestCSVWriteGeneralGenerateLine < Test::Unit::TestCase
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue