mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Import CSV 3.1.2 (#2547)
This commit is contained in:
parent
d6e68bb263
commit
92df7d98b6
Notes:
git
2019-10-12 14:03:45 +09:00
Merged-By: kou <kou@clear-code.com>
14 changed files with 582 additions and 447 deletions
|
@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
|||
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
||||
|
||||
class CSV
|
||||
# Note: Don't use this class directly. This is an internal class.
|
||||
class Parser
|
||||
#
|
||||
# A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
|
||||
# or String object being read from or written to. Your data is never transcoded
|
||||
# (unless you ask Ruby to transcode it for you) and will literally be parsed in
|
||||
# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
|
||||
# Encoding of your data. This is accomplished by transcoding the parser itself
|
||||
# into your Encoding.
|
||||
#
|
||||
|
||||
# Raised when encoding is invalid.
|
||||
class InvalidEncoding < StandardError
|
||||
end
|
||||
|
||||
#
|
||||
# CSV::Scanner receives a CSV output, scans it and return the content.
|
||||
# It also controls the life cycle of the object with its methods +keep_start+,
|
||||
# +keep_end+, +keep_back+, +keep_drop+.
|
||||
#
|
||||
# Uses StringScanner (the official strscan gem). Strscan provides lexical
|
||||
# scanning operations on a String. We inherit its object and take advantage
|
||||
# on the methods. For more information, please visit:
|
||||
# https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
|
||||
#
|
||||
class Scanner < StringScanner
|
||||
alias_method :scan_all, :scan
|
||||
|
||||
|
@ -38,7 +59,7 @@ class CSV
|
|||
|
||||
def keep_end
|
||||
start = @keeps.pop
|
||||
string[start, pos - start]
|
||||
string.byteslice(start, pos - start)
|
||||
end
|
||||
|
||||
def keep_back
|
||||
|
@ -50,6 +71,18 @@ class CSV
|
|||
end
|
||||
end
|
||||
|
||||
#
|
||||
# CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
|
||||
# It also controls the life cycle of the object with its methods +keep_start+,
|
||||
# +keep_end+, +keep_back+, +keep_drop+.
|
||||
#
|
||||
# CSV::InputsScanner.scan() tries to match with pattern at the current position.
|
||||
# If there's a match, the scanner advances the “scan pointer” and returns the matched string.
|
||||
# Otherwise, the scanner returns nil.
|
||||
#
|
||||
# CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
|
||||
# If there is no more data (eos? = true), it returns "".
|
||||
#
|
||||
class InputsScanner
|
||||
def initialize(inputs, encoding, chunk_size: 8192)
|
||||
@inputs = inputs.dup
|
||||
|
@ -137,7 +170,7 @@ class CSV
|
|||
|
||||
def keep_end
|
||||
start, buffer = @keeps.pop
|
||||
keep = @scanner.string[start, @scanner.pos - start]
|
||||
keep = @scanner.string.byteslice(start, @scanner.pos - start)
|
||||
if buffer
|
||||
buffer << keep
|
||||
keep = buffer
|
||||
|
@ -192,7 +225,7 @@ class CSV
|
|||
input = @inputs.first
|
||||
case input
|
||||
when StringIO
|
||||
string = input.string
|
||||
string = input.read
|
||||
raise InvalidEncoding unless string.valid_encoding?
|
||||
@scanner = StringScanner.new(string)
|
||||
@inputs.shift
|
||||
|
@ -319,6 +352,7 @@ class CSV
|
|||
end
|
||||
|
||||
private
|
||||
# A set of tasks to prepare the file in order to parse it
|
||||
def prepare
|
||||
prepare_variable
|
||||
prepare_quote_character
|
||||
|
@ -447,7 +481,13 @@ class CSV
|
|||
end
|
||||
|
||||
def prepare_separators
|
||||
@column_separator = @options[:column_separator].to_s.encode(@encoding)
|
||||
column_separator = @options[:column_separator]
|
||||
@column_separator = column_separator.to_s.encode(@encoding)
|
||||
if @column_separator.size < 1
|
||||
message = ":col_sep must be 1 or more characters: "
|
||||
message += column_separator.inspect
|
||||
raise ArgumentError, message
|
||||
end
|
||||
@row_separator =
|
||||
resolve_row_separator(@options[:row_separator]).encode(@encoding)
|
||||
|
||||
|
@ -534,7 +574,9 @@ class CSV
|
|||
cr = "\r".encode(@encoding)
|
||||
lf = "\n".encode(@encoding)
|
||||
if @input.is_a?(StringIO)
|
||||
separator = detect_row_separator(@input.string, cr, lf)
|
||||
pos = @input.pos
|
||||
separator = detect_row_separator(@input.read, cr, lf)
|
||||
@input.seek(pos)
|
||||
elsif @input.respond_to?(:gets)
|
||||
if @input.is_a?(File)
|
||||
chunk_size = 32 * 1024
|
||||
|
@ -651,7 +693,9 @@ class CSV
|
|||
return false if @quote_character.nil?
|
||||
|
||||
if @input.is_a?(StringIO)
|
||||
sample = @input.string
|
||||
pos = @input.pos
|
||||
sample = @input.read
|
||||
@input.seek(pos)
|
||||
else
|
||||
return false if @samples.empty?
|
||||
sample = @samples.first
|
||||
|
@ -684,7 +728,7 @@ class CSV
|
|||
UnoptimizedStringIO.new(sample)
|
||||
end
|
||||
if @input.is_a?(StringIO)
|
||||
inputs << UnoptimizedStringIO.new(@input.string)
|
||||
inputs << UnoptimizedStringIO.new(@input.read)
|
||||
else
|
||||
inputs << @input
|
||||
end
|
||||
|
@ -697,7 +741,7 @@ class CSV
|
|||
def build_scanner
|
||||
string = nil
|
||||
if @samples.empty? and @input.is_a?(StringIO)
|
||||
string = @input.string
|
||||
string = @input.read
|
||||
elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
|
||||
string = @samples[0]
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue