# frozen_string_literal: true require "forwardable" class CSV # # A CSV::Table is a two-dimensional data structure for representing CSV # documents. Tables allow you to work with the data by row or column, # manipulate the data, and even convert the results back to CSV, if needed. # # All tables returned by CSV will be constructed from this class, if header # row processing is activated. # class Table # # Constructs a new CSV::Table from +array_of_rows+, which are expected # to be CSV::Row objects. All rows are assumed to have the same headers. # # The optional +headers+ parameter can be set to Array of headers. # If headers aren't set, headers are fetched from CSV::Row objects. # Otherwise, headers() method will return headers being set in # headers argument. # # A CSV::Table object supports the following Array methods through # delegation: # # * empty?() # * length() # * size() # def initialize(array_of_rows, headers: nil) @table = array_of_rows @headers = headers unless @headers if @table.empty? @headers = [] else @headers = @table.first.headers end end @mode = :col_or_row end # The current access mode for indexing and iteration. attr_reader :mode # Internal data format used to compare equality. attr_reader :table protected :table ### Array Delegation ### extend Forwardable def_delegators :@table, :empty?, :length, :size # # Returns a duplicate table object, in column mode. This is handy for # chaining in a single call without changing the table mode, but be aware # that this method can consume a fair amount of memory for bigger data sets. # # This method returns the duplicate table for chaining. Don't chain # destructive methods (like []=()) this way though, since you are working # with a duplicate. # def by_col self.class.new(@table.dup).by_col! end # # Switches the mode of this table to column mode. All calls to indexing and # iteration methods will work with columns until the mode is changed again. # # This method returns the table and is safe to chain. # def by_col! @mode = :col self end # # Returns a duplicate table object, in mixed mode. This is handy for # chaining in a single call without changing the table mode, but be aware # that this method can consume a fair amount of memory for bigger data sets. # # This method returns the duplicate table for chaining. Don't chain # destructive methods (like []=()) this way though, since you are working # with a duplicate. # def by_col_or_row self.class.new(@table.dup).by_col_or_row! end # # Switches the mode of this table to mixed mode. All calls to indexing and # iteration methods will use the default intelligent indexing system until # the mode is changed again. In mixed mode an index is assumed to be a row # reference while anything else is assumed to be column access by headers. # # This method returns the table and is safe to chain. # def by_col_or_row! @mode = :col_or_row self end # # Returns a duplicate table object, in row mode. This is handy for chaining # in a single call without changing the table mode, but be aware that this # method can consume a fair amount of memory for bigger data sets. # # This method returns the duplicate table for chaining. Don't chain # destructive methods (like []=()) this way though, since you are working # with a duplicate. # def by_row self.class.new(@table.dup).by_row! end # # Switches the mode of this table to row mode. All calls to indexing and # iteration methods will work with rows until the mode is changed again. # # This method returns the table and is safe to chain. # def by_row! @mode = :row self end # # Returns the headers for the first row of this table (assumed to match all # other rows). The headers Array passed to CSV::Table.new is returned for # empty tables. # def headers if @table.empty? @headers.dup else @table.first.headers end end # :call-seq: # table[n] -> row # table[range] -> array_of_rows # table[header] -> array_of_fields # # Returns data from the table; does not modify the table. # # --- # # The expression table[n], where +n+ is a non-negative \Integer, # returns the +n+th row of the table, if that row exists, # and if the access mode is :row or :col_or_row: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # # table[1] # => # # table.by_col_or_row! # => # # table[1] # => # # # Counts backward from the last row if +n+ is negative: # table[-1] # => # # # Returns +nil+ if +n+ is too large or too small: # table[4] # => nil # table[-4] => nil # # Raises an exception if the access mode is :row # and +n+ is not an # {Integer-convertible object}[rdoc-ref:implicit_conversion.rdoc@Integer-Convertible+Objects]. # table.by_row! # => # # # Raises TypeError (no implicit conversion of String into Integer): # table['Name'] # # --- # # The expression table[range], where +range+ is a Range object, # returns rows from the table, beginning at row range.first, # if those rows exist, and if the access mode is :row or :col_or_row: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # # rows = table[1..2] # => # # rows # => [#, #] # table.by_col_or_row! # => # # rows = table[1..2] # => # # rows # => [#, #] # # If there are too few rows, returns all from range.first to the end: # rows = table[1..50] # => # # rows # => [#, #] # # Special case: if range.start == table.size, returns an empty \Array: # table[table.size..50] # => [] # # If range.end is negative, calculates the ending index from the end: # rows = table[0..-1] # rows # => [#, #, #] # # If range.start is negative, calculates the starting index from the end: # rows = table[-1..2] # rows # => [#] # # If range.start is larger than table.size, returns +nil+: # table[4..4] # => nil # # --- # # The expression table[header], where +header+ is a \String, # returns column values (\Array of \Strings) if the column exists # and if the access mode is :col or :col_or_row: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # => # # table['Name'] # => ["foo", "bar", "baz"] # table.by_col_or_row! # => # # col = table['Name'] # col # => ["foo", "bar", "baz"] # # Modifying the returned column values does not modify the table: # col[0] = 'bat' # col # => ["bat", "bar", "baz"] # table['Name'] # => ["foo", "bar", "baz"] # # Returns an \Array of +nil+ values if there is no such column: # table['Nosuch'] # => [nil, nil, nil] def [](index_or_header) if @mode == :row or # by index (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range))) @table[index_or_header] else # by header @table.map { |row| row[index_or_header] } end end # # In the default mixed mode, this method assigns rows for index access and # columns for header access. You can force the index association by first # calling by_col!() or by_row!(). # # Rows may be set to an Array of values (which will inherit the table's # headers()) or a CSV::Row. # # Columns may be set to a single value, which is copied to each row of the # column, or an Array of values. Arrays of values are assigned to rows top # to bottom in row major order. Excess values are ignored and if the Array # does not have a value for each row the extra rows will receive a +nil+. # # Assigning to an existing column or row clobbers the data. Assigning to # new columns creates them at the right end of the table. # def []=(index_or_header, value) if @mode == :row or # by index (@mode == :col_or_row and index_or_header.is_a? Integer) if value.is_a? Array @table[index_or_header] = Row.new(headers, value) else @table[index_or_header] = value end else # set column unless index_or_header.is_a? Integer index = @headers.index(index_or_header) || @headers.size @headers[index] = index_or_header end if value.is_a? Array # multiple values @table.each_with_index do |row, i| if row.header_row? row[index_or_header] = index_or_header else row[index_or_header] = value[i] end end else # repeated value @table.each do |row| if row.header_row? row[index_or_header] = index_or_header else row[index_or_header] = value end end end end end # :call-seq: # table.values_at(*indexes) -> array_of_rows # table.values_at(*headers) -> array_of_columns_data # # If the access mode is :row or :col_or_row, # and each argument is either an \Integer or a \Range, # returns rows. # Otherwise, returns columns data. # # In either case, the returned values are in the order # specified by the arguments. Arguments may be repeated. # # --- # # Returns rows as an \Array of \CSV::Row objects. # # No argument: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.values_at # => [] # # One index: # values = table.values_at(0) # values # => [#] # # Two indexes: # values = table.values_at(2, 0) # values # => [#, #] # # One \Range: # values = table.values_at(1..2) # values # => [#, #] # # \Ranges and indexes: # values = table.values_at(0..1, 1..2, 0, 2) # pp values # Output: # [#, # #, # #, # #, # #, # #] # # --- # # Returns columns data as row Arrays, # each consisting of the specified columns data for that row: # values = table.values_at('Name') # values # => [["foo"], ["bar"], ["baz"]] # values = table.values_at('Value', 'Name') # values # => [["0", "foo"], ["1", "bar"], ["2", "baz"]] def values_at(*indices_or_headers) if @mode == :row or # by indices ( @mode == :col_or_row and indices_or_headers.all? do |index| index.is_a?(Integer) or ( index.is_a?(Range) and index.first.is_a?(Integer) and index.last.is_a?(Integer) ) end ) @table.values_at(*indices_or_headers) else # by headers @table.map { |row| row.values_at(*indices_or_headers) } end end # :call-seq: # table << row_or_array -> self # # If +row_or_array+ is a \CSV::Row object, # it is appended to the table: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table << CSV::Row.new(table.headers, ['bat', 3]) # table[3] # => # # # If +row_or_array+ is an \Array, it is used to create a new # \CSV::Row object which is then appended to the table: # table << ['bam', 4] # table[4] # => # def <<(row_or_array) if row_or_array.is_a? Array # append Array @table << Row.new(headers, row_or_array) else # append Row @table << row_or_array end self # for chaining end # # :call-seq: # table.push(*rows_or_arrays) -> self # # A shortcut for appending multiple rows. Equivalent to: # rows.each {|row| self << row } # # Each argument may be either a \CSV::Row object or an \Array: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # rows = [ # CSV::Row.new(table.headers, ['bat', 3]), # ['bam', 4] # ] # table.push(*rows) # table[3..4] # => [#, #] def push(*rows) rows.each { |row| self << row } self # for chaining end # :call-seq: # table.delete(*indexes) -> deleted_values # table.delete(*headers) -> deleted_values # # If the access mode is :row or :col_or_row, # and each argument is either an \Integer or a \Range, # returns deleted rows. # Otherwise, returns deleted columns data. # # In either case, the returned values are in the order # specified by the arguments. Arguments may be repeated. # # --- # # Returns rows as an \Array of \CSV::Row objects. # # One index: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # deleted_values = table.delete(0) # deleted_values # => [#] # # Two indexes: # table = CSV.parse(source, headers: true) # deleted_values = table.delete(2, 0) # deleted_values # => [#, #] # # --- # # Returns columns data as column Arrays. # # One header: # table = CSV.parse(source, headers: true) # deleted_values = table.delete('Name') # deleted_values # => ["foo", "bar", "baz"] # # Two headers: # table = CSV.parse(source, headers: true) # deleted_values = table.delete('Value', 'Name') # deleted_values # => [["0", "1", "2"], ["foo", "bar", "baz"]] def delete(*indexes_or_headers) if indexes_or_headers.empty? raise ArgumentError, "wrong number of arguments (given 0, expected 1+)" end deleted_values = indexes_or_headers.map do |index_or_header| if @mode == :row or # by index (@mode == :col_or_row and index_or_header.is_a? Integer) @table.delete_at(index_or_header) else # by header if index_or_header.is_a? Integer @headers.delete_at(index_or_header) else @headers.delete(index_or_header) end @table.map { |row| row.delete(index_or_header).last } end end if indexes_or_headers.size == 1 deleted_values[0] else deleted_values end end # Removes rows or columns for which the block returns a truthy value; # returns +self+. # # Removes rows when the access mode is :row or :col_or_row; # calls the block with each \CSV::Row object: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # # table.size # => 3 # table.delete_if {|row| row['Name'].start_with?('b') } # table.size # => 1 # # Removes columns when the access mode is :col; # calls the block with each column as a 2-element array # containing the header and an \Array of column fields: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # => # # table.headers.size # => 2 # table.delete_if {|column_data| column_data[1].include?('2') } # table.headers.size # => 1 # # Returns a new \Enumerator if no block is given: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.delete_if # => #:delete_if> def delete_if(&block) return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size } unless block_given? if @mode == :row or @mode == :col_or_row # by index @table.delete_if(&block) else # by header deleted = [] headers.each do |header| deleted << delete(header) if yield([header, self[header]]) end end self # for chaining end include Enumerable # Calls the block with each row or column; returns +self+. # # When the access mode is :row or :col_or_row, # calls the block with each \CSV::Row object: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # # table.each {|row| p row } # Output: # # # # # # # # When the access mode is :col, # calls the block with each column as a 2-element array # containing the header and an \Array of column fields: # table.by_col! # => # # table.each {|column_data| p column_data } # Output: # ["Name", ["foo", "bar", "baz"]] # ["Value", ["0", "1", "2"]] # # Returns a new \Enumerator if no block is given: # table.each # => #:each> def each(&block) return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given? if @mode == :col headers.each { |header| yield([header, self[header]]) } else @table.each(&block) end self # for chaining end # Returns +true+ if all each row of +self+ == # the corresponding row of +other_table+, otherwise, +false+. # # The access mode does no affect the result. # # Equal tables: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # other_table = CSV.parse(source, headers: true) # table == other_table # => true # # Different row count: # other_table.delete(2) # table == other_table # => false # # Different last row: # other_table << ['bat', 3] # table == other_table # => false def ==(other) return @table == other.table if other.is_a? CSV::Table @table == other end # # Returns the table as an Array of Arrays. Headers will be the first row, # then all of the field rows will follow. # def to_a array = [headers] @table.each do |row| array.push(row.fields) unless row.header_row? end array end # # Returns the table as a complete CSV String. Headers will be listed first, # then all of the field rows. # # This method assumes you want the Table.headers(), unless you explicitly # pass :write_headers => false. # def to_csv(write_headers: true, **options) array = write_headers ? [headers.to_csv(**options)] : [] @table.each do |row| array.push(row.fields.to_csv(**options)) unless row.header_row? end array.join("") end alias_method :to_s, :to_csv # # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step, # returning nil if any intermediate step is nil. # def dig(index_or_header, *index_or_headers) value = self[index_or_header] if value.nil? nil elsif index_or_headers.empty? value else unless value.respond_to?(:dig) raise TypeError, "#{value.class} does not have \#dig method" end value.dig(*index_or_headers) end end # Shows the mode and size of this table in a US-ASCII String. def inspect "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII") end end end