ruby--ruby/lib/csv/row.rb

# frozen_string_literal: true

require "forwardable"

class CSV
  #
  # A CSV::Row is part Array and part Hash. It retains an order for the fields
  # and allows duplicates just as an Array would, but also allows you to access
  # fields by name just as you could if they were in a Hash.
  #
  # All rows returned by CSV will be constructed from this class, if header row
  # processing is activated.
  #
  class Row
    #
    # Constructs a new CSV::Row from +headers+ and +fields+, which are expected
    # to be Arrays. If one Array is shorter than the other, it will be padded
    # with +nil+ objects.
    #
    # The optional +header_row+ parameter can be set to +true+ to indicate, via
    # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
    # row. Otherwise, the row assumes to be a field row.
    #
    # A CSV::Row object supports the following Array methods through delegation:
    #
    # * empty?()
    # * length()
    # * size()
    #
    def initialize(headers, fields, header_row = false)
      @header_row = header_row
      headers.each { |h| h.freeze if h.is_a? String }

      # handle extra headers or fields
      @row = if headers.size >= fields.size
        headers.zip(fields)
      else
        fields.zip(headers).each(&:reverse!)
      end
    end

    # Internal data format used to compare equality.
    attr_reader :row
    protected   :row

    ### Array Delegation ###

    extend Forwardable
    def_delegators :@row, :empty?, :length, :size

    def initialize_copy(other)
      super_return_value = super
      @row = @row.collect(&:dup)
      super_return_value
    end

    # :call-seq:
    #   row.header_row? -> true or false
    #
    # Returns +true+ if this is a header row, +false+ otherwise.
    def header_row?
      @header_row
    end

    # :call-seq:
    #   row.field_row? -> true or false
    #
    # Returns +true+ if this is a field row, +false+ otherwise.
    def field_row?
      not header_row?
    end

    # :call-seq:
    #    row.headers
    #
    # Returns the headers for this row:
    #   source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table.first
    #   row.headers # => ["Name", "Value"]
    def headers
      @row.map(&:first)
    end

    # :call-seq:
    #   field(index)
    #   field(header)
    #   field(header, offset)
    #
    # Returns the field value for the given +index+ or +header+.
    #
    # ---
    #
    # Fetch field value by \Integer index:
    #   source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.field(0) # => "foo"
    #   row.field(1) # => "bar"
    #
    # Counts backward from the last column if +index+ is negative:
    #   row.field(-1) # => "0"
    #   row.field(-2) # => "foo"
    #
    # Returns +nil+ if +index+ is out of range:
    #   row.field(2) # => nil
    #   row.field(-3) # => nil
    #
    # ---
    #
    # Fetch field value by header (first found):
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.field('Name') # => "Foo"
    #
    # Fetch field value by header, ignoring +offset+ leading fields:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.field('Name', 2) # => "Baz"
    #
    # Returns +nil+ if the header does not exist.
    def field(header_or_index, minimum_index = 0)
      # locate the pair
      finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc
      pair   = @row[minimum_index..-1].public_send(finder, header_or_index)

      # return the field if we have a pair
      if pair.nil?
        nil
      else
        header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last
      end
    end
    alias_method :[], :field

    #
    # :call-seq:
    #   fetch(header)
    #   fetch(header, default)
    #   fetch(header) {|row| ... }
    #
    # Returns the field value as specified by +header+.
    #
    # ---
    #
    # With the single argument +header+, returns the field value
    # for that header (first found):
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.fetch('Name') # => "Foo"
    #
    # Raises exception +KeyError+ if the header does not exist.
    #
    # ---
    #
    # With arguments +header+ and +default+ given,
    # returns the field value for the header (first found)
    # if the header exists, otherwise returns +default+:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.fetch('Name', '') # => "Foo"
    #   row.fetch(:nosuch, '') # => ""
    #
    # ---
    #
    # With argument +header+ and a block given,
    # returns the field value for the header (first found)
    # if the header exists; otherwise calls the block
    # and returns its return value:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.fetch('Name') {|header| fail 'Cannot happen' } # => "Foo"
    #   row.fetch(:nosuch) {|header| "Header '#{header} not found'" } # => "Header 'nosuch not found'"
    def fetch(header, *varargs)
      raise ArgumentError, "Too many arguments" if varargs.length > 1
      pair = @row.assoc(header)
      if pair
        pair.last
      else
        if block_given?
          yield header
        elsif varargs.empty?
          raise KeyError, "key not found: #{header}"
        else
          varargs.first
        end
      end
    end

    # :call-seq:
    #   row.has_key?(header)
    #
    # Returns +true+ if there is a field with the given +header+,
    # +false+ otherwise.
    def has_key?(header)
      !!@row.assoc(header)
    end
    alias_method :include?, :has_key?
    alias_method :key?,     :has_key?
    alias_method :member?,  :has_key?
    alias_method :header?,  :has_key?

    #
    # :call-seq:
    #   row[index] = value -> value
    #   row[header, offset] = value -> value
    #   row[header] = value -> value
    #
    # Assigns the field value for the given +index+ or +header+;
    # returns +value+.
    #
    # ---
    #
    # Assign field value by \Integer index:
    #   source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row[0] = 'Bat'
    #   row[1] = 3
    #   row # => #<CSV::Row "Name":"Bat" "Value":3>
    #
    # Counts backward from the last column if +index+ is negative:
    #   row[-1] = 4
    #   row[-2] = 'Bam'
    #   row # => #<CSV::Row "Name":"Bam" "Value":4>
    #
    # Extends the row with <tt>nil:nil</tt> if positive +index+ is not in the row:
    #   row[4] = 5
    #   row # => #<CSV::Row "Name":"bad" "Value":4 nil:nil nil:nil nil:5>
    #
    # Raises IndexError if negative +index+ is too small (too far from zero).
    #
    # ---
    #
    # Assign field value by header (first found):
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row['Name'] = 'Bat'
    #   row # => #<CSV::Row "Name":"Bat" "Name":"Bar" "Name":"Baz">
    #
    # Assign field value by header, ignoring +offset+ leading fields:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row['Name', 2] = 4
    #   row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":4>
    #
    # Append new field by (new) header:
    #   source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row['New'] = 6
    #   row# => #<CSV::Row "Name":"foo" "Value":"0" "New":6>
    def []=(*args)
      value = args.pop

      if args.first.is_a? Integer
        if @row[args.first].nil?  # extending past the end with index
          @row[args.first] = [nil, value]
          @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
        else                      # normal index assignment
          @row[args.first][1] = value
        end
      else
        index = index(*args)
        if index.nil?             # appending a field
          self << [args.first, value]
        else                      # normal header assignment
          @row[index][1] = value
        end
      end
    end

    #
    # :call-seq:
    #   row << [header, value] -> self
    #   row << hash -> self
    #   row << value -> self
    #
    # Adds a field to +self+; returns +self+:
    #
    # If the argument is a 2-element \Array <tt>[header, value]</tt>,
    # a field is added with the given +header+ and +value+:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row << ['NAME', 'Bat']
    #   row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" "NAME":"Bat">
    #
    # If the argument is a \Hash, each <tt>key-value</tt> pair is added
    # as a field with header +key+ and value +value+.
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row << {NAME: 'Bat', name: 'Bam'}
    #   row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" NAME:"Bat" name:"Bam">
    #
    # Otherwise, the given +value+ is added as a field with no header.
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row << 'Bag'
    #   row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" nil:"Bag">
    def <<(arg)
      if arg.is_a?(Array) and arg.size == 2  # appending a header and name
        @row << arg
      elsif arg.is_a?(Hash)                  # append header and name pairs
        arg.each { |pair| @row << pair }
      else                                   # append field value
        @row << [nil, arg]
      end

      self  # for chaining
    end

    # :call-seq:
    #   row.push(*values) ->self
    #
    # Appends each of the given +values+ to +self+ as a field; returns +self+:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.push('Bat', 'Bam')
    #   row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" nil:"Bat" nil:"Bam">
    def push(*args)
      args.each { |arg| self << arg }

      self  # for chaining
    end

    #
    # :call-seq:
    #   delete(index) -> [header, value] or nil
    #   delete(header) -> [header, value] or empty_array
    #   delete(header, offset) -> [header, value] or empty_array
    #
    # Removes a specified field from +self+; returns the 2-element \Array
    # <tt>[header, value]</tt> if the field exists.
    #
    # If an \Integer argument +index+ is given,
    # removes and returns the field at offset +index+,
    # or returns +nil+ if the field does not exist:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.delete(1) # => ["Name", "Bar"]
    #   row.delete(50) # => nil
    #
    # Otherwise, if the single argument +header+ is given,
    # removes and returns the first-found field with the given header,
    # of returns a new empty \Array if the field does not exist:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.delete('Name') # => ["Name", "Foo"]
    #   row.delete('NAME') # => []
    #
    # If argument +header+ and \Integer argument +offset+ are given,
    # removes and returns the first-found field with the given header
    # whose +index+ is at least as large as +offset+:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.delete('Name', 1) # => ["Name", "Bar"]
    #   row.delete('NAME', 1) # => []
    def delete(header_or_index, minimum_index = 0)
      if header_or_index.is_a? Integer                 # by index
        @row.delete_at(header_or_index)
      elsif i = index(header_or_index, minimum_index)  # by header
        @row.delete_at(i)
      else
        [ ]
      end
    end

    # :call-seq:
    #   row.delete_if {|header, value| ... } -> self
    #
    # Removes fields from +self+ as selected by the block; returns +self+.
    #
    # Removes each field for which the block returns a truthy value:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.delete_if {|header, value| value.start_with?('B') } # => true
    #   row # => #<CSV::Row "Name":"Foo">
    #   row.delete_if {|header, value| header.start_with?('B') } # => false
    #
    # If no block is given, returns a new Enumerator:
    #   row.delete_if # => #<Enumerator: #<CSV::Row "Name":"Foo">:delete_if>
    def delete_if(&block)
      return enum_for(__method__) { size } unless block_given?

      @row.delete_if(&block)

      self  # for chaining
    end

    # :call-seq:
    #   self.fields(*specifiers)
    #
    # Returns field values per the given +specifiers+, which may be any mixture of:
    # - \Integer index.
    # - \Range of \Integer indexes.
    # - 2-element \Array containing a header and offset.
    # - Header.
    # - \Range of headers.
    #
    # For +specifier+ in one of the first four cases above,
    # returns the result of <tt>self.field(specifier)</tt>;  see #field.
    #
    # Although there may be any number of +specifiers+,
    # the examples here will illustrate one at a time.
    #
    # When the specifier is an \Integer +index+,
    # returns <tt>self.field(index)</tt>L
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.fields(1) # => ["Bar"]
    #
    # When the specifier is a \Range of \Integers +range+,
    # returns <tt>self.field(range)</tt>:
    #   row.fields(1..2) # => ["Bar", "Baz"]
    #
    # When the specifier is a 2-element \Array +array+,
    # returns <tt>self.field(array)</tt>L
    #   row.fields('Name', 1) # => ["Foo", "Bar"]
    #
    # When the specifier is a header +header+,
    # returns <tt>self.field(header)</tt>L
    #   row.fields('Name') # => ["Foo"]
    #
    # When the specifier is a \Range of headers +range+,
    # forms a new \Range +new_range+ from the indexes of
    # <tt>range.start</tt> and <tt>range.end</tt>,
    # and returns <tt>self.field(new_range)</tt>:
    #   source = "Name,NAME,name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.fields('Name'..'NAME') # => ["Foo", "Bar"]
    #
    # Returns all fields if no argument given:
    #   row.fields # => ["Foo", "Bar", "Baz"]
    def fields(*headers_and_or_indices)
      if headers_and_or_indices.empty?  # return all fields--no arguments
        @row.map(&:last)
      else                              # or work like values_at()
        all = []
        headers_and_or_indices.each do |h_or_i|
          if h_or_i.is_a? Range
            index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
                                                        index(h_or_i.begin)
            index_end   = h_or_i.end.is_a?(Integer)   ? h_or_i.end :
                                                        index(h_or_i.end)
            new_range   = h_or_i.exclude_end? ? (index_begin...index_end) :
                                                (index_begin..index_end)
            all.concat(fields.values_at(new_range))
          else
            all << field(*Array(h_or_i))
          end
        end
        return all
      end
    end
    alias_method :values_at, :fields

    #
    # :call-seq:
    #   index( header )
    #   index( header, offset )
    #
    # This method will return the index of a field with the provided +header+.
    # The +offset+ can be used to locate duplicate header names, as described in
    # CSV::Row.field().
    #
    def index(header, minimum_index = 0)
      # find the pair
      index = headers[minimum_index..-1].index(header)
      # return the index at the right offset, if we found one
      index.nil? ? nil : index + minimum_index
    end

    #
    # Returns +true+ if +data+ matches a field in this row, and +false+
    # otherwise.
    #
    def field?(data)
      fields.include? data
    end

    include Enumerable

    #
    # Yields each pair of the row as header and field tuples (much like
    # iterating over a Hash). This method returns the row for chaining.
    #
    # If no block is given, an Enumerator is returned.
    #
    # Support for Enumerable.
    #
    def each(&block)
      return enum_for(__method__) { size } unless block_given?

      @row.each(&block)

      self  # for chaining
    end

    alias_method :each_pair, :each

    #
    # Returns +true+ if this row contains the same headers and fields in the
    # same order as +other+.
    #
    def ==(other)
      return @row == other.row if other.is_a? CSV::Row
      @row == other
    end

    # :call-seq:
    #   row.to_h -> hash
    #
    # Returns the new \Hash formed by adding each header-value pair in +self+
    # as a key-value pair in the \Hash.
    #   source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.to_h # => {"Name"=>"foo", "Value"=>"0"}
    #
    # Header order is preserved, but repeated headers are ignored:
    #   source = "Name,Name,Name\nFoo,Bar,Baz\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.to_h # => {"Name"=>"Foo"}
    def to_h
      hash = {}
      each do |key, _value|
        hash[key] = self[key] unless hash.key?(key)
      end
      hash
    end
    alias_method :to_hash, :to_h

    alias_method :to_ary, :to_a

    # :call-seq:
    #   row.to_csv -> csv_string
    #
    # Returns the row as a \CSV String. Headers are not included:
    #   source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.to_csv # => "foo,0\n"
    def to_csv(**options)
      fields.to_csv(**options)
    end
    alias_method :to_s, :to_csv

    # :call-seq:
    #   row.dig(index_or_header, *identifiers) -> object
    #
    # Finds and returns the object in nested object that is specified
    # by +index_or_header+ and +specifiers+.
    #
    # The nested objects may be instances of various classes.
    # See {Dig Methods}[https://docs.ruby-lang.org/en/master/doc/dig_methods_rdoc.html].
    #
    # Examples:
    #   source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.dig(1) # => "0"
    #   row.dig('Value') # => "0"
    #   row.dig(5) # => nil
    def dig(index_or_header, *indexes)
      value = field(index_or_header)
      if value.nil?
        nil
      elsif indexes.empty?
        value
      else
        unless value.respond_to?(:dig)
          raise TypeError, "#{value.class} does not have \#dig method"
        end
        value.dig(*indexes)
      end
    end

    # :call-seq:
    #   row.inspect -> string
    #
    # Returns an ASCII-compatible \String showing:
    # - Class \CSV::Row.
    # - Header-value pairs.
    # Example:
    #   source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
    #   table = CSV.parse(source, headers: true)
    #   row = table[0]
    #   row.inspect # => "#<CSV::Row \"Name\":\"foo\" \"Value\":\"0\">"
    def inspect
      str = ["#<", self.class.to_s]
      each do |header, field|
        str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) <<
               ":" << field.inspect
      end
      str << ">"
      begin
        str.join('')
      rescue  # any encoding error
        str.map do |s|
          e = Encoding::Converter.asciicompat_encoding(s.encoding)
          e ? s.encode(e) : s.force_encoding("ASCII-8BIT")
        end.join('')
      end
    end
  end
end