2020-02-12 16:08:48 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2020-03-03 07:08:08 -05:00
|
|
|
##
|
|
|
|
# A mixin for ActiveRecord models that enables callers to insert instances of the
|
|
|
|
# target class into the database en-bloc via the [bulk_insert] method.
|
|
|
|
#
|
|
|
|
# Upon inclusion in the target class, the mixin will perform a number of checks to
|
|
|
|
# ensure that the target is eligible for bulk insertions. For instance, it must not
|
|
|
|
# use ActiveRecord callbacks that fire between [save]s, since these would not run
|
|
|
|
# properly when instances are inserted in bulk.
|
|
|
|
#
|
|
|
|
# The mixin uses ActiveRecord 6's [InsertAll] type internally for bulk insertions.
|
|
|
|
# Unlike [InsertAll], however, it requires you to pass instances of the target type
|
|
|
|
# rather than row hashes, since it will run validations prior to insertion.
|
|
|
|
#
|
|
|
|
# @example
|
|
|
|
#
|
|
|
|
# class MyRecord < ApplicationRecord
|
|
|
|
# include BulkInsertSafe # must be included _last_ i.e. after any other concerns
|
|
|
|
# end
|
|
|
|
#
|
|
|
|
# # simple
|
|
|
|
# MyRecord.bulk_insert!(items)
|
|
|
|
#
|
|
|
|
# # with custom batch size
|
|
|
|
# MyRecord.bulk_insert!(items, batch_size: 100)
|
|
|
|
#
|
|
|
|
# # without validations
|
|
|
|
# MyRecord.bulk_insert!(items, validate: false)
|
|
|
|
#
|
|
|
|
# # with attribute hash modification
|
|
|
|
# MyRecord.bulk_insert!(items) { |item_attrs| item_attrs['col'] = 42 }
|
|
|
|
#
|
|
|
|
#
|
2020-02-12 16:08:48 -05:00
|
|
|
module BulkInsertSafe
|
|
|
|
extend ActiveSupport::Concern
|
|
|
|
|
|
|
|
# These are the callbacks we think safe when used on models that are
|
|
|
|
# written to the database in bulk
|
|
|
|
CALLBACK_NAME_WHITELIST = Set[
|
|
|
|
:initialize,
|
|
|
|
:validate,
|
|
|
|
:validation,
|
|
|
|
:find,
|
|
|
|
:destroy
|
|
|
|
].freeze
|
|
|
|
|
2020-03-03 07:08:08 -05:00
|
|
|
DEFAULT_BATCH_SIZE = 500
|
|
|
|
|
2020-02-12 16:08:48 -05:00
|
|
|
MethodNotAllowedError = Class.new(StandardError)
|
2020-03-03 07:08:08 -05:00
|
|
|
PrimaryKeySetError = Class.new(StandardError)
|
2020-02-12 16:08:48 -05:00
|
|
|
|
|
|
|
class_methods do
|
|
|
|
def set_callback(name, *args)
|
|
|
|
unless _bulk_insert_callback_allowed?(name, args)
|
|
|
|
raise MethodNotAllowedError.new(
|
|
|
|
"Not allowed to call `set_callback(#{name}, #{args})` when model extends `BulkInsertSafe`." \
|
|
|
|
"Callbacks that fire per each record being inserted do not work with bulk-inserts.")
|
|
|
|
end
|
|
|
|
|
|
|
|
super
|
|
|
|
end
|
|
|
|
|
2020-03-16 08:09:12 -04:00
|
|
|
# Inserts the given ActiveRecord [items] to the table mapped to this class.
|
2020-03-03 07:08:08 -05:00
|
|
|
# Items will be inserted in batches of a given size, where insertion semantics are
|
2020-03-16 08:09:12 -04:00
|
|
|
# "atomic across all batches".
|
2020-03-03 07:08:08 -05:00
|
|
|
#
|
|
|
|
# @param [Boolean] validate Whether validations should run on [items]
|
|
|
|
# @param [Integer] batch_size How many items should at most be inserted at once
|
2020-03-16 08:09:12 -04:00
|
|
|
# @param [Boolean] skip_duplicates Marks duplicates as allowed, and skips inserting them
|
2020-03-25 14:08:10 -04:00
|
|
|
# @param [Symbol] returns Pass :ids to return an array with the primary key values
|
|
|
|
# for all inserted records or nil to omit the underlying
|
|
|
|
# RETURNING SQL clause entirely.
|
2020-03-03 07:08:08 -05:00
|
|
|
# @param [Proc] handle_attributes Block that will receive each item attribute hash
|
|
|
|
# prior to insertion for further processing
|
|
|
|
#
|
|
|
|
# Note that this method will throw on the following occasions:
|
|
|
|
# - [PrimaryKeySetError] when primary keys are set on entities prior to insertion
|
|
|
|
# - [ActiveRecord::RecordInvalid] on entity validation failures
|
|
|
|
# - [ActiveRecord::RecordNotUnique] on duplicate key errors
|
|
|
|
#
|
2020-03-16 08:09:12 -04:00
|
|
|
# @return true if operation succeeded, throws otherwise.
|
2020-03-03 07:08:08 -05:00
|
|
|
#
|
2020-03-25 14:08:10 -04:00
|
|
|
def bulk_insert!(items, validate: true, skip_duplicates: false, returns: nil, batch_size: DEFAULT_BATCH_SIZE, &handle_attributes)
|
2020-03-16 08:09:12 -04:00
|
|
|
_bulk_insert_all!(items,
|
|
|
|
validate: validate,
|
|
|
|
on_duplicate: skip_duplicates ? :skip : :raise,
|
2020-03-25 14:08:10 -04:00
|
|
|
returns: returns,
|
2020-03-16 08:09:12 -04:00
|
|
|
unique_by: nil,
|
|
|
|
batch_size: batch_size,
|
|
|
|
&handle_attributes)
|
|
|
|
end
|
2020-03-03 07:08:08 -05:00
|
|
|
|
2020-03-16 08:09:12 -04:00
|
|
|
# Upserts the given ActiveRecord [items] to the table mapped to this class.
|
|
|
|
# Items will be inserted or updated in batches of a given size,
|
|
|
|
# where insertion semantics are "atomic across all batches".
|
|
|
|
#
|
|
|
|
# @param [Boolean] validate Whether validations should run on [items]
|
|
|
|
# @param [Integer] batch_size How many items should at most be inserted at once
|
|
|
|
# @param [Symbol/Array] unique_by Defines index or columns to use to consider item duplicate
|
2020-03-25 14:08:10 -04:00
|
|
|
# @param [Symbol] returns Pass :ids to return an array with the primary key values
|
|
|
|
# for all inserted or updated records or nil to omit the
|
|
|
|
# underlying RETURNING SQL clause entirely.
|
2020-03-16 08:09:12 -04:00
|
|
|
# @param [Proc] handle_attributes Block that will receive each item attribute hash
|
|
|
|
# prior to insertion for further processing
|
|
|
|
#
|
|
|
|
# Unique indexes can be identified by columns or name:
|
|
|
|
# - unique_by: :isbn
|
|
|
|
# - unique_by: %i[ author_id name ]
|
|
|
|
# - unique_by: :index_books_on_isbn
|
|
|
|
#
|
|
|
|
# Note that this method will throw on the following occasions:
|
|
|
|
# - [PrimaryKeySetError] when primary keys are set on entities prior to insertion
|
|
|
|
# - [ActiveRecord::RecordInvalid] on entity validation failures
|
|
|
|
# - [ActiveRecord::RecordNotUnique] on duplicate key errors
|
|
|
|
#
|
|
|
|
# @return true if operation succeeded, throws otherwise.
|
|
|
|
#
|
2020-03-25 14:08:10 -04:00
|
|
|
def bulk_upsert!(items, unique_by:, returns: nil, validate: true, batch_size: DEFAULT_BATCH_SIZE, &handle_attributes)
|
2020-03-16 08:09:12 -04:00
|
|
|
_bulk_insert_all!(items,
|
|
|
|
validate: validate,
|
|
|
|
on_duplicate: :update,
|
2020-03-25 14:08:10 -04:00
|
|
|
returns: returns,
|
2020-03-16 08:09:12 -04:00
|
|
|
unique_by: unique_by,
|
|
|
|
batch_size: batch_size,
|
|
|
|
&handle_attributes)
|
2020-03-03 07:08:08 -05:00
|
|
|
end
|
|
|
|
|
2020-02-12 16:08:48 -05:00
|
|
|
private
|
|
|
|
|
2020-03-25 14:08:10 -04:00
|
|
|
def _bulk_insert_all!(items, on_duplicate:, returns:, unique_by:, validate:, batch_size:, &handle_attributes)
|
|
|
|
return [] if items.empty?
|
|
|
|
|
|
|
|
returning =
|
|
|
|
case returns
|
|
|
|
when :ids
|
|
|
|
[primary_key]
|
|
|
|
when nil
|
|
|
|
false
|
|
|
|
else
|
|
|
|
raise ArgumentError, "returns needs to be :ids or nil"
|
|
|
|
end
|
2020-03-16 08:09:12 -04:00
|
|
|
|
2020-03-03 07:08:08 -05:00
|
|
|
transaction do
|
2020-03-25 14:08:10 -04:00
|
|
|
items.each_slice(batch_size).flat_map do |item_batch|
|
2020-03-16 08:09:12 -04:00
|
|
|
attributes = _bulk_insert_item_attributes(
|
|
|
|
item_batch, validate, &handle_attributes)
|
2020-03-03 07:08:08 -05:00
|
|
|
|
2020-03-16 08:09:12 -04:00
|
|
|
ActiveRecord::InsertAll
|
2020-03-25 14:08:10 -04:00
|
|
|
.new(self, attributes, on_duplicate: on_duplicate, returning: returning, unique_by: unique_by)
|
|
|
|
.execute
|
|
|
|
.pluck(primary_key)
|
2020-03-03 07:08:08 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def _bulk_insert_item_attributes(items, validate_items)
|
|
|
|
items.map do |item|
|
|
|
|
item.validate! if validate_items
|
2020-03-13 08:09:22 -04:00
|
|
|
|
|
|
|
attributes = {}
|
|
|
|
column_names.each do |name|
|
2020-03-27 14:07:48 -04:00
|
|
|
attributes[name] = item.read_attribute(name)
|
2020-03-13 08:09:22 -04:00
|
|
|
end
|
2020-03-03 07:08:08 -05:00
|
|
|
|
|
|
|
_bulk_insert_reject_primary_key!(attributes, item.class.primary_key)
|
|
|
|
|
|
|
|
yield attributes if block_given?
|
|
|
|
|
|
|
|
attributes
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def _bulk_insert_reject_primary_key!(attributes, primary_key)
|
2020-03-10 14:08:17 -04:00
|
|
|
if existing_pk = attributes.delete(primary_key)
|
|
|
|
raise PrimaryKeySetError, "Primary key set: #{primary_key}:#{existing_pk}\n" \
|
2020-03-03 07:08:08 -05:00
|
|
|
"Bulk-inserts are only supported for rows that don't already have PK set"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-02-12 16:08:48 -05:00
|
|
|
def _bulk_insert_callback_allowed?(name, args)
|
|
|
|
_bulk_insert_whitelisted?(name) || _bulk_insert_saved_from_belongs_to?(name, args)
|
|
|
|
end
|
|
|
|
|
|
|
|
# belongs_to associations will install a before_save hook during class loading
|
|
|
|
def _bulk_insert_saved_from_belongs_to?(name, args)
|
|
|
|
args.first == :before && args.second.to_s.start_with?('autosave_associated_records_for_')
|
|
|
|
end
|
|
|
|
|
|
|
|
def _bulk_insert_whitelisted?(name)
|
|
|
|
CALLBACK_NAME_WHITELIST.include?(name)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|