1
0
Fork 0
mirror of https://github.com/rails/rails.git synced 2022-11-09 12:12:34 -05:00

Bulk Insert: Reuse indexes for unique_by

I found `:unique_by` with `:columns` and `:where` inside it tough to
grasp. The documentation only mentioned indexes and partial indexes.
So why duplicate a model's indexes in an insert_all/upsert_all call
when we can just look it up?

This has the added benefit of raising if no index is found, such that
people can't insert thousands of records without relying on an index
of some form.
This commit is contained in:
Kasper Timm Hansen 2019-03-09 01:15:17 +01:00
parent c11d115fa3
commit 2ea5c24085
No known key found for this signature in database
GPG key ID: 191153215EDA53D8
3 changed files with 87 additions and 63 deletions

View file

@ -14,6 +14,7 @@ module ActiveRecord
@returning = (connection.supports_insert_returning? ? primary_keys : false) if @returning.nil? @returning = (connection.supports_insert_returning? ? primary_keys : false) if @returning.nil?
@returning = false if @returning == [] @returning = false if @returning == []
@unique_by = find_unique_index_for(unique_by) if unique_by
@on_duplicate = :skip if @on_duplicate == :update && updatable_columns.empty? @on_duplicate = :skip if @on_duplicate == :update && updatable_columns.empty?
ensure_valid_options_for_connection! ensure_valid_options_for_connection!
@ -27,6 +28,11 @@ module ActiveRecord
keys - readonly_columns - unique_by_columns keys - readonly_columns - unique_by_columns
end end
def primary_keys
Array(model.primary_key)
end
def skip_duplicates? def skip_duplicates?
on_duplicate == :skip on_duplicate == :skip
end end
@ -47,6 +53,21 @@ module ActiveRecord
end end
private private
def find_unique_index_for(unique_by)
match = Array(unique_by).map(&:to_s)
if index = unique_indexes.find { |i| match.include?(i.name) || i.columns == match }
index
else
raise ArgumentError, "No unique index found for #{unique_by}"
end
end
def unique_indexes
connection.schema_cache.indexes(model.table_name).select(&:unique)
end
def ensure_valid_options_for_connection! def ensure_valid_options_for_connection!
if returning && !connection.supports_insert_returning? if returning && !connection.supports_insert_returning?
raise ArgumentError, "#{connection.class} does not support :returning" raise ArgumentError, "#{connection.class} does not support :returning"
@ -69,21 +90,20 @@ module ActiveRecord
end end
end end
def to_sql def to_sql
connection.build_insert_sql(ActiveRecord::InsertAll::Builder.new(self)) connection.build_insert_sql(ActiveRecord::InsertAll::Builder.new(self))
end end
def readonly_columns def readonly_columns
primary_keys + model.readonly_attributes.to_a primary_keys + model.readonly_attributes.to_a
end end
def unique_by_columns def unique_by_columns
unique_by ? unique_by.fetch(:columns).map(&:to_s) : [] Array(unique_by&.columns)
end end
def primary_keys
Array.wrap(model.primary_key)
end
def verify_attributes(attributes) def verify_attributes(attributes)
if keys != attributes.keys.to_set if keys != attributes.keys.to_set
@ -121,10 +141,13 @@ module ActiveRecord
end end
def conflict_target def conflict_target
return unless conflict_columns if index = insert_all.unique_by
sql = +"(#{quote_columns(conflict_columns).join(',')})" sql = +"(#{quote_columns(index.columns).join(',')})"
sql << " WHERE #{where}" if where sql << " WHERE #{index.where}" if index.where
sql sql
elsif update_duplicates?
"(#{quote_columns(insert_all.primary_keys).join(',')})"
end
end end
def updatable_columns def updatable_columns
@ -150,18 +173,6 @@ module ActiveRecord
def quote_columns(columns) def quote_columns(columns)
columns.map(&connection.method(:quote_column_name)) columns.map(&connection.method(:quote_column_name))
end end
def conflict_columns
@conflict_columns ||= begin
conflict_columns = insert_all.unique_by.fetch(:columns) if insert_all.unique_by
conflict_columns ||= Array.wrap(model.primary_key) if update_duplicates?
conflict_columns
end
end
def where
insert_all.unique_by && insert_all.unique_by[:where]
end
end end
end end
end end

View file

@ -92,25 +92,21 @@ module ActiveRecord
# <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted # <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted
# record or pass <tt>returning: false</tt> to omit the clause. # record or pass <tt>returning: false</tt> to omit the clause.
# #
# [:unique_by] # [:unique_index]
# (Postgres and SQLite only) In a table with more than one unique constraint or index, # (Postgres and SQLite only) In a table with more than one unique constaint or index,
# new records may be considered duplicates according to different criteria. By default, # new records may be considered duplicates according to different criteria. By default,
# new rows will be skipped if they violate _any_ unique constraint or index. By defining # new rows will be skipped if they violate _any_ unique constraint/index. By passing
# <tt>:unique_by</tt>, you can skip rows that would create duplicates according to the given # <tt>:unique_index</tt>, you can skip rows that would create duplicates according to the given
# constraint but raise <tt>ActiveRecord::RecordNotUnique</tt> if rows violate other constraints. # constraint but raise <tt>ActiveRecord::RecordNotUnique</tt> if rows violate other constraints.
# #
# (For example, maybe you assume a client will try to import the same ISBNs more than # For example, maybe you assume a client will try to import the same ISBNs more than
# once and want to silently ignore the duplicate records, but you don't except any of # once and want to silently ignore the duplicate records, but you don't except any of
# your code to attempt to create two rows with the same primary key and would appreciate # your code to attempt to create two rows with the same primary key and would appreciate
# an exception report in that scenario.) # an exception instead.
# #
# Indexes can be identified by an array of columns: # Indexes can be identified by name:
# #
# unique_by: { columns: %w[ isbn ] } # unique_index: :isbn
#
# Partial indexes can be identified by an array of columns and a <tt>:where</tt> condition:
#
# unique_by: { columns: %w[ isbn ], where: "published_on IS NOT NULL" }
# #
# ==== Example # ==== Example
# #
@ -120,7 +116,6 @@ module ActiveRecord
# { id: 1, title: 'Rework', author: 'David' }, # { id: 1, title: 'Rework', author: 'David' },
# { id: 1, title: 'Eloquent Ruby', author: 'Russ' } # { id: 1, title: 'Eloquent Ruby', author: 'Russ' }
# ]) # ])
#
def insert_all(attributes, returning: nil, unique_by: nil) def insert_all(attributes, returning: nil, unique_by: nil)
InsertAll.new(self, attributes, on_duplicate: :skip, returning: returning, unique_by: unique_by).execute InsertAll.new(self, attributes, on_duplicate: :skip, returning: returning, unique_by: unique_by).execute
end end
@ -174,12 +169,11 @@ module ActiveRecord
# ]) # ])
# #
# # Raises ActiveRecord::RecordNotUnique because 'Eloquent Ruby' # # Raises ActiveRecord::RecordNotUnique because 'Eloquent Ruby'
# # does not have a unique ID # # does not have a unique id.
# Book.insert_all!([ # Book.insert_all!([
# { id: 1, title: 'Rework', author: 'David' }, # { id: 1, title: 'Rework', author: 'David' },
# { id: 1, title: 'Eloquent Ruby', author: 'Russ' } # { id: 1, title: 'Eloquent Ruby', author: 'Russ' }
# ]) # ])
#
def insert_all!(attributes, returning: nil) def insert_all!(attributes, returning: nil)
InsertAll.new(self, attributes, on_duplicate: :raise, returning: returning).execute InsertAll.new(self, attributes, on_duplicate: :raise, returning: returning).execute
end end
@ -217,43 +211,32 @@ module ActiveRecord
# <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted # <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted
# record or pass <tt>returning: false</tt> to omit the clause. # record or pass <tt>returning: false</tt> to omit the clause.
# #
# [:unique_by] # [:unique_index]
# (Postgres and SQLite only) In a table with more than one unique constraint or index, # (Postgres and SQLite only) In a table with more than one unique constaint or index,
# new records may be considered duplicates according to different criteria. For MySQL, # new records may be considered duplicates according to different criteria. For MySQL,
# an upsert will take place if a new record violates _any_ unique constraint. For # an upsert will take place if a new record violates _any_ unique constraint. For
# Postgres and SQLite, new rows will replace existing rows when the new row has the # Postgres and SQLite, new rows will replace existing rows when the new row has the
# same primary key as the existing row. In case of SQLite, an upsert operation causes # same primary key as the existing row. By passing <tt>:unique_index</tt>, you can supply
# an insert to behave as an update or a no-op if the insert would violate # a different index than the primary key one to match new records to existing ones.
# a uniqueness constraint. By defining <tt>:unique_by</tt>, you can supply
# a different unique constraint for matching new records to existing ones than the
# primary key.
# #
# (For example, if you have a unique index on the ISBN column and use that as # For example, if you have a unique index on the ISBN column and pass that as
# the <tt>:unique_by</tt>, a new record with the same ISBN as an existing record # <tt>:unique_index</tt>, a new record with the same ISBN as an existing record
# will replace the existing record but a new record with the same primary key # will replace the existing record but a new record with the same primary key
# as an existing record will raise <tt>ActiveRecord::RecordNotUnique</tt>.) # as an existing record will raise <tt>ActiveRecord::RecordNotUnique</tt>.
# #
# Indexes can be identified by an array of columns: # Indexes can be identified by name:
# #
# unique_by: { columns: %w[ isbn ] } # unique_index: :isbn
#
# Partial indexes can be identified by an array of columns and a <tt>:where</tt> condition:
#
# unique_by: { columns: %w[ isbn ], where: "published_on IS NOT NULL" }
# #
# ==== Examples # ==== Examples
# #
# # Given a unique index on <tt>books.isbn</tt> and the following record: # # Insert multiple records, performing an upsert when records have duplicate ISBNs.
# Book.create!(title: 'Rework', author: 'David', isbn: '1') # # Here 'Eloquent Ruby' will overwrite 'Rework' because its ISBN is duplicate.
# #
# # Insert multiple records, allowing new records with the same ISBN
# # as an existing record to overwrite the existing record.
# # ('Eloquent Ruby' will overwrite 'Rework' because its ISBN is duplicate)
# Book.upsert_all([ # Book.upsert_all([
# { title: 'Eloquent Ruby', author: 'Russ', isbn: '1' }, # { title: 'Rework', author: 'David', isbn: '1' },
# { title: 'Clean Code', author: 'Robert', isbn: '2' } # { title: 'Eloquent Ruby', author: 'Russ', isbn: '1' }
# ], unique_by: { columns: %w[ isbn ] }) # ], unique_index: :isbn)
#
def upsert_all(attributes, returning: nil, unique_by: nil) def upsert_all(attributes, returning: nil, unique_by: nil)
InsertAll.new(self, attributes, on_duplicate: :update, returning: returning, unique_by: unique_by).execute InsertAll.new(self, attributes, on_duplicate: :update, returning: returning, unique_by: unique_by).execute
end end

View file

@ -95,7 +95,37 @@ class InsertAllTest < ActiveRecord::TestCase
assert_raise ActiveRecord::RecordNotUnique do assert_raise ActiveRecord::RecordNotUnique do
Book.insert_all [{ id: 1, name: "Agile Web Development with Rails" }], Book.insert_all [{ id: 1, name: "Agile Web Development with Rails" }],
unique_by: { columns: %i{author_id name} } unique_by: :index_books_on_author_id_and_name
end
end
def test_insert_all_and_upsert_all_with_index_finding_options
skip unless supports_insert_conflict_target?
assert_difference "Book.count", +3 do
Book.insert_all [{ name: "Rework", author_id: 1 }], unique_by: :isbn
Book.insert_all [{ name: "Remote", author_id: 1 }], unique_by: %i( author_id name )
Book.insert_all [{ name: "Renote", author_id: 1 }], unique_by: :index_books_on_isbn
end
assert_raise ActiveRecord::RecordNotUnique do
Book.upsert_all [{ name: "Rework", author_id: 1 }], unique_by: :isbn
end
end
def test_insert_all_and_upsert_all_raises_when_index_is_missing
skip unless supports_insert_conflict_target?
[ :cats, %i( author_id isbn ), :author_id ].each do |missing_or_non_unique_by|
error = assert_raises ArgumentError do
Book.insert_all [{ name: "Rework", author_id: 1 }], unique_by: missing_or_non_unique_by
end
assert_match "No unique index", error.message
error = assert_raises ArgumentError do
Book.upsert_all [{ name: "Rework", author_id: 1 }], unique_by: missing_or_non_unique_by
end
assert_match "No unique index", error.message
end end
end end
@ -120,7 +150,7 @@ class InsertAllTest < ActiveRecord::TestCase
Book.upsert_all [{ id: 101, name: "Perelandra", author_id: 7 }] Book.upsert_all [{ id: 101, name: "Perelandra", author_id: 7 }]
Book.upsert_all [{ id: 103, name: "Perelandra", author_id: 7, isbn: "1974522598" }], Book.upsert_all [{ id: 103, name: "Perelandra", author_id: 7, isbn: "1974522598" }],
unique_by: { columns: %i{author_id name} } unique_by: :index_books_on_author_id_and_name
book = Book.find_by(name: "Perelandra") book = Book.find_by(name: "Perelandra")
assert_equal 101, book.id, "Should not have updated the ID" assert_equal 101, book.id, "Should not have updated the ID"
@ -132,7 +162,7 @@ class InsertAllTest < ActiveRecord::TestCase
Book.upsert_all [{ name: "Out of the Silent Planet", author_id: 7, isbn: "1974522598", published_on: Date.new(1938, 4, 1) }] Book.upsert_all [{ name: "Out of the Silent Planet", author_id: 7, isbn: "1974522598", published_on: Date.new(1938, 4, 1) }]
Book.upsert_all [{ name: "Perelandra", author_id: 7, isbn: "1974522598" }], Book.upsert_all [{ name: "Perelandra", author_id: 7, isbn: "1974522598" }],
unique_by: { columns: %w[ isbn ], where: "published_on IS NOT NULL" } unique_by: :index_books_on_isbn
assert_equal ["Out of the Silent Planet", "Perelandra"], Book.where(isbn: "1974522598").order(:name).pluck(:name) assert_equal ["Out of the Silent Planet", "Perelandra"], Book.where(isbn: "1974522598").order(:name).pluck(:name)
end end