Reduce UPDATEs for background column type changes

Prior to this commit we would essentially update all rows in a table,
even those where the source column (e.g. `issues.closed_at`) was NULL.
This in turn could lead to statement timeouts when using the default
batch size of 10 000 rows per job.

To work around this we don't schedule jobs for rows where the source
value is NULL. We also don't update rows where the source column is NULL
(as an extra precaution) or the target column already has a non-NULL
value. Using this approach it should be possible to update 10 000 rows
in the "issues" table in about 7.5 - 8 seconds.

Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/42158
This commit is contained in:
Yorick Peterse 2018-01-18 14:22:41 +01:00
parent 50a649530a
commit 0054d3838c
No known key found for this signature in database
GPG key ID: EDD30D2BEB691AC9
3 changed files with 6 additions and 3 deletions

View file

@ -28,6 +28,8 @@ module Gitlab
UPDATE #{quoted_table}
SET #{quoted_copy_to} = #{quoted_copy_from}
WHERE id BETWEEN #{start_id} AND #{end_id}
AND #{quoted_copy_from} IS NOT NULL
AND #{quoted_copy_to} IS NULL
SQL
end

View file

@ -525,8 +525,9 @@ module Gitlab
install_rename_triggers(table, column, temp_column)
# Schedule the jobs that will copy the data from the old column to the
# new one.
relation.each_batch(of: batch_size) do |batch, index|
# new one. Rows with NULL values in our source column are skipped since
# the target column is already NULL at this point.
relation.where.not(column => nil).each_batch(of: batch_size) do |batch, index|
start_id, end_id = batch.pluck('MIN(id), MAX(id)').first
max_index = index

View file

@ -1038,7 +1038,7 @@ describe Gitlab::Database::MigrationHelpers do
end
describe '#change_column_type_using_background_migration' do
let!(:issue) { create(:issue) }
let!(:issue) { create(:issue, :closed, closed_at: Time.zone.now) }
let(:issue_model) do
Class.new(ActiveRecord::Base) do