2021-01-05 18:10:25 +00:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Gitlab
|
|
|
|
module BackgroundMigration
|
2021-04-21 21:09:40 +00:00
|
|
|
# Background migration that updates the value of one or more
|
|
|
|
# columns using the value of other columns in the same table.
|
2021-01-05 18:10:25 +00:00
|
|
|
#
|
|
|
|
# - The {start_id, end_id} arguments are at the start so that it can be used
|
2021-03-09 12:08:52 +00:00
|
|
|
# with `queue_batched_background_migration`
|
2021-01-05 18:10:25 +00:00
|
|
|
# - Uses sub-batching so that we can keep each update's execution time at
|
|
|
|
# low 100s ms, while being able to update more records per 2 minutes
|
|
|
|
# that we allow background migration jobs to be scheduled one after the other
|
|
|
|
# - We skip the NULL checks as they may result in not using an index scan
|
|
|
|
# - The table that is migrated does _not_ need `id` as the primary key
|
|
|
|
# We use the provided primary_key column to perform the update.
|
|
|
|
class CopyColumnUsingBackgroundMigrationJob
|
|
|
|
include Gitlab::Database::DynamicModelHelpers
|
|
|
|
|
|
|
|
# start_id - The start ID of the range of rows to update.
|
|
|
|
# end_id - The end ID of the range of rows to update.
|
2021-03-09 12:08:52 +00:00
|
|
|
# batch_table - The name of the table that contains the columns.
|
|
|
|
# batch_column - The name of the column we use to batch over the table.
|
2021-01-05 18:10:25 +00:00
|
|
|
# sub_batch_size - We don't want updates to take more than ~100ms
|
|
|
|
# This allows us to run multiple smaller batches during
|
|
|
|
# the minimum 2.minute interval that we can schedule jobs
|
2021-04-23 00:09:47 +00:00
|
|
|
# pause_ms - The number of milliseconds to sleep between each subbatch execution.
|
2021-04-21 21:09:40 +00:00
|
|
|
# copy_from - List of columns containing the data to copy.
|
|
|
|
# copy_to - List of columns to copy the data to. Order must match the order in `copy_from`.
|
2021-04-23 00:09:47 +00:00
|
|
|
def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, pause_ms, copy_from, copy_to)
|
2021-04-21 21:09:40 +00:00
|
|
|
copy_from = Array.wrap(copy_from)
|
|
|
|
copy_to = Array.wrap(copy_to)
|
|
|
|
|
|
|
|
raise ArgumentError, 'number of source and destination columns must match' unless copy_from.count == copy_to.count
|
|
|
|
|
|
|
|
assignment_clauses = column_assignment_clauses(copy_from, copy_to)
|
2021-01-05 18:10:25 +00:00
|
|
|
|
2021-03-09 12:08:52 +00:00
|
|
|
parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id)
|
2021-01-05 18:10:25 +00:00
|
|
|
|
2021-03-09 12:08:52 +00:00
|
|
|
parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch|
|
2021-03-31 00:09:32 +00:00
|
|
|
batch_metrics.time_operation(:update_all) do
|
2021-04-21 21:09:40 +00:00
|
|
|
sub_batch.update_all(assignment_clauses)
|
2021-03-31 00:09:32 +00:00
|
|
|
end
|
2021-01-05 18:10:25 +00:00
|
|
|
|
2021-04-23 00:09:47 +00:00
|
|
|
pause_ms = 0 if pause_ms < 0
|
|
|
|
sleep(pause_ms * 0.001)
|
2021-01-05 18:10:25 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-03-31 00:09:32 +00:00
|
|
|
def batch_metrics
|
|
|
|
@batch_metrics ||= Gitlab::Database::BackgroundMigration::BatchMetrics.new
|
|
|
|
end
|
|
|
|
|
2021-01-05 18:10:25 +00:00
|
|
|
private
|
|
|
|
|
|
|
|
def connection
|
|
|
|
ActiveRecord::Base.connection
|
|
|
|
end
|
|
|
|
|
|
|
|
def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id)
|
|
|
|
define_batchable_model(source_table).where(source_key_column => start_id..stop_id)
|
|
|
|
end
|
2021-04-21 21:09:40 +00:00
|
|
|
|
|
|
|
def column_assignment_clauses(copy_from, copy_to)
|
|
|
|
assignments = copy_from.zip(copy_to).map do |from_column, to_column|
|
|
|
|
from_column = connection.quote_column_name(from_column)
|
|
|
|
to_column = connection.quote_column_name(to_column)
|
|
|
|
|
|
|
|
"#{to_column} = #{from_column}"
|
|
|
|
end
|
|
|
|
|
|
|
|
assignments.join(', ')
|
|
|
|
end
|
2021-01-05 18:10:25 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|