Merge branch 'add-rename-column-background-helper' into 'master'
Add a helper to rename a column using a background migration Closes #47591 See merge request gitlab-org/gitlab-ce!20180
This commit is contained in:
commit
2440b7b6f1
|
@ -195,22 +195,22 @@ end
|
|||
|
||||
And that's it, we're done!
|
||||
|
||||
## Changing Column Types For Large Tables
|
||||
## Changing The Schema For Large Tables
|
||||
|
||||
While `change_column_type_concurrently` can be used for changing the type of a
|
||||
column without downtime it doesn't work very well for large tables. Because all
|
||||
of the work happens in sequence the migration can take a very long time to
|
||||
complete, preventing a deployment from proceeding.
|
||||
`change_column_type_concurrently` can also produce a lot of pressure on the
|
||||
database due to it rapidly updating many rows in sequence.
|
||||
While `change_column_type_concurrently` and `rename_column_concurrently` can be
|
||||
used for changing the schema of a table without downtime it doesn't work very
|
||||
well for large tables. Because all of the work happens in sequence the migration
|
||||
can take a very long time to complete, preventing a deployment from proceeding.
|
||||
They can also produce a lot of pressure on the database due to it rapidly
|
||||
updating many rows in sequence.
|
||||
|
||||
To reduce database pressure you should instead use
|
||||
`change_column_type_using_background_migration` when migrating a column in a
|
||||
large table (e.g. `issues`). This method works similar to
|
||||
`change_column_type_concurrently` but uses background migration to spread the
|
||||
work / load over a longer time period, without slowing down deployments.
|
||||
`change_column_type_using_background_migration` or `rename_column_concurrently`
|
||||
when migrating a column in a large table (e.g. `issues`). These methods work
|
||||
similarly to the concurrent counterparts but uses background migration to spread
|
||||
the work / load over a longer time period, without slowing down deployments.
|
||||
|
||||
Usage of this method is fairly simple:
|
||||
For example, to change the column type using a background migration:
|
||||
|
||||
```ruby
|
||||
class ExampleMigration < ActiveRecord::Migration
|
||||
|
@ -296,6 +296,15 @@ class MigrateRemainingIssuesClosedAt < ActiveRecord::Migration
|
|||
end
|
||||
```
|
||||
|
||||
The same applies to `rename_column_using_background_migration`:
|
||||
|
||||
1. Create a migration using the helper, which will schedule background
|
||||
migrations to spread the writes over a longer period of time.
|
||||
2. In the next monthly release, create a clean-up migration to steal from the
|
||||
Sidekiq queues, migrate any missing rows, and cleanup the rename. This
|
||||
migration should skip the steps after stealing from the Sidekiq queues if the
|
||||
column has already been renamed.
|
||||
|
||||
For more information, see [the documentation on cleaning up background
|
||||
migrations](background_migrations.md#cleaning-up).
|
||||
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module BackgroundMigration
|
||||
# Background migration for cleaning up a concurrent column rename.
|
||||
class CleanupConcurrentRename < CleanupConcurrentSchemaChange
|
||||
RESCHEDULE_DELAY = 10.minutes
|
||||
|
||||
def cleanup_concurrent_schema_change(table, old_column, new_column)
|
||||
cleanup_concurrent_column_rename(table, old_column, new_column)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,52 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module BackgroundMigration
|
||||
# Base class for cleaning up concurrent schema changes.
|
||||
class CleanupConcurrentSchemaChange
|
||||
include Database::MigrationHelpers
|
||||
|
||||
# table - The name of the table the migration is performed for.
|
||||
# old_column - The name of the old (to drop) column.
|
||||
# new_column - The name of the new column.
|
||||
def perform(table, old_column, new_column)
|
||||
return unless column_exists?(table, new_column)
|
||||
|
||||
rows_to_migrate = define_model_for(table)
|
||||
.where(new_column => nil)
|
||||
.where
|
||||
.not(old_column => nil)
|
||||
|
||||
if rows_to_migrate.any?
|
||||
BackgroundMigrationWorker.perform_in(
|
||||
RESCHEDULE_DELAY,
|
||||
self.class.name,
|
||||
[table, old_column, new_column]
|
||||
)
|
||||
else
|
||||
cleanup_concurrent_schema_change(table, old_column, new_column)
|
||||
end
|
||||
end
|
||||
|
||||
# These methods are necessary so we can re-use the migration helpers in
|
||||
# this class.
|
||||
def connection
|
||||
ActiveRecord::Base.connection
|
||||
end
|
||||
|
||||
def method_missing(name, *args, &block)
|
||||
connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend
|
||||
end
|
||||
|
||||
def respond_to_missing?(*args)
|
||||
connection.respond_to?(*args) || super
|
||||
end
|
||||
|
||||
def define_model_for(table)
|
||||
Class.new(ActiveRecord::Base) do
|
||||
self.table_name = table
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -2,52 +2,12 @@
|
|||
|
||||
module Gitlab
|
||||
module BackgroundMigration
|
||||
# Background migration for cleaning up a concurrent column rename.
|
||||
class CleanupConcurrentTypeChange
|
||||
include Database::MigrationHelpers
|
||||
|
||||
# Background migration for cleaning up a concurrent column type changeb.
|
||||
class CleanupConcurrentTypeChange < CleanupConcurrentSchemaChange
|
||||
RESCHEDULE_DELAY = 10.minutes
|
||||
|
||||
# table - The name of the table the migration is performed for.
|
||||
# old_column - The name of the old (to drop) column.
|
||||
# new_column - The name of the new column.
|
||||
def perform(table, old_column, new_column)
|
||||
return unless column_exists?(:issues, new_column)
|
||||
|
||||
rows_to_migrate = define_model_for(table)
|
||||
.where(new_column => nil)
|
||||
.where
|
||||
.not(old_column => nil)
|
||||
|
||||
if rows_to_migrate.any?
|
||||
BackgroundMigrationWorker.perform_in(
|
||||
RESCHEDULE_DELAY,
|
||||
'CleanupConcurrentTypeChange',
|
||||
[table, old_column, new_column]
|
||||
)
|
||||
else
|
||||
cleanup_concurrent_column_type_change(table, old_column)
|
||||
end
|
||||
end
|
||||
|
||||
# These methods are necessary so we can re-use the migration helpers in
|
||||
# this class.
|
||||
def connection
|
||||
ActiveRecord::Base.connection
|
||||
end
|
||||
|
||||
def method_missing(name, *args, &block)
|
||||
connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend
|
||||
end
|
||||
|
||||
def respond_to_missing?(*args)
|
||||
connection.respond_to?(*args) || super
|
||||
end
|
||||
|
||||
def define_model_for(table)
|
||||
Class.new(ActiveRecord::Base) do
|
||||
self.table_name = table
|
||||
end
|
||||
def cleanup_concurrent_schema_change(table, old_column, new_column)
|
||||
cleanup_concurrent_column_type_change(table, old_column)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -596,6 +596,97 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
|
||||
# Renames a column using a background migration.
|
||||
#
|
||||
# Because this method uses a background migration it's more suitable for
|
||||
# large tables. For small tables it's better to use
|
||||
# `rename_column_concurrently` since it can complete its work in a much
|
||||
# shorter amount of time and doesn't rely on Sidekiq.
|
||||
#
|
||||
# Example usage:
|
||||
#
|
||||
# rename_column_using_background_migration(
|
||||
# :users,
|
||||
# :feed_token,
|
||||
# :rss_token
|
||||
# )
|
||||
#
|
||||
# table - The name of the database table containing the column.
|
||||
#
|
||||
# old - The old column name.
|
||||
#
|
||||
# new - The new column name.
|
||||
#
|
||||
# type - The type of the new column. If no type is given the old column's
|
||||
# type is used.
|
||||
#
|
||||
# batch_size - The number of rows to schedule in a single background
|
||||
# migration.
|
||||
#
|
||||
# interval - The time interval between every background migration.
|
||||
def rename_column_using_background_migration(
|
||||
table,
|
||||
old_column,
|
||||
new_column,
|
||||
type: nil,
|
||||
batch_size: 10_000,
|
||||
interval: 10.minutes
|
||||
)
|
||||
|
||||
check_trigger_permissions!(table)
|
||||
|
||||
old_col = column_for(table, old_column)
|
||||
new_type = type || old_col.type
|
||||
max_index = 0
|
||||
|
||||
add_column(table, new_column, new_type,
|
||||
limit: old_col.limit,
|
||||
precision: old_col.precision,
|
||||
scale: old_col.scale)
|
||||
|
||||
# We set the default value _after_ adding the column so we don't end up
|
||||
# updating any existing data with the default value. This isn't
|
||||
# necessary since we copy over old values further down.
|
||||
change_column_default(table, new_column, old_col.default) if old_col.default
|
||||
|
||||
install_rename_triggers(table, old_column, new_column)
|
||||
|
||||
model = Class.new(ActiveRecord::Base) do
|
||||
self.table_name = table
|
||||
|
||||
include ::EachBatch
|
||||
end
|
||||
|
||||
# Schedule the jobs that will copy the data from the old column to the
|
||||
# new one. Rows with NULL values in our source column are skipped since
|
||||
# the target column is already NULL at this point.
|
||||
model.where.not(old_column => nil).each_batch(of: batch_size) do |batch, index|
|
||||
start_id, end_id = batch.pluck('MIN(id), MAX(id)').first
|
||||
max_index = index
|
||||
|
||||
BackgroundMigrationWorker.perform_in(
|
||||
index * interval,
|
||||
'CopyColumn',
|
||||
[table, old_column, new_column, start_id, end_id]
|
||||
)
|
||||
end
|
||||
|
||||
# Schedule the renaming of the column to happen (initially) 1 hour after
|
||||
# the last batch finished.
|
||||
BackgroundMigrationWorker.perform_in(
|
||||
(max_index * interval) + 1.hour,
|
||||
'CleanupConcurrentRename',
|
||||
[table, old_column, new_column]
|
||||
)
|
||||
|
||||
if perform_background_migration_inline?
|
||||
# To ensure the schema is up to date immediately we perform the
|
||||
# migration inline in dev / test environments.
|
||||
Gitlab::BackgroundMigration.steal('CopyColumn')
|
||||
Gitlab::BackgroundMigration.steal('CleanupConcurrentRename')
|
||||
end
|
||||
end
|
||||
|
||||
def perform_background_migration_inline?
|
||||
Rails.env.test? || Rails.env.development?
|
||||
end
|
||||
|
|
|
@ -1178,6 +1178,61 @@ describe Gitlab::Database::MigrationHelpers do
|
|||
end
|
||||
end
|
||||
|
||||
describe '#rename_column_using_background_migration' do
|
||||
let!(:issue) { create(:issue, :closed, closed_at: Time.zone.now) }
|
||||
|
||||
it 'renames a column using a background migration' do
|
||||
expect(model)
|
||||
.to receive(:add_column)
|
||||
.with(
|
||||
'issues',
|
||||
:closed_at_timestamp,
|
||||
:datetime_with_timezone,
|
||||
limit: anything,
|
||||
precision: anything,
|
||||
scale: anything
|
||||
)
|
||||
|
||||
expect(model)
|
||||
.to receive(:install_rename_triggers)
|
||||
.with('issues', :closed_at, :closed_at_timestamp)
|
||||
|
||||
expect(BackgroundMigrationWorker)
|
||||
.to receive(:perform_in)
|
||||
.ordered
|
||||
.with(
|
||||
10.minutes,
|
||||
'CopyColumn',
|
||||
['issues', :closed_at, :closed_at_timestamp, issue.id, issue.id]
|
||||
)
|
||||
|
||||
expect(BackgroundMigrationWorker)
|
||||
.to receive(:perform_in)
|
||||
.ordered
|
||||
.with(
|
||||
1.hour + 10.minutes,
|
||||
'CleanupConcurrentRename',
|
||||
['issues', :closed_at, :closed_at_timestamp]
|
||||
)
|
||||
|
||||
expect(Gitlab::BackgroundMigration)
|
||||
.to receive(:steal)
|
||||
.ordered
|
||||
.with('CopyColumn')
|
||||
|
||||
expect(Gitlab::BackgroundMigration)
|
||||
.to receive(:steal)
|
||||
.ordered
|
||||
.with('CleanupConcurrentRename')
|
||||
|
||||
model.rename_column_using_background_migration(
|
||||
'issues',
|
||||
:closed_at,
|
||||
:closed_at_timestamp
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#perform_background_migration_inline?' do
|
||||
it 'returns true in a test environment' do
|
||||
allow(Rails.env)
|
||||
|
|
Loading…
Reference in New Issue