Use a background migration for issues.closed_at
In a previous attempt (rolled back in https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/16021) we tried to migrate `issues.closed_at` from timestamp to timestamptz using a regular migration. This has a bad impact on GitLab.com and as such was rolled back. This commit re-implements the original migrations using generic background migrations, allowing us to still migrate the data in a single release but without a negative impact on availability. To ensure the database schema is up to date the background migrations are performed inline in development and test environments. We also make sure to not migrate that that doesn't need migrating in the first place or has already been migrated.
This commit is contained in:
parent
1dac427179
commit
78d22fb20d
8 changed files with 402 additions and 12 deletions
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Use a background migration for issues.closed_at
|
||||
merge_request:
|
||||
author:
|
||||
type: other
|
|
@ -0,0 +1,45 @@
|
|||
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
|
||||
# for more information on how to write migrations for GitLab.
|
||||
|
||||
class ScheduleIssuesClosedAtTypeChange < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
class Issue < ActiveRecord::Base
|
||||
self.table_name = 'issues'
|
||||
include EachBatch
|
||||
|
||||
def self.to_migrate
|
||||
where('closed_at IS NOT NULL')
|
||||
end
|
||||
end
|
||||
|
||||
def up
|
||||
return unless migrate_column_type?
|
||||
|
||||
change_column_type_using_background_migration(
|
||||
Issue.to_migrate,
|
||||
:closed_at,
|
||||
:datetime_with_timezone
|
||||
)
|
||||
end
|
||||
|
||||
def down
|
||||
return if migrate_column_type?
|
||||
|
||||
change_column_type_using_background_migration(
|
||||
Issue.to_migrate,
|
||||
:closed_at,
|
||||
:datetime
|
||||
)
|
||||
end
|
||||
|
||||
def migrate_column_type?
|
||||
# Some environments may have already executed the previous version of this
|
||||
# migration, thus we don't need to migrate those environments again.
|
||||
column_for('issues', 'closed_at').type == :datetime
|
||||
end
|
||||
end
|
|
@ -11,7 +11,7 @@
|
|||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema.define(version: 20171220191323) do
|
||||
ActiveRecord::Schema.define(version: 20171221140220) do
|
||||
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "plpgsql"
|
||||
|
|
|
@ -195,6 +195,63 @@ end
|
|||
|
||||
And that's it, we're done!
|
||||
|
||||
## Changing Column Types For Large Tables
|
||||
|
||||
While `change_column_type_concurrently` can be used for changing the type of a
|
||||
column without downtime it doesn't work very well for large tables. Because all
|
||||
of the work happens in sequence the migration can take a very long time to
|
||||
complete, preventing a deployment from proceeding.
|
||||
`change_column_type_concurrently` can also produce a lot of pressure on the
|
||||
database due to it rapidly updating many rows in sequence.
|
||||
|
||||
To reduce database pressure you should instead use
|
||||
`change_column_type_using_background_migration` when migrating a column in a
|
||||
large table (e.g. `issues`). This method works similar to
|
||||
`change_column_type_concurrently` but uses background migration to spread the
|
||||
work / load over a longer time period, without slowing down deployments.
|
||||
|
||||
Usage of this method is fairly simple:
|
||||
|
||||
```ruby
|
||||
class ExampleMigration < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
class Issue < ActiveRecord::Base
|
||||
self.table_name = 'issues'
|
||||
|
||||
include EachBatch
|
||||
|
||||
def self.to_migrate
|
||||
where('closed_at IS NOT NULL')
|
||||
end
|
||||
end
|
||||
|
||||
def up
|
||||
change_column_type_using_background_migration(
|
||||
Issue.to_migrate,
|
||||
:closed_at,
|
||||
:datetime_with_timezone
|
||||
)
|
||||
end
|
||||
|
||||
def down
|
||||
change_column_type_using_background_migration(
|
||||
Issue.to_migrate,
|
||||
:closed_at,
|
||||
:datetime
|
||||
)
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
This would change the type of `issues.closed_at` to `timestamp with time zone`.
|
||||
|
||||
Keep in mind that the relation passed to
|
||||
`change_column_type_using_background_migration` _must_ include `EachBatch`,
|
||||
otherwise it will raise a `TypeError`.
|
||||
|
||||
## Adding Indexes
|
||||
|
||||
Adding indexes is an expensive process that blocks INSERT and UPDATE queries for
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module BackgroundMigration
|
||||
# Background migration for cleaning up a concurrent column rename.
|
||||
class CleanupConcurrentTypeChange
|
||||
include Database::MigrationHelpers
|
||||
|
||||
RESCHEDULE_DELAY = 10.minutes
|
||||
|
||||
# table - The name of the table the migration is performed for.
|
||||
# old_column - The name of the old (to drop) column.
|
||||
# new_column - The name of the new column.
|
||||
def perform(table, old_column, new_column)
|
||||
return unless column_exists?(:issues, new_column)
|
||||
|
||||
rows_to_migrate = define_model_for(table)
|
||||
.where(new_column => nil)
|
||||
.where
|
||||
.not(old_column => nil)
|
||||
|
||||
if rows_to_migrate.any?
|
||||
BackgroundMigrationWorker.perform_in(
|
||||
RESCHEDULE_DELAY,
|
||||
'CleanupConcurrentTypeChange',
|
||||
[table, old_column, new_column]
|
||||
)
|
||||
else
|
||||
cleanup_concurrent_column_type_change(table, old_column)
|
||||
end
|
||||
end
|
||||
|
||||
# These methods are necessary so we can re-use the migration helpers in
|
||||
# this class.
|
||||
def connection
|
||||
ActiveRecord::Base.connection
|
||||
end
|
||||
|
||||
def method_missing(name, *args, &block)
|
||||
connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend
|
||||
end
|
||||
|
||||
def respond_to_missing?(*args)
|
||||
connection.respond_to?(*args) || super
|
||||
end
|
||||
|
||||
def define_model_for(table)
|
||||
Class.new(ActiveRecord::Base) do
|
||||
self.table_name = table
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
39
lib/gitlab/background_migration/copy_column.rb
Normal file
39
lib/gitlab/background_migration/copy_column.rb
Normal file
|
@ -0,0 +1,39 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module BackgroundMigration
|
||||
# CopyColumn is a simple (reusable) background migration that can be used to
|
||||
# update the value of a column based on the value of another column in the
|
||||
# same table.
|
||||
#
|
||||
# For this background migration to work the table that is migrated _has_ to
|
||||
# have an `id` column as the primary key.
|
||||
class CopyColumn
|
||||
# table - The name of the table that contains the columns.
|
||||
# copy_from - The column containing the data to copy.
|
||||
# copy_to - The column to copy the data to.
|
||||
# start_id - The start ID of the range of rows to update.
|
||||
# end_id - The end ID of the range of rows to update.
|
||||
def perform(table, copy_from, copy_to, start_id, end_id)
|
||||
return unless connection.column_exists?(table, copy_to)
|
||||
|
||||
quoted_table = connection.quote_table_name(table)
|
||||
quoted_copy_from = connection.quote_column_name(copy_from)
|
||||
quoted_copy_to = connection.quote_column_name(copy_to)
|
||||
|
||||
# We're using raw SQL here since this job may be frequently executed. As
|
||||
# a result dynamically defining models would lead to many unnecessary
|
||||
# schema information queries.
|
||||
connection.execute <<-SQL.strip_heredoc
|
||||
UPDATE #{quoted_table}
|
||||
SET #{quoted_copy_to} = #{quoted_copy_from}
|
||||
WHERE id BETWEEN #{start_id} AND #{end_id}
|
||||
SQL
|
||||
end
|
||||
|
||||
def connection
|
||||
ActiveRecord::Base.connection
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -385,10 +385,27 @@ module Gitlab
|
|||
# necessary since we copy over old values further down.
|
||||
change_column_default(table, new, old_col.default) if old_col.default
|
||||
|
||||
trigger_name = rename_trigger_name(table, old, new)
|
||||
install_rename_triggers(table, old, new)
|
||||
|
||||
update_column_in_batches(table, new, Arel::Table.new(table)[old])
|
||||
|
||||
change_column_null(table, new, false) unless old_col.null
|
||||
|
||||
copy_indexes(table, old, new)
|
||||
copy_foreign_keys(table, old, new)
|
||||
end
|
||||
|
||||
# Installs triggers in a table that keep a new column in sync with an old
|
||||
# one.
|
||||
#
|
||||
# table - The name of the table to install the trigger in.
|
||||
# old_column - The name of the old column.
|
||||
# new_column - The name of the new column.
|
||||
def install_rename_triggers(table, old_column, new_column)
|
||||
trigger_name = rename_trigger_name(table, old_column, new_column)
|
||||
quoted_table = quote_table_name(table)
|
||||
quoted_old = quote_column_name(old)
|
||||
quoted_new = quote_column_name(new)
|
||||
quoted_old = quote_column_name(old_column)
|
||||
quoted_new = quote_column_name(new_column)
|
||||
|
||||
if Database.postgresql?
|
||||
install_rename_triggers_for_postgresql(trigger_name, quoted_table,
|
||||
|
@ -397,13 +414,6 @@ module Gitlab
|
|||
install_rename_triggers_for_mysql(trigger_name, quoted_table,
|
||||
quoted_old, quoted_new)
|
||||
end
|
||||
|
||||
update_column_in_batches(table, new, Arel::Table.new(table)[old])
|
||||
|
||||
change_column_null(table, new, false) unless old_col.null
|
||||
|
||||
copy_indexes(table, old, new)
|
||||
copy_foreign_keys(table, old, new)
|
||||
end
|
||||
|
||||
# Changes the type of a column concurrently.
|
||||
|
@ -455,6 +465,97 @@ module Gitlab
|
|||
remove_column(table, old)
|
||||
end
|
||||
|
||||
# Changes the column type of a table using a background migration.
|
||||
#
|
||||
# Because this method uses a background migration it's more suitable for
|
||||
# large tables. For small tables it's better to use
|
||||
# `change_column_type_concurrently` since it can complete its work in a
|
||||
# much shorter amount of time and doesn't rely on Sidekiq.
|
||||
#
|
||||
# Example usage:
|
||||
#
|
||||
# class Issue < ActiveRecord::Base
|
||||
# self.table_name = 'issues'
|
||||
#
|
||||
# include EachBatch
|
||||
#
|
||||
# def self.to_migrate
|
||||
# where('closed_at IS NOT NULL')
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# change_column_type_using_background_migration(
|
||||
# Issue.to_migrate,
|
||||
# :closed_at,
|
||||
# :datetime_with_timezone
|
||||
# )
|
||||
#
|
||||
# Reverting a migration like this is done exactly the same way, just with
|
||||
# a different type to migrate to (e.g. `:datetime` in the above example).
|
||||
#
|
||||
# relation - An ActiveRecord relation to use for scheduling jobs and
|
||||
# figuring out what table we're modifying. This relation _must_
|
||||
# have the EachBatch module included.
|
||||
#
|
||||
# column - The name of the column for which the type will be changed.
|
||||
#
|
||||
# new_type - The new type of the column.
|
||||
#
|
||||
# batch_size - The number of rows to schedule in a single background
|
||||
# migration.
|
||||
#
|
||||
# interval - The time interval between every background migration.
|
||||
def change_column_type_using_background_migration(
|
||||
relation,
|
||||
column,
|
||||
new_type,
|
||||
batch_size: 10_000,
|
||||
interval: 10.minutes
|
||||
)
|
||||
unless relation.model < EachBatch
|
||||
raise TypeError, 'The relation must include the EachBatch module'
|
||||
end
|
||||
|
||||
temp_column = "#{column}_for_type_change"
|
||||
table = relation.table_name
|
||||
max_index = 0
|
||||
|
||||
add_column(table, temp_column, new_type)
|
||||
install_rename_triggers(table, column, temp_column)
|
||||
|
||||
# Schedule the jobs that will copy the data from the old column to the
|
||||
# new one.
|
||||
relation.each_batch(of: batch_size) do |batch, index|
|
||||
start_id, end_id = batch.pluck('MIN(id), MAX(id)').first
|
||||
max_index = index
|
||||
|
||||
BackgroundMigrationWorker.perform_in(
|
||||
index * interval,
|
||||
'CopyColumn',
|
||||
[table, column, temp_column, start_id, end_id]
|
||||
)
|
||||
end
|
||||
|
||||
# Schedule the renaming of the column to happen (initially) 1 hour after
|
||||
# the last batch finished.
|
||||
BackgroundMigrationWorker.perform_in(
|
||||
(max_index * interval) + 1.hour,
|
||||
'CleanupConcurrentTypeChange',
|
||||
[table, column, temp_column]
|
||||
)
|
||||
|
||||
if perform_background_migration_inline?
|
||||
# To ensure the schema is up to date immediately we perform the
|
||||
# migration inline in dev / test environments.
|
||||
Gitlab::BackgroundMigration.steal('CopyColumn')
|
||||
Gitlab::BackgroundMigration.steal('CleanupConcurrentTypeChange')
|
||||
end
|
||||
end
|
||||
|
||||
def perform_background_migration_inline?
|
||||
Rails.env.test? || Rails.env.development?
|
||||
end
|
||||
|
||||
# Performs a concurrent column rename when using PostgreSQL.
|
||||
def install_rename_triggers_for_postgresql(trigger, table, old, new)
|
||||
execute <<-EOF.strip_heredoc
|
||||
|
|
|
@ -902,7 +902,7 @@ describe Gitlab::Database::MigrationHelpers do
|
|||
describe '#check_trigger_permissions!' do
|
||||
it 'does nothing when the user has the correct permissions' do
|
||||
expect { model.check_trigger_permissions!('users') }
|
||||
.not_to raise_error(RuntimeError)
|
||||
.not_to raise_error
|
||||
end
|
||||
|
||||
it 'raises RuntimeError when the user does not have the correct permissions' do
|
||||
|
@ -1036,4 +1036,93 @@ describe Gitlab::Database::MigrationHelpers do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#change_column_type_using_background_migration' do
|
||||
let!(:issue) { create(:issue) }
|
||||
|
||||
let(:issue_model) do
|
||||
Class.new(ActiveRecord::Base) do
|
||||
self.table_name = 'issues'
|
||||
include EachBatch
|
||||
end
|
||||
end
|
||||
|
||||
it 'changes the type of a column using a background migration' do
|
||||
expect(model)
|
||||
.to receive(:add_column)
|
||||
.with('issues', 'closed_at_for_type_change', :datetime_with_timezone)
|
||||
|
||||
expect(model)
|
||||
.to receive(:install_rename_triggers)
|
||||
.with('issues', :closed_at, 'closed_at_for_type_change')
|
||||
|
||||
expect(BackgroundMigrationWorker)
|
||||
.to receive(:perform_in)
|
||||
.ordered
|
||||
.with(
|
||||
10.minutes,
|
||||
'CopyColumn',
|
||||
['issues', :closed_at, 'closed_at_for_type_change', issue.id, issue.id]
|
||||
)
|
||||
|
||||
expect(BackgroundMigrationWorker)
|
||||
.to receive(:perform_in)
|
||||
.ordered
|
||||
.with(
|
||||
1.hour + 10.minutes,
|
||||
'CleanupConcurrentTypeChange',
|
||||
['issues', :closed_at, 'closed_at_for_type_change']
|
||||
)
|
||||
|
||||
expect(Gitlab::BackgroundMigration)
|
||||
.to receive(:steal)
|
||||
.ordered
|
||||
.with('CopyColumn')
|
||||
|
||||
expect(Gitlab::BackgroundMigration)
|
||||
.to receive(:steal)
|
||||
.ordered
|
||||
.with('CleanupConcurrentTypeChange')
|
||||
|
||||
model.change_column_type_using_background_migration(
|
||||
issue_model.all,
|
||||
:closed_at,
|
||||
:datetime_with_timezone
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#perform_background_migration_inline?' do
|
||||
it 'returns true in a test environment' do
|
||||
allow(Rails.env)
|
||||
.to receive(:test?)
|
||||
.and_return(true)
|
||||
|
||||
expect(model.perform_background_migration_inline?).to eq(true)
|
||||
end
|
||||
|
||||
it 'returns true in a development environment' do
|
||||
allow(Rails.env)
|
||||
.to receive(:test?)
|
||||
.and_return(false)
|
||||
|
||||
allow(Rails.env)
|
||||
.to receive(:development?)
|
||||
.and_return(true)
|
||||
|
||||
expect(model.perform_background_migration_inline?).to eq(true)
|
||||
end
|
||||
|
||||
it 'returns false in a production environment' do
|
||||
allow(Rails.env)
|
||||
.to receive(:test?)
|
||||
.and_return(false)
|
||||
|
||||
allow(Rails.env)
|
||||
.to receive(:development?)
|
||||
.and_return(false)
|
||||
|
||||
expect(model.perform_background_migration_inline?).to eq(false)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue