Merge branch 'change-issues-closed-at-background-migration' into 'master'

Use a background migration for migrating issues.closed_at

See merge request gitlab-org/gitlab-ce!16083
This commit is contained in:
Sean McGivern 2018-01-04 10:14:06 +00:00
commit ac409fb444
8 changed files with 402 additions and 12 deletions

View File

@ -0,0 +1,5 @@
---
title: Use a background migration for issues.closed_at
merge_request:
author:
type: other

View File

@ -0,0 +1,45 @@
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class ScheduleIssuesClosedAtTypeChange < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
class Issue < ActiveRecord::Base
self.table_name = 'issues'
include EachBatch
def self.to_migrate
where('closed_at IS NOT NULL')
end
end
def up
return unless migrate_column_type?
change_column_type_using_background_migration(
Issue.to_migrate,
:closed_at,
:datetime_with_timezone
)
end
def down
return if migrate_column_type?
change_column_type_using_background_migration(
Issue.to_migrate,
:closed_at,
:datetime
)
end
def migrate_column_type?
# Some environments may have already executed the previous version of this
# migration, thus we don't need to migrate those environments again.
column_for('issues', 'closed_at').type == :datetime
end
end

View File

@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20171220191323) do
ActiveRecord::Schema.define(version: 20171221140220) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"

View File

@ -195,6 +195,63 @@ end
And that's it, we're done!
## Changing Column Types For Large Tables
While `change_column_type_concurrently` can be used for changing the type of a
column without downtime it doesn't work very well for large tables. Because all
of the work happens in sequence the migration can take a very long time to
complete, preventing a deployment from proceeding.
`change_column_type_concurrently` can also produce a lot of pressure on the
database due to it rapidly updating many rows in sequence.
To reduce database pressure you should instead use
`change_column_type_using_background_migration` when migrating a column in a
large table (e.g. `issues`). This method works similar to
`change_column_type_concurrently` but uses background migration to spread the
work / load over a longer time period, without slowing down deployments.
Usage of this method is fairly simple:
```ruby
class ExampleMigration < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
class Issue < ActiveRecord::Base
self.table_name = 'issues'
include EachBatch
def self.to_migrate
where('closed_at IS NOT NULL')
end
end
def up
change_column_type_using_background_migration(
Issue.to_migrate,
:closed_at,
:datetime_with_timezone
)
end
def down
change_column_type_using_background_migration(
Issue.to_migrate,
:closed_at,
:datetime
)
end
end
```
This would change the type of `issues.closed_at` to `timestamp with time zone`.
Keep in mind that the relation passed to
`change_column_type_using_background_migration` _must_ include `EachBatch`,
otherwise it will raise a `TypeError`.
## Adding Indexes
Adding indexes is an expensive process that blocks INSERT and UPDATE queries for

View File

@ -0,0 +1,54 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Background migration for cleaning up a concurrent column rename.
class CleanupConcurrentTypeChange
include Database::MigrationHelpers
RESCHEDULE_DELAY = 10.minutes
# table - The name of the table the migration is performed for.
# old_column - The name of the old (to drop) column.
# new_column - The name of the new column.
def perform(table, old_column, new_column)
return unless column_exists?(:issues, new_column)
rows_to_migrate = define_model_for(table)
.where(new_column => nil)
.where
.not(old_column => nil)
if rows_to_migrate.any?
BackgroundMigrationWorker.perform_in(
RESCHEDULE_DELAY,
'CleanupConcurrentTypeChange',
[table, old_column, new_column]
)
else
cleanup_concurrent_column_type_change(table, old_column)
end
end
# These methods are necessary so we can re-use the migration helpers in
# this class.
def connection
ActiveRecord::Base.connection
end
def method_missing(name, *args, &block)
connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend
end
def respond_to_missing?(*args)
connection.respond_to?(*args) || super
end
def define_model_for(table)
Class.new(ActiveRecord::Base) do
self.table_name = table
end
end
end
end
end

View File

@ -0,0 +1,39 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# CopyColumn is a simple (reusable) background migration that can be used to
# update the value of a column based on the value of another column in the
# same table.
#
# For this background migration to work the table that is migrated _has_ to
# have an `id` column as the primary key.
class CopyColumn
# table - The name of the table that contains the columns.
# copy_from - The column containing the data to copy.
# copy_to - The column to copy the data to.
# start_id - The start ID of the range of rows to update.
# end_id - The end ID of the range of rows to update.
def perform(table, copy_from, copy_to, start_id, end_id)
return unless connection.column_exists?(table, copy_to)
quoted_table = connection.quote_table_name(table)
quoted_copy_from = connection.quote_column_name(copy_from)
quoted_copy_to = connection.quote_column_name(copy_to)
# We're using raw SQL here since this job may be frequently executed. As
# a result dynamically defining models would lead to many unnecessary
# schema information queries.
connection.execute <<-SQL.strip_heredoc
UPDATE #{quoted_table}
SET #{quoted_copy_to} = #{quoted_copy_from}
WHERE id BETWEEN #{start_id} AND #{end_id}
SQL
end
def connection
ActiveRecord::Base.connection
end
end
end
end

View File

@ -385,10 +385,27 @@ module Gitlab
# necessary since we copy over old values further down.
change_column_default(table, new, old_col.default) if old_col.default
trigger_name = rename_trigger_name(table, old, new)
install_rename_triggers(table, old, new)
update_column_in_batches(table, new, Arel::Table.new(table)[old])
change_column_null(table, new, false) unless old_col.null
copy_indexes(table, old, new)
copy_foreign_keys(table, old, new)
end
# Installs triggers in a table that keep a new column in sync with an old
# one.
#
# table - The name of the table to install the trigger in.
# old_column - The name of the old column.
# new_column - The name of the new column.
def install_rename_triggers(table, old_column, new_column)
trigger_name = rename_trigger_name(table, old_column, new_column)
quoted_table = quote_table_name(table)
quoted_old = quote_column_name(old)
quoted_new = quote_column_name(new)
quoted_old = quote_column_name(old_column)
quoted_new = quote_column_name(new_column)
if Database.postgresql?
install_rename_triggers_for_postgresql(trigger_name, quoted_table,
@ -397,13 +414,6 @@ module Gitlab
install_rename_triggers_for_mysql(trigger_name, quoted_table,
quoted_old, quoted_new)
end
update_column_in_batches(table, new, Arel::Table.new(table)[old])
change_column_null(table, new, false) unless old_col.null
copy_indexes(table, old, new)
copy_foreign_keys(table, old, new)
end
# Changes the type of a column concurrently.
@ -455,6 +465,97 @@ module Gitlab
remove_column(table, old)
end
# Changes the column type of a table using a background migration.
#
# Because this method uses a background migration it's more suitable for
# large tables. For small tables it's better to use
# `change_column_type_concurrently` since it can complete its work in a
# much shorter amount of time and doesn't rely on Sidekiq.
#
# Example usage:
#
# class Issue < ActiveRecord::Base
# self.table_name = 'issues'
#
# include EachBatch
#
# def self.to_migrate
# where('closed_at IS NOT NULL')
# end
# end
#
# change_column_type_using_background_migration(
# Issue.to_migrate,
# :closed_at,
# :datetime_with_timezone
# )
#
# Reverting a migration like this is done exactly the same way, just with
# a different type to migrate to (e.g. `:datetime` in the above example).
#
# relation - An ActiveRecord relation to use for scheduling jobs and
# figuring out what table we're modifying. This relation _must_
# have the EachBatch module included.
#
# column - The name of the column for which the type will be changed.
#
# new_type - The new type of the column.
#
# batch_size - The number of rows to schedule in a single background
# migration.
#
# interval - The time interval between every background migration.
def change_column_type_using_background_migration(
relation,
column,
new_type,
batch_size: 10_000,
interval: 10.minutes
)
unless relation.model < EachBatch
raise TypeError, 'The relation must include the EachBatch module'
end
temp_column = "#{column}_for_type_change"
table = relation.table_name
max_index = 0
add_column(table, temp_column, new_type)
install_rename_triggers(table, column, temp_column)
# Schedule the jobs that will copy the data from the old column to the
# new one.
relation.each_batch(of: batch_size) do |batch, index|
start_id, end_id = batch.pluck('MIN(id), MAX(id)').first
max_index = index
BackgroundMigrationWorker.perform_in(
index * interval,
'CopyColumn',
[table, column, temp_column, start_id, end_id]
)
end
# Schedule the renaming of the column to happen (initially) 1 hour after
# the last batch finished.
BackgroundMigrationWorker.perform_in(
(max_index * interval) + 1.hour,
'CleanupConcurrentTypeChange',
[table, column, temp_column]
)
if perform_background_migration_inline?
# To ensure the schema is up to date immediately we perform the
# migration inline in dev / test environments.
Gitlab::BackgroundMigration.steal('CopyColumn')
Gitlab::BackgroundMigration.steal('CleanupConcurrentTypeChange')
end
end
def perform_background_migration_inline?
Rails.env.test? || Rails.env.development?
end
# Performs a concurrent column rename when using PostgreSQL.
def install_rename_triggers_for_postgresql(trigger, table, old, new)
execute <<-EOF.strip_heredoc

View File

@ -902,7 +902,7 @@ describe Gitlab::Database::MigrationHelpers do
describe '#check_trigger_permissions!' do
it 'does nothing when the user has the correct permissions' do
expect { model.check_trigger_permissions!('users') }
.not_to raise_error(RuntimeError)
.not_to raise_error
end
it 'raises RuntimeError when the user does not have the correct permissions' do
@ -1036,4 +1036,93 @@ describe Gitlab::Database::MigrationHelpers do
end
end
end
describe '#change_column_type_using_background_migration' do
let!(:issue) { create(:issue) }
let(:issue_model) do
Class.new(ActiveRecord::Base) do
self.table_name = 'issues'
include EachBatch
end
end
it 'changes the type of a column using a background migration' do
expect(model)
.to receive(:add_column)
.with('issues', 'closed_at_for_type_change', :datetime_with_timezone)
expect(model)
.to receive(:install_rename_triggers)
.with('issues', :closed_at, 'closed_at_for_type_change')
expect(BackgroundMigrationWorker)
.to receive(:perform_in)
.ordered
.with(
10.minutes,
'CopyColumn',
['issues', :closed_at, 'closed_at_for_type_change', issue.id, issue.id]
)
expect(BackgroundMigrationWorker)
.to receive(:perform_in)
.ordered
.with(
1.hour + 10.minutes,
'CleanupConcurrentTypeChange',
['issues', :closed_at, 'closed_at_for_type_change']
)
expect(Gitlab::BackgroundMigration)
.to receive(:steal)
.ordered
.with('CopyColumn')
expect(Gitlab::BackgroundMigration)
.to receive(:steal)
.ordered
.with('CleanupConcurrentTypeChange')
model.change_column_type_using_background_migration(
issue_model.all,
:closed_at,
:datetime_with_timezone
)
end
end
describe '#perform_background_migration_inline?' do
it 'returns true in a test environment' do
allow(Rails.env)
.to receive(:test?)
.and_return(true)
expect(model.perform_background_migration_inline?).to eq(true)
end
it 'returns true in a development environment' do
allow(Rails.env)
.to receive(:test?)
.and_return(false)
allow(Rails.env)
.to receive(:development?)
.and_return(true)
expect(model.perform_background_migration_inline?).to eq(true)
end
it 'returns false in a production environment' do
allow(Rails.env)
.to receive(:test?)
.and_return(false)
allow(Rails.env)
.to receive(:development?)
.and_return(false)
expect(model.perform_background_migration_inline?).to eq(false)
end
end
end