diff --git a/changelogs/unreleased/change-issues-closed-at-background-migration.yml b/changelogs/unreleased/change-issues-closed-at-background-migration.yml new file mode 100644 index 00000000000..1c81c6a889e --- /dev/null +++ b/changelogs/unreleased/change-issues-closed-at-background-migration.yml @@ -0,0 +1,5 @@ +--- +title: Use a background migration for issues.closed_at +merge_request: +author: +type: other diff --git a/db/post_migrate/20171221140220_schedule_issues_closed_at_type_change.rb b/db/post_migrate/20171221140220_schedule_issues_closed_at_type_change.rb new file mode 100644 index 00000000000..be18c5866ae --- /dev/null +++ b/db/post_migrate/20171221140220_schedule_issues_closed_at_type_change.rb @@ -0,0 +1,45 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class ScheduleIssuesClosedAtTypeChange < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + class Issue < ActiveRecord::Base + self.table_name = 'issues' + include EachBatch + + def self.to_migrate + where('closed_at IS NOT NULL') + end + end + + def up + return unless migrate_column_type? + + change_column_type_using_background_migration( + Issue.to_migrate, + :closed_at, + :datetime_with_timezone + ) + end + + def down + return if migrate_column_type? + + change_column_type_using_background_migration( + Issue.to_migrate, + :closed_at, + :datetime + ) + end + + def migrate_column_type? + # Some environments may have already executed the previous version of this + # migration, thus we don't need to migrate those environments again. + column_for('issues', 'closed_at').type == :datetime + end +end diff --git a/db/schema.rb b/db/schema.rb index 9d3b5db117e..925629f28fb 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20171220191323) do +ActiveRecord::Schema.define(version: 20171221140220) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" diff --git a/doc/development/what_requires_downtime.md b/doc/development/what_requires_downtime.md index 05e0a64af18..9d0c62ecc35 100644 --- a/doc/development/what_requires_downtime.md +++ b/doc/development/what_requires_downtime.md @@ -195,6 +195,63 @@ end And that's it, we're done! +## Changing Column Types For Large Tables + +While `change_column_type_concurrently` can be used for changing the type of a +column without downtime it doesn't work very well for large tables. Because all +of the work happens in sequence the migration can take a very long time to +complete, preventing a deployment from proceeding. +`change_column_type_concurrently` can also produce a lot of pressure on the +database due to it rapidly updating many rows in sequence. + +To reduce database pressure you should instead use +`change_column_type_using_background_migration` when migrating a column in a +large table (e.g. `issues`). This method works similar to +`change_column_type_concurrently` but uses background migration to spread the +work / load over a longer time period, without slowing down deployments. + +Usage of this method is fairly simple: + +```ruby +class ExampleMigration < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + disable_ddl_transaction! + + class Issue < ActiveRecord::Base + self.table_name = 'issues' + + include EachBatch + + def self.to_migrate + where('closed_at IS NOT NULL') + end + end + + def up + change_column_type_using_background_migration( + Issue.to_migrate, + :closed_at, + :datetime_with_timezone + ) + end + + def down + change_column_type_using_background_migration( + Issue.to_migrate, + :closed_at, + :datetime + ) + end +end +``` + +This would change the type of `issues.closed_at` to `timestamp with time zone`. + +Keep in mind that the relation passed to +`change_column_type_using_background_migration` _must_ include `EachBatch`, +otherwise it will raise a `TypeError`. + ## Adding Indexes Adding indexes is an expensive process that blocks INSERT and UPDATE queries for diff --git a/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb new file mode 100644 index 00000000000..de622f657b2 --- /dev/null +++ b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Background migration for cleaning up a concurrent column rename. + class CleanupConcurrentTypeChange + include Database::MigrationHelpers + + RESCHEDULE_DELAY = 10.minutes + + # table - The name of the table the migration is performed for. + # old_column - The name of the old (to drop) column. + # new_column - The name of the new column. + def perform(table, old_column, new_column) + return unless column_exists?(:issues, new_column) + + rows_to_migrate = define_model_for(table) + .where(new_column => nil) + .where + .not(old_column => nil) + + if rows_to_migrate.any? + BackgroundMigrationWorker.perform_in( + RESCHEDULE_DELAY, + 'CleanupConcurrentTypeChange', + [table, old_column, new_column] + ) + else + cleanup_concurrent_column_type_change(table, old_column) + end + end + + # These methods are necessary so we can re-use the migration helpers in + # this class. + def connection + ActiveRecord::Base.connection + end + + def method_missing(name, *args, &block) + connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend + end + + def respond_to_missing?(*args) + connection.respond_to?(*args) || super + end + + def define_model_for(table) + Class.new(ActiveRecord::Base) do + self.table_name = table + end + end + end + end +end diff --git a/lib/gitlab/background_migration/copy_column.rb b/lib/gitlab/background_migration/copy_column.rb new file mode 100644 index 00000000000..a2cb215c230 --- /dev/null +++ b/lib/gitlab/background_migration/copy_column.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # CopyColumn is a simple (reusable) background migration that can be used to + # update the value of a column based on the value of another column in the + # same table. + # + # For this background migration to work the table that is migrated _has_ to + # have an `id` column as the primary key. + class CopyColumn + # table - The name of the table that contains the columns. + # copy_from - The column containing the data to copy. + # copy_to - The column to copy the data to. + # start_id - The start ID of the range of rows to update. + # end_id - The end ID of the range of rows to update. + def perform(table, copy_from, copy_to, start_id, end_id) + return unless connection.column_exists?(table, copy_to) + + quoted_table = connection.quote_table_name(table) + quoted_copy_from = connection.quote_column_name(copy_from) + quoted_copy_to = connection.quote_column_name(copy_to) + + # We're using raw SQL here since this job may be frequently executed. As + # a result dynamically defining models would lead to many unnecessary + # schema information queries. + connection.execute <<-SQL.strip_heredoc + UPDATE #{quoted_table} + SET #{quoted_copy_to} = #{quoted_copy_from} + WHERE id BETWEEN #{start_id} AND #{end_id} + SQL + end + + def connection + ActiveRecord::Base.connection + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 3f65bc912de..33171f83692 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -385,10 +385,27 @@ module Gitlab # necessary since we copy over old values further down. change_column_default(table, new, old_col.default) if old_col.default - trigger_name = rename_trigger_name(table, old, new) + install_rename_triggers(table, old, new) + + update_column_in_batches(table, new, Arel::Table.new(table)[old]) + + change_column_null(table, new, false) unless old_col.null + + copy_indexes(table, old, new) + copy_foreign_keys(table, old, new) + end + + # Installs triggers in a table that keep a new column in sync with an old + # one. + # + # table - The name of the table to install the trigger in. + # old_column - The name of the old column. + # new_column - The name of the new column. + def install_rename_triggers(table, old_column, new_column) + trigger_name = rename_trigger_name(table, old_column, new_column) quoted_table = quote_table_name(table) - quoted_old = quote_column_name(old) - quoted_new = quote_column_name(new) + quoted_old = quote_column_name(old_column) + quoted_new = quote_column_name(new_column) if Database.postgresql? install_rename_triggers_for_postgresql(trigger_name, quoted_table, @@ -397,13 +414,6 @@ module Gitlab install_rename_triggers_for_mysql(trigger_name, quoted_table, quoted_old, quoted_new) end - - update_column_in_batches(table, new, Arel::Table.new(table)[old]) - - change_column_null(table, new, false) unless old_col.null - - copy_indexes(table, old, new) - copy_foreign_keys(table, old, new) end # Changes the type of a column concurrently. @@ -455,6 +465,97 @@ module Gitlab remove_column(table, old) end + # Changes the column type of a table using a background migration. + # + # Because this method uses a background migration it's more suitable for + # large tables. For small tables it's better to use + # `change_column_type_concurrently` since it can complete its work in a + # much shorter amount of time and doesn't rely on Sidekiq. + # + # Example usage: + # + # class Issue < ActiveRecord::Base + # self.table_name = 'issues' + # + # include EachBatch + # + # def self.to_migrate + # where('closed_at IS NOT NULL') + # end + # end + # + # change_column_type_using_background_migration( + # Issue.to_migrate, + # :closed_at, + # :datetime_with_timezone + # ) + # + # Reverting a migration like this is done exactly the same way, just with + # a different type to migrate to (e.g. `:datetime` in the above example). + # + # relation - An ActiveRecord relation to use for scheduling jobs and + # figuring out what table we're modifying. This relation _must_ + # have the EachBatch module included. + # + # column - The name of the column for which the type will be changed. + # + # new_type - The new type of the column. + # + # batch_size - The number of rows to schedule in a single background + # migration. + # + # interval - The time interval between every background migration. + def change_column_type_using_background_migration( + relation, + column, + new_type, + batch_size: 10_000, + interval: 10.minutes + ) + unless relation.model < EachBatch + raise TypeError, 'The relation must include the EachBatch module' + end + + temp_column = "#{column}_for_type_change" + table = relation.table_name + max_index = 0 + + add_column(table, temp_column, new_type) + install_rename_triggers(table, column, temp_column) + + # Schedule the jobs that will copy the data from the old column to the + # new one. + relation.each_batch(of: batch_size) do |batch, index| + start_id, end_id = batch.pluck('MIN(id), MAX(id)').first + max_index = index + + BackgroundMigrationWorker.perform_in( + index * interval, + 'CopyColumn', + [table, column, temp_column, start_id, end_id] + ) + end + + # Schedule the renaming of the column to happen (initially) 1 hour after + # the last batch finished. + BackgroundMigrationWorker.perform_in( + (max_index * interval) + 1.hour, + 'CleanupConcurrentTypeChange', + [table, column, temp_column] + ) + + if perform_background_migration_inline? + # To ensure the schema is up to date immediately we perform the + # migration inline in dev / test environments. + Gitlab::BackgroundMigration.steal('CopyColumn') + Gitlab::BackgroundMigration.steal('CleanupConcurrentTypeChange') + end + end + + def perform_background_migration_inline? + Rails.env.test? || Rails.env.development? + end + # Performs a concurrent column rename when using PostgreSQL. def install_rename_triggers_for_postgresql(trigger, table, old, new) execute <<-EOF.strip_heredoc diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb index 664ba0f7234..7727a1d81b1 100644 --- a/spec/lib/gitlab/database/migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -902,7 +902,7 @@ describe Gitlab::Database::MigrationHelpers do describe '#check_trigger_permissions!' do it 'does nothing when the user has the correct permissions' do expect { model.check_trigger_permissions!('users') } - .not_to raise_error(RuntimeError) + .not_to raise_error end it 'raises RuntimeError when the user does not have the correct permissions' do @@ -1036,4 +1036,93 @@ describe Gitlab::Database::MigrationHelpers do end end end + + describe '#change_column_type_using_background_migration' do + let!(:issue) { create(:issue) } + + let(:issue_model) do + Class.new(ActiveRecord::Base) do + self.table_name = 'issues' + include EachBatch + end + end + + it 'changes the type of a column using a background migration' do + expect(model) + .to receive(:add_column) + .with('issues', 'closed_at_for_type_change', :datetime_with_timezone) + + expect(model) + .to receive(:install_rename_triggers) + .with('issues', :closed_at, 'closed_at_for_type_change') + + expect(BackgroundMigrationWorker) + .to receive(:perform_in) + .ordered + .with( + 10.minutes, + 'CopyColumn', + ['issues', :closed_at, 'closed_at_for_type_change', issue.id, issue.id] + ) + + expect(BackgroundMigrationWorker) + .to receive(:perform_in) + .ordered + .with( + 1.hour + 10.minutes, + 'CleanupConcurrentTypeChange', + ['issues', :closed_at, 'closed_at_for_type_change'] + ) + + expect(Gitlab::BackgroundMigration) + .to receive(:steal) + .ordered + .with('CopyColumn') + + expect(Gitlab::BackgroundMigration) + .to receive(:steal) + .ordered + .with('CleanupConcurrentTypeChange') + + model.change_column_type_using_background_migration( + issue_model.all, + :closed_at, + :datetime_with_timezone + ) + end + end + + describe '#perform_background_migration_inline?' do + it 'returns true in a test environment' do + allow(Rails.env) + .to receive(:test?) + .and_return(true) + + expect(model.perform_background_migration_inline?).to eq(true) + end + + it 'returns true in a development environment' do + allow(Rails.env) + .to receive(:test?) + .and_return(false) + + allow(Rails.env) + .to receive(:development?) + .and_return(true) + + expect(model.perform_background_migration_inline?).to eq(true) + end + + it 'returns false in a production environment' do + allow(Rails.env) + .to receive(:test?) + .and_return(false) + + allow(Rails.env) + .to receive(:development?) + .and_return(false) + + expect(model.perform_background_migration_inline?).to eq(false) + end + end end