From 7b146ab6c3a08f40acff5301ecaa60e8f83010a4 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Mon, 17 Jul 2017 10:16:42 +0200 Subject: [PATCH] Recover from all exceptions when stealing bg migration It also makes it possible to gracefully retry a migration in order to avoid problems like deadlocks. --- lib/gitlab/background_migration.rb | 26 +++++- spec/lib/gitlab/background_migration_spec.rb | 83 +++++++++++++++----- 2 files changed, 86 insertions(+), 23 deletions(-) diff --git a/lib/gitlab/background_migration.rb b/lib/gitlab/background_migration.rb index 64ddae12f89..cb0ac9221a0 100644 --- a/lib/gitlab/background_migration.rb +++ b/lib/gitlab/background_migration.rb @@ -7,6 +7,12 @@ module Gitlab # Begins stealing jobs from the background migrations queue, blocking the # caller until all jobs have been completed. # + # When a migration raises a StandardError is is going to be retries up to + # three times, for example, to recover from a deadlock. + # + # When Exception is being raised, it enqueues the migration again, and + # re-raises the exception. + # # steal_class - The name of the class for which to steal jobs. def self.steal(steal_class) enqueued = Sidekiq::Queue.new(self.queue) @@ -20,22 +26,34 @@ module Gitlab next unless migration_class == steal_class begin - perform(migration_class, migration_args) if job.delete - rescue => e - Logger.new($stdout).warn(e.message) + perform(migration_class, migration_args, retries: 3) if job.delete + rescue StandardError next + rescue Exception + BackgroundMigrationWorker # enqueue this migration again + .perform_async(migration_class, migration_args) + + raise end end end end + ## + # Performs a background migration. In case of `StandardError` being caught + # this will retry a migration up to three times. + # # class_name - The name of the background migration class as defined in the # Gitlab::BackgroundMigration namespace. # # arguments - The arguments to pass to the background migration's "perform" # method. - def self.perform(class_name, arguments) + def self.perform(class_name, arguments, retries: 1) const_get(class_name).new.perform(*arguments) + rescue => e + Rails.logger.warn("Retrying background migration #{class_name} " \ + "with #{arguments}") + (retries -= 1) > 0 ? retry : raise end end end diff --git a/spec/lib/gitlab/background_migration_spec.rb b/spec/lib/gitlab/background_migration_spec.rb index b28329792eb..9c6b07f02cc 100644 --- a/spec/lib/gitlab/background_migration_spec.rb +++ b/spec/lib/gitlab/background_migration_spec.rb @@ -24,7 +24,8 @@ describe Gitlab::BackgroundMigration do it 'steals jobs from a queue' do expect(queue[0]).to receive(:delete).and_return(true) - expect(described_class).to receive(:perform).with('Foo', [10, 20]) + expect(described_class).to receive(:perform) + .with('Foo', [10, 20], anything) described_class.steal('Foo') end @@ -32,7 +33,7 @@ describe Gitlab::BackgroundMigration do it 'does not steal job that has already been taken' do expect(queue[0]).to receive(:delete).and_return(false) - expect(described_class).not_to receive(:perform).with('Foo', [10, 20]) + expect(described_class).not_to receive(:perform) described_class.steal('Foo') end @@ -57,17 +58,40 @@ describe Gitlab::BackgroundMigration do before do stub_const("#{described_class}::Foo", migration) - allow(migration).to receive(:perform).with(10, 20) - .and_raise(StandardError, 'Migration error') - allow(queue[0]).to receive(:delete).and_return(true) allow(queue[1]).to receive(:delete).and_return(true) end - it 'recovers from an exceptions and continues' do - expect(migration).to receive(:perform).twice - expect { described_class.steal('Foo') } - .to output(/Migration error/).to_stdout + context 'when standard error is being raised' do + before do + allow(migration).to receive(:perform).with(10, 20) + .and_raise(StandardError, 'Migration error') + end + + it 'recovers from an exception and retries the migration' do + expect(migration).to receive(:perform).with(10, 20) + .exactly(3).times.ordered + expect(migration).to receive(:perform).with(20, 30) + .once.ordered + expect(Rails.logger).to receive(:warn) + .with(/Retrying background migration/).exactly(3).times + + described_class.steal('Foo') + end + end + + context 'when top level exception is being raised' do + it 'enqueues the migration again and reraises the error' do + allow(migration).to receive(:perform).with(10, 20) + .and_raise(Exception, 'Migration error').once + + expect(BackgroundMigrationWorker).to receive(:perform_async) + .with('Foo', [10, 20]).once + + expect(Rails.logger).not_to receive(:warn) + expect { described_class.steal('Foo') } + .to raise_error(Exception) + end end end end @@ -91,9 +115,9 @@ describe Gitlab::BackgroundMigration do it 'steals from the scheduled sets queue first' do Sidekiq::Testing.disable! do expect(described_class).to receive(:perform) - .with('Object', [1]).ordered + .with('Object', [1], anything).ordered expect(described_class).to receive(:perform) - .with('Object', [2]).ordered + .with('Object', [2], anything).ordered BackgroundMigrationWorker.perform_async('Object', [2]) BackgroundMigrationWorker.perform_in(10.minutes, 'Object', [1]) @@ -105,17 +129,38 @@ describe Gitlab::BackgroundMigration do end describe '.perform' do - it 'performs a background migration' do - instance = double(:instance) - klass = double(:klass, new: instance) + let(:migration) { spy(:migration) } - expect(described_class).to receive(:const_get) - .with('Foo') - .and_return(klass) + before do + stub_const("#{described_class.name}::Foo", migration) + end - expect(instance).to receive(:perform).with(10, 20) + context 'when retries count is not specified' do + it 'performs a background migration' do + expect(migration).to receive(:perform).with(10, 20).once - described_class.perform('Foo', [10, 20]) + described_class.perform('Foo', [10, 20]) + end + end + + context 'when retries count is zero' do + it 'perform a background migration only once' do + expect(migration).to receive(:perform).with(10, 20) + .and_raise(StandardError).once + + expect { described_class.perform('Foo', [10, 20], retries: 0) } + .to raise_error(StandardError) + end + end + + context 'when retries count is larger than zero' do + it 'retries a background migration when needed' do + expect(migration).to receive(:perform).with(10, 20) + .and_raise(StandardError).exactly(3).times + + expect { described_class.perform('Foo', [10, 20], retries: 3) } + .to raise_error(StandardError) + end end end end