Recover from all exceptions when stealing bg migration

It also makes it possible to gracefully retry a migration in order to
avoid problems like deadlocks.
This commit is contained in:
Grzegorz Bizon 2017-07-17 10:16:42 +02:00
parent 01c55ffca8
commit 7b146ab6c3
2 changed files with 86 additions and 23 deletions

View file

@ -7,6 +7,12 @@ module Gitlab
# Begins stealing jobs from the background migrations queue, blocking the # Begins stealing jobs from the background migrations queue, blocking the
# caller until all jobs have been completed. # caller until all jobs have been completed.
# #
# When a migration raises a StandardError is is going to be retries up to
# three times, for example, to recover from a deadlock.
#
# When Exception is being raised, it enqueues the migration again, and
# re-raises the exception.
#
# steal_class - The name of the class for which to steal jobs. # steal_class - The name of the class for which to steal jobs.
def self.steal(steal_class) def self.steal(steal_class)
enqueued = Sidekiq::Queue.new(self.queue) enqueued = Sidekiq::Queue.new(self.queue)
@ -20,22 +26,34 @@ module Gitlab
next unless migration_class == steal_class next unless migration_class == steal_class
begin begin
perform(migration_class, migration_args) if job.delete perform(migration_class, migration_args, retries: 3) if job.delete
rescue => e rescue StandardError
Logger.new($stdout).warn(e.message)
next next
rescue Exception
BackgroundMigrationWorker # enqueue this migration again
.perform_async(migration_class, migration_args)
raise
end end
end end
end end
end end
##
# Performs a background migration. In case of `StandardError` being caught
# this will retry a migration up to three times.
#
# class_name - The name of the background migration class as defined in the # class_name - The name of the background migration class as defined in the
# Gitlab::BackgroundMigration namespace. # Gitlab::BackgroundMigration namespace.
# #
# arguments - The arguments to pass to the background migration's "perform" # arguments - The arguments to pass to the background migration's "perform"
# method. # method.
def self.perform(class_name, arguments) def self.perform(class_name, arguments, retries: 1)
const_get(class_name).new.perform(*arguments) const_get(class_name).new.perform(*arguments)
rescue => e
Rails.logger.warn("Retrying background migration #{class_name} " \
"with #{arguments}")
(retries -= 1) > 0 ? retry : raise
end end
end end
end end

View file

@ -24,7 +24,8 @@ describe Gitlab::BackgroundMigration do
it 'steals jobs from a queue' do it 'steals jobs from a queue' do
expect(queue[0]).to receive(:delete).and_return(true) expect(queue[0]).to receive(:delete).and_return(true)
expect(described_class).to receive(:perform).with('Foo', [10, 20]) expect(described_class).to receive(:perform)
.with('Foo', [10, 20], anything)
described_class.steal('Foo') described_class.steal('Foo')
end end
@ -32,7 +33,7 @@ describe Gitlab::BackgroundMigration do
it 'does not steal job that has already been taken' do it 'does not steal job that has already been taken' do
expect(queue[0]).to receive(:delete).and_return(false) expect(queue[0]).to receive(:delete).and_return(false)
expect(described_class).not_to receive(:perform).with('Foo', [10, 20]) expect(described_class).not_to receive(:perform)
described_class.steal('Foo') described_class.steal('Foo')
end end
@ -57,17 +58,40 @@ describe Gitlab::BackgroundMigration do
before do before do
stub_const("#{described_class}::Foo", migration) stub_const("#{described_class}::Foo", migration)
allow(migration).to receive(:perform).with(10, 20)
.and_raise(StandardError, 'Migration error')
allow(queue[0]).to receive(:delete).and_return(true) allow(queue[0]).to receive(:delete).and_return(true)
allow(queue[1]).to receive(:delete).and_return(true) allow(queue[1]).to receive(:delete).and_return(true)
end end
it 'recovers from an exceptions and continues' do context 'when standard error is being raised' do
expect(migration).to receive(:perform).twice before do
allow(migration).to receive(:perform).with(10, 20)
.and_raise(StandardError, 'Migration error')
end
it 'recovers from an exception and retries the migration' do
expect(migration).to receive(:perform).with(10, 20)
.exactly(3).times.ordered
expect(migration).to receive(:perform).with(20, 30)
.once.ordered
expect(Rails.logger).to receive(:warn)
.with(/Retrying background migration/).exactly(3).times
described_class.steal('Foo')
end
end
context 'when top level exception is being raised' do
it 'enqueues the migration again and reraises the error' do
allow(migration).to receive(:perform).with(10, 20)
.and_raise(Exception, 'Migration error').once
expect(BackgroundMigrationWorker).to receive(:perform_async)
.with('Foo', [10, 20]).once
expect(Rails.logger).not_to receive(:warn)
expect { described_class.steal('Foo') } expect { described_class.steal('Foo') }
.to output(/Migration error/).to_stdout .to raise_error(Exception)
end
end end
end end
end end
@ -91,9 +115,9 @@ describe Gitlab::BackgroundMigration do
it 'steals from the scheduled sets queue first' do it 'steals from the scheduled sets queue first' do
Sidekiq::Testing.disable! do Sidekiq::Testing.disable! do
expect(described_class).to receive(:perform) expect(described_class).to receive(:perform)
.with('Object', [1]).ordered .with('Object', [1], anything).ordered
expect(described_class).to receive(:perform) expect(described_class).to receive(:perform)
.with('Object', [2]).ordered .with('Object', [2], anything).ordered
BackgroundMigrationWorker.perform_async('Object', [2]) BackgroundMigrationWorker.perform_async('Object', [2])
BackgroundMigrationWorker.perform_in(10.minutes, 'Object', [1]) BackgroundMigrationWorker.perform_in(10.minutes, 'Object', [1])
@ -105,17 +129,38 @@ describe Gitlab::BackgroundMigration do
end end
describe '.perform' do describe '.perform' do
let(:migration) { spy(:migration) }
before do
stub_const("#{described_class.name}::Foo", migration)
end
context 'when retries count is not specified' do
it 'performs a background migration' do it 'performs a background migration' do
instance = double(:instance) expect(migration).to receive(:perform).with(10, 20).once
klass = double(:klass, new: instance)
expect(described_class).to receive(:const_get)
.with('Foo')
.and_return(klass)
expect(instance).to receive(:perform).with(10, 20)
described_class.perform('Foo', [10, 20]) described_class.perform('Foo', [10, 20])
end end
end end
context 'when retries count is zero' do
it 'perform a background migration only once' do
expect(migration).to receive(:perform).with(10, 20)
.and_raise(StandardError).once
expect { described_class.perform('Foo', [10, 20], retries: 0) }
.to raise_error(StandardError)
end
end
context 'when retries count is larger than zero' do
it 'retries a background migration when needed' do
expect(migration).to receive(:perform).with(10, 20)
.and_raise(StandardError).exactly(3).times
expect { described_class.perform('Foo', [10, 20], retries: 3) }
.to raise_error(StandardError)
end
end
end
end end