Recover from all exceptions when stealing bg migration
It also makes it possible to gracefully retry a migration in order to avoid problems like deadlocks.
This commit is contained in:
parent
01c55ffca8
commit
7b146ab6c3
2 changed files with 86 additions and 23 deletions
|
@ -7,6 +7,12 @@ module Gitlab
|
|||
# Begins stealing jobs from the background migrations queue, blocking the
|
||||
# caller until all jobs have been completed.
|
||||
#
|
||||
# When a migration raises a StandardError is is going to be retries up to
|
||||
# three times, for example, to recover from a deadlock.
|
||||
#
|
||||
# When Exception is being raised, it enqueues the migration again, and
|
||||
# re-raises the exception.
|
||||
#
|
||||
# steal_class - The name of the class for which to steal jobs.
|
||||
def self.steal(steal_class)
|
||||
enqueued = Sidekiq::Queue.new(self.queue)
|
||||
|
@ -20,22 +26,34 @@ module Gitlab
|
|||
next unless migration_class == steal_class
|
||||
|
||||
begin
|
||||
perform(migration_class, migration_args) if job.delete
|
||||
rescue => e
|
||||
Logger.new($stdout).warn(e.message)
|
||||
perform(migration_class, migration_args, retries: 3) if job.delete
|
||||
rescue StandardError
|
||||
next
|
||||
rescue Exception
|
||||
BackgroundMigrationWorker # enqueue this migration again
|
||||
.perform_async(migration_class, migration_args)
|
||||
|
||||
raise
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
##
|
||||
# Performs a background migration. In case of `StandardError` being caught
|
||||
# this will retry a migration up to three times.
|
||||
#
|
||||
# class_name - The name of the background migration class as defined in the
|
||||
# Gitlab::BackgroundMigration namespace.
|
||||
#
|
||||
# arguments - The arguments to pass to the background migration's "perform"
|
||||
# method.
|
||||
def self.perform(class_name, arguments)
|
||||
def self.perform(class_name, arguments, retries: 1)
|
||||
const_get(class_name).new.perform(*arguments)
|
||||
rescue => e
|
||||
Rails.logger.warn("Retrying background migration #{class_name} " \
|
||||
"with #{arguments}")
|
||||
(retries -= 1) > 0 ? retry : raise
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -24,7 +24,8 @@ describe Gitlab::BackgroundMigration do
|
|||
it 'steals jobs from a queue' do
|
||||
expect(queue[0]).to receive(:delete).and_return(true)
|
||||
|
||||
expect(described_class).to receive(:perform).with('Foo', [10, 20])
|
||||
expect(described_class).to receive(:perform)
|
||||
.with('Foo', [10, 20], anything)
|
||||
|
||||
described_class.steal('Foo')
|
||||
end
|
||||
|
@ -32,7 +33,7 @@ describe Gitlab::BackgroundMigration do
|
|||
it 'does not steal job that has already been taken' do
|
||||
expect(queue[0]).to receive(:delete).and_return(false)
|
||||
|
||||
expect(described_class).not_to receive(:perform).with('Foo', [10, 20])
|
||||
expect(described_class).not_to receive(:perform)
|
||||
|
||||
described_class.steal('Foo')
|
||||
end
|
||||
|
@ -57,17 +58,40 @@ describe Gitlab::BackgroundMigration do
|
|||
before do
|
||||
stub_const("#{described_class}::Foo", migration)
|
||||
|
||||
allow(migration).to receive(:perform).with(10, 20)
|
||||
.and_raise(StandardError, 'Migration error')
|
||||
|
||||
allow(queue[0]).to receive(:delete).and_return(true)
|
||||
allow(queue[1]).to receive(:delete).and_return(true)
|
||||
end
|
||||
|
||||
it 'recovers from an exceptions and continues' do
|
||||
expect(migration).to receive(:perform).twice
|
||||
expect { described_class.steal('Foo') }
|
||||
.to output(/Migration error/).to_stdout
|
||||
context 'when standard error is being raised' do
|
||||
before do
|
||||
allow(migration).to receive(:perform).with(10, 20)
|
||||
.and_raise(StandardError, 'Migration error')
|
||||
end
|
||||
|
||||
it 'recovers from an exception and retries the migration' do
|
||||
expect(migration).to receive(:perform).with(10, 20)
|
||||
.exactly(3).times.ordered
|
||||
expect(migration).to receive(:perform).with(20, 30)
|
||||
.once.ordered
|
||||
expect(Rails.logger).to receive(:warn)
|
||||
.with(/Retrying background migration/).exactly(3).times
|
||||
|
||||
described_class.steal('Foo')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when top level exception is being raised' do
|
||||
it 'enqueues the migration again and reraises the error' do
|
||||
allow(migration).to receive(:perform).with(10, 20)
|
||||
.and_raise(Exception, 'Migration error').once
|
||||
|
||||
expect(BackgroundMigrationWorker).to receive(:perform_async)
|
||||
.with('Foo', [10, 20]).once
|
||||
|
||||
expect(Rails.logger).not_to receive(:warn)
|
||||
expect { described_class.steal('Foo') }
|
||||
.to raise_error(Exception)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -91,9 +115,9 @@ describe Gitlab::BackgroundMigration do
|
|||
it 'steals from the scheduled sets queue first' do
|
||||
Sidekiq::Testing.disable! do
|
||||
expect(described_class).to receive(:perform)
|
||||
.with('Object', [1]).ordered
|
||||
.with('Object', [1], anything).ordered
|
||||
expect(described_class).to receive(:perform)
|
||||
.with('Object', [2]).ordered
|
||||
.with('Object', [2], anything).ordered
|
||||
|
||||
BackgroundMigrationWorker.perform_async('Object', [2])
|
||||
BackgroundMigrationWorker.perform_in(10.minutes, 'Object', [1])
|
||||
|
@ -105,17 +129,38 @@ describe Gitlab::BackgroundMigration do
|
|||
end
|
||||
|
||||
describe '.perform' do
|
||||
it 'performs a background migration' do
|
||||
instance = double(:instance)
|
||||
klass = double(:klass, new: instance)
|
||||
let(:migration) { spy(:migration) }
|
||||
|
||||
expect(described_class).to receive(:const_get)
|
||||
.with('Foo')
|
||||
.and_return(klass)
|
||||
before do
|
||||
stub_const("#{described_class.name}::Foo", migration)
|
||||
end
|
||||
|
||||
expect(instance).to receive(:perform).with(10, 20)
|
||||
context 'when retries count is not specified' do
|
||||
it 'performs a background migration' do
|
||||
expect(migration).to receive(:perform).with(10, 20).once
|
||||
|
||||
described_class.perform('Foo', [10, 20])
|
||||
described_class.perform('Foo', [10, 20])
|
||||
end
|
||||
end
|
||||
|
||||
context 'when retries count is zero' do
|
||||
it 'perform a background migration only once' do
|
||||
expect(migration).to receive(:perform).with(10, 20)
|
||||
.and_raise(StandardError).once
|
||||
|
||||
expect { described_class.perform('Foo', [10, 20], retries: 0) }
|
||||
.to raise_error(StandardError)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when retries count is larger than zero' do
|
||||
it 'retries a background migration when needed' do
|
||||
expect(migration).to receive(:perform).with(10, 20)
|
||||
.and_raise(StandardError).exactly(3).times
|
||||
|
||||
expect { described_class.perform('Foo', [10, 20], retries: 3) }
|
||||
.to raise_error(StandardError)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue