Recover from all exceptions when stealing bg migration
It also makes it possible to gracefully retry a migration in order to avoid problems like deadlocks.
This commit is contained in:
parent
01c55ffca8
commit
7b146ab6c3
2 changed files with 86 additions and 23 deletions
|
@ -7,6 +7,12 @@ module Gitlab
|
||||||
# Begins stealing jobs from the background migrations queue, blocking the
|
# Begins stealing jobs from the background migrations queue, blocking the
|
||||||
# caller until all jobs have been completed.
|
# caller until all jobs have been completed.
|
||||||
#
|
#
|
||||||
|
# When a migration raises a StandardError is is going to be retries up to
|
||||||
|
# three times, for example, to recover from a deadlock.
|
||||||
|
#
|
||||||
|
# When Exception is being raised, it enqueues the migration again, and
|
||||||
|
# re-raises the exception.
|
||||||
|
#
|
||||||
# steal_class - The name of the class for which to steal jobs.
|
# steal_class - The name of the class for which to steal jobs.
|
||||||
def self.steal(steal_class)
|
def self.steal(steal_class)
|
||||||
enqueued = Sidekiq::Queue.new(self.queue)
|
enqueued = Sidekiq::Queue.new(self.queue)
|
||||||
|
@ -20,22 +26,34 @@ module Gitlab
|
||||||
next unless migration_class == steal_class
|
next unless migration_class == steal_class
|
||||||
|
|
||||||
begin
|
begin
|
||||||
perform(migration_class, migration_args) if job.delete
|
perform(migration_class, migration_args, retries: 3) if job.delete
|
||||||
rescue => e
|
rescue StandardError
|
||||||
Logger.new($stdout).warn(e.message)
|
|
||||||
next
|
next
|
||||||
|
rescue Exception
|
||||||
|
BackgroundMigrationWorker # enqueue this migration again
|
||||||
|
.perform_async(migration_class, migration_args)
|
||||||
|
|
||||||
|
raise
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Performs a background migration. In case of `StandardError` being caught
|
||||||
|
# this will retry a migration up to three times.
|
||||||
|
#
|
||||||
# class_name - The name of the background migration class as defined in the
|
# class_name - The name of the background migration class as defined in the
|
||||||
# Gitlab::BackgroundMigration namespace.
|
# Gitlab::BackgroundMigration namespace.
|
||||||
#
|
#
|
||||||
# arguments - The arguments to pass to the background migration's "perform"
|
# arguments - The arguments to pass to the background migration's "perform"
|
||||||
# method.
|
# method.
|
||||||
def self.perform(class_name, arguments)
|
def self.perform(class_name, arguments, retries: 1)
|
||||||
const_get(class_name).new.perform(*arguments)
|
const_get(class_name).new.perform(*arguments)
|
||||||
|
rescue => e
|
||||||
|
Rails.logger.warn("Retrying background migration #{class_name} " \
|
||||||
|
"with #{arguments}")
|
||||||
|
(retries -= 1) > 0 ? retry : raise
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -24,7 +24,8 @@ describe Gitlab::BackgroundMigration do
|
||||||
it 'steals jobs from a queue' do
|
it 'steals jobs from a queue' do
|
||||||
expect(queue[0]).to receive(:delete).and_return(true)
|
expect(queue[0]).to receive(:delete).and_return(true)
|
||||||
|
|
||||||
expect(described_class).to receive(:perform).with('Foo', [10, 20])
|
expect(described_class).to receive(:perform)
|
||||||
|
.with('Foo', [10, 20], anything)
|
||||||
|
|
||||||
described_class.steal('Foo')
|
described_class.steal('Foo')
|
||||||
end
|
end
|
||||||
|
@ -32,7 +33,7 @@ describe Gitlab::BackgroundMigration do
|
||||||
it 'does not steal job that has already been taken' do
|
it 'does not steal job that has already been taken' do
|
||||||
expect(queue[0]).to receive(:delete).and_return(false)
|
expect(queue[0]).to receive(:delete).and_return(false)
|
||||||
|
|
||||||
expect(described_class).not_to receive(:perform).with('Foo', [10, 20])
|
expect(described_class).not_to receive(:perform)
|
||||||
|
|
||||||
described_class.steal('Foo')
|
described_class.steal('Foo')
|
||||||
end
|
end
|
||||||
|
@ -57,17 +58,40 @@ describe Gitlab::BackgroundMigration do
|
||||||
before do
|
before do
|
||||||
stub_const("#{described_class}::Foo", migration)
|
stub_const("#{described_class}::Foo", migration)
|
||||||
|
|
||||||
allow(migration).to receive(:perform).with(10, 20)
|
|
||||||
.and_raise(StandardError, 'Migration error')
|
|
||||||
|
|
||||||
allow(queue[0]).to receive(:delete).and_return(true)
|
allow(queue[0]).to receive(:delete).and_return(true)
|
||||||
allow(queue[1]).to receive(:delete).and_return(true)
|
allow(queue[1]).to receive(:delete).and_return(true)
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'recovers from an exceptions and continues' do
|
context 'when standard error is being raised' do
|
||||||
expect(migration).to receive(:perform).twice
|
before do
|
||||||
|
allow(migration).to receive(:perform).with(10, 20)
|
||||||
|
.and_raise(StandardError, 'Migration error')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'recovers from an exception and retries the migration' do
|
||||||
|
expect(migration).to receive(:perform).with(10, 20)
|
||||||
|
.exactly(3).times.ordered
|
||||||
|
expect(migration).to receive(:perform).with(20, 30)
|
||||||
|
.once.ordered
|
||||||
|
expect(Rails.logger).to receive(:warn)
|
||||||
|
.with(/Retrying background migration/).exactly(3).times
|
||||||
|
|
||||||
|
described_class.steal('Foo')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when top level exception is being raised' do
|
||||||
|
it 'enqueues the migration again and reraises the error' do
|
||||||
|
allow(migration).to receive(:perform).with(10, 20)
|
||||||
|
.and_raise(Exception, 'Migration error').once
|
||||||
|
|
||||||
|
expect(BackgroundMigrationWorker).to receive(:perform_async)
|
||||||
|
.with('Foo', [10, 20]).once
|
||||||
|
|
||||||
|
expect(Rails.logger).not_to receive(:warn)
|
||||||
expect { described_class.steal('Foo') }
|
expect { described_class.steal('Foo') }
|
||||||
.to output(/Migration error/).to_stdout
|
.to raise_error(Exception)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -91,9 +115,9 @@ describe Gitlab::BackgroundMigration do
|
||||||
it 'steals from the scheduled sets queue first' do
|
it 'steals from the scheduled sets queue first' do
|
||||||
Sidekiq::Testing.disable! do
|
Sidekiq::Testing.disable! do
|
||||||
expect(described_class).to receive(:perform)
|
expect(described_class).to receive(:perform)
|
||||||
.with('Object', [1]).ordered
|
.with('Object', [1], anything).ordered
|
||||||
expect(described_class).to receive(:perform)
|
expect(described_class).to receive(:perform)
|
||||||
.with('Object', [2]).ordered
|
.with('Object', [2], anything).ordered
|
||||||
|
|
||||||
BackgroundMigrationWorker.perform_async('Object', [2])
|
BackgroundMigrationWorker.perform_async('Object', [2])
|
||||||
BackgroundMigrationWorker.perform_in(10.minutes, 'Object', [1])
|
BackgroundMigrationWorker.perform_in(10.minutes, 'Object', [1])
|
||||||
|
@ -105,17 +129,38 @@ describe Gitlab::BackgroundMigration do
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '.perform' do
|
describe '.perform' do
|
||||||
|
let(:migration) { spy(:migration) }
|
||||||
|
|
||||||
|
before do
|
||||||
|
stub_const("#{described_class.name}::Foo", migration)
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when retries count is not specified' do
|
||||||
it 'performs a background migration' do
|
it 'performs a background migration' do
|
||||||
instance = double(:instance)
|
expect(migration).to receive(:perform).with(10, 20).once
|
||||||
klass = double(:klass, new: instance)
|
|
||||||
|
|
||||||
expect(described_class).to receive(:const_get)
|
|
||||||
.with('Foo')
|
|
||||||
.and_return(klass)
|
|
||||||
|
|
||||||
expect(instance).to receive(:perform).with(10, 20)
|
|
||||||
|
|
||||||
described_class.perform('Foo', [10, 20])
|
described_class.perform('Foo', [10, 20])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'when retries count is zero' do
|
||||||
|
it 'perform a background migration only once' do
|
||||||
|
expect(migration).to receive(:perform).with(10, 20)
|
||||||
|
.and_raise(StandardError).once
|
||||||
|
|
||||||
|
expect { described_class.perform('Foo', [10, 20], retries: 0) }
|
||||||
|
.to raise_error(StandardError)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when retries count is larger than zero' do
|
||||||
|
it 'retries a background migration when needed' do
|
||||||
|
expect(migration).to receive(:perform).with(10, 20)
|
||||||
|
.and_raise(StandardError).exactly(3).times
|
||||||
|
|
||||||
|
expect { described_class.perform('Foo', [10, 20], retries: 3) }
|
||||||
|
.to raise_error(StandardError)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue