diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb index 051382a08a9..07559ea479b 100644 --- a/app/workers/repository_check/batch_worker.rb +++ b/app/workers/repository_check/batch_worker.rb @@ -4,9 +4,11 @@ module RepositoryCheck class BatchWorker include ApplicationWorker include RepositoryCheckQueue + include ExclusiveLeaseGuard RUN_TIME = 3600 BATCH_SIZE = 10_000 + LEASE_TIMEOUT = 1.hour attr_reader :shard_name @@ -16,6 +18,20 @@ module RepositoryCheck return unless Gitlab::CurrentSettings.repository_checks_enabled return unless Gitlab::ShardHealthCache.healthy_shard?(shard_name) + try_obtain_lease do + perform_repository_checks + end + end + + def lease_timeout + LEASE_TIMEOUT + end + + def lease_key + "repository_check_batch_worker:#{shard_name}" + end + + def perform_repository_checks start = Time.now # This loop will break after a little more than one hour ('a little @@ -26,7 +42,7 @@ module RepositoryCheck project_ids.each do |project_id| break if Time.now - start >= RUN_TIME - next unless try_obtain_lease(project_id) + next unless try_obtain_lease_for_project(project_id) SingleRepositoryWorker.new.perform(project_id) end @@ -60,7 +76,7 @@ module RepositoryCheck Project.where(repository_storage: shard_name) end - def try_obtain_lease(id) + def try_obtain_lease_for_project(id) # Use a 24-hour timeout because on servers/projects where 'git fsck' is # super slow we definitely do not want to run it twice in parallel. Gitlab::ExclusiveLease.new( diff --git a/app/workers/repository_check/dispatch_worker.rb b/app/workers/repository_check/dispatch_worker.rb index 891a273afd7..96634f09a15 100644 --- a/app/workers/repository_check/dispatch_worker.rb +++ b/app/workers/repository_check/dispatch_worker.rb @@ -3,13 +3,22 @@ module RepositoryCheck include ApplicationWorker include CronjobQueue include ::EachShardWorker + include ExclusiveLeaseGuard + + LEASE_TIMEOUT = 1.hour def perform return unless Gitlab::CurrentSettings.repository_checks_enabled - each_eligible_shard do |shard_name| - RepositoryCheck::BatchWorker.perform_async(shard_name) + try_obtain_lease do + each_eligible_shard do |shard_name| + RepositoryCheck::BatchWorker.perform_async(shard_name) + end end end + + def lease_timeout + LEASE_TIMEOUT + end end end diff --git a/spec/workers/repository_check/batch_worker_spec.rb b/spec/workers/repository_check/batch_worker_spec.rb index 6bc551be9ad..ede271b2cdd 100644 --- a/spec/workers/repository_check/batch_worker_spec.rb +++ b/spec/workers/repository_check/batch_worker_spec.rb @@ -62,4 +62,12 @@ describe RepositoryCheck::BatchWorker do expect(subject.perform(shard_name)).to eq([]) end + + it 'does not run if the exclusive lease is taken' do + allow(subject).to receive(:try_obtain_lease).and_return(false) + + expect(subject).not_to receive(:perform_repository_checks) + + subject.perform(shard_name) + end end diff --git a/spec/workers/repository_check/dispatch_worker_spec.rb b/spec/workers/repository_check/dispatch_worker_spec.rb index 20a4f1f5344..7877429aa8f 100644 --- a/spec/workers/repository_check/dispatch_worker_spec.rb +++ b/spec/workers/repository_check/dispatch_worker_spec.rb @@ -11,6 +11,14 @@ describe RepositoryCheck::DispatchWorker do subject.perform end + it 'does nothing if the exclusive lease is taken' do + allow(subject).to receive(:try_obtain_lease).and_return(false) + + expect(RepositoryCheck::BatchWorker).not_to receive(:perform_async) + + subject.perform + end + it 'dispatches work to RepositoryCheck::BatchWorker' do expect(RepositoryCheck::BatchWorker).to receive(:perform_async).at_least(:once)