From 6c35fb59b79e7abc321cee65fc1730ce67908b6b Mon Sep 17 00:00:00 2001 From: John Cai Date: Tue, 23 Apr 2019 10:32:06 -0700 Subject: [PATCH] Add GitDeduplicationService for deduplication housekeeping GitDeduplicationService performs idempotent operations on deduplicated projects. --- .../projects/git_deduplication_service.rb | 64 +++++++++++++ app/workers/git_garbage_collect_worker.rb | 4 +- .../git_deduplication_service_spec.rb | 90 +++++++++++++++++++ 3 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 app/services/projects/git_deduplication_service.rb create mode 100644 spec/services/projects/git_deduplication_service_spec.rb diff --git a/app/services/projects/git_deduplication_service.rb b/app/services/projects/git_deduplication_service.rb new file mode 100644 index 00000000000..74d469ecf37 --- /dev/null +++ b/app/services/projects/git_deduplication_service.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +module Projects + class GitDeduplicationService < BaseService + include ExclusiveLeaseGuard + + LEASE_TIMEOUT = 86400 + + delegate :pool_repository, to: :project + attr_reader :project + + def initialize(project) + @project = project + end + + def execute + try_obtain_lease do + unless project.has_pool_repository? + disconnect_git_alternates + break + end + + if source_project? && pool_can_fetch_from_source? + fetch_from_source + end + + project.link_pool_repository if same_storage_as_pool?(project.repository) + end + end + + private + + def disconnect_git_alternates + project.repository.disconnect_alternates + end + + def pool_can_fetch_from_source? + project.git_objects_poolable? && + same_storage_as_pool?(pool_repository.source_project.repository) + end + + def same_storage_as_pool?(repository) + pool_repository.object_pool.repository.storage == repository.storage + end + + def fetch_from_source + project.pool_repository.object_pool.fetch + end + + def source_project? + return unless project.has_pool_repository? + + project.pool_repository.source_project == project + end + + def lease_timeout + LEASE_TIMEOUT + end + + def lease_key + "git_deduplication:#{project.id}" + end + end +end diff --git a/app/workers/git_garbage_collect_worker.rb b/app/workers/git_garbage_collect_worker.rb index d4a6f53dae5..489d6215774 100644 --- a/app/workers/git_garbage_collect_worker.rb +++ b/app/workers/git_garbage_collect_worker.rb @@ -23,7 +23,9 @@ class GitGarbageCollectWorker end task = task.to_sym - project.link_pool_repository + + ::Projects::GitDeduplicationService.new(project).execute + gitaly_call(task, project.repository.raw_repository) # Refresh the branch cache in case garbage collection caused a ref lookup to fail diff --git a/spec/services/projects/git_deduplication_service_spec.rb b/spec/services/projects/git_deduplication_service_spec.rb new file mode 100644 index 00000000000..3acbc46b473 --- /dev/null +++ b/spec/services/projects/git_deduplication_service_spec.rb @@ -0,0 +1,90 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Projects::GitDeduplicationService do + include ExclusiveLeaseHelpers + + let(:pool) { create(:pool_repository, :ready) } + let(:project) { create(:project, :repository) } + let(:lease_key) { "git_deduplication:#{project.id}" } + let(:lease_timeout) { Projects::GitDeduplicationService::LEASE_TIMEOUT } + + subject(:service) { described_class.new(project) } + + describe '#execute' do + context 'when there is not already a lease' do + context 'when the project does not have a pool repository' do + it 'calls disconnect_git_alternates' do + stub_exclusive_lease(lease_key, timeout: lease_timeout) + + expect(project.repository).to receive(:disconnect_alternates) + + service.execute + end + end + + context 'when the project has a pool repository' do + let(:project) { create(:project, :repository, pool_repository: pool) } + + context 'when the project is a source project' do + let(:lease_key) { "git_deduplication:#{pool.source_project.id}" } + + subject(:service) { described_class.new(pool.source_project) } + + it 'calls fetch' do + stub_exclusive_lease(lease_key, timeout: lease_timeout) + allow(pool.source_project).to receive(:git_objects_poolable?).and_return(true) + + expect(pool.object_pool).to receive(:fetch) + + service.execute + end + + it 'does not call fetch if git objects are not poolable' do + stub_exclusive_lease(lease_key, timeout: lease_timeout) + allow(pool.source_project).to receive(:git_objects_poolable?).and_return(false) + + expect(pool.object_pool).not_to receive(:fetch) + + service.execute + end + + it 'does not call fetch if pool and project are not on the same storage' do + stub_exclusive_lease(lease_key, timeout: lease_timeout) + allow(pool.source_project.repository).to receive(:storage).and_return('special_storage_001') + + expect(pool.object_pool).not_to receive(:fetch) + + service.execute + end + end + + it 'links the repository to the object pool' do + expect(project).to receive(:link_pool_repository) + + service.execute + end + + it 'does not link the repository to the object pool if they are not on the same storage' do + allow(project.repository).to receive(:storage).and_return('special_storage_001') + expect(project).not_to receive(:link_pool_repository) + + service.execute + end + end + + context 'when a lease is already out' do + before do + stub_exclusive_lease_taken(lease_key, timeout: lease_timeout) + end + + it 'fails when a lease is already out' do + expect(service).to receive(:log_error).with('Cannot obtain an exclusive lease. There must be another instance already in execution.') + + service.execute + end + end + end + end +end