Add GitDeduplicationService for deduplication housekeeping

GitDeduplicationService performs idempotent operations on deduplicated
projects.
This commit is contained in:
John Cai 2019-04-23 10:32:06 -07:00
parent bb5bbbaa1a
commit 6c35fb59b7
3 changed files with 157 additions and 1 deletions

View file

@ -0,0 +1,64 @@
# frozen_string_literal: true
module Projects
class GitDeduplicationService < BaseService
include ExclusiveLeaseGuard
LEASE_TIMEOUT = 86400
delegate :pool_repository, to: :project
attr_reader :project
def initialize(project)
@project = project
end
def execute
try_obtain_lease do
unless project.has_pool_repository?
disconnect_git_alternates
break
end
if source_project? && pool_can_fetch_from_source?
fetch_from_source
end
project.link_pool_repository if same_storage_as_pool?(project.repository)
end
end
private
def disconnect_git_alternates
project.repository.disconnect_alternates
end
def pool_can_fetch_from_source?
project.git_objects_poolable? &&
same_storage_as_pool?(pool_repository.source_project.repository)
end
def same_storage_as_pool?(repository)
pool_repository.object_pool.repository.storage == repository.storage
end
def fetch_from_source
project.pool_repository.object_pool.fetch
end
def source_project?
return unless project.has_pool_repository?
project.pool_repository.source_project == project
end
def lease_timeout
LEASE_TIMEOUT
end
def lease_key
"git_deduplication:#{project.id}"
end
end
end

View file

@ -23,7 +23,9 @@ class GitGarbageCollectWorker
end
task = task.to_sym
project.link_pool_repository
::Projects::GitDeduplicationService.new(project).execute
gitaly_call(task, project.repository.raw_repository)
# Refresh the branch cache in case garbage collection caused a ref lookup to fail

View file

@ -0,0 +1,90 @@
# frozen_string_literal: true
require 'spec_helper'
describe Projects::GitDeduplicationService do
include ExclusiveLeaseHelpers
let(:pool) { create(:pool_repository, :ready) }
let(:project) { create(:project, :repository) }
let(:lease_key) { "git_deduplication:#{project.id}" }
let(:lease_timeout) { Projects::GitDeduplicationService::LEASE_TIMEOUT }
subject(:service) { described_class.new(project) }
describe '#execute' do
context 'when there is not already a lease' do
context 'when the project does not have a pool repository' do
it 'calls disconnect_git_alternates' do
stub_exclusive_lease(lease_key, timeout: lease_timeout)
expect(project.repository).to receive(:disconnect_alternates)
service.execute
end
end
context 'when the project has a pool repository' do
let(:project) { create(:project, :repository, pool_repository: pool) }
context 'when the project is a source project' do
let(:lease_key) { "git_deduplication:#{pool.source_project.id}" }
subject(:service) { described_class.new(pool.source_project) }
it 'calls fetch' do
stub_exclusive_lease(lease_key, timeout: lease_timeout)
allow(pool.source_project).to receive(:git_objects_poolable?).and_return(true)
expect(pool.object_pool).to receive(:fetch)
service.execute
end
it 'does not call fetch if git objects are not poolable' do
stub_exclusive_lease(lease_key, timeout: lease_timeout)
allow(pool.source_project).to receive(:git_objects_poolable?).and_return(false)
expect(pool.object_pool).not_to receive(:fetch)
service.execute
end
it 'does not call fetch if pool and project are not on the same storage' do
stub_exclusive_lease(lease_key, timeout: lease_timeout)
allow(pool.source_project.repository).to receive(:storage).and_return('special_storage_001')
expect(pool.object_pool).not_to receive(:fetch)
service.execute
end
end
it 'links the repository to the object pool' do
expect(project).to receive(:link_pool_repository)
service.execute
end
it 'does not link the repository to the object pool if they are not on the same storage' do
allow(project.repository).to receive(:storage).and_return('special_storage_001')
expect(project).not_to receive(:link_pool_repository)
service.execute
end
end
context 'when a lease is already out' do
before do
stub_exclusive_lease_taken(lease_key, timeout: lease_timeout)
end
it 'fails when a lease is already out' do
expect(service).to receive(:log_error).with('Cannot obtain an exclusive lease. There must be another instance already in execution.')
service.execute
end
end
end
end
end