Makes LFS object linker process OIDs in batches
During a project import, `LfsLinkService` attempts to link `LfsObjects` that have not already been associated with a project. It's possible for a large repo to have thousands of OIDs, which can cause long database query and parsing times. By processing a batch of 1000 at a time, we can reduce that time at the expense of a few more SQL queries. Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/66274
This commit is contained in:
parent
a58f4f00cf
commit
2022e6799b
|
@ -2,6 +2,7 @@
|
|||
|
||||
class LfsObject < ApplicationRecord
|
||||
include AfterCommitQueue
|
||||
include EachBatch
|
||||
include ObjectStorage::BackgroundMove
|
||||
|
||||
has_many :lfs_objects_projects, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
module Projects
|
||||
module LfsPointers
|
||||
class LfsLinkService < BaseService
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
# Accept an array of oids to link
|
||||
#
|
||||
# Returns an array with the oid of the existent lfs objects
|
||||
|
@ -18,16 +20,33 @@ module Projects
|
|||
|
||||
# rubocop: disable CodeReuse/ActiveRecord
|
||||
def link_existing_lfs_objects(oids)
|
||||
existent_lfs_objects = LfsObject.where(oid: oids)
|
||||
all_existing_objects = []
|
||||
iterations = 0
|
||||
|
||||
return [] unless existent_lfs_objects.any?
|
||||
LfsObject.where(oid: oids).each_batch(of: BATCH_SIZE) do |existent_lfs_objects|
|
||||
next unless existent_lfs_objects.any?
|
||||
|
||||
not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects)
|
||||
project.all_lfs_objects << not_linked_lfs_objects
|
||||
iterations += 1
|
||||
not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects)
|
||||
project.all_lfs_objects << not_linked_lfs_objects
|
||||
|
||||
existent_lfs_objects.pluck(:oid)
|
||||
all_existing_objects += existent_lfs_objects.pluck(:oid)
|
||||
end
|
||||
|
||||
log_lfs_link_results(all_existing_objects.count, iterations)
|
||||
|
||||
all_existing_objects
|
||||
end
|
||||
# rubocop: enable CodeReuse/ActiveRecord
|
||||
|
||||
def log_lfs_link_results(lfs_objects_linked_count, iterations)
|
||||
Gitlab::Import::Logger.info(
|
||||
class: self.class.name,
|
||||
project_id: project.id,
|
||||
project_path: project.full_path,
|
||||
lfs_objects_linked_count: lfs_objects_linked_count,
|
||||
iterations: iterations)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Makes LFS object linker process OIDs in batches
|
||||
merge_request: 32268
|
||||
author:
|
||||
type: performance
|
|
@ -30,5 +30,23 @@ describe Projects::LfsPointers::LfsLinkService do
|
|||
|
||||
expect(subject.execute(new_oid_list.keys)).to eq linked
|
||||
end
|
||||
|
||||
it 'links in batches' do
|
||||
stub_const("#{described_class}::BATCH_SIZE", 3)
|
||||
|
||||
expect(Gitlab::Import::Logger)
|
||||
.to receive(:info)
|
||||
.with(class: described_class.name,
|
||||
project_id: project.id,
|
||||
project_path: project.full_path,
|
||||
lfs_objects_linked_count: 7,
|
||||
iterations: 3)
|
||||
|
||||
lfs_objects = create_list(:lfs_object, 7)
|
||||
linked = subject.execute(lfs_objects.pluck(:oid))
|
||||
|
||||
expect(project.all_lfs_objects.count).to eq 9
|
||||
expect(linked.size).to eq 7
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue