diff --git a/app/models/lfs_object.rb b/app/models/lfs_object.rb index 79a376ff0fd..40695a97d97 100644 --- a/app/models/lfs_object.rb +++ b/app/models/lfs_object.rb @@ -2,6 +2,7 @@ class LfsObject < ApplicationRecord include AfterCommitQueue + include EachBatch include ObjectStorage::BackgroundMove has_many :lfs_objects_projects, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent diff --git a/app/services/projects/lfs_pointers/lfs_link_service.rb b/app/services/projects/lfs_pointers/lfs_link_service.rb index e3c956250f0..38de2af9c1e 100644 --- a/app/services/projects/lfs_pointers/lfs_link_service.rb +++ b/app/services/projects/lfs_pointers/lfs_link_service.rb @@ -4,6 +4,8 @@ module Projects module LfsPointers class LfsLinkService < BaseService + BATCH_SIZE = 1000 + # Accept an array of oids to link # # Returns an array with the oid of the existent lfs objects @@ -18,16 +20,33 @@ module Projects # rubocop: disable CodeReuse/ActiveRecord def link_existing_lfs_objects(oids) - existent_lfs_objects = LfsObject.where(oid: oids) + all_existing_objects = [] + iterations = 0 - return [] unless existent_lfs_objects.any? + LfsObject.where(oid: oids).each_batch(of: BATCH_SIZE) do |existent_lfs_objects| + next unless existent_lfs_objects.any? - not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects) - project.all_lfs_objects << not_linked_lfs_objects + iterations += 1 + not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects) + project.all_lfs_objects << not_linked_lfs_objects - existent_lfs_objects.pluck(:oid) + all_existing_objects += existent_lfs_objects.pluck(:oid) + end + + log_lfs_link_results(all_existing_objects.count, iterations) + + all_existing_objects end # rubocop: enable CodeReuse/ActiveRecord + + def log_lfs_link_results(lfs_objects_linked_count, iterations) + Gitlab::Import::Logger.info( + class: self.class.name, + project_id: project.id, + project_path: project.full_path, + lfs_objects_linked_count: lfs_objects_linked_count, + iterations: iterations) + end end end end diff --git a/changelogs/unreleased/sh-lfs-object-batches.yml b/changelogs/unreleased/sh-lfs-object-batches.yml new file mode 100644 index 00000000000..09043e286be --- /dev/null +++ b/changelogs/unreleased/sh-lfs-object-batches.yml @@ -0,0 +1,5 @@ +--- +title: Makes LFS object linker process OIDs in batches +merge_request: 32268 +author: +type: performance diff --git a/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb index 849601c4a63..66233787d3a 100644 --- a/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb +++ b/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb @@ -30,5 +30,23 @@ describe Projects::LfsPointers::LfsLinkService do expect(subject.execute(new_oid_list.keys)).to eq linked end + + it 'links in batches' do + stub_const("#{described_class}::BATCH_SIZE", 3) + + expect(Gitlab::Import::Logger) + .to receive(:info) + .with(class: described_class.name, + project_id: project.id, + project_path: project.full_path, + lfs_objects_linked_count: 7, + iterations: 3) + + lfs_objects = create_list(:lfs_object, 7) + linked = subject.execute(lfs_objects.pluck(:oid)) + + expect(project.all_lfs_objects.count).to eq 9 + expect(linked.size).to eq 7 + end end end