From 5f0e4040ce7d4d0019b3340dd603cc6f67dd036d Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Fri, 23 Nov 2018 12:26:17 +0100 Subject: [PATCH] Batch load only data from same repository when lazy object is accessed By specifying `key`, we get a different lazy batch loader for each repository, which means that accessing a lazy object from one repository will only result in that repository's objects being fetched, not those of other repositories, saving us some unnecessary Gitaly lookups. --- app/graphql/resolvers/full_path_resolver.rb | 7 +++-- .../resolvers/merge_request_resolver.rb | 7 +++-- app/models/blob.rb | 12 ++------ changelogs/unreleased/dm-batch-loader-key.yml | 5 ++++ lib/gitlab/git/commit.rb | 28 ++++--------------- lib/gitlab/git/tag.rb | 14 ++-------- spec/lib/gitlab/git/commit_spec.rb | 6 ++++ spec/lib/gitlab/git/tag_spec.rb | 3 ++ spec/models/blob_spec.rb | 21 ++++++++++---- 9 files changed, 49 insertions(+), 54 deletions(-) create mode 100644 changelogs/unreleased/dm-batch-loader-key.yml diff --git a/app/graphql/resolvers/full_path_resolver.rb b/app/graphql/resolvers/full_path_resolver.rb index 8d3da33e8d2..0f1a64b6c58 100644 --- a/app/graphql/resolvers/full_path_resolver.rb +++ b/app/graphql/resolvers/full_path_resolver.rb @@ -11,10 +11,11 @@ module Resolvers end def model_by_full_path(model, full_path) - BatchLoader.for(full_path).batch(key: "#{model.model_name.param_key}:full_path") do |full_paths, loader| + BatchLoader.for(full_path).batch(key: model) do |full_paths, loader, args| # `with_route` avoids an N+1 calculating full_path - results = model.where_full_path_in(full_paths).with_route - results.each { |project| loader.call(project.full_path, project) } + args[:key].where_full_path_in(full_paths).with_route.each do |project| + loader.call(project.full_path, project) + end end end end diff --git a/app/graphql/resolvers/merge_request_resolver.rb b/app/graphql/resolvers/merge_request_resolver.rb index b87c95217f7..d047ce9e3a1 100644 --- a/app/graphql/resolvers/merge_request_resolver.rb +++ b/app/graphql/resolvers/merge_request_resolver.rb @@ -14,9 +14,10 @@ module Resolvers def resolve(iid:) return unless project.present? - BatchLoader.for(iid.to_s).batch(key: project.id) do |iids, loader| - results = project.merge_requests.where(iid: iids) - results.each { |mr| loader.call(mr.iid.to_s, mr) } + BatchLoader.for(iid.to_s).batch(key: project) do |iids, loader, args| + args[:key].merge_requests.where(iid: iids).each do |mr| + loader.call(mr.iid.to_s, mr) + end end end # rubocop: enable CodeReuse/ActiveRecord diff --git a/app/models/blob.rb b/app/models/blob.rb index 4f310e70f4f..66a0925c495 100644 --- a/app/models/blob.rb +++ b/app/models/blob.rb @@ -80,15 +80,9 @@ class Blob < SimpleDelegator end def self.lazy(project, commit_id, path) - BatchLoader.for({ project: project, commit_id: commit_id, path: path }).batch do |items, loader| - items_by_project = items.group_by { |i| i[:project] } - - items_by_project.each do |project, items| - items = items.map { |i| i.values_at(:commit_id, :path) } - - project.repository.blobs_at(items).each do |blob| - loader.call({ project: blob.project, commit_id: blob.commit_id, path: blob.path }, blob) if blob - end + BatchLoader.for([commit_id, path]).batch(key: project.repository) do |items, loader, args| + args[:key].blobs_at(items).each do |blob| + loader.call([blob.commit_id, blob.path], blob) if blob end end end diff --git a/changelogs/unreleased/dm-batch-loader-key.yml b/changelogs/unreleased/dm-batch-loader-key.yml new file mode 100644 index 00000000000..047fdbc4b3f --- /dev/null +++ b/changelogs/unreleased/dm-batch-loader-key.yml @@ -0,0 +1,5 @@ +--- +title: Batch load only data from same repository when lazy object is accessed +merge_request: 23309 +author: +type: performance diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb index 4f05c4b73a1..5863815ca85 100644 --- a/lib/gitlab/git/commit.rb +++ b/lib/gitlab/git/commit.rb @@ -155,17 +155,9 @@ module Gitlab end def extract_signature_lazily(repository, commit_id) - BatchLoader.for({ repository: repository, commit_id: commit_id }).batch do |items, loader| - items_by_repo = items.group_by { |i| i[:repository] } - - items_by_repo.each do |repo, items| - commit_ids = items.map { |i| i[:commit_id] } - - signatures = batch_signature_extraction(repository, commit_ids) - - signatures.each do |commit_sha, signature_data| - loader.call({ repository: repository, commit_id: commit_sha }, signature_data) - end + BatchLoader.for(commit_id).batch(key: repository) do |commit_ids, loader, args| + batch_signature_extraction(args[:key], commit_ids).each do |commit_id, signature_data| + loader.call(commit_id, signature_data) end end end @@ -175,17 +167,9 @@ module Gitlab end def get_message(repository, commit_id) - BatchLoader.for({ repository: repository, commit_id: commit_id }).batch do |items, loader| - items_by_repo = items.group_by { |i| i[:repository] } - - items_by_repo.each do |repo, items| - commit_ids = items.map { |i| i[:commit_id] } - - messages = get_messages(repository, commit_ids) - - messages.each do |commit_sha, message| - loader.call({ repository: repository, commit_id: commit_sha }, message) - end + BatchLoader.for(commit_id).batch(key: repository) do |commit_ids, loader, args| + get_messages(args[:key], commit_ids).each do |commit_id, message| + loader.call(commit_id, message) end end end diff --git a/lib/gitlab/git/tag.rb b/lib/gitlab/git/tag.rb index ade708d0541..23d989ff258 100644 --- a/lib/gitlab/git/tag.rb +++ b/lib/gitlab/git/tag.rb @@ -14,17 +14,9 @@ module Gitlab class << self def get_message(repository, tag_id) - BatchLoader.for({ repository: repository, tag_id: tag_id }).batch do |items, loader| - items_by_repo = items.group_by { |i| i[:repository] } - - items_by_repo.each do |repo, items| - tag_ids = items.map { |i| i[:tag_id] } - - messages = get_messages(repository, tag_ids) - - messages.each do |id, message| - loader.call({ repository: repository, tag_id: id }, message) - end + BatchLoader.for(tag_id).batch(key: repository) do |tag_ids, loader, args| + get_messages(args[:key], tag_ids).each do |tag_id, message| + loader.call(tag_id, message) end end end diff --git a/spec/lib/gitlab/git/commit_spec.rb b/spec/lib/gitlab/git/commit_spec.rb index 74d542060d5..db68062e433 100644 --- a/spec/lib/gitlab/git/commit_spec.rb +++ b/spec/lib/gitlab/git/commit_spec.rb @@ -450,11 +450,17 @@ describe Gitlab::Git::Commit, :seed_helper do described_class.extract_signature_lazily(repository, commit_id) end + other_repository = double(:repository) + described_class.extract_signature_lazily(other_repository, commit_ids.first) + expect(described_class).to receive(:batch_signature_extraction) .with(repository, commit_ids) .once .and_return({}) + expect(described_class).not_to receive(:batch_signature_extraction) + .with(other_repository, commit_ids.first) + 2.times { signatures.each(&:itself) } end end diff --git a/spec/lib/gitlab/git/tag_spec.rb b/spec/lib/gitlab/git/tag_spec.rb index abee2822d77..b51e3879f49 100644 --- a/spec/lib/gitlab/git/tag_spec.rb +++ b/spec/lib/gitlab/git/tag_spec.rb @@ -38,6 +38,9 @@ describe Gitlab::Git::Tag, :seed_helper do end it 'gets messages in one batch', :request_store do + other_repository = double(:repository) + described_class.get_message(other_repository, tag_ids.first) + expect { subject.map(&:itself) }.to change { Gitlab::GitalyClient.get_request_count }.by(1) end end diff --git a/spec/models/blob_spec.rb b/spec/models/blob_spec.rb index 81e35e6c931..ed93f94d893 100644 --- a/spec/models/blob_spec.rb +++ b/spec/models/blob_spec.rb @@ -18,14 +18,23 @@ describe Blob do describe '.lazy' do let(:project) { create(:project, :repository) } - let(:commit) { project.commit_by(oid: 'e63f41fe459e62e1228fcef60d7189127aeba95a') } + let(:other_project) { create(:project, :repository) } + let(:commit_id) { 'e63f41fe459e62e1228fcef60d7189127aeba95a' } - it 'fetches all blobs when the first is accessed' do - changelog = described_class.lazy(project, commit.id, 'CHANGELOG') - contributing = described_class.lazy(project, commit.id, 'CONTRIBUTING.md') + it 'does not fetch blobs when none are accessed' do + expect(project.repository).not_to receive(:blobs_at) - expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original - expect(Gitlab::Git::Blob).not_to receive(:find) + described_class.lazy(project, commit_id, 'CHANGELOG') + end + + it 'fetches all blobs for the same repository when one is accessed' do + expect(project.repository).to receive(:blobs_at).with([[commit_id, 'CHANGELOG'], [commit_id, 'CONTRIBUTING.md']]).once.and_call_original + expect(other_project.repository).not_to receive(:blobs_at) + + changelog = described_class.lazy(project, commit_id, 'CHANGELOG') + contributing = described_class.lazy(project, commit_id, 'CONTRIBUTING.md') + + described_class.lazy(other_project, commit_id, 'CHANGELOG') # Access property so the values are loaded changelog.id