Batch load only data from same repository when lazy object is accessed

By specifying `key`, we get a different lazy batch loader for each
repository, which means that accessing a lazy object from one repository
will only result in that repository's objects being fetched, not those
of other repositories, saving us some unnecessary Gitaly lookups.
This commit is contained in:
Douwe Maan 2018-11-23 12:26:17 +01:00
parent ba9eeea4ee
commit 5f0e4040ce
No known key found for this signature in database
GPG Key ID: 5976703F65143D36
9 changed files with 49 additions and 54 deletions

View File

@ -11,10 +11,11 @@ module Resolvers
end
def model_by_full_path(model, full_path)
BatchLoader.for(full_path).batch(key: "#{model.model_name.param_key}:full_path") do |full_paths, loader|
BatchLoader.for(full_path).batch(key: model) do |full_paths, loader, args|
# `with_route` avoids an N+1 calculating full_path
results = model.where_full_path_in(full_paths).with_route
results.each { |project| loader.call(project.full_path, project) }
args[:key].where_full_path_in(full_paths).with_route.each do |project|
loader.call(project.full_path, project)
end
end
end
end

View File

@ -14,9 +14,10 @@ module Resolvers
def resolve(iid:)
return unless project.present?
BatchLoader.for(iid.to_s).batch(key: project.id) do |iids, loader|
results = project.merge_requests.where(iid: iids)
results.each { |mr| loader.call(mr.iid.to_s, mr) }
BatchLoader.for(iid.to_s).batch(key: project) do |iids, loader, args|
args[:key].merge_requests.where(iid: iids).each do |mr|
loader.call(mr.iid.to_s, mr)
end
end
end
# rubocop: enable CodeReuse/ActiveRecord

View File

@ -80,15 +80,9 @@ class Blob < SimpleDelegator
end
def self.lazy(project, commit_id, path)
BatchLoader.for({ project: project, commit_id: commit_id, path: path }).batch do |items, loader|
items_by_project = items.group_by { |i| i[:project] }
items_by_project.each do |project, items|
items = items.map { |i| i.values_at(:commit_id, :path) }
project.repository.blobs_at(items).each do |blob|
loader.call({ project: blob.project, commit_id: blob.commit_id, path: blob.path }, blob) if blob
end
BatchLoader.for([commit_id, path]).batch(key: project.repository) do |items, loader, args|
args[:key].blobs_at(items).each do |blob|
loader.call([blob.commit_id, blob.path], blob) if blob
end
end
end

View File

@ -0,0 +1,5 @@
---
title: Batch load only data from same repository when lazy object is accessed
merge_request: 23309
author:
type: performance

View File

@ -155,17 +155,9 @@ module Gitlab
end
def extract_signature_lazily(repository, commit_id)
BatchLoader.for({ repository: repository, commit_id: commit_id }).batch do |items, loader|
items_by_repo = items.group_by { |i| i[:repository] }
items_by_repo.each do |repo, items|
commit_ids = items.map { |i| i[:commit_id] }
signatures = batch_signature_extraction(repository, commit_ids)
signatures.each do |commit_sha, signature_data|
loader.call({ repository: repository, commit_id: commit_sha }, signature_data)
end
BatchLoader.for(commit_id).batch(key: repository) do |commit_ids, loader, args|
batch_signature_extraction(args[:key], commit_ids).each do |commit_id, signature_data|
loader.call(commit_id, signature_data)
end
end
end
@ -175,17 +167,9 @@ module Gitlab
end
def get_message(repository, commit_id)
BatchLoader.for({ repository: repository, commit_id: commit_id }).batch do |items, loader|
items_by_repo = items.group_by { |i| i[:repository] }
items_by_repo.each do |repo, items|
commit_ids = items.map { |i| i[:commit_id] }
messages = get_messages(repository, commit_ids)
messages.each do |commit_sha, message|
loader.call({ repository: repository, commit_id: commit_sha }, message)
end
BatchLoader.for(commit_id).batch(key: repository) do |commit_ids, loader, args|
get_messages(args[:key], commit_ids).each do |commit_id, message|
loader.call(commit_id, message)
end
end
end

View File

@ -14,17 +14,9 @@ module Gitlab
class << self
def get_message(repository, tag_id)
BatchLoader.for({ repository: repository, tag_id: tag_id }).batch do |items, loader|
items_by_repo = items.group_by { |i| i[:repository] }
items_by_repo.each do |repo, items|
tag_ids = items.map { |i| i[:tag_id] }
messages = get_messages(repository, tag_ids)
messages.each do |id, message|
loader.call({ repository: repository, tag_id: id }, message)
end
BatchLoader.for(tag_id).batch(key: repository) do |tag_ids, loader, args|
get_messages(args[:key], tag_ids).each do |tag_id, message|
loader.call(tag_id, message)
end
end
end

View File

@ -450,11 +450,17 @@ describe Gitlab::Git::Commit, :seed_helper do
described_class.extract_signature_lazily(repository, commit_id)
end
other_repository = double(:repository)
described_class.extract_signature_lazily(other_repository, commit_ids.first)
expect(described_class).to receive(:batch_signature_extraction)
.with(repository, commit_ids)
.once
.and_return({})
expect(described_class).not_to receive(:batch_signature_extraction)
.with(other_repository, commit_ids.first)
2.times { signatures.each(&:itself) }
end
end

View File

@ -38,6 +38,9 @@ describe Gitlab::Git::Tag, :seed_helper do
end
it 'gets messages in one batch', :request_store do
other_repository = double(:repository)
described_class.get_message(other_repository, tag_ids.first)
expect { subject.map(&:itself) }.to change { Gitlab::GitalyClient.get_request_count }.by(1)
end
end

View File

@ -18,14 +18,23 @@ describe Blob do
describe '.lazy' do
let(:project) { create(:project, :repository) }
let(:commit) { project.commit_by(oid: 'e63f41fe459e62e1228fcef60d7189127aeba95a') }
let(:other_project) { create(:project, :repository) }
let(:commit_id) { 'e63f41fe459e62e1228fcef60d7189127aeba95a' }
it 'fetches all blobs when the first is accessed' do
changelog = described_class.lazy(project, commit.id, 'CHANGELOG')
contributing = described_class.lazy(project, commit.id, 'CONTRIBUTING.md')
it 'does not fetch blobs when none are accessed' do
expect(project.repository).not_to receive(:blobs_at)
expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original
expect(Gitlab::Git::Blob).not_to receive(:find)
described_class.lazy(project, commit_id, 'CHANGELOG')
end
it 'fetches all blobs for the same repository when one is accessed' do
expect(project.repository).to receive(:blobs_at).with([[commit_id, 'CHANGELOG'], [commit_id, 'CONTRIBUTING.md']]).once.and_call_original
expect(other_project.repository).not_to receive(:blobs_at)
changelog = described_class.lazy(project, commit_id, 'CHANGELOG')
contributing = described_class.lazy(project, commit_id, 'CONTRIBUTING.md')
described_class.lazy(other_project, commit_id, 'CHANGELOG')
# Access property so the values are loaded
changelog.id