Batchload blobs for diff generation

After installing a new gem, batch-loader, a construct can be used to
queue data to be fetched in bulk. The gem was also introduced in both
gitlab-org/gitlab-ce!14680 and gitlab-org/gitlab-ce!14846, but those mrs
are not merged yet.

For the generation of diffs, both the old blob and the new blob need to
be loaded. This for every file in the diff, too. Now we collect all
these so we do 1 fetch. Three `.allow_n_plus_1_calls` have been removed,
which I expect to be valid, but this needs to be confirmed by a full CI
run.

Possibly closes:
- https://gitlab.com/gitlab-org/gitlab-ce/issues/37445
- https://gitlab.com/gitlab-org/gitlab-ce/issues/37599
- https://gitlab.com/gitlab-org/gitlab-ce/issues/37431
This commit is contained in:
Zeger-Jan van de Weg 2017-11-03 14:16:43 +01:00
parent 6dd89059b8
commit f9565e3039
No known key found for this signature in database
GPG Key ID: 65F6A8D64A88ABAC
18 changed files with 86 additions and 56 deletions

View File

@ -263,6 +263,8 @@ gem 'gettext_i18n_rails', '~> 1.8.0'
gem 'gettext_i18n_rails_js', '~> 1.2.0'
gem 'gettext', '~> 3.2.2', require: false, group: :development
gem 'batch-loader'
# Perf bar
gem 'peek', '~> 1.0.1'
gem 'peek-gc', '~> 0.0.2'

View File

@ -73,6 +73,7 @@ GEM
thread_safe (~> 0.3, >= 0.3.1)
babosa (1.0.2)
base32 (0.3.2)
batch-loader (1.1.1)
bcrypt (3.1.11)
bcrypt_pbkdf (1.0.0)
benchmark-ips (2.3.0)
@ -982,6 +983,7 @@ DEPENDENCIES
awesome_print (~> 1.2.0)
babosa (~> 1.0.2)
base32 (~> 0.3.0)
batch-loader
bcrypt_pbkdf (~> 1.0)
benchmark-ips (~> 2.3.0)
better_errors (~> 2.1.0)

View File

@ -22,12 +22,7 @@ class Projects::CommitController < Projects::ApplicationController
apply_diff_view_cookie!
respond_to do |format|
format.html do
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37599
Gitlab::GitalyClient.allow_n_plus_1_calls do
render
end
end
format.html { render }
format.diff { render text: @commit.to_diff }
format.patch { render text: @commit.to_patch }
end
@ -112,7 +107,7 @@ class Projects::CommitController < Projects::ApplicationController
end
def commit
@noteable = @commit ||= @project.commit(params[:id])
@noteable = @commit ||= @project.commit_by(oid: params[:id])
end
def define_commit_vars

View File

@ -10,10 +10,7 @@ class Projects::MergeRequests::DiffsController < Projects::MergeRequests::Applic
def show
@environment = @merge_request.environments_for(current_user).last
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37431
Gitlab::GitalyClient.allow_n_plus_1_calls do
render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") }
end
render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") }
end
def diff_for_path

View File

@ -76,12 +76,24 @@ class Blob < SimpleDelegator
new(blob, project)
end
def self.lazy(project, commit_id, path)
BatchLoader.for(commit_id: commit_id, path: path).batch do |items, loader|
project.repository.blobs_at(items.map(&:values)).each do |blob|
loader.call({ commit_id: blob.commit_id, path: blob.path }, blob) if blob
end
end
end
def initialize(blob, project = nil)
@project = project
super(blob)
end
def inspect
"#<#{self.class.name} oid:#{id[0..8]} commit:#{commit_id[0..8]} path:#{path}>"
end
# Returns the data of the blob.
#
# If the blob is a text based blob the content is converted to UTF-8 and any
@ -95,7 +107,10 @@ class Blob < SimpleDelegator
end
def load_all_data!
super(project.repository) if project
# Endpoint needed: gitlab-org/gitaly#756
Gitlab::GitalyClient.allow_n_plus_1_calls do
super(project.repository) if project
end
end
def no_highlighting?

View File

@ -84,7 +84,7 @@ class Commit
end
def id
@raw.id
raw.id
end
def ==(other)
@ -361,7 +361,7 @@ class Commit
@deltas ||= raw.deltas
end
def diffs(diff_options = nil)
def diffs(diff_options = {})
Gitlab::Diff::FileCollection::Commit.new(self, diff_options: diff_options)
end

View File

@ -478,6 +478,11 @@ class Repository
nil
end
# items is an Array like: [[oid, path], [oid1, path1]]
def blobs_at(items)
raw_repository.batch_blobs(items).map { |blob| Blob.decorate(blob, project) }
end
def root_ref
if raw_repository
raw_repository.root_ref

View File

@ -0,0 +1,5 @@
---
title: Fetch blobs in bulk when generating diffs
merge_request:
author:
type: performance

View File

@ -0,0 +1 @@
Rails.application.config.middleware.use(BatchLoader::Middleware)

View File

@ -25,6 +25,10 @@ module Gitlab
@repository = repository
@diff_refs = diff_refs
@fallback_diff_refs = fallback_diff_refs
# Ensure items are collected in the the batch
new_blob
old_blob
end
def position(position_marker, position_type: :text)
@ -95,21 +99,15 @@ module Gitlab
end
def new_blob
return @new_blob if defined?(@new_blob)
return unless new_content_sha
sha = new_content_sha
return @new_blob = nil unless sha
@new_blob = repository.blob_at(sha, file_path)
Blob.lazy(repository.project, new_content_sha, file_path)
end
def old_blob
return @old_blob if defined?(@old_blob)
return unless old_content_sha
sha = old_content_sha
return @old_blob = nil unless sha
@old_blob = repository.blob_at(sha, old_path)
Blob.lazy(repository.project, old_content_sha, old_path)
end
def content_sha

View File

@ -22,10 +22,7 @@ module Gitlab
end
def diff_files
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37445
Gitlab::GitalyClient.allow_n_plus_1_calls do
@diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) }
end
@diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) }
end
def diff_file_with_old_path(old_path)

View File

@ -179,6 +179,8 @@ module Gitlab
)
end
end
rescue Rugged::ReferenceError
nil
end
def rugged_raw(repository, sha, limit:)

View File

@ -1161,6 +1161,11 @@ module Gitlab
Gitlab::Git::Blob.find(self, sha, path) unless Gitlab::Git.blank_ref?(sha)
end
# Items should be of format [[commit_id, path], [commit_id1, path1]]
def batch_blobs(items, blob_size_limit: nil)
Gitlab::Git::Blob.batch(self, items, blob_size_limit: blob_size_limit)
end
def commit_index(user, branch_name, index, options)
committer = user_to_committer(user)

View File

@ -1,15 +1,15 @@
require 'spec_helper'
describe Projects::CommitController do
let(:project) { create(:project, :repository) }
let(:user) { create(:user) }
set(:project) { create(:project, :repository) }
set(:user) { create(:user) }
let(:commit) { project.commit("master") }
let(:master_pickable_sha) { '7d3b0f7cff5f37573aea97cebfd5692ea1689924' }
let(:master_pickable_commit) { project.commit(master_pickable_sha) }
before do
sign_in(user)
project.team << [user, :master]
project.add_master(user)
end
describe 'GET show' do

View File

@ -116,12 +116,8 @@ describe Gitlab::Diff::File do
end
context 'when renamed' do
let(:commit) { project.commit('6907208d755b60ebeacb2e9dfea74c92c3449a1f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/js/commit.coffee') }
before do
allow(diff_file.new_blob).to receive(:id).and_return(diff_file.old_blob.id)
end
let(:commit) { project.commit('94bb47ca1297b7b3731ff2a36923640991e9236f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('CHANGELOG.md') }
it 'returns false' do
expect(diff_file.content_changed?).to be_falsey

View File

@ -16,6 +16,23 @@ describe Blob do
end
end
describe '.lazy' do
let(:project) { create(:project, :repository) }
let(:commit) { project.commit_by(oid: 'e63f41fe459e62e1228fcef60d7189127aeba95a') }
it 'fetches all blobs when the first is accessed' do
changelog = described_class.lazy(project, commit.id, 'CHANGELOG')
contributing = described_class.lazy(project, commit.id, 'CONTRIBUTING.md')
expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original
expect(Gitlab::Git::Blob).not_to receive(:find)
# Access property so the values are loaded
changelog.id
contributing.id
end
end
describe '#data' do
context 'using a binary blob' do
it 'returns the data as-is' do

View File

@ -32,10 +32,8 @@ describe DiffViewer::Base do
end
context 'when the binaryness does not match' do
before do
allow(diff_file.old_blob).to receive(:binary?).and_return(false)
allow(diff_file.new_blob).to receive(:binary?).and_return(false)
end
let(:commit) { project.commit_by(oid: 'ae73cb07c9eeaf35924a10f713b364d32b2dd34f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('Gemfile.zip') }
it 'returns false' do
expect(viewer_class.can_render?(diff_file)).to be_falsey
@ -60,8 +58,7 @@ describe DiffViewer::Base do
context 'when the binaryness does not match' do
before do
allow(diff_file.old_blob).to receive(:binary?).and_return(true)
allow(diff_file.new_blob).to receive(:binary?).and_return(true)
allow_any_instance_of(Blob).to receive(:binary?).and_return(true)
end
it 'returns false' do
@ -77,12 +74,12 @@ describe DiffViewer::Base do
end
context 'when the file was renamed and only the old blob is supported' do
let(:commit) { project.commit('2f63565e7aac07bcdadb654e253078b727143ec4') }
let(:commit) { project.commit_by(oid: '2f63565e7aac07bcdadb654e253078b727143ec4') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/images/6049019_460s.jpg') }
before do
allow(diff_file).to receive(:renamed_file?).and_return(true)
allow(diff_file.new_blob).to receive(:extension).and_return('jpeg')
viewer_class.extensions = %w(notjpg)
end
it 'returns false' do
@ -94,8 +91,7 @@ describe DiffViewer::Base do
describe '#collapsed?' do
context 'when the combined blob size is larger than the collapse limit' do
before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(512.kilobytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(513.kilobytes)
allow(diff_file).to receive(:raw_size).and_return(1025.kilobytes)
end
it 'returns true' do
@ -113,8 +109,7 @@ describe DiffViewer::Base do
describe '#too_large?' do
context 'when the combined blob size is larger than the size limit' do
before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes)
allow(diff_file).to receive(:raw_size).and_return(6.megabytes)
end
it 'returns true' do
@ -132,8 +127,7 @@ describe DiffViewer::Base do
describe '#render_error' do
context 'when the combined blob size is larger than the size limit' do
before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes)
allow(diff_file).to receive(:raw_size).and_return(6.megabytes)
end
it 'returns :too_large' do

View File

@ -1,9 +1,9 @@
require 'spec_helper'
describe DiffViewer::ServerSide do
let(:project) { create(:project, :repository) }
let(:commit) { project.commit('570e7b2abdd848b95f2f578043fc23bd6f6fd24d') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') }
set(:project) { create(:project, :repository) }
let(:commit) { project.commit_by(oid: '570e7b2abdd848b95f2f578043fc23bd6f6fd24d') }
let!(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') }
let(:viewer_class) do
Class.new(DiffViewer::Base) do
@ -15,8 +15,7 @@ describe DiffViewer::ServerSide do
describe '#prepare!' do
it 'loads all diff file data' do
expect(diff_file.old_blob).to receive(:load_all_data!)
expect(diff_file.new_blob).to receive(:load_all_data!)
expect(Blob).to receive(:lazy).at_least(:twice)
subject.prepare!
end