Reduce Gitaly calls in PostReceive

This commit reduces I/O load and memory utilization during PostReceive
for the common case when no project hooks or services are set up.

We saw a Gitaly N+1 issue in `CommitDelta` when many tags or branches
are pushed. We can reduce this overhead in the common case because we
observe that most new projects do not have any Web hooks or services,
especially when they are first created. Previously, `BaseHooksService`
unconditionally iterated through the last 20 commits of each ref to
build the `push_data` structure. The `push_data` structured was used in
numerous places:

1. Building the push payload in `EventCreateService`
2. Creating a CI pipeline
3. Executing project Web or system hooks
4. Executing project services
5. As the return value of `BaseHooksService#execute`
6. `BranchHooksService#invalidated_file_types`

We only need to generate the full `push_data` for items 3, 4, and 6.

Item 1: `EventCreateService` only needs the last commit and doesn't
actually need the commit deltas.

Item 2: In addition, `Ci::CreatePipelineService` only needed a subset of
the parameters.

Item 5: The return value of `BaseHooksService#execute` also wasn't being
used anywhere.

Item 6: This is only used when pushing to the default branch, so if
many tags are pushed we can save significant I/O here.

Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/65878

Fic
This commit is contained in:
Stan Hu 2019-08-12 15:31:58 -07:00
parent 6480bd6dc9
commit 4e2bb4e5e7
11 changed files with 205 additions and 18 deletions

View file

@ -1230,6 +1230,14 @@ class Project < ApplicationRecord
end
end
def has_active_hooks?(hooks_scope = :push_hooks)
hooks.hooks_for(hooks_scope).any? || SystemHook.hooks_for(hooks_scope).any?
end
def has_active_services?(hooks_scope = :push_hooks)
services.public_send(hooks_scope).any? # rubocop:disable GitlabSecurity/PublicSend
end
def valid_repo?
repository.exists?
rescue

View file

@ -19,7 +19,7 @@ module Git
update_remote_mirrors
push_data
success
end
private
@ -33,7 +33,7 @@ module Git
end
def limited_commits
commits.last(PROCESS_COMMIT_LIMIT)
@limited_commits ||= commits.last(PROCESS_COMMIT_LIMIT)
end
def commits_count
@ -48,21 +48,25 @@ module Git
[]
end
# Push events in the activity feed only show information for the
# last commit.
def create_events
EventCreateService.new.push(project, current_user, push_data)
EventCreateService.new.push(project, current_user, event_push_data)
end
def create_pipelines
return unless params.fetch(:create_pipelines, true)
Ci::CreatePipelineService
.new(project, current_user, push_data)
.new(project, current_user, base_params)
.execute(:push, pipeline_options)
end
def execute_project_hooks
project.execute_hooks(push_data, hook_name)
project.execute_services(push_data, hook_name)
# Creating push_data invokes one CommitDelta RPC per commit. Only
# build this data if we actually need it.
project.execute_hooks(push_data, hook_name) if project.has_active_hooks?(hook_name)
project.execute_services(push_data, hook_name) if project.has_active_services?(hook_name)
end
def enqueue_invalidate_cache
@ -73,18 +77,35 @@ module Git
)
end
def push_data
@push_data ||= Gitlab::DataBuilder::Push.build(
project: project,
user: current_user,
def base_params
{
oldrev: params[:oldrev],
newrev: params[:newrev],
ref: params[:ref],
commits: limited_commits,
push_options: params[:push_options] || {}
}
end
def push_data_params(commits:, with_changed_files: true)
base_params.merge(
project: project,
user: current_user,
commits: commits,
message: event_message,
commits_count: commits_count,
push_options: params[:push_options] || {}
with_changed_files: with_changed_files
)
end
def event_push_data
# We only need the last commit for the event push, and we don't
# need the full deltas either.
@event_push_data ||= Gitlab::DataBuilder::Push.build(
push_data_params(commits: commits.last, with_changed_files: false))
end
def push_data
@push_data ||= Gitlab::DataBuilder::Push.build(push_data_params(commits: limited_commits))
# Dependent code may modify the push data, so return a duplicate each time
@push_data.dup

View file

@ -0,0 +1,5 @@
---
title: Reduce Gitaly calls in PostReceive
merge_request: 31741
author:
type: performance

View file

@ -60,7 +60,8 @@ module Gitlab
# rubocop:disable Metrics/ParameterLists
def build(
project:, user:, ref:, oldrev: nil, newrev: nil,
commits: [], commits_count: nil, message: nil, push_options: {})
commits: [], commits_count: nil, message: nil, push_options: {},
with_changed_files: true)
commits = Array(commits)
@ -75,7 +76,7 @@ module Gitlab
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/38259
commit_attrs = Gitlab::GitalyClient.allow_n_plus_1_calls do
commits_limited.map do |commit|
commit.hook_attrs(with_changed_files: true)
commit.hook_attrs(with_changed_files: with_changed_files)
end
end

View file

@ -3,9 +3,43 @@
require 'spec_helper'
describe Gitlab::DataBuilder::Push do
include RepoHelpers
let(:project) { create(:project, :repository) }
let(:user) { build(:user, public_email: 'public-email@example.com') }
describe '.build' do
let(:sample) { RepoHelpers.sample_compare }
let(:commits) { project.repository.commits_between(sample.commits.first, sample.commits.last) }
let(:subject) do
described_class.build(project: project,
user: user,
ref: sample.target_branch,
commits: commits,
commits_count: commits.length,
message: 'test message',
with_changed_files: with_changed_files)
end
context 'with changed files' do
let(:with_changed_files) { true }
it 'returns commit hook data' do
expect(subject[:project]).to eq(project.hook_attrs)
expect(subject[:commits].first.keys).to include(*%i(added removed modified))
end
end
context 'without changed files' do
let(:with_changed_files) { false }
it 'returns commit hook data without include deltas' do
expect(subject[:project]).to eq(project.hook_attrs)
expect(subject[:commits].first.keys).not_to include(*%i(added removed modified))
end
end
end
describe '.build_sample' do
let(:data) { described_class.build_sample(project, user) }

View file

@ -4297,6 +4297,39 @@ describe Project do
end
end
describe '#has_active_hooks?' do
set(:project) { create(:project) }
it { expect(project.has_active_hooks?).to be_falsey }
it 'returns true when a matching push hook exists' do
create(:project_hook, push_events: true, project: project)
expect(project.has_active_hooks?(:merge_request_events)).to be_falsey
expect(project.has_active_hooks?).to be_truthy
end
it 'returns true when a matching system hook exists' do
create(:system_hook, push_events: true)
expect(project.has_active_hooks?(:merge_request_events)).to be_falsey
expect(project.has_active_hooks?).to be_truthy
end
end
describe '#has_active_services?' do
set(:project) { create(:project) }
it { expect(project.has_active_services?).to be_falsey }
it 'returns true when a matching service exists' do
create(:custom_issue_tracker_service, push_events: true, merge_requests_events: false, project: project)
expect(project.has_active_services?(:merge_request_hooks)).to be_falsey
expect(project.has_active_services?).to be_truthy
end
end
describe '#badges' do
let(:project_group) { create(:group) }
let(:project) { create(:project, path: 'avatar', namespace: project_group) }

View file

@ -14,6 +14,78 @@ describe Git::BaseHooksService do
let(:newrev) { "8a2a6eb295bb170b34c24c76c49ed0e9b2eaf34b" } # gitlab-test: git rev-parse refs/tags/v1.1.0
let(:ref) { 'refs/tags/v1.1.0' }
describe '#execute_project_hooks' do
class TestService < described_class
def hook_name
:push_hooks
end
def commits
[]
end
end
let(:project) { create(:project, :repository) }
subject { TestService.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref) }
context '#execute_hooks' do
before do
expect(project).to receive(:has_active_hooks?).and_return(active)
end
context 'active hooks' do
let(:active) { true }
it 'executes the hooks' do
expect(subject).to receive(:push_data).at_least(:once).and_call_original
expect(project).to receive(:execute_hooks)
subject.execute
end
end
context 'inactive hooks' do
let(:active) { false }
it 'does not execute the hooks' do
expect(subject).not_to receive(:push_data)
expect(project).not_to receive(:execute_hooks)
subject.execute
end
end
end
context '#execute_services' do
before do
expect(project).to receive(:has_active_services?).and_return(active)
end
context 'active services' do
let(:active) { true }
it 'executes the services' do
expect(subject).to receive(:push_data).at_least(:once).and_call_original
expect(project).to receive(:execute_services)
subject.execute
end
end
context 'inactive services' do
let(:active) { false }
it 'does not execute the services' do
expect(subject).not_to receive(:push_data)
expect(project).not_to receive(:execute_services)
subject.execute
end
end
end
end
describe 'with remote mirrors' do
class TestService < described_class
def commits

View file

@ -25,7 +25,7 @@ describe Git::BranchHooksService do
end
describe "Git Push Data" do
subject(:push_data) { service.execute }
subject(:push_data) { service.send(:push_data) }
it 'has expected push data attributes' do
is_expected.to match a_hash_including(
@ -109,6 +109,7 @@ describe Git::BranchHooksService do
expect(event.push_event_payload).to be_an_instance_of(PushEventPayload)
expect(event.push_event_payload.commit_from).to eq(oldrev)
expect(event.push_event_payload.commit_to).to eq(newrev)
expect(event.push_event_payload.commit_title).to eq('Change some files')
expect(event.push_event_payload.ref).to eq('master')
expect(event.push_event_payload.commit_count).to eq(1)
end
@ -124,6 +125,7 @@ describe Git::BranchHooksService do
expect(event.push_event_payload).to be_an_instance_of(PushEventPayload)
expect(event.push_event_payload.commit_from).to be_nil
expect(event.push_event_payload.commit_to).to eq(newrev)
expect(event.push_event_payload.commit_title).to eq('Initial commit')
expect(event.push_event_payload.ref).to eq('master')
expect(event.push_event_payload.commit_count).to be > 1
end

View file

@ -78,7 +78,10 @@ describe Git::BranchPushService, services: true do
it "creates a new pipeline" do
expect { subject }.to change { Ci::Pipeline.count }
expect(Ci::Pipeline.last).to be_push
pipeline = Ci::Pipeline.last
expect(pipeline).to be_push
expect(Gitlab::Git::BRANCH_REF_PREFIX + pipeline.ref).to eq(ref)
end
end
@ -123,6 +126,10 @@ describe Git::BranchPushService, services: true do
describe "Webhooks" do
context "execute webhooks" do
before do
create(:project_hook, push_events: true, project: project)
end
it "when pushing a branch for the first time" do
expect(project).to receive(:execute_hooks)
expect(project.default_branch).to eq("master")

View file

@ -26,7 +26,8 @@ describe Git::TagHooksService, :service do
describe 'System hooks' do
it 'Executes system hooks' do
push_data = service.execute
push_data = service.send(:push_data)
expect(project).to receive(:has_active_hooks?).and_return(true)
expect_next_instance_of(SystemHooksService) do |system_hooks_service|
expect(system_hooks_service)
@ -40,6 +41,7 @@ describe Git::TagHooksService, :service do
describe "Webhooks" do
it "executes hooks on the project" do
expect(project).to receive(:has_active_hooks?).and_return(true)
expect(project).to receive(:execute_hooks)
service.execute
@ -61,7 +63,7 @@ describe Git::TagHooksService, :service do
describe 'Push data' do
shared_examples_for 'tag push data expectations' do
subject(:push_data) { service.execute }
subject(:push_data) { service.send(:push_data) }
it 'has expected push data attributes' do
is_expected.to match a_hash_including(
object_kind: 'tag_push',

View file

@ -222,6 +222,8 @@ describe PostReceive do
end
it "asks the project to trigger all hooks" do
create(:project_hook, push_events: true, tag_push_events: true, project: project)
create(:custom_issue_tracker_service, push_events: true, merge_requests_events: false, project: project)
allow(Project).to receive(:find_by).and_return(project)
expect(project).to receive(:execute_hooks).twice