Expire project caches once per push instead of once per ref

Previously `ProjectCacheWorker` would be scheduled once per ref, which
would generate unnecessary I/O and load on Sidekiq, especially if many
tags or branches were pushed at once. `ProjectCacheWorker` would expire
three items:

1. Repository size: This only needs to be updated once per push.
2. Commit count: This only needs to be updated if the default branch
   is updated.
3. Project method caches: This only needs to be updated if the default
   branch changes, but only if certain files change (e.g. README,
   CHANGELOG, etc.).

Because the third item requires looking at the actual changes in the
commit deltas, we schedule one `ProjectCacheWorker` to handle the first
two cases, and schedule a separate `ProjectCacheWorker` for the third
case if it is needed. As a result, this brings down the number of
`ProjectCacheWorker` jobs from N to 2.

Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/52046
This commit is contained in:
Stan Hu 2019-08-16 19:53:56 +00:00 committed by Douwe Maan
parent b3145bc976
commit f14647fdae
11 changed files with 201 additions and 27 deletions

View file

@ -389,11 +389,15 @@ class Repository
expire_statistics_caches
end
# Runs code after a repository has been created.
def after_create
def expire_status_cache
expire_exists_cache
expire_root_ref_cache
expire_emptiness_caches
end
# Runs code after a repository has been created.
def after_create
expire_status_cache
repository_event(:create_repository)
end

View file

@ -8,8 +8,6 @@ module Git
PROCESS_COMMIT_LIMIT = 100
def execute
project.repository.after_create if project.empty_repo?
create_events
create_pipelines
execute_project_hooks
@ -70,11 +68,11 @@ module Git
end
def enqueue_invalidate_cache
ProjectCacheWorker.perform_async(
project.id,
invalidated_file_types,
[:commit_count, :repository_size]
)
file_types = invalidated_file_types
return unless file_types.present?
ProjectCacheWorker.perform_async(project.id, file_types, [], false)
end
def base_params

View file

@ -42,10 +42,8 @@ class PostReceive
user = identify_user(post_received)
return false unless user
# Expire the branches cache so we have updated data for this push
post_received.project.repository.expire_branches_cache if post_received.includes_branches?
# We only need to expire tags once per push
post_received.project.repository.expire_caches_for_tags if post_received.includes_tags?
# We only need to expire certain caches once per push
expire_caches(post_received)
post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index|
service_klass =
@ -74,6 +72,30 @@ class PostReceive
after_project_changes_hooks(post_received, user, refs.to_a, changes)
end
# Expire the project, branch, and tag cache once per push. Schedule an
# update for the repository size and commit count if necessary.
def expire_caches(post_received)
project = post_received.project
project.repository.expire_status_cache if project.empty_repo?
project.repository.expire_branches_cache if post_received.includes_branches?
project.repository.expire_caches_for_tags if post_received.includes_tags?
enqueue_repository_cache_update(post_received)
end
def enqueue_repository_cache_update(post_received)
stats_to_invalidate = [:repository_size]
stats_to_invalidate << :commit_count if post_received.includes_default_branch?
ProjectCacheWorker.perform_async(
post_received.project.id,
[],
stats_to_invalidate,
true
)
end
def after_project_changes_hooks(post_received, user, refs, changes)
hook_data = Gitlab::DataBuilder::Repository.update(post_received.project, user, changes, refs)
SystemHooksService.new.execute_hooks(hook_data, :repository_update_hooks)

View file

@ -12,13 +12,15 @@ class ProjectCacheWorker
# CHANGELOG.
# statistics - An Array containing columns from ProjectStatistics to
# refresh, if empty all columns will be refreshed
# refresh_statistics - A boolean that determines whether project statistics should
# be updated.
# rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, files = [], statistics = [])
def perform(project_id, files = [], statistics = [], refresh_statistics = true)
project = Project.find_by(id: project_id)
return unless project
update_statistics(project, statistics)
update_statistics(project, statistics) if refresh_statistics
return unless project.repository.exists?

View file

@ -0,0 +1,5 @@
---
title: Expire project caches once per push instead of once per ref
merge_request: 31876
author:
type: performance

View file

@ -39,6 +39,17 @@ module Gitlab
end
end
def includes_default_branch?
# If the branch doesn't have a default branch yet, we presume the
# first branch pushed will be the default.
return true unless project.default_branch.present?
enum_for(:changes_refs).any? do |_oldrev, _newrev, ref|
Gitlab::Git.branch_ref?(ref) &&
Gitlab::Git.branch_name(ref) == project.default_branch
end
end
private
def deserialize_changes(changes)

View file

@ -3,7 +3,7 @@
require 'spec_helper'
describe ::Gitlab::GitPostReceive do
let(:project) { create(:project) }
set(:project) { create(:project, :repository) }
subject { described_class.new(project, "project-#{project.id}", changes.dup, {}) }
@ -92,4 +92,47 @@ describe ::Gitlab::GitPostReceive do
end
end
end
describe '#includes_default_branch?' do
context 'with no default branch' do
let(:changes) do
<<~EOF
654321 210987 refs/heads/test1
654322 210986 refs/tags/#{project.default_branch}
654323 210985 refs/heads/test3
EOF
end
it 'returns false' do
expect(subject.includes_default_branch?).to be_falsey
end
end
context 'with a project with no default branch' do
let(:changes) do
<<~EOF
654321 210987 refs/heads/test1
EOF
end
it 'returns true' do
expect(project).to receive(:default_branch).and_return(nil)
expect(subject.includes_default_branch?).to be_truthy
end
end
context 'with default branch' do
let(:changes) do
<<~EOF
654322 210986 refs/heads/test1
654321 210987 refs/tags/test2
654323 210985 refs/heads/#{project.default_branch}
EOF
end
it 'returns true' do
expect(subject.includes_default_branch?).to be_truthy
end
end
end
end

View file

@ -1815,22 +1815,36 @@ describe Repository do
end
describe '#after_create' do
it 'calls expire_status_cache' do
expect(repository).to receive(:expire_status_cache)
repository.after_create
end
it 'logs an event' do
expect(repository).to receive(:repository_event).with(:create_repository)
repository.after_create
end
end
describe '#expire_status_cache' do
it 'flushes the exists cache' do
expect(repository).to receive(:expire_exists_cache)
repository.after_create
repository.expire_status_cache
end
it 'flushes the root ref cache' do
expect(repository).to receive(:expire_root_ref_cache)
repository.after_create
repository.expire_status_cache
end
it 'flushes the emptiness caches' do
expect(repository).to receive(:expire_emptiness_caches)
repository.after_create
repository.expire_status_cache
end
end

View file

@ -158,9 +158,13 @@ describe Git::BranchHooksService do
let(:blank_sha) { Gitlab::Git::BLANK_SHA }
def clears_cache(extended: [])
expect(ProjectCacheWorker)
.to receive(:perform_async)
.with(project.id, extended, %i[commit_count repository_size])
expect(service).to receive(:invalidated_file_types).and_return(extended)
if extended.present?
expect(ProjectCacheWorker)
.to receive(:perform_async)
.with(project.id, extended, [], false)
end
service.execute
end

View file

@ -37,6 +37,29 @@ describe PostReceive do
end
describe "#process_project_changes" do
context 'with an empty project' do
let(:empty_project) { create(:project, :empty_repo) }
let(:changes) { "123456 789012 refs/heads/tést1\n" }
before do
allow_any_instance_of(Gitlab::GitPostReceive).to receive(:identify).and_return(empty_project.owner)
allow(Gitlab::GlRepository).to receive(:parse).and_return([empty_project, Gitlab::GlRepository::PROJECT])
end
it 'expire the status cache' do
expect(empty_project.repository).to receive(:expire_status_cache)
perform
end
it 'schedules a cache update for commit count and size' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(empty_project.id, [], [:repository_size, :commit_count], true)
perform
end
end
context 'empty changes' do
it "does not call any PushService but runs after project hooks" do
expect(Git::BranchPushService).not_to receive(:new)
@ -67,15 +90,22 @@ describe PostReceive do
context "branches" do
let(:changes) do
<<~EOF
'123456 789012 refs/heads/tést1'
'123456 789012 refs/heads/tést2'
123456 789012 refs/heads/tést1
123456 789012 refs/heads/tést2
EOF
end
it 'expires the branches cache' do
expect(project.repository).to receive(:expire_branches_cache).once
described_class.new.perform(gl_repository, key_id, base64_changes)
perform
end
it 'expires the status cache' do
expect(project).to receive(:empty_repo?).and_return(true)
expect(project.repository).to receive(:expire_status_cache)
perform
end
it 'calls Git::BranchPushService' do
@ -87,6 +117,30 @@ describe PostReceive do
perform
end
it 'schedules a cache update for repository size only' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(project.id, [], [:repository_size], true)
perform
end
context 'with a default branch' do
let(:changes) do
<<~EOF
123456 789012 refs/heads/tést1
123456 789012 refs/heads/tést2
678912 123455 refs/heads/#{project.default_branch}
EOF
end
it 'schedules a cache update for commit count and size' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(project.id, [], [:repository_size, :commit_count], true)
perform
end
end
end
context "tags" do
@ -107,7 +161,7 @@ describe PostReceive do
it 'does not expire branches cache' do
expect(project.repository).not_to receive(:expire_branches_cache)
described_class.new.perform(gl_repository, key_id, base64_changes)
perform
end
it "only invalidates tags once" do
@ -115,7 +169,7 @@ describe PostReceive do
expect(project.repository).to receive(:expire_caches_for_tags).once.and_call_original
expect(project.repository).to receive(:expire_tags_cache).once.and_call_original
described_class.new.perform(gl_repository, key_id, base64_changes)
perform
end
it "calls Git::TagPushService" do
@ -129,6 +183,13 @@ describe PostReceive do
perform
end
it 'schedules a single ProjectCacheWorker update' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(project.id, [], [:repository_size], true)
perform
end
end
context "merge-requests" do

View file

@ -49,6 +49,16 @@ describe ProjectCacheWorker do
worker.perform(project.id, %w(readme))
end
context 'with statistics disabled' do
let(:statistics) { [] }
it 'does not update the project statistics' do
expect(worker).not_to receive(:update_statistics)
worker.perform(project.id, [], [], false)
end
end
context 'with statistics' do
let(:statistics) { %w(repository_size) }