Expire project caches once per push instead of once per ref

Previously `ProjectCacheWorker` would be scheduled once per ref, which
would generate unnecessary I/O and load on Sidekiq, especially if many
tags or branches were pushed at once. `ProjectCacheWorker` would expire
three items:

1. Repository size: This only needs to be updated once per push.
2. Commit count: This only needs to be updated if the default branch
   is updated.
3. Project method caches: This only needs to be updated if the default
   branch changes, but only if certain files change (e.g. README,
   CHANGELOG, etc.).

Because the third item requires looking at the actual changes in the
commit deltas, we schedule one `ProjectCacheWorker` to handle the first
two cases, and schedule a separate `ProjectCacheWorker` for the third
case if it is needed. As a result, this brings down the number of
`ProjectCacheWorker` jobs from N to 2.

Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/52046
This commit is contained in:
Stan Hu 2019-08-16 19:53:56 +00:00 committed by Douwe Maan
parent b3145bc976
commit f14647fdae
11 changed files with 201 additions and 27 deletions

View file

@ -389,11 +389,15 @@ class Repository
expire_statistics_caches expire_statistics_caches
end end
# Runs code after a repository has been created. def expire_status_cache
def after_create
expire_exists_cache expire_exists_cache
expire_root_ref_cache expire_root_ref_cache
expire_emptiness_caches expire_emptiness_caches
end
# Runs code after a repository has been created.
def after_create
expire_status_cache
repository_event(:create_repository) repository_event(:create_repository)
end end

View file

@ -8,8 +8,6 @@ module Git
PROCESS_COMMIT_LIMIT = 100 PROCESS_COMMIT_LIMIT = 100
def execute def execute
project.repository.after_create if project.empty_repo?
create_events create_events
create_pipelines create_pipelines
execute_project_hooks execute_project_hooks
@ -70,11 +68,11 @@ module Git
end end
def enqueue_invalidate_cache def enqueue_invalidate_cache
ProjectCacheWorker.perform_async( file_types = invalidated_file_types
project.id,
invalidated_file_types, return unless file_types.present?
[:commit_count, :repository_size]
) ProjectCacheWorker.perform_async(project.id, file_types, [], false)
end end
def base_params def base_params

View file

@ -42,10 +42,8 @@ class PostReceive
user = identify_user(post_received) user = identify_user(post_received)
return false unless user return false unless user
# Expire the branches cache so we have updated data for this push # We only need to expire certain caches once per push
post_received.project.repository.expire_branches_cache if post_received.includes_branches? expire_caches(post_received)
# We only need to expire tags once per push
post_received.project.repository.expire_caches_for_tags if post_received.includes_tags?
post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index| post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index|
service_klass = service_klass =
@ -74,6 +72,30 @@ class PostReceive
after_project_changes_hooks(post_received, user, refs.to_a, changes) after_project_changes_hooks(post_received, user, refs.to_a, changes)
end end
# Expire the project, branch, and tag cache once per push. Schedule an
# update for the repository size and commit count if necessary.
def expire_caches(post_received)
project = post_received.project
project.repository.expire_status_cache if project.empty_repo?
project.repository.expire_branches_cache if post_received.includes_branches?
project.repository.expire_caches_for_tags if post_received.includes_tags?
enqueue_repository_cache_update(post_received)
end
def enqueue_repository_cache_update(post_received)
stats_to_invalidate = [:repository_size]
stats_to_invalidate << :commit_count if post_received.includes_default_branch?
ProjectCacheWorker.perform_async(
post_received.project.id,
[],
stats_to_invalidate,
true
)
end
def after_project_changes_hooks(post_received, user, refs, changes) def after_project_changes_hooks(post_received, user, refs, changes)
hook_data = Gitlab::DataBuilder::Repository.update(post_received.project, user, changes, refs) hook_data = Gitlab::DataBuilder::Repository.update(post_received.project, user, changes, refs)
SystemHooksService.new.execute_hooks(hook_data, :repository_update_hooks) SystemHooksService.new.execute_hooks(hook_data, :repository_update_hooks)

View file

@ -12,13 +12,15 @@ class ProjectCacheWorker
# CHANGELOG. # CHANGELOG.
# statistics - An Array containing columns from ProjectStatistics to # statistics - An Array containing columns from ProjectStatistics to
# refresh, if empty all columns will be refreshed # refresh, if empty all columns will be refreshed
# refresh_statistics - A boolean that determines whether project statistics should
# be updated.
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, files = [], statistics = []) def perform(project_id, files = [], statistics = [], refresh_statistics = true)
project = Project.find_by(id: project_id) project = Project.find_by(id: project_id)
return unless project return unless project
update_statistics(project, statistics) update_statistics(project, statistics) if refresh_statistics
return unless project.repository.exists? return unless project.repository.exists?

View file

@ -0,0 +1,5 @@
---
title: Expire project caches once per push instead of once per ref
merge_request: 31876
author:
type: performance

View file

@ -39,6 +39,17 @@ module Gitlab
end end
end end
def includes_default_branch?
# If the branch doesn't have a default branch yet, we presume the
# first branch pushed will be the default.
return true unless project.default_branch.present?
enum_for(:changes_refs).any? do |_oldrev, _newrev, ref|
Gitlab::Git.branch_ref?(ref) &&
Gitlab::Git.branch_name(ref) == project.default_branch
end
end
private private
def deserialize_changes(changes) def deserialize_changes(changes)

View file

@ -3,7 +3,7 @@
require 'spec_helper' require 'spec_helper'
describe ::Gitlab::GitPostReceive do describe ::Gitlab::GitPostReceive do
let(:project) { create(:project) } set(:project) { create(:project, :repository) }
subject { described_class.new(project, "project-#{project.id}", changes.dup, {}) } subject { described_class.new(project, "project-#{project.id}", changes.dup, {}) }
@ -92,4 +92,47 @@ describe ::Gitlab::GitPostReceive do
end end
end end
end end
describe '#includes_default_branch?' do
context 'with no default branch' do
let(:changes) do
<<~EOF
654321 210987 refs/heads/test1
654322 210986 refs/tags/#{project.default_branch}
654323 210985 refs/heads/test3
EOF
end
it 'returns false' do
expect(subject.includes_default_branch?).to be_falsey
end
end
context 'with a project with no default branch' do
let(:changes) do
<<~EOF
654321 210987 refs/heads/test1
EOF
end
it 'returns true' do
expect(project).to receive(:default_branch).and_return(nil)
expect(subject.includes_default_branch?).to be_truthy
end
end
context 'with default branch' do
let(:changes) do
<<~EOF
654322 210986 refs/heads/test1
654321 210987 refs/tags/test2
654323 210985 refs/heads/#{project.default_branch}
EOF
end
it 'returns true' do
expect(subject.includes_default_branch?).to be_truthy
end
end
end
end end

View file

@ -1815,22 +1815,36 @@ describe Repository do
end end
describe '#after_create' do describe '#after_create' do
it 'calls expire_status_cache' do
expect(repository).to receive(:expire_status_cache)
repository.after_create
end
it 'logs an event' do
expect(repository).to receive(:repository_event).with(:create_repository)
repository.after_create
end
end
describe '#expire_status_cache' do
it 'flushes the exists cache' do it 'flushes the exists cache' do
expect(repository).to receive(:expire_exists_cache) expect(repository).to receive(:expire_exists_cache)
repository.after_create repository.expire_status_cache
end end
it 'flushes the root ref cache' do it 'flushes the root ref cache' do
expect(repository).to receive(:expire_root_ref_cache) expect(repository).to receive(:expire_root_ref_cache)
repository.after_create repository.expire_status_cache
end end
it 'flushes the emptiness caches' do it 'flushes the emptiness caches' do
expect(repository).to receive(:expire_emptiness_caches) expect(repository).to receive(:expire_emptiness_caches)
repository.after_create repository.expire_status_cache
end end
end end

View file

@ -158,9 +158,13 @@ describe Git::BranchHooksService do
let(:blank_sha) { Gitlab::Git::BLANK_SHA } let(:blank_sha) { Gitlab::Git::BLANK_SHA }
def clears_cache(extended: []) def clears_cache(extended: [])
expect(service).to receive(:invalidated_file_types).and_return(extended)
if extended.present?
expect(ProjectCacheWorker) expect(ProjectCacheWorker)
.to receive(:perform_async) .to receive(:perform_async)
.with(project.id, extended, %i[commit_count repository_size]) .with(project.id, extended, [], false)
end
service.execute service.execute
end end

View file

@ -37,6 +37,29 @@ describe PostReceive do
end end
describe "#process_project_changes" do describe "#process_project_changes" do
context 'with an empty project' do
let(:empty_project) { create(:project, :empty_repo) }
let(:changes) { "123456 789012 refs/heads/tést1\n" }
before do
allow_any_instance_of(Gitlab::GitPostReceive).to receive(:identify).and_return(empty_project.owner)
allow(Gitlab::GlRepository).to receive(:parse).and_return([empty_project, Gitlab::GlRepository::PROJECT])
end
it 'expire the status cache' do
expect(empty_project.repository).to receive(:expire_status_cache)
perform
end
it 'schedules a cache update for commit count and size' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(empty_project.id, [], [:repository_size, :commit_count], true)
perform
end
end
context 'empty changes' do context 'empty changes' do
it "does not call any PushService but runs after project hooks" do it "does not call any PushService but runs after project hooks" do
expect(Git::BranchPushService).not_to receive(:new) expect(Git::BranchPushService).not_to receive(:new)
@ -67,15 +90,22 @@ describe PostReceive do
context "branches" do context "branches" do
let(:changes) do let(:changes) do
<<~EOF <<~EOF
'123456 789012 refs/heads/tést1' 123456 789012 refs/heads/tést1
'123456 789012 refs/heads/tést2' 123456 789012 refs/heads/tést2
EOF EOF
end end
it 'expires the branches cache' do it 'expires the branches cache' do
expect(project.repository).to receive(:expire_branches_cache).once expect(project.repository).to receive(:expire_branches_cache).once
described_class.new.perform(gl_repository, key_id, base64_changes) perform
end
it 'expires the status cache' do
expect(project).to receive(:empty_repo?).and_return(true)
expect(project.repository).to receive(:expire_status_cache)
perform
end end
it 'calls Git::BranchPushService' do it 'calls Git::BranchPushService' do
@ -87,6 +117,30 @@ describe PostReceive do
perform perform
end end
it 'schedules a cache update for repository size only' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(project.id, [], [:repository_size], true)
perform
end
context 'with a default branch' do
let(:changes) do
<<~EOF
123456 789012 refs/heads/tést1
123456 789012 refs/heads/tést2
678912 123455 refs/heads/#{project.default_branch}
EOF
end
it 'schedules a cache update for commit count and size' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(project.id, [], [:repository_size, :commit_count], true)
perform
end
end
end end
context "tags" do context "tags" do
@ -107,7 +161,7 @@ describe PostReceive do
it 'does not expire branches cache' do it 'does not expire branches cache' do
expect(project.repository).not_to receive(:expire_branches_cache) expect(project.repository).not_to receive(:expire_branches_cache)
described_class.new.perform(gl_repository, key_id, base64_changes) perform
end end
it "only invalidates tags once" do it "only invalidates tags once" do
@ -115,7 +169,7 @@ describe PostReceive do
expect(project.repository).to receive(:expire_caches_for_tags).once.and_call_original expect(project.repository).to receive(:expire_caches_for_tags).once.and_call_original
expect(project.repository).to receive(:expire_tags_cache).once.and_call_original expect(project.repository).to receive(:expire_tags_cache).once.and_call_original
described_class.new.perform(gl_repository, key_id, base64_changes) perform
end end
it "calls Git::TagPushService" do it "calls Git::TagPushService" do
@ -129,6 +183,13 @@ describe PostReceive do
perform perform
end end
it 'schedules a single ProjectCacheWorker update' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(project.id, [], [:repository_size], true)
perform
end
end end
context "merge-requests" do context "merge-requests" do

View file

@ -49,6 +49,16 @@ describe ProjectCacheWorker do
worker.perform(project.id, %w(readme)) worker.perform(project.id, %w(readme))
end end
context 'with statistics disabled' do
let(:statistics) { [] }
it 'does not update the project statistics' do
expect(worker).not_to receive(:update_statistics)
worker.perform(project.id, [], [], false)
end
end
context 'with statistics' do context 'with statistics' do
let(:statistics) { %w(repository_size) } let(:statistics) { %w(repository_size) }