Expire project caches once per push instead of once per ref
Previously `ProjectCacheWorker` would be scheduled once per ref, which would generate unnecessary I/O and load on Sidekiq, especially if many tags or branches were pushed at once. `ProjectCacheWorker` would expire three items: 1. Repository size: This only needs to be updated once per push. 2. Commit count: This only needs to be updated if the default branch is updated. 3. Project method caches: This only needs to be updated if the default branch changes, but only if certain files change (e.g. README, CHANGELOG, etc.). Because the third item requires looking at the actual changes in the commit deltas, we schedule one `ProjectCacheWorker` to handle the first two cases, and schedule a separate `ProjectCacheWorker` for the third case if it is needed. As a result, this brings down the number of `ProjectCacheWorker` jobs from N to 2. Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/52046
This commit is contained in:
parent
b3145bc976
commit
f14647fdae
11 changed files with 201 additions and 27 deletions
|
@ -389,11 +389,15 @@ class Repository
|
|||
expire_statistics_caches
|
||||
end
|
||||
|
||||
# Runs code after a repository has been created.
|
||||
def after_create
|
||||
def expire_status_cache
|
||||
expire_exists_cache
|
||||
expire_root_ref_cache
|
||||
expire_emptiness_caches
|
||||
end
|
||||
|
||||
# Runs code after a repository has been created.
|
||||
def after_create
|
||||
expire_status_cache
|
||||
|
||||
repository_event(:create_repository)
|
||||
end
|
||||
|
|
|
@ -8,8 +8,6 @@ module Git
|
|||
PROCESS_COMMIT_LIMIT = 100
|
||||
|
||||
def execute
|
||||
project.repository.after_create if project.empty_repo?
|
||||
|
||||
create_events
|
||||
create_pipelines
|
||||
execute_project_hooks
|
||||
|
@ -70,11 +68,11 @@ module Git
|
|||
end
|
||||
|
||||
def enqueue_invalidate_cache
|
||||
ProjectCacheWorker.perform_async(
|
||||
project.id,
|
||||
invalidated_file_types,
|
||||
[:commit_count, :repository_size]
|
||||
)
|
||||
file_types = invalidated_file_types
|
||||
|
||||
return unless file_types.present?
|
||||
|
||||
ProjectCacheWorker.perform_async(project.id, file_types, [], false)
|
||||
end
|
||||
|
||||
def base_params
|
||||
|
|
|
@ -42,10 +42,8 @@ class PostReceive
|
|||
user = identify_user(post_received)
|
||||
return false unless user
|
||||
|
||||
# Expire the branches cache so we have updated data for this push
|
||||
post_received.project.repository.expire_branches_cache if post_received.includes_branches?
|
||||
# We only need to expire tags once per push
|
||||
post_received.project.repository.expire_caches_for_tags if post_received.includes_tags?
|
||||
# We only need to expire certain caches once per push
|
||||
expire_caches(post_received)
|
||||
|
||||
post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index|
|
||||
service_klass =
|
||||
|
@ -74,6 +72,30 @@ class PostReceive
|
|||
after_project_changes_hooks(post_received, user, refs.to_a, changes)
|
||||
end
|
||||
|
||||
# Expire the project, branch, and tag cache once per push. Schedule an
|
||||
# update for the repository size and commit count if necessary.
|
||||
def expire_caches(post_received)
|
||||
project = post_received.project
|
||||
|
||||
project.repository.expire_status_cache if project.empty_repo?
|
||||
project.repository.expire_branches_cache if post_received.includes_branches?
|
||||
project.repository.expire_caches_for_tags if post_received.includes_tags?
|
||||
|
||||
enqueue_repository_cache_update(post_received)
|
||||
end
|
||||
|
||||
def enqueue_repository_cache_update(post_received)
|
||||
stats_to_invalidate = [:repository_size]
|
||||
stats_to_invalidate << :commit_count if post_received.includes_default_branch?
|
||||
|
||||
ProjectCacheWorker.perform_async(
|
||||
post_received.project.id,
|
||||
[],
|
||||
stats_to_invalidate,
|
||||
true
|
||||
)
|
||||
end
|
||||
|
||||
def after_project_changes_hooks(post_received, user, refs, changes)
|
||||
hook_data = Gitlab::DataBuilder::Repository.update(post_received.project, user, changes, refs)
|
||||
SystemHooksService.new.execute_hooks(hook_data, :repository_update_hooks)
|
||||
|
|
|
@ -12,13 +12,15 @@ class ProjectCacheWorker
|
|||
# CHANGELOG.
|
||||
# statistics - An Array containing columns from ProjectStatistics to
|
||||
# refresh, if empty all columns will be refreshed
|
||||
# refresh_statistics - A boolean that determines whether project statistics should
|
||||
# be updated.
|
||||
# rubocop: disable CodeReuse/ActiveRecord
|
||||
def perform(project_id, files = [], statistics = [])
|
||||
def perform(project_id, files = [], statistics = [], refresh_statistics = true)
|
||||
project = Project.find_by(id: project_id)
|
||||
|
||||
return unless project
|
||||
|
||||
update_statistics(project, statistics)
|
||||
update_statistics(project, statistics) if refresh_statistics
|
||||
|
||||
return unless project.repository.exists?
|
||||
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Expire project caches once per push instead of once per ref
|
||||
merge_request: 31876
|
||||
author:
|
||||
type: performance
|
|
@ -39,6 +39,17 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
|
||||
def includes_default_branch?
|
||||
# If the branch doesn't have a default branch yet, we presume the
|
||||
# first branch pushed will be the default.
|
||||
return true unless project.default_branch.present?
|
||||
|
||||
enum_for(:changes_refs).any? do |_oldrev, _newrev, ref|
|
||||
Gitlab::Git.branch_ref?(ref) &&
|
||||
Gitlab::Git.branch_name(ref) == project.default_branch
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def deserialize_changes(changes)
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe ::Gitlab::GitPostReceive do
|
||||
let(:project) { create(:project) }
|
||||
set(:project) { create(:project, :repository) }
|
||||
|
||||
subject { described_class.new(project, "project-#{project.id}", changes.dup, {}) }
|
||||
|
||||
|
@ -92,4 +92,47 @@ describe ::Gitlab::GitPostReceive do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#includes_default_branch?' do
|
||||
context 'with no default branch' do
|
||||
let(:changes) do
|
||||
<<~EOF
|
||||
654321 210987 refs/heads/test1
|
||||
654322 210986 refs/tags/#{project.default_branch}
|
||||
654323 210985 refs/heads/test3
|
||||
EOF
|
||||
end
|
||||
|
||||
it 'returns false' do
|
||||
expect(subject.includes_default_branch?).to be_falsey
|
||||
end
|
||||
end
|
||||
|
||||
context 'with a project with no default branch' do
|
||||
let(:changes) do
|
||||
<<~EOF
|
||||
654321 210987 refs/heads/test1
|
||||
EOF
|
||||
end
|
||||
|
||||
it 'returns true' do
|
||||
expect(project).to receive(:default_branch).and_return(nil)
|
||||
expect(subject.includes_default_branch?).to be_truthy
|
||||
end
|
||||
end
|
||||
|
||||
context 'with default branch' do
|
||||
let(:changes) do
|
||||
<<~EOF
|
||||
654322 210986 refs/heads/test1
|
||||
654321 210987 refs/tags/test2
|
||||
654323 210985 refs/heads/#{project.default_branch}
|
||||
EOF
|
||||
end
|
||||
|
||||
it 'returns true' do
|
||||
expect(subject.includes_default_branch?).to be_truthy
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1815,22 +1815,36 @@ describe Repository do
|
|||
end
|
||||
|
||||
describe '#after_create' do
|
||||
it 'calls expire_status_cache' do
|
||||
expect(repository).to receive(:expire_status_cache)
|
||||
|
||||
repository.after_create
|
||||
end
|
||||
|
||||
it 'logs an event' do
|
||||
expect(repository).to receive(:repository_event).with(:create_repository)
|
||||
|
||||
repository.after_create
|
||||
end
|
||||
end
|
||||
|
||||
describe '#expire_status_cache' do
|
||||
it 'flushes the exists cache' do
|
||||
expect(repository).to receive(:expire_exists_cache)
|
||||
|
||||
repository.after_create
|
||||
repository.expire_status_cache
|
||||
end
|
||||
|
||||
it 'flushes the root ref cache' do
|
||||
expect(repository).to receive(:expire_root_ref_cache)
|
||||
|
||||
repository.after_create
|
||||
repository.expire_status_cache
|
||||
end
|
||||
|
||||
it 'flushes the emptiness caches' do
|
||||
expect(repository).to receive(:expire_emptiness_caches)
|
||||
|
||||
repository.after_create
|
||||
repository.expire_status_cache
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -158,9 +158,13 @@ describe Git::BranchHooksService do
|
|||
let(:blank_sha) { Gitlab::Git::BLANK_SHA }
|
||||
|
||||
def clears_cache(extended: [])
|
||||
expect(ProjectCacheWorker)
|
||||
.to receive(:perform_async)
|
||||
.with(project.id, extended, %i[commit_count repository_size])
|
||||
expect(service).to receive(:invalidated_file_types).and_return(extended)
|
||||
|
||||
if extended.present?
|
||||
expect(ProjectCacheWorker)
|
||||
.to receive(:perform_async)
|
||||
.with(project.id, extended, [], false)
|
||||
end
|
||||
|
||||
service.execute
|
||||
end
|
||||
|
|
|
@ -37,6 +37,29 @@ describe PostReceive do
|
|||
end
|
||||
|
||||
describe "#process_project_changes" do
|
||||
context 'with an empty project' do
|
||||
let(:empty_project) { create(:project, :empty_repo) }
|
||||
let(:changes) { "123456 789012 refs/heads/tést1\n" }
|
||||
|
||||
before do
|
||||
allow_any_instance_of(Gitlab::GitPostReceive).to receive(:identify).and_return(empty_project.owner)
|
||||
allow(Gitlab::GlRepository).to receive(:parse).and_return([empty_project, Gitlab::GlRepository::PROJECT])
|
||||
end
|
||||
|
||||
it 'expire the status cache' do
|
||||
expect(empty_project.repository).to receive(:expire_status_cache)
|
||||
|
||||
perform
|
||||
end
|
||||
|
||||
it 'schedules a cache update for commit count and size' do
|
||||
expect(ProjectCacheWorker).to receive(:perform_async)
|
||||
.with(empty_project.id, [], [:repository_size, :commit_count], true)
|
||||
|
||||
perform
|
||||
end
|
||||
end
|
||||
|
||||
context 'empty changes' do
|
||||
it "does not call any PushService but runs after project hooks" do
|
||||
expect(Git::BranchPushService).not_to receive(:new)
|
||||
|
@ -67,15 +90,22 @@ describe PostReceive do
|
|||
context "branches" do
|
||||
let(:changes) do
|
||||
<<~EOF
|
||||
'123456 789012 refs/heads/tést1'
|
||||
'123456 789012 refs/heads/tést2'
|
||||
123456 789012 refs/heads/tést1
|
||||
123456 789012 refs/heads/tést2
|
||||
EOF
|
||||
end
|
||||
|
||||
it 'expires the branches cache' do
|
||||
expect(project.repository).to receive(:expire_branches_cache).once
|
||||
|
||||
described_class.new.perform(gl_repository, key_id, base64_changes)
|
||||
perform
|
||||
end
|
||||
|
||||
it 'expires the status cache' do
|
||||
expect(project).to receive(:empty_repo?).and_return(true)
|
||||
expect(project.repository).to receive(:expire_status_cache)
|
||||
|
||||
perform
|
||||
end
|
||||
|
||||
it 'calls Git::BranchPushService' do
|
||||
|
@ -87,6 +117,30 @@ describe PostReceive do
|
|||
|
||||
perform
|
||||
end
|
||||
|
||||
it 'schedules a cache update for repository size only' do
|
||||
expect(ProjectCacheWorker).to receive(:perform_async)
|
||||
.with(project.id, [], [:repository_size], true)
|
||||
|
||||
perform
|
||||
end
|
||||
|
||||
context 'with a default branch' do
|
||||
let(:changes) do
|
||||
<<~EOF
|
||||
123456 789012 refs/heads/tést1
|
||||
123456 789012 refs/heads/tést2
|
||||
678912 123455 refs/heads/#{project.default_branch}
|
||||
EOF
|
||||
end
|
||||
|
||||
it 'schedules a cache update for commit count and size' do
|
||||
expect(ProjectCacheWorker).to receive(:perform_async)
|
||||
.with(project.id, [], [:repository_size, :commit_count], true)
|
||||
|
||||
perform
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "tags" do
|
||||
|
@ -107,7 +161,7 @@ describe PostReceive do
|
|||
it 'does not expire branches cache' do
|
||||
expect(project.repository).not_to receive(:expire_branches_cache)
|
||||
|
||||
described_class.new.perform(gl_repository, key_id, base64_changes)
|
||||
perform
|
||||
end
|
||||
|
||||
it "only invalidates tags once" do
|
||||
|
@ -115,7 +169,7 @@ describe PostReceive do
|
|||
expect(project.repository).to receive(:expire_caches_for_tags).once.and_call_original
|
||||
expect(project.repository).to receive(:expire_tags_cache).once.and_call_original
|
||||
|
||||
described_class.new.perform(gl_repository, key_id, base64_changes)
|
||||
perform
|
||||
end
|
||||
|
||||
it "calls Git::TagPushService" do
|
||||
|
@ -129,6 +183,13 @@ describe PostReceive do
|
|||
|
||||
perform
|
||||
end
|
||||
|
||||
it 'schedules a single ProjectCacheWorker update' do
|
||||
expect(ProjectCacheWorker).to receive(:perform_async)
|
||||
.with(project.id, [], [:repository_size], true)
|
||||
|
||||
perform
|
||||
end
|
||||
end
|
||||
|
||||
context "merge-requests" do
|
||||
|
|
|
@ -49,6 +49,16 @@ describe ProjectCacheWorker do
|
|||
worker.perform(project.id, %w(readme))
|
||||
end
|
||||
|
||||
context 'with statistics disabled' do
|
||||
let(:statistics) { [] }
|
||||
|
||||
it 'does not update the project statistics' do
|
||||
expect(worker).not_to receive(:update_statistics)
|
||||
|
||||
worker.perform(project.id, [], [], false)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with statistics' do
|
||||
let(:statistics) { %w(repository_size) }
|
||||
|
||||
|
|
Loading…
Reference in a new issue