Update ProjectStatistics#build_artifacts_size synchronously without summing (#41059)

Previously we scheduled a worker to just some this but we were running
into performance issues when the build table was getting too large. So
now we've updated the code such that this column is updated immediately
and incremented/decremented by the correct amount whenever artifacts are
created or deleted.

We've also added the performance optimization that we do not update this
statistic if a project is deleted because it could result in many
updates for a project with many builds.
This commit is contained in:
Dylan Griffith 2018-03-21 10:03:50 +11:00
parent 947ed8b882
commit 03b020f2e4
8 changed files with 175 additions and 105 deletions

View File

@ -20,7 +20,7 @@ module Ci
has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment'
has_many :trace_sections, class_name: 'Ci::BuildTraceSection'
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent
has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
@ -95,8 +95,8 @@ module Ci
run_after_commit { BuildHooksWorker.perform_async(build.id) }
end
after_commit :update_project_statistics_after_save, on: [:create, :update]
after_commit :update_project_statistics, on: :destroy
after_save :update_project_statistics_after_save, if: :artifacts_size_changed?
after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
class << self
# This is needed for url_for to work,
@ -664,16 +664,20 @@ module Ci
pipeline.config_processor.build_attributes(name)
end
def update_project_statistics
return unless project
ProjectCacheWorker.perform_async(project_id, [], [:build_artifacts_size])
def update_project_statistics_after_save
update_project_statistics(read_attribute(:artifacts_size).to_i - artifacts_size_was.to_i)
end
def update_project_statistics_after_save
if previous_changes.include?('artifacts_size')
update_project_statistics
end
def update_project_statistics_after_destroy
update_project_statistics(-artifacts_size)
end
def update_project_statistics(difference)
ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
end
def project_destroyed?
project.pending_delete?
end
end
end

View File

@ -9,6 +9,8 @@ module Ci
before_save :update_file_store
before_save :set_size, if: :file_changed?
after_save :update_project_statistics_after_save, if: :size_changed?
after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) }
@ -34,10 +36,6 @@ module Ci
[nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store)
end
def set_size
self.size = file.size
end
def expire_in
expire_at - Time.now if expire_at
end
@ -48,5 +46,28 @@ module Ci
ChronicDuration.parse(value)&.seconds&.from_now
end
end
private
def set_size
self.size = file.size
end
def update_project_statistics_after_save
update_project_statistics(size.to_i - size_was.to_i)
end
def update_project_statistics_after_destroy
update_project_statistics(-self.size)
end
def update_project_statistics(difference)
ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
end
def project_destroyed?
# Use job.project to avoid extra DB query for project
job.project.pending_delete?
end
end
end

View File

@ -4,15 +4,15 @@ class ProjectStatistics < ActiveRecord::Base
before_save :update_storage_size
STORAGE_COLUMNS = [:repository_size, :lfs_objects_size, :build_artifacts_size].freeze
STATISTICS_COLUMNS = [:commit_count] + STORAGE_COLUMNS
COLUMNS_TO_REFRESH = [:repository_size, :lfs_objects_size, :commit_count].freeze
INCREMENTABLE_COLUMNS = [:build_artifacts_size].freeze
def total_repository_size
repository_size + lfs_objects_size
end
def refresh!(only: nil)
STATISTICS_COLUMNS.each do |column, generator|
COLUMNS_TO_REFRESH.each do |column, generator|
if only.blank? || only.include?(column)
public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend
end
@ -34,13 +34,15 @@ class ProjectStatistics < ActiveRecord::Base
self.lfs_objects_size = project.lfs_objects.sum(:size)
end
def update_build_artifacts_size
self.build_artifacts_size =
project.builds.sum(:artifacts_size) +
Ci::JobArtifact.artifacts_size_for(self.project)
def update_storage_size
self.storage_size = repository_size + lfs_objects_size + build_artifacts_size
end
def update_storage_size
self.storage_size = STORAGE_COLUMNS.sum(&method(:read_attribute))
def self.increment_statistic(project_id, key, amount)
raise ArgumentError, "Cannot increment attribute: #{key}" unless key.in?(INCREMENTABLE_COLUMNS)
return if amount == 0
where(project_id: project_id)
.update_all(["#{key} = COALESCE(#{key}, 0) + (?)", amount])
end
end

View File

@ -6,10 +6,10 @@ class JobArtifactUploader < GitlabUploader
storage_options Gitlab.config.artifacts
def size
return super if model.size.nil?
def cached_size
return model.size if model.size.present? && !model.file_changed?
model.size
size
end
def store_dir
@ -20,7 +20,7 @@ class JobArtifactUploader < GitlabUploader
if file_storage?
File.open(path, "rb") if path
else
::Gitlab::Ci::Trace::HttpIO.new(url, size) if url
::Gitlab::Ci::Trace::HttpIO.new(url, cached_size) if url
end
end

View File

@ -0,0 +1,5 @@
---
title: Improve DB performance of calculating total artifacts size
merge_request: 17839
author:
type: performance

View File

@ -1384,29 +1384,51 @@ describe Ci::Build do
end
end
describe '#update_project_statistics' do
context 'when updating the build' do
let(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics' do
build.artifacts_size = 42
expect(build).to receive(:update_project_statistics_after_save).and_call_original
expect { build.save! }
.to change { build.project.statistics.reload.build_artifacts_size }
.by(19)
end
context 'when the artifact size stays the same' do
it 'does not update project statistics' do
build.name = 'changed'
expect(build).not_to receive(:update_project_statistics_after_save)
build.save!
end
end
end
context 'when destroying the build' do
let!(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics when the artifact size changes' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(build.project_id, [], [:build_artifacts_size])
it 'updates project statistics' do
expect(ProjectStatistics)
.to receive(:increment_statistic)
.and_call_original
build.artifacts_size = 42
build.save!
expect { build.destroy! }
.to change { build.project.statistics.reload.build_artifacts_size }
.by(-23)
end
it 'does not update project statistics when the artifact size stays the same' do
expect(ProjectCacheWorker).not_to receive(:perform_async)
context 'when the build is destroyed due to the project being destroyed' do
it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
build.name = 'changed'
build.save!
end
it 'updates project statistics when the build is destroyed' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(build.project_id, [], [:build_artifacts_size])
build.destroy
build.project.update_attributes(pending_delete: true)
build.project.destroy!
end
end
end

View File

@ -1,7 +1,7 @@
require 'spec_helper'
describe Ci::JobArtifact do
set(:artifact) { create(:ci_job_artifact, :archive) }
let(:artifact) { create(:ci_job_artifact, :archive) }
describe "Associations" do
it { is_expected.to belong_to(:project) }
@ -59,10 +59,32 @@ describe Ci::JobArtifact do
end
end
describe '#set_size' do
it 'sets the size' do
context 'creating the artifact' do
let(:project) { create(:project) }
let(:artifact) { create(:ci_job_artifact, :archive, project: project) }
it 'sets the size from the file size' do
expect(artifact.size).to eq(106365)
end
it 'updates the project statistics' do
expect { artifact }
.to change { project.statistics.reload.build_artifacts_size }
.by(106365)
end
end
context 'updating the artifact file' do
it 'updates the artifact size' do
artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png')))
expect(artifact.size).to eq(1062)
end
it 'updates the project statistics' do
expect { artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png'))) }
.to change { artifact.project.statistics.reload.build_artifacts_size }
.by(1062 - 106365)
end
end
describe '#file' do
@ -118,4 +140,32 @@ describe Ci::JobArtifact do
is_expected.to be_nil
end
end
context 'when destroying the artifact' do
let(:project) { create(:project, :repository) }
let(:pipeline) { create(:ci_pipeline, project: project) }
let!(:build) { create(:ci_build, :artifacts, pipeline: pipeline) }
it 'updates the project statistics' do
artifact = build.job_artifacts.first
expect(ProjectStatistics)
.to receive(:increment_statistic)
.and_call_original
expect { artifact.destroy }
.to change { project.statistics.reload.build_artifacts_size }
.by(-106365)
end
context 'when it is destroyed from the project level' do
it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
project.update_attributes(pending_delete: true)
project.destroy!
end
end
end
end

View File

@ -4,26 +4,6 @@ describe ProjectStatistics do
let(:project) { create :project }
let(:statistics) { project.statistics }
describe 'constants' do
describe 'STORAGE_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STORAGE_COLUMNS).to be_kind_of Array
expect(described_class::STORAGE_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
end
describe 'STATISTICS_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STATISTICS_COLUMNS).to be_kind_of Array
expect(described_class::STATISTICS_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
it 'includes all storage columns' do
expect(described_class::STATISTICS_COLUMNS & described_class::STORAGE_COLUMNS).to eq described_class::STORAGE_COLUMNS
end
end
end
describe 'associations' do
it { is_expected.to belong_to(:project) }
it { is_expected.to belong_to(:namespace) }
@ -63,7 +43,6 @@ describe ProjectStatistics do
allow(statistics).to receive(:update_commit_count)
allow(statistics).to receive(:update_repository_size)
allow(statistics).to receive(:update_lfs_objects_size)
allow(statistics).to receive(:update_build_artifacts_size)
allow(statistics).to receive(:update_storage_size)
end
@ -76,7 +55,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_commit_count)
expect(statistics).to have_received(:update_repository_size)
expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).to have_received(:update_build_artifacts_size)
end
end
@ -89,7 +67,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).not_to have_received(:update_commit_count)
expect(statistics).not_to have_received(:update_repository_size)
expect(statistics).not_to have_received(:update_build_artifacts_size)
end
end
end
@ -131,40 +108,6 @@ describe ProjectStatistics do
end
end
describe '#update_build_artifacts_size' do
let!(:pipeline) { create(:ci_pipeline, project: project) }
context 'when new job artifacts are calculated' do
let(:ci_build) { create(:ci_build, pipeline: pipeline) }
before do
create(:ci_job_artifact, :archive, project: pipeline.project, job: ci_build)
end
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to be(106365)
end
it 'calculates related build artifacts by project' do
expect(Ci::JobArtifact).to receive(:artifacts_size_for).with(project) { 0 }
statistics.update_build_artifacts_size
end
end
context 'when legacy artifacts are used' do
let!(:ci_build) { create(:ci_build, pipeline: pipeline, artifacts_size: 10.megabytes) }
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to eq(10.megabytes)
end
end
end
describe '#update_storage_size' do
it "sums all storage counters" do
statistics.update!(
@ -177,4 +120,27 @@ describe ProjectStatistics do
expect(statistics.storage_size).to eq 5
end
end
describe '.increment_statistic' do
it 'increases the statistic by that amount' do
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 13) }
.to change { statistics.reload.build_artifacts_size }
.by(13)
end
context 'when the amount is 0' do
it 'does not execute a query' do
project
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 0) }
.not_to exceed_query_limit(0)
end
end
context 'when using an invalid column' do
it 'raises an error' do
expect { described_class.increment_statistic(project.id, :id, 13) }
.to raise_error(ArgumentError, "Cannot increment attribute: id")
end
end
end
end