Update ProjectStatistics#build_artifacts_size synchronously without summing (#41059)

Previously we scheduled a worker to just some this but we were running
into performance issues when the build table was getting too large. So
now we've updated the code such that this column is updated immediately
and incremented/decremented by the correct amount whenever artifacts are
created or deleted.

We've also added the performance optimization that we do not update this
statistic if a project is deleted because it could result in many
updates for a project with many builds.
This commit is contained in:
Dylan Griffith 2018-03-21 10:03:50 +11:00
parent 947ed8b882
commit 03b020f2e4
8 changed files with 175 additions and 105 deletions

View File

@ -20,7 +20,7 @@ module Ci
has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment' has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment'
has_many :trace_sections, class_name: 'Ci::BuildTraceSection' has_many :trace_sections, class_name: 'Ci::BuildTraceSection'
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent
has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
@ -95,8 +95,8 @@ module Ci
run_after_commit { BuildHooksWorker.perform_async(build.id) } run_after_commit { BuildHooksWorker.perform_async(build.id) }
end end
after_commit :update_project_statistics_after_save, on: [:create, :update] after_save :update_project_statistics_after_save, if: :artifacts_size_changed?
after_commit :update_project_statistics, on: :destroy after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
class << self class << self
# This is needed for url_for to work, # This is needed for url_for to work,
@ -664,16 +664,20 @@ module Ci
pipeline.config_processor.build_attributes(name) pipeline.config_processor.build_attributes(name)
end end
def update_project_statistics def update_project_statistics_after_save
return unless project update_project_statistics(read_attribute(:artifacts_size).to_i - artifacts_size_was.to_i)
ProjectCacheWorker.perform_async(project_id, [], [:build_artifacts_size])
end end
def update_project_statistics_after_save def update_project_statistics_after_destroy
if previous_changes.include?('artifacts_size') update_project_statistics(-artifacts_size)
update_project_statistics end
end
def update_project_statistics(difference)
ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
end
def project_destroyed?
project.pending_delete?
end end
end end
end end

View File

@ -9,6 +9,8 @@ module Ci
before_save :update_file_store before_save :update_file_store
before_save :set_size, if: :file_changed? before_save :set_size, if: :file_changed?
after_save :update_project_statistics_after_save, if: :size_changed?
after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) } scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) }
@ -34,10 +36,6 @@ module Ci
[nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store) [nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store)
end end
def set_size
self.size = file.size
end
def expire_in def expire_in
expire_at - Time.now if expire_at expire_at - Time.now if expire_at
end end
@ -48,5 +46,28 @@ module Ci
ChronicDuration.parse(value)&.seconds&.from_now ChronicDuration.parse(value)&.seconds&.from_now
end end
end end
private
def set_size
self.size = file.size
end
def update_project_statistics_after_save
update_project_statistics(size.to_i - size_was.to_i)
end
def update_project_statistics_after_destroy
update_project_statistics(-self.size)
end
def update_project_statistics(difference)
ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
end
def project_destroyed?
# Use job.project to avoid extra DB query for project
job.project.pending_delete?
end
end end
end end

View File

@ -4,15 +4,15 @@ class ProjectStatistics < ActiveRecord::Base
before_save :update_storage_size before_save :update_storage_size
STORAGE_COLUMNS = [:repository_size, :lfs_objects_size, :build_artifacts_size].freeze COLUMNS_TO_REFRESH = [:repository_size, :lfs_objects_size, :commit_count].freeze
STATISTICS_COLUMNS = [:commit_count] + STORAGE_COLUMNS INCREMENTABLE_COLUMNS = [:build_artifacts_size].freeze
def total_repository_size def total_repository_size
repository_size + lfs_objects_size repository_size + lfs_objects_size
end end
def refresh!(only: nil) def refresh!(only: nil)
STATISTICS_COLUMNS.each do |column, generator| COLUMNS_TO_REFRESH.each do |column, generator|
if only.blank? || only.include?(column) if only.blank? || only.include?(column)
public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend
end end
@ -34,13 +34,15 @@ class ProjectStatistics < ActiveRecord::Base
self.lfs_objects_size = project.lfs_objects.sum(:size) self.lfs_objects_size = project.lfs_objects.sum(:size)
end end
def update_build_artifacts_size def update_storage_size
self.build_artifacts_size = self.storage_size = repository_size + lfs_objects_size + build_artifacts_size
project.builds.sum(:artifacts_size) +
Ci::JobArtifact.artifacts_size_for(self.project)
end end
def update_storage_size def self.increment_statistic(project_id, key, amount)
self.storage_size = STORAGE_COLUMNS.sum(&method(:read_attribute)) raise ArgumentError, "Cannot increment attribute: #{key}" unless key.in?(INCREMENTABLE_COLUMNS)
return if amount == 0
where(project_id: project_id)
.update_all(["#{key} = COALESCE(#{key}, 0) + (?)", amount])
end end
end end

View File

@ -6,10 +6,10 @@ class JobArtifactUploader < GitlabUploader
storage_options Gitlab.config.artifacts storage_options Gitlab.config.artifacts
def size def cached_size
return super if model.size.nil? return model.size if model.size.present? && !model.file_changed?
model.size size
end end
def store_dir def store_dir
@ -20,7 +20,7 @@ class JobArtifactUploader < GitlabUploader
if file_storage? if file_storage?
File.open(path, "rb") if path File.open(path, "rb") if path
else else
::Gitlab::Ci::Trace::HttpIO.new(url, size) if url ::Gitlab::Ci::Trace::HttpIO.new(url, cached_size) if url
end end
end end

View File

@ -0,0 +1,5 @@
---
title: Improve DB performance of calculating total artifacts size
merge_request: 17839
author:
type: performance

View File

@ -1384,29 +1384,51 @@ describe Ci::Build do
end end
end end
describe '#update_project_statistics' do context 'when updating the build' do
let(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics' do
build.artifacts_size = 42
expect(build).to receive(:update_project_statistics_after_save).and_call_original
expect { build.save! }
.to change { build.project.statistics.reload.build_artifacts_size }
.by(19)
end
context 'when the artifact size stays the same' do
it 'does not update project statistics' do
build.name = 'changed'
expect(build).not_to receive(:update_project_statistics_after_save)
build.save!
end
end
end
context 'when destroying the build' do
let!(:build) { create(:ci_build, artifacts_size: 23) } let!(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics when the artifact size changes' do it 'updates project statistics' do
expect(ProjectCacheWorker).to receive(:perform_async) expect(ProjectStatistics)
.with(build.project_id, [], [:build_artifacts_size]) .to receive(:increment_statistic)
.and_call_original
build.artifacts_size = 42 expect { build.destroy! }
build.save! .to change { build.project.statistics.reload.build_artifacts_size }
.by(-23)
end end
it 'does not update project statistics when the artifact size stays the same' do context 'when the build is destroyed due to the project being destroyed' do
expect(ProjectCacheWorker).not_to receive(:perform_async) it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
build.name = 'changed' build.project.update_attributes(pending_delete: true)
build.save! build.project.destroy!
end end
it 'updates project statistics when the build is destroyed' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(build.project_id, [], [:build_artifacts_size])
build.destroy
end end
end end

View File

@ -1,7 +1,7 @@
require 'spec_helper' require 'spec_helper'
describe Ci::JobArtifact do describe Ci::JobArtifact do
set(:artifact) { create(:ci_job_artifact, :archive) } let(:artifact) { create(:ci_job_artifact, :archive) }
describe "Associations" do describe "Associations" do
it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:project) }
@ -59,10 +59,32 @@ describe Ci::JobArtifact do
end end
end end
describe '#set_size' do context 'creating the artifact' do
it 'sets the size' do let(:project) { create(:project) }
let(:artifact) { create(:ci_job_artifact, :archive, project: project) }
it 'sets the size from the file size' do
expect(artifact.size).to eq(106365) expect(artifact.size).to eq(106365)
end end
it 'updates the project statistics' do
expect { artifact }
.to change { project.statistics.reload.build_artifacts_size }
.by(106365)
end
end
context 'updating the artifact file' do
it 'updates the artifact size' do
artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png')))
expect(artifact.size).to eq(1062)
end
it 'updates the project statistics' do
expect { artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png'))) }
.to change { artifact.project.statistics.reload.build_artifacts_size }
.by(1062 - 106365)
end
end end
describe '#file' do describe '#file' do
@ -118,4 +140,32 @@ describe Ci::JobArtifact do
is_expected.to be_nil is_expected.to be_nil
end end
end end
context 'when destroying the artifact' do
let(:project) { create(:project, :repository) }
let(:pipeline) { create(:ci_pipeline, project: project) }
let!(:build) { create(:ci_build, :artifacts, pipeline: pipeline) }
it 'updates the project statistics' do
artifact = build.job_artifacts.first
expect(ProjectStatistics)
.to receive(:increment_statistic)
.and_call_original
expect { artifact.destroy }
.to change { project.statistics.reload.build_artifacts_size }
.by(-106365)
end
context 'when it is destroyed from the project level' do
it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
project.update_attributes(pending_delete: true)
project.destroy!
end
end
end
end end

View File

@ -4,26 +4,6 @@ describe ProjectStatistics do
let(:project) { create :project } let(:project) { create :project }
let(:statistics) { project.statistics } let(:statistics) { project.statistics }
describe 'constants' do
describe 'STORAGE_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STORAGE_COLUMNS).to be_kind_of Array
expect(described_class::STORAGE_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
end
describe 'STATISTICS_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STATISTICS_COLUMNS).to be_kind_of Array
expect(described_class::STATISTICS_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
it 'includes all storage columns' do
expect(described_class::STATISTICS_COLUMNS & described_class::STORAGE_COLUMNS).to eq described_class::STORAGE_COLUMNS
end
end
end
describe 'associations' do describe 'associations' do
it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:project) }
it { is_expected.to belong_to(:namespace) } it { is_expected.to belong_to(:namespace) }
@ -63,7 +43,6 @@ describe ProjectStatistics do
allow(statistics).to receive(:update_commit_count) allow(statistics).to receive(:update_commit_count)
allow(statistics).to receive(:update_repository_size) allow(statistics).to receive(:update_repository_size)
allow(statistics).to receive(:update_lfs_objects_size) allow(statistics).to receive(:update_lfs_objects_size)
allow(statistics).to receive(:update_build_artifacts_size)
allow(statistics).to receive(:update_storage_size) allow(statistics).to receive(:update_storage_size)
end end
@ -76,7 +55,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_commit_count) expect(statistics).to have_received(:update_commit_count)
expect(statistics).to have_received(:update_repository_size) expect(statistics).to have_received(:update_repository_size)
expect(statistics).to have_received(:update_lfs_objects_size) expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).to have_received(:update_build_artifacts_size)
end end
end end
@ -89,7 +67,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_lfs_objects_size) expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).not_to have_received(:update_commit_count) expect(statistics).not_to have_received(:update_commit_count)
expect(statistics).not_to have_received(:update_repository_size) expect(statistics).not_to have_received(:update_repository_size)
expect(statistics).not_to have_received(:update_build_artifacts_size)
end end
end end
end end
@ -131,40 +108,6 @@ describe ProjectStatistics do
end end
end end
describe '#update_build_artifacts_size' do
let!(:pipeline) { create(:ci_pipeline, project: project) }
context 'when new job artifacts are calculated' do
let(:ci_build) { create(:ci_build, pipeline: pipeline) }
before do
create(:ci_job_artifact, :archive, project: pipeline.project, job: ci_build)
end
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to be(106365)
end
it 'calculates related build artifacts by project' do
expect(Ci::JobArtifact).to receive(:artifacts_size_for).with(project) { 0 }
statistics.update_build_artifacts_size
end
end
context 'when legacy artifacts are used' do
let!(:ci_build) { create(:ci_build, pipeline: pipeline, artifacts_size: 10.megabytes) }
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to eq(10.megabytes)
end
end
end
describe '#update_storage_size' do describe '#update_storage_size' do
it "sums all storage counters" do it "sums all storage counters" do
statistics.update!( statistics.update!(
@ -177,4 +120,27 @@ describe ProjectStatistics do
expect(statistics.storage_size).to eq 5 expect(statistics.storage_size).to eq 5
end end
end end
describe '.increment_statistic' do
it 'increases the statistic by that amount' do
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 13) }
.to change { statistics.reload.build_artifacts_size }
.by(13)
end
context 'when the amount is 0' do
it 'does not execute a query' do
project
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 0) }
.not_to exceed_query_limit(0)
end
end
context 'when using an invalid column' do
it 'raises an error' do
expect { described_class.increment_statistic(project.id, :id, 13) }
.to raise_error(ArgumentError, "Cannot increment attribute: id")
end
end
end
end end