From 03b020f2e4a4e48dc4ddceb1656b84aaa7938149 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 21 Mar 2018 10:03:50 +1100 Subject: [PATCH] Update ProjectStatistics#build_artifacts_size synchronously without summing (#41059) Previously we scheduled a worker to just some this but we were running into performance issues when the build table was getting too large. So now we've updated the code such that this column is updated immediately and incremented/decremented by the correct amount whenever artifacts are created or deleted. We've also added the performance optimization that we do not update this statistic if a project is deleted because it could result in many updates for a project with many builds. --- app/models/ci/build.rb | 26 +++--- app/models/ci/job_artifact.rb | 29 ++++++- app/models/project_statistics.rb | 20 ++--- app/uploaders/job_artifact_uploader.rb | 8 +- ...lculate-artifact-size-more-efficiently.yml | 5 ++ spec/models/ci/build_spec.rb | 56 +++++++++---- spec/models/ci/job_artifact_spec.rb | 56 ++++++++++++- spec/models/project_statistics_spec.rb | 80 ++++++------------- 8 files changed, 175 insertions(+), 105 deletions(-) create mode 100644 changelogs/unreleased/41059-calculate-artifact-size-more-efficiently.yml diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 4aa65bf4273..79408f69083 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -20,7 +20,7 @@ module Ci has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment' has_many :trace_sections, class_name: 'Ci::BuildTraceSection' - has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent + has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id @@ -95,8 +95,8 @@ module Ci run_after_commit { BuildHooksWorker.perform_async(build.id) } end - after_commit :update_project_statistics_after_save, on: [:create, :update] - after_commit :update_project_statistics, on: :destroy + after_save :update_project_statistics_after_save, if: :artifacts_size_changed? + after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed? class << self # This is needed for url_for to work, @@ -664,16 +664,20 @@ module Ci pipeline.config_processor.build_attributes(name) end - def update_project_statistics - return unless project - - ProjectCacheWorker.perform_async(project_id, [], [:build_artifacts_size]) + def update_project_statistics_after_save + update_project_statistics(read_attribute(:artifacts_size).to_i - artifacts_size_was.to_i) end - def update_project_statistics_after_save - if previous_changes.include?('artifacts_size') - update_project_statistics - end + def update_project_statistics_after_destroy + update_project_statistics(-artifacts_size) + end + + def update_project_statistics(difference) + ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference) + end + + def project_destroyed? + project.pending_delete? end end end diff --git a/app/models/ci/job_artifact.rb b/app/models/ci/job_artifact.rb index fbb95fe16df..f846482cdeb 100644 --- a/app/models/ci/job_artifact.rb +++ b/app/models/ci/job_artifact.rb @@ -9,6 +9,8 @@ module Ci before_save :update_file_store before_save :set_size, if: :file_changed? + after_save :update_project_statistics_after_save, if: :size_changed? + after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed? scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) } @@ -34,10 +36,6 @@ module Ci [nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store) end - def set_size - self.size = file.size - end - def expire_in expire_at - Time.now if expire_at end @@ -48,5 +46,28 @@ module Ci ChronicDuration.parse(value)&.seconds&.from_now end end + + private + + def set_size + self.size = file.size + end + + def update_project_statistics_after_save + update_project_statistics(size.to_i - size_was.to_i) + end + + def update_project_statistics_after_destroy + update_project_statistics(-self.size) + end + + def update_project_statistics(difference) + ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference) + end + + def project_destroyed? + # Use job.project to avoid extra DB query for project + job.project.pending_delete? + end end end diff --git a/app/models/project_statistics.rb b/app/models/project_statistics.rb index 87a4350f022..5d4e3c34b39 100644 --- a/app/models/project_statistics.rb +++ b/app/models/project_statistics.rb @@ -4,15 +4,15 @@ class ProjectStatistics < ActiveRecord::Base before_save :update_storage_size - STORAGE_COLUMNS = [:repository_size, :lfs_objects_size, :build_artifacts_size].freeze - STATISTICS_COLUMNS = [:commit_count] + STORAGE_COLUMNS + COLUMNS_TO_REFRESH = [:repository_size, :lfs_objects_size, :commit_count].freeze + INCREMENTABLE_COLUMNS = [:build_artifacts_size].freeze def total_repository_size repository_size + lfs_objects_size end def refresh!(only: nil) - STATISTICS_COLUMNS.each do |column, generator| + COLUMNS_TO_REFRESH.each do |column, generator| if only.blank? || only.include?(column) public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend end @@ -34,13 +34,15 @@ class ProjectStatistics < ActiveRecord::Base self.lfs_objects_size = project.lfs_objects.sum(:size) end - def update_build_artifacts_size - self.build_artifacts_size = - project.builds.sum(:artifacts_size) + - Ci::JobArtifact.artifacts_size_for(self.project) + def update_storage_size + self.storage_size = repository_size + lfs_objects_size + build_artifacts_size end - def update_storage_size - self.storage_size = STORAGE_COLUMNS.sum(&method(:read_attribute)) + def self.increment_statistic(project_id, key, amount) + raise ArgumentError, "Cannot increment attribute: #{key}" unless key.in?(INCREMENTABLE_COLUMNS) + return if amount == 0 + + where(project_id: project_id) + .update_all(["#{key} = COALESCE(#{key}, 0) + (?)", amount]) end end diff --git a/app/uploaders/job_artifact_uploader.rb b/app/uploaders/job_artifact_uploader.rb index dd86753479d..2a5a830ce4f 100644 --- a/app/uploaders/job_artifact_uploader.rb +++ b/app/uploaders/job_artifact_uploader.rb @@ -6,10 +6,10 @@ class JobArtifactUploader < GitlabUploader storage_options Gitlab.config.artifacts - def size - return super if model.size.nil? + def cached_size + return model.size if model.size.present? && !model.file_changed? - model.size + size end def store_dir @@ -20,7 +20,7 @@ class JobArtifactUploader < GitlabUploader if file_storage? File.open(path, "rb") if path else - ::Gitlab::Ci::Trace::HttpIO.new(url, size) if url + ::Gitlab::Ci::Trace::HttpIO.new(url, cached_size) if url end end diff --git a/changelogs/unreleased/41059-calculate-artifact-size-more-efficiently.yml b/changelogs/unreleased/41059-calculate-artifact-size-more-efficiently.yml new file mode 100644 index 00000000000..e3f94bbf081 --- /dev/null +++ b/changelogs/unreleased/41059-calculate-artifact-size-more-efficiently.yml @@ -0,0 +1,5 @@ +--- +title: Improve DB performance of calculating total artifacts size +merge_request: 17839 +author: +type: performance diff --git a/spec/models/ci/build_spec.rb b/spec/models/ci/build_spec.rb index a12717835b0..c5e4e9c464f 100644 --- a/spec/models/ci/build_spec.rb +++ b/spec/models/ci/build_spec.rb @@ -1384,29 +1384,51 @@ describe Ci::Build do end end - describe '#update_project_statistics' do + context 'when updating the build' do + let(:build) { create(:ci_build, artifacts_size: 23) } + + it 'updates project statistics' do + build.artifacts_size = 42 + + expect(build).to receive(:update_project_statistics_after_save).and_call_original + + expect { build.save! } + .to change { build.project.statistics.reload.build_artifacts_size } + .by(19) + end + + context 'when the artifact size stays the same' do + it 'does not update project statistics' do + build.name = 'changed' + + expect(build).not_to receive(:update_project_statistics_after_save) + + build.save! + end + end + end + + context 'when destroying the build' do let!(:build) { create(:ci_build, artifacts_size: 23) } - it 'updates project statistics when the artifact size changes' do - expect(ProjectCacheWorker).to receive(:perform_async) - .with(build.project_id, [], [:build_artifacts_size]) + it 'updates project statistics' do + expect(ProjectStatistics) + .to receive(:increment_statistic) + .and_call_original - build.artifacts_size = 42 - build.save! + expect { build.destroy! } + .to change { build.project.statistics.reload.build_artifacts_size } + .by(-23) end - it 'does not update project statistics when the artifact size stays the same' do - expect(ProjectCacheWorker).not_to receive(:perform_async) + context 'when the build is destroyed due to the project being destroyed' do + it 'does not update the project statistics' do + expect(ProjectStatistics) + .not_to receive(:increment_statistic) - build.name = 'changed' - build.save! - end - - it 'updates project statistics when the build is destroyed' do - expect(ProjectCacheWorker).to receive(:perform_async) - .with(build.project_id, [], [:build_artifacts_size]) - - build.destroy + build.project.update_attributes(pending_delete: true) + build.project.destroy! + end end end diff --git a/spec/models/ci/job_artifact_spec.rb b/spec/models/ci/job_artifact_spec.rb index 1aa28434879..aad0026aa29 100644 --- a/spec/models/ci/job_artifact_spec.rb +++ b/spec/models/ci/job_artifact_spec.rb @@ -1,7 +1,7 @@ require 'spec_helper' describe Ci::JobArtifact do - set(:artifact) { create(:ci_job_artifact, :archive) } + let(:artifact) { create(:ci_job_artifact, :archive) } describe "Associations" do it { is_expected.to belong_to(:project) } @@ -59,10 +59,32 @@ describe Ci::JobArtifact do end end - describe '#set_size' do - it 'sets the size' do + context 'creating the artifact' do + let(:project) { create(:project) } + let(:artifact) { create(:ci_job_artifact, :archive, project: project) } + + it 'sets the size from the file size' do expect(artifact.size).to eq(106365) end + + it 'updates the project statistics' do + expect { artifact } + .to change { project.statistics.reload.build_artifacts_size } + .by(106365) + end + end + + context 'updating the artifact file' do + it 'updates the artifact size' do + artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png'))) + expect(artifact.size).to eq(1062) + end + + it 'updates the project statistics' do + expect { artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png'))) } + .to change { artifact.project.statistics.reload.build_artifacts_size } + .by(1062 - 106365) + end end describe '#file' do @@ -118,4 +140,32 @@ describe Ci::JobArtifact do is_expected.to be_nil end end + + context 'when destroying the artifact' do + let(:project) { create(:project, :repository) } + let(:pipeline) { create(:ci_pipeline, project: project) } + let!(:build) { create(:ci_build, :artifacts, pipeline: pipeline) } + + it 'updates the project statistics' do + artifact = build.job_artifacts.first + + expect(ProjectStatistics) + .to receive(:increment_statistic) + .and_call_original + + expect { artifact.destroy } + .to change { project.statistics.reload.build_artifacts_size } + .by(-106365) + end + + context 'when it is destroyed from the project level' do + it 'does not update the project statistics' do + expect(ProjectStatistics) + .not_to receive(:increment_statistic) + + project.update_attributes(pending_delete: true) + project.destroy! + end + end + end end diff --git a/spec/models/project_statistics_spec.rb b/spec/models/project_statistics_spec.rb index 5cff2af4aca..38a3590ad12 100644 --- a/spec/models/project_statistics_spec.rb +++ b/spec/models/project_statistics_spec.rb @@ -4,26 +4,6 @@ describe ProjectStatistics do let(:project) { create :project } let(:statistics) { project.statistics } - describe 'constants' do - describe 'STORAGE_COLUMNS' do - it 'is an array of symbols' do - expect(described_class::STORAGE_COLUMNS).to be_kind_of Array - expect(described_class::STORAGE_COLUMNS.map(&:class).uniq).to eq [Symbol] - end - end - - describe 'STATISTICS_COLUMNS' do - it 'is an array of symbols' do - expect(described_class::STATISTICS_COLUMNS).to be_kind_of Array - expect(described_class::STATISTICS_COLUMNS.map(&:class).uniq).to eq [Symbol] - end - - it 'includes all storage columns' do - expect(described_class::STATISTICS_COLUMNS & described_class::STORAGE_COLUMNS).to eq described_class::STORAGE_COLUMNS - end - end - end - describe 'associations' do it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:namespace) } @@ -63,7 +43,6 @@ describe ProjectStatistics do allow(statistics).to receive(:update_commit_count) allow(statistics).to receive(:update_repository_size) allow(statistics).to receive(:update_lfs_objects_size) - allow(statistics).to receive(:update_build_artifacts_size) allow(statistics).to receive(:update_storage_size) end @@ -76,7 +55,6 @@ describe ProjectStatistics do expect(statistics).to have_received(:update_commit_count) expect(statistics).to have_received(:update_repository_size) expect(statistics).to have_received(:update_lfs_objects_size) - expect(statistics).to have_received(:update_build_artifacts_size) end end @@ -89,7 +67,6 @@ describe ProjectStatistics do expect(statistics).to have_received(:update_lfs_objects_size) expect(statistics).not_to have_received(:update_commit_count) expect(statistics).not_to have_received(:update_repository_size) - expect(statistics).not_to have_received(:update_build_artifacts_size) end end end @@ -131,40 +108,6 @@ describe ProjectStatistics do end end - describe '#update_build_artifacts_size' do - let!(:pipeline) { create(:ci_pipeline, project: project) } - - context 'when new job artifacts are calculated' do - let(:ci_build) { create(:ci_build, pipeline: pipeline) } - - before do - create(:ci_job_artifact, :archive, project: pipeline.project, job: ci_build) - end - - it "stores the size of related build artifacts" do - statistics.update_build_artifacts_size - - expect(statistics.build_artifacts_size).to be(106365) - end - - it 'calculates related build artifacts by project' do - expect(Ci::JobArtifact).to receive(:artifacts_size_for).with(project) { 0 } - - statistics.update_build_artifacts_size - end - end - - context 'when legacy artifacts are used' do - let!(:ci_build) { create(:ci_build, pipeline: pipeline, artifacts_size: 10.megabytes) } - - it "stores the size of related build artifacts" do - statistics.update_build_artifacts_size - - expect(statistics.build_artifacts_size).to eq(10.megabytes) - end - end - end - describe '#update_storage_size' do it "sums all storage counters" do statistics.update!( @@ -177,4 +120,27 @@ describe ProjectStatistics do expect(statistics.storage_size).to eq 5 end end + + describe '.increment_statistic' do + it 'increases the statistic by that amount' do + expect { described_class.increment_statistic(project.id, :build_artifacts_size, 13) } + .to change { statistics.reload.build_artifacts_size } + .by(13) + end + + context 'when the amount is 0' do + it 'does not execute a query' do + project + expect { described_class.increment_statistic(project.id, :build_artifacts_size, 0) } + .not_to exceed_query_limit(0) + end + end + + context 'when using an invalid column' do + it 'raises an error' do + expect { described_class.increment_statistic(project.id, :id, 13) } + .to raise_error(ArgumentError, "Cannot increment attribute: id") + end + end + end end