diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb new file mode 100644 index 00000000000..c4d2f5f61a0 --- /dev/null +++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb @@ -0,0 +1,26 @@ +class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze + TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze + + disable_ddl_transaction! + + def up + unless index_exists_by_name?(:merge_request_diffs, TMP_INDEX) + add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX) + end + + BackgroundMigrationWorker.perform_async(SCHEDULER) + + # We don't remove the index since it's going to be used on DeleteDiffFiles + # worker. We should remove it in an upcoming release. + end + + def down + if index_exists_by_name?(:merge_request_diffs, TMP_INDEX) + remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX) + end + end +end diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 8fb2c334048..664ead1af44 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -4,41 +4,77 @@ module Gitlab module BackgroundMigration class DeleteDiffFiles - def perform(merge_request_diff_id) - merge_request_diff = MergeRequestDiff.find_by(id: merge_request_diff_id) + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' - return unless merge_request_diff - return unless should_delete_diff_files?(merge_request_diff) + belongs_to :merge_request + has_many :merge_request_diff_files + end - MergeRequestDiff.transaction do - merge_request_diff.update_column(:state, 'without_files') + class MergeRequestDiffFile < ActiveRecord::Base + self.table_name = 'merge_request_diff_files' + end - # explain (analyze, buffers) when deleting 453 diff files: - # - # Delete on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actual time=43.265..43.265 rows=0 loops=1) - # Buffers: shared hit=2043 read=259 dirtied=254 - # -> Index Scan using index_merge_request_diff_files_on_mr_diff_id_and_order on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actu - # al time=0.466..26.317 rows=453 loops=1) - # Index Cond: (merge_request_diff_id = 463448) - # Buffers: shared hit=17 read=84 - # Planning time: 0.107 ms - # Execution time: 43.287 ms - # - MergeRequestDiffFile.where(merge_request_diff_id: merge_request_diff.id).delete_all + DEAD_TUPLES_THRESHOLD = 50_000 + VACUUM_WAIT_TIME = 5.minutes + + def perform(ids) + @ids = ids + + # We should reschedule until deadtuples get in a desirable + # state (e.g. < 50_000). That may take more than one reschedule. + # + if should_wait_deadtuple_vacuum? + reschedule + return end + + prune_diff_files + end + + def should_wait_deadtuple_vacuum? + return false unless Gitlab::Database.postgresql? + + diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD end private - def should_delete_diff_files?(merge_request_diff) - return false if merge_request_diff.state == 'without_files' + def reschedule + BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize, [@ids]) + end - merge_request = merge_request_diff.merge_request + def diffs_collection + MergeRequestDiff.where(id: @ids) + end - return false unless merge_request.state == 'merged' - return false if merge_request_diff.id == merge_request.latest_merge_request_diff_id + def diff_files_dead_tuples_count + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = 'merge_request_diff_files'")[0] - true + dead_tuple&.fetch('n_dead_tup', 0).to_i + end + + def prune_diff_files + removed = 0 + updated = 0 + + MergeRequestDiff.transaction do + updated = diffs_collection.update_all(state: 'without_files') + removed = MergeRequestDiffFile.where(merge_request_diff_id: @ids).delete_all + end + + log_info("Removed #{removed} merge_request_diff_files rows, "\ + "updated #{updated} merge_request_diffs rows") + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end + + def log_info(message) + Rails.logger.info("BackgroundMigration::DeleteDiffFiles - #{message}") end end end diff --git a/lib/gitlab/background_migration/schedule_diff_files_deletion.rb b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb new file mode 100644 index 00000000000..609cf19187c --- /dev/null +++ b/lib/gitlab/background_migration/schedule_diff_files_deletion.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class ScheduleDiffFilesDeletion + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + + belongs_to :merge_request + + include EachBatch + end + + DIFF_BATCH_SIZE = 5_000 + INTERVAL = 5.minutes + MIGRATION = 'DeleteDiffFiles' + + def perform + diffs = MergeRequestDiff + .from("(#{diffs_collection.to_sql}) merge_request_diffs") + .where('merge_request_diffs.id != merge_request_diffs.latest_merge_request_diff_id') + .select(:id) + + diffs.each_batch(of: DIFF_BATCH_SIZE) do |relation, index| + ids = relation.pluck(:id) + + BackgroundMigrationWorker.perform_in(index * INTERVAL, MIGRATION, [ids]) + end + end + + private + + def diffs_collection + MergeRequestDiff + .joins(:merge_request) + .where("merge_requests.state = 'merged'") + .where('merge_requests.latest_merge_request_diff_id IS NOT NULL') + .where("merge_request_diffs.state NOT IN ('without_files', 'empty')") + .select('merge_requests.latest_merge_request_diff_id, merge_request_diffs.id') + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb index 1b3df7b20d4..64c994a268f 100644 --- a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb +++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb @@ -1,31 +1,35 @@ require 'spec_helper' -describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180626125654 do +describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180619121030 do describe '#perform' do context 'when diff files can be deleted' do let(:merge_request) { create(:merge_request, :merged) } - let(:merge_request_diff) do + let!(:merge_request_diff) do merge_request.create_merge_request_diff merge_request.merge_request_diffs.first end + let(:perform) do + described_class.new.perform(MergeRequestDiff.pluck(:id)) + end + it 'deletes all merge request diff files' do - expect { described_class.new.perform(merge_request_diff.id) } + expect { perform } .to change { merge_request_diff.merge_request_diff_files.count } .from(20).to(0) end it 'updates state to without_files' do - expect { described_class.new.perform(merge_request_diff.id) } + expect { perform } .to change { merge_request_diff.reload.state } .from('collected').to('without_files') end it 'rollsback if something goes wrong' do - expect(MergeRequestDiffFile).to receive_message_chain(:where, :delete_all) + expect(described_class::MergeRequestDiffFile).to receive_message_chain(:where, :delete_all) .and_raise - expect { described_class.new.perform(merge_request_diff.id) } + expect { perform } .to raise_error merge_request_diff.reload @@ -35,35 +39,35 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180 end end - it 'deletes no merge request diff files when MR is not merged' do - merge_request = create(:merge_request, :opened) - merge_request.create_merge_request_diff - merge_request_diff = merge_request.merge_request_diffs.first - - expect { described_class.new.perform(merge_request_diff.id) } - .not_to change { merge_request_diff.merge_request_diff_files.count } - .from(20) - end - - it 'deletes no merge request diff files when diff is marked as "without_files"' do + it 'reschedules itself when should_wait_deadtuple_vacuum' do merge_request = create(:merge_request, :merged) - merge_request.create_merge_request_diff - merge_request_diff = merge_request.merge_request_diffs.first + first_diff = merge_request.merge_request_diff + second_diff = merge_request.create_merge_request_diff - merge_request_diff.clean! + Sidekiq::Testing.fake! do + worker = described_class.new + allow(worker).to receive(:should_wait_deadtuple_vacuum?) { true } - expect { described_class.new.perform(merge_request_diff.id) } - .not_to change { merge_request_diff.merge_request_diff_files.count } - .from(20) + worker.perform([first_diff.id, second_diff.id]) + + expect(described_class.name.demodulize).to be_scheduled_delayed_migration(5.minutes, [first_diff.id, second_diff.id]) + expect(BackgroundMigrationWorker.jobs.size).to eq(1) + end end + end - it 'deletes no merge request diff files when diff is the latest' do - merge_request = create(:merge_request, :merged) - merge_request_diff = merge_request.merge_request_diff + describe '#should_wait_deadtuple_vacuum?' do + it 'returns true when hitting merge_request_diff_files hits DEAD_TUPLES_THRESHOLD', :postgresql do + worker = described_class.new + threshold_query_result = [{ "n_dead_tup" => described_class::DEAD_TUPLES_THRESHOLD.to_s }] + normal_query_result = [{ "n_dead_tup" => '3' }] - expect { described_class.new.perform(merge_request_diff.id) } - .not_to change { merge_request_diff.merge_request_diff_files.count } - .from(20) + allow(worker) + .to receive(:execute_statement) + .with(/SELECT n_dead_tup */) + .and_return(threshold_query_result, normal_query_result) + + expect(worker.should_wait_deadtuple_vacuum?).to be(true) end end end diff --git a/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb new file mode 100644 index 00000000000..fb5093b0bd1 --- /dev/null +++ b/spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb @@ -0,0 +1,43 @@ +require 'spec_helper' + +describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do + describe '#perform' do + let(:merge_request_diffs) { table(:merge_request_diffs) } + let(:merge_requests) { table(:merge_requests) } + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + + before do + stub_const("#{described_class.name}::DIFF_BATCH_SIZE", 3) + + namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') + projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab') + + merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged') + + merge_request_diffs.create!(id: 1, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 2, merge_request_id: 1, state: 'empty') + merge_request_diffs.create!(id: 3, merge_request_id: 1, state: 'without_files') + merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected') + + merge_requests.update(1, latest_merge_request_diff_id: 7) + end + + it 'correctly schedules diff file deletion workers' do + Sidekiq::Testing.fake! do + Timecop.freeze do + described_class.new.perform + + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(5.minutes, [1, 4, 5]) + + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, [6]) + + expect(BackgroundMigrationWorker.jobs.size).to eq(2) + end + end + end + end +end diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb new file mode 100644 index 00000000000..6bae870920c --- /dev/null +++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb @@ -0,0 +1,17 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb') + +describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do + it 'correctly schedules diff files deletion schedulers' do + Sidekiq::Testing.fake! do + expect(BackgroundMigrationWorker) + .to receive(:perform_async) + .with(described_class::SCHEDULER) + .and_call_original + + migrate! + + expect(BackgroundMigrationWorker.jobs.size).to eq(1) + end + end +end