From 31d8464f67f498fdba3dd1055463b647ad9ef767 Mon Sep 17 00:00:00 2001 From: Oswaldo Ferreira Date: Mon, 25 Jun 2018 17:49:53 -0300 Subject: [PATCH] Schedule workers to delete non-latest diffs in post-migration --- ...ete-non-latest-mr-diff-files-migration.yml | 5 ++ ...21030_enqueue_delete_diff_files_workers.rb | 70 +++++++++++++++++++ .../background_migration/delete_diff_files.rb | 46 ++++++++++++ .../delete_diff_files_spec.rb | 69 ++++++++++++++++++ .../enqueue_delete_diff_files_workers_spec.rb | 48 +++++++++++++ 5 files changed, 238 insertions(+) create mode 100644 changelogs/unreleased/osw-delete-non-latest-mr-diff-files-migration.yml create mode 100644 db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb create mode 100644 lib/gitlab/background_migration/delete_diff_files.rb create mode 100644 spec/lib/gitlab/background_migration/delete_diff_files_spec.rb create mode 100644 spec/migrations/enqueue_delete_diff_files_workers_spec.rb diff --git a/changelogs/unreleased/osw-delete-non-latest-mr-diff-files-migration.yml b/changelogs/unreleased/osw-delete-non-latest-mr-diff-files-migration.yml new file mode 100644 index 00000000000..e4cbae1a109 --- /dev/null +++ b/changelogs/unreleased/osw-delete-non-latest-mr-diff-files-migration.yml @@ -0,0 +1,5 @@ +--- +title: Schedule workers to delete non-latest diffs in post-migration +merge_request: +author: +type: other diff --git a/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb new file mode 100644 index 00000000000..5fb3d545624 --- /dev/null +++ b/db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb @@ -0,0 +1,70 @@ +class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + + belongs_to :merge_request + + include EachBatch + end + + DOWNTIME = false + BATCH_SIZE = 1000 + MIGRATION = 'DeleteDiffFiles' + DELAY_INTERVAL = 8.minutes + TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze + + disable_ddl_transaction! + + def up + # We add temporary index, to make iteration over batches more performant. + # Conditional here is to avoid the need of doing that in a separate + # migration file to make this operation idempotent. + # + unless index_exists_by_name?(:merge_request_diffs, TMP_INDEX) + add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX) + end + + + diffs_with_files = MergeRequestDiff.where.not(state: ['without_files', 'empty']) + + # explain (analyze, buffers) example for the iteration: + # + # Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1) + # Index Cond: ((id >= 764586) AND (id < 835298)) + # Heap Fetches: 8 + # Buffers: shared hit=18188 + # Planning time: 0.752 ms + # Execution time: 12.430 ms + # + diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index| + ids = relation.pluck(:id) + + ids.each_with_index do |diff_id, inner_index| + # This will give some space between batches of workers. + interval = DELAY_INTERVAL * outer_index + inner_index.minutes + + # A single `merge_request_diff` can be associated with way too many + # `merge_request_diff_files`. It's better to avoid batching these and + # schedule one at a time. + # + # Considering roughly 6M jobs, this should take ~30 days to process all + # of them. + # + BackgroundMigrationWorker.perform_in(interval, MIGRATION, [diff_id]) + end + end + + # We remove temporary index, because it is not required during standard + # operations and runtime. + # + remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX) + end + + def down + if index_exists_by_name?(:merge_request_diffs, TMP_INDEX) + remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX) + end + end +end diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb new file mode 100644 index 00000000000..0b785e1b056 --- /dev/null +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true +# rubocop:disable Metrics/AbcSize +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class DeleteDiffFiles + def perform(merge_request_diff_id) + merge_request_diff = MergeRequestDiff.find_by(id: merge_request_diff_id) + + return unless merge_request_diff + return unless should_delete_diff_files?(merge_request_diff) + + MergeRequestDiff.transaction do + merge_request_diff.update_column(:state, 'without_files') + + # explain (analyze, buffers) when deleting 453 diff files: + # + # Delete on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actual time=43.265..43.265 rows=0 loops=1) + # Buffers: shared hit=2043 read=259 dirtied=254 + # -> Index Scan using index_merge_request_diff_files_on_mr_diff_id_and_order on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actu + # al time=0.466..26.317 rows=453 loops=1) + # Index Cond: (merge_request_diff_id = 463448) + # Buffers: shared hit=17 read=84 + # Planning time: 0.107 ms + # Execution time: 43.287 ms + # + MergeRequestDiffFile.where(merge_request_diff_id: merge_request_diff.id).delete_all + end + end + + private + + def should_delete_diff_files?(merge_request_diff) + return false if merge_request_diff.state == 'without_files' + + merge_request = merge_request_diff.merge_request + + return false unless merge_request.state == 'merged' + return false if merge_request_diff.id == merge_request.latest_merge_request_diff_id + + true + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb new file mode 100644 index 00000000000..a251ab323d8 --- /dev/null +++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb @@ -0,0 +1,69 @@ +require 'spec_helper' + +describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180619121030 do + describe '#perform' do + context 'when diff files can be deleted' do + let(:merge_request) { create(:merge_request, :merged) } + let(:merge_request_diff) do + merge_request.create_merge_request_diff + merge_request.merge_request_diffs.first + end + + it 'deletes all merge request diff files' do + expect { described_class.new.perform(merge_request_diff.id) } + .to change { merge_request_diff.merge_request_diff_files.count } + .from(20).to(0) + end + + it 'updates state to without_files' do + expect { described_class.new.perform(merge_request_diff.id) } + .to change { merge_request_diff.reload.state } + .from('collected').to('without_files') + end + + it 'rollsback if something goes wrong' do + expect(MergeRequestDiffFile).to receive_message_chain(:where, :delete_all) + .and_raise + + expect { described_class.new.perform(merge_request_diff.id) } + .to raise_error + + merge_request_diff.reload + + expect(merge_request_diff.state).to eq('collected') + expect(merge_request_diff.merge_request_diff_files.count).to eq(20) + end + end + + it 'deletes no merge request diff files when MR is not merged' do + merge_request = create(:merge_request, :opened) + merge_request.create_merge_request_diff + merge_request_diff = merge_request.merge_request_diffs.first + + expect { described_class.new.perform(merge_request_diff.id) } + .not_to change { merge_request_diff.merge_request_diff_files.count } + .from(20) + end + + it 'deletes no merge request diff files when diff is marked as "without_files"' do + merge_request = create(:merge_request, :merged) + merge_request.create_merge_request_diff + merge_request_diff = merge_request.merge_request_diffs.first + + merge_request_diff.clean! + + expect { described_class.new.perform(merge_request_diff.id) } + .not_to change { merge_request_diff.merge_request_diff_files.count } + .from(20) + end + + it 'deletes no merge request diff files when diff is the latest' do + merge_request = create(:merge_request, :merged) + merge_request_diff = merge_request.merge_request_diff + + expect { described_class.new.perform(merge_request_diff.id) } + .not_to change { merge_request_diff.merge_request_diff_files.count } + .from(20) + end + end +end diff --git a/spec/migrations/enqueue_delete_diff_files_workers_spec.rb b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb new file mode 100644 index 00000000000..686027822b8 --- /dev/null +++ b/spec/migrations/enqueue_delete_diff_files_workers_spec.rb @@ -0,0 +1,48 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb') + +describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do + let(:merge_request_diffs) { table(:merge_request_diffs) } + let(:merge_requests) { table(:merge_requests) } + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + + before do + stub_const("#{described_class.name}::BATCH_SIZE", 2) + + namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') + projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab') + + merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged') + + merge_request_diffs.create!(id: 1, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 2, merge_request_id: 1, state: 'without_files') + merge_request_diffs.create!(id: 3, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected') + merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty') + merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected') + + merge_requests.update(1, latest_merge_request_diff_id: 6) + end + + it 'correctly schedules diff file deletion workers' do + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + # 1st batch + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(8.minutes, 1) + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(9.minutes, 3) + # 2nd batch + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(16.minutes, 4) + expect(described_class::MIGRATION).to be_scheduled_delayed_migration(17.minutes, 6) + expect(BackgroundMigrationWorker.jobs.size).to eq(4) + end + end + end + + it 'migrates the data' do + expect { migrate! }.to change { merge_request_diffs.where(state: 'without_files').count } + .from(1).to(4) + end +end