Schedule workers to delete non-latest diffs in post-migration

This commit is contained in:
Oswaldo Ferreira 2018-06-25 17:49:53 -03:00
parent 292cf66890
commit 31d8464f67
5 changed files with 238 additions and 0 deletions

View file

@ -0,0 +1,5 @@
---
title: Schedule workers to delete non-latest diffs in post-migration
merge_request:
author:
type: other

View file

@ -0,0 +1,70 @@
class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
belongs_to :merge_request
include EachBatch
end
DOWNTIME = false
BATCH_SIZE = 1000
MIGRATION = 'DeleteDiffFiles'
DELAY_INTERVAL = 8.minutes
TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
disable_ddl_transaction!
def up
# We add temporary index, to make iteration over batches more performant.
# Conditional here is to avoid the need of doing that in a separate
# migration file to make this operation idempotent.
#
unless index_exists_by_name?(:merge_request_diffs, TMP_INDEX)
add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX)
end
diffs_with_files = MergeRequestDiff.where.not(state: ['without_files', 'empty'])
# explain (analyze, buffers) example for the iteration:
#
# Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1)
# Index Cond: ((id >= 764586) AND (id < 835298))
# Heap Fetches: 8
# Buffers: shared hit=18188
# Planning time: 0.752 ms
# Execution time: 12.430 ms
#
diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index|
ids = relation.pluck(:id)
ids.each_with_index do |diff_id, inner_index|
# This will give some space between batches of workers.
interval = DELAY_INTERVAL * outer_index + inner_index.minutes
# A single `merge_request_diff` can be associated with way too many
# `merge_request_diff_files`. It's better to avoid batching these and
# schedule one at a time.
#
# Considering roughly 6M jobs, this should take ~30 days to process all
# of them.
#
BackgroundMigrationWorker.perform_in(interval, MIGRATION, [diff_id])
end
end
# We remove temporary index, because it is not required during standard
# operations and runtime.
#
remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX)
end
def down
if index_exists_by_name?(:merge_request_diffs, TMP_INDEX)
remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX)
end
end
end

View file

@ -0,0 +1,46 @@
# frozen_string_literal: true
# rubocop:disable Metrics/AbcSize
# rubocop:disable Style/Documentation
module Gitlab
module BackgroundMigration
class DeleteDiffFiles
def perform(merge_request_diff_id)
merge_request_diff = MergeRequestDiff.find_by(id: merge_request_diff_id)
return unless merge_request_diff
return unless should_delete_diff_files?(merge_request_diff)
MergeRequestDiff.transaction do
merge_request_diff.update_column(:state, 'without_files')
# explain (analyze, buffers) when deleting 453 diff files:
#
# Delete on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actual time=43.265..43.265 rows=0 loops=1)
# Buffers: shared hit=2043 read=259 dirtied=254
# -> Index Scan using index_merge_request_diff_files_on_mr_diff_id_and_order on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actu
# al time=0.466..26.317 rows=453 loops=1)
# Index Cond: (merge_request_diff_id = 463448)
# Buffers: shared hit=17 read=84
# Planning time: 0.107 ms
# Execution time: 43.287 ms
#
MergeRequestDiffFile.where(merge_request_diff_id: merge_request_diff.id).delete_all
end
end
private
def should_delete_diff_files?(merge_request_diff)
return false if merge_request_diff.state == 'without_files'
merge_request = merge_request_diff.merge_request
return false unless merge_request.state == 'merged'
return false if merge_request_diff.id == merge_request.latest_merge_request_diff_id
true
end
end
end
end

View file

@ -0,0 +1,69 @@
require 'spec_helper'
describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180619121030 do
describe '#perform' do
context 'when diff files can be deleted' do
let(:merge_request) { create(:merge_request, :merged) }
let(:merge_request_diff) do
merge_request.create_merge_request_diff
merge_request.merge_request_diffs.first
end
it 'deletes all merge request diff files' do
expect { described_class.new.perform(merge_request_diff.id) }
.to change { merge_request_diff.merge_request_diff_files.count }
.from(20).to(0)
end
it 'updates state to without_files' do
expect { described_class.new.perform(merge_request_diff.id) }
.to change { merge_request_diff.reload.state }
.from('collected').to('without_files')
end
it 'rollsback if something goes wrong' do
expect(MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
.and_raise
expect { described_class.new.perform(merge_request_diff.id) }
.to raise_error
merge_request_diff.reload
expect(merge_request_diff.state).to eq('collected')
expect(merge_request_diff.merge_request_diff_files.count).to eq(20)
end
end
it 'deletes no merge request diff files when MR is not merged' do
merge_request = create(:merge_request, :opened)
merge_request.create_merge_request_diff
merge_request_diff = merge_request.merge_request_diffs.first
expect { described_class.new.perform(merge_request_diff.id) }
.not_to change { merge_request_diff.merge_request_diff_files.count }
.from(20)
end
it 'deletes no merge request diff files when diff is marked as "without_files"' do
merge_request = create(:merge_request, :merged)
merge_request.create_merge_request_diff
merge_request_diff = merge_request.merge_request_diffs.first
merge_request_diff.clean!
expect { described_class.new.perform(merge_request_diff.id) }
.not_to change { merge_request_diff.merge_request_diff_files.count }
.from(20)
end
it 'deletes no merge request diff files when diff is the latest' do
merge_request = create(:merge_request, :merged)
merge_request_diff = merge_request.merge_request_diff
expect { described_class.new.perform(merge_request_diff.id) }
.not_to change { merge_request_diff.merge_request_diff_files.count }
.from(20)
end
end
end

View file

@ -0,0 +1,48 @@
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb')
describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
merge_request_diffs.create!(id: 1, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 2, merge_request_id: 1, state: 'without_files')
merge_request_diffs.create!(id: 3, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty')
merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected')
merge_requests.update(1, latest_merge_request_diff_id: 6)
end
it 'correctly schedules diff file deletion workers' do
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
# 1st batch
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(8.minutes, 1)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(9.minutes, 3)
# 2nd batch
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(16.minutes, 4)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(17.minutes, 6)
expect(BackgroundMigrationWorker.jobs.size).to eq(4)
end
end
end
it 'migrates the data' do
expect { migrate! }.to change { merge_request_diffs.where(state: 'without_files').count }
.from(1).to(4)
end
end