Merge branch 'add-back-mr-diff-deletion-migration' into 'master'
Add back MR diff deletion migration See merge request gitlab-org/gitlab-ce!20331
This commit is contained in:
commit
d975a29be9
6 changed files with 223 additions and 53 deletions
|
@ -0,0 +1,26 @@
|
|||
class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze
|
||||
TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
def up
|
||||
unless index_exists_by_name?(:merge_request_diffs, TMP_INDEX)
|
||||
add_concurrent_index(:merge_request_diffs, :id, where: "(state NOT IN ('without_files', 'empty'))", name: TMP_INDEX)
|
||||
end
|
||||
|
||||
BackgroundMigrationWorker.perform_async(SCHEDULER)
|
||||
|
||||
# We don't remove the index since it's going to be used on DeleteDiffFiles
|
||||
# worker. We should remove it in an upcoming release.
|
||||
end
|
||||
|
||||
def down
|
||||
if index_exists_by_name?(:merge_request_diffs, TMP_INDEX)
|
||||
remove_concurrent_index_by_name(:merge_request_diffs, TMP_INDEX)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -4,41 +4,77 @@
|
|||
module Gitlab
|
||||
module BackgroundMigration
|
||||
class DeleteDiffFiles
|
||||
def perform(merge_request_diff_id)
|
||||
merge_request_diff = MergeRequestDiff.find_by(id: merge_request_diff_id)
|
||||
class MergeRequestDiff < ActiveRecord::Base
|
||||
self.table_name = 'merge_request_diffs'
|
||||
|
||||
return unless merge_request_diff
|
||||
return unless should_delete_diff_files?(merge_request_diff)
|
||||
|
||||
MergeRequestDiff.transaction do
|
||||
merge_request_diff.update_column(:state, 'without_files')
|
||||
|
||||
# explain (analyze, buffers) when deleting 453 diff files:
|
||||
#
|
||||
# Delete on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actual time=43.265..43.265 rows=0 loops=1)
|
||||
# Buffers: shared hit=2043 read=259 dirtied=254
|
||||
# -> Index Scan using index_merge_request_diff_files_on_mr_diff_id_and_order on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actu
|
||||
# al time=0.466..26.317 rows=453 loops=1)
|
||||
# Index Cond: (merge_request_diff_id = 463448)
|
||||
# Buffers: shared hit=17 read=84
|
||||
# Planning time: 0.107 ms
|
||||
# Execution time: 43.287 ms
|
||||
#
|
||||
MergeRequestDiffFile.where(merge_request_diff_id: merge_request_diff.id).delete_all
|
||||
belongs_to :merge_request
|
||||
has_many :merge_request_diff_files
|
||||
end
|
||||
|
||||
class MergeRequestDiffFile < ActiveRecord::Base
|
||||
self.table_name = 'merge_request_diff_files'
|
||||
end
|
||||
|
||||
DEAD_TUPLES_THRESHOLD = 50_000
|
||||
VACUUM_WAIT_TIME = 5.minutes
|
||||
|
||||
def perform(ids)
|
||||
@ids = ids
|
||||
|
||||
# We should reschedule until deadtuples get in a desirable
|
||||
# state (e.g. < 50_000). That may take more than one reschedule.
|
||||
#
|
||||
if should_wait_deadtuple_vacuum?
|
||||
reschedule
|
||||
return
|
||||
end
|
||||
|
||||
prune_diff_files
|
||||
end
|
||||
|
||||
def should_wait_deadtuple_vacuum?
|
||||
return false unless Gitlab::Database.postgresql?
|
||||
|
||||
diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def should_delete_diff_files?(merge_request_diff)
|
||||
return false if merge_request_diff.state == 'without_files'
|
||||
def reschedule
|
||||
BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize, [@ids])
|
||||
end
|
||||
|
||||
merge_request = merge_request_diff.merge_request
|
||||
def diffs_collection
|
||||
MergeRequestDiff.where(id: @ids)
|
||||
end
|
||||
|
||||
return false unless merge_request.state == 'merged'
|
||||
return false if merge_request_diff.id == merge_request.latest_merge_request_diff_id
|
||||
def diff_files_dead_tuples_count
|
||||
dead_tuple =
|
||||
execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\
|
||||
"WHERE relname = 'merge_request_diff_files'")[0]
|
||||
|
||||
true
|
||||
dead_tuple&.fetch('n_dead_tup', 0).to_i
|
||||
end
|
||||
|
||||
def prune_diff_files
|
||||
removed = 0
|
||||
updated = 0
|
||||
|
||||
MergeRequestDiff.transaction do
|
||||
updated = diffs_collection.update_all(state: 'without_files')
|
||||
removed = MergeRequestDiffFile.where(merge_request_diff_id: @ids).delete_all
|
||||
end
|
||||
|
||||
log_info("Removed #{removed} merge_request_diff_files rows, "\
|
||||
"updated #{updated} merge_request_diffs rows")
|
||||
end
|
||||
|
||||
def execute_statement(sql)
|
||||
ActiveRecord::Base.connection.execute(sql)
|
||||
end
|
||||
|
||||
def log_info(message)
|
||||
Rails.logger.info("BackgroundMigration::DeleteDiffFiles - #{message}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
# frozen_string_literal: true
|
||||
# rubocop:disable Style/Documentation
|
||||
|
||||
module Gitlab
|
||||
module BackgroundMigration
|
||||
class ScheduleDiffFilesDeletion
|
||||
class MergeRequestDiff < ActiveRecord::Base
|
||||
self.table_name = 'merge_request_diffs'
|
||||
|
||||
belongs_to :merge_request
|
||||
|
||||
include EachBatch
|
||||
end
|
||||
|
||||
DIFF_BATCH_SIZE = 5_000
|
||||
INTERVAL = 5.minutes
|
||||
MIGRATION = 'DeleteDiffFiles'
|
||||
|
||||
def perform
|
||||
diffs = MergeRequestDiff
|
||||
.from("(#{diffs_collection.to_sql}) merge_request_diffs")
|
||||
.where('merge_request_diffs.id != merge_request_diffs.latest_merge_request_diff_id')
|
||||
.select(:id)
|
||||
|
||||
diffs.each_batch(of: DIFF_BATCH_SIZE) do |relation, index|
|
||||
ids = relation.pluck(:id)
|
||||
|
||||
BackgroundMigrationWorker.perform_in(index * INTERVAL, MIGRATION, [ids])
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def diffs_collection
|
||||
MergeRequestDiff
|
||||
.joins(:merge_request)
|
||||
.where("merge_requests.state = 'merged'")
|
||||
.where('merge_requests.latest_merge_request_diff_id IS NOT NULL')
|
||||
.where("merge_request_diffs.state NOT IN ('without_files', 'empty')")
|
||||
.select('merge_requests.latest_merge_request_diff_id, merge_request_diffs.id')
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,31 +1,35 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180626125654 do
|
||||
describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180619121030 do
|
||||
describe '#perform' do
|
||||
context 'when diff files can be deleted' do
|
||||
let(:merge_request) { create(:merge_request, :merged) }
|
||||
let(:merge_request_diff) do
|
||||
let!(:merge_request_diff) do
|
||||
merge_request.create_merge_request_diff
|
||||
merge_request.merge_request_diffs.first
|
||||
end
|
||||
|
||||
let(:perform) do
|
||||
described_class.new.perform(MergeRequestDiff.pluck(:id))
|
||||
end
|
||||
|
||||
it 'deletes all merge request diff files' do
|
||||
expect { described_class.new.perform(merge_request_diff.id) }
|
||||
expect { perform }
|
||||
.to change { merge_request_diff.merge_request_diff_files.count }
|
||||
.from(20).to(0)
|
||||
end
|
||||
|
||||
it 'updates state to without_files' do
|
||||
expect { described_class.new.perform(merge_request_diff.id) }
|
||||
expect { perform }
|
||||
.to change { merge_request_diff.reload.state }
|
||||
.from('collected').to('without_files')
|
||||
end
|
||||
|
||||
it 'rollsback if something goes wrong' do
|
||||
expect(MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
|
||||
expect(described_class::MergeRequestDiffFile).to receive_message_chain(:where, :delete_all)
|
||||
.and_raise
|
||||
|
||||
expect { described_class.new.perform(merge_request_diff.id) }
|
||||
expect { perform }
|
||||
.to raise_error
|
||||
|
||||
merge_request_diff.reload
|
||||
|
@ -35,35 +39,35 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
|
|||
end
|
||||
end
|
||||
|
||||
it 'deletes no merge request diff files when MR is not merged' do
|
||||
merge_request = create(:merge_request, :opened)
|
||||
merge_request.create_merge_request_diff
|
||||
merge_request_diff = merge_request.merge_request_diffs.first
|
||||
|
||||
expect { described_class.new.perform(merge_request_diff.id) }
|
||||
.not_to change { merge_request_diff.merge_request_diff_files.count }
|
||||
.from(20)
|
||||
end
|
||||
|
||||
it 'deletes no merge request diff files when diff is marked as "without_files"' do
|
||||
it 'reschedules itself when should_wait_deadtuple_vacuum' do
|
||||
merge_request = create(:merge_request, :merged)
|
||||
merge_request.create_merge_request_diff
|
||||
merge_request_diff = merge_request.merge_request_diffs.first
|
||||
first_diff = merge_request.merge_request_diff
|
||||
second_diff = merge_request.create_merge_request_diff
|
||||
|
||||
merge_request_diff.clean!
|
||||
Sidekiq::Testing.fake! do
|
||||
worker = described_class.new
|
||||
allow(worker).to receive(:should_wait_deadtuple_vacuum?) { true }
|
||||
|
||||
expect { described_class.new.perform(merge_request_diff.id) }
|
||||
.not_to change { merge_request_diff.merge_request_diff_files.count }
|
||||
.from(20)
|
||||
end
|
||||
worker.perform([first_diff.id, second_diff.id])
|
||||
|
||||
it 'deletes no merge request diff files when diff is the latest' do
|
||||
merge_request = create(:merge_request, :merged)
|
||||
merge_request_diff = merge_request.merge_request_diff
|
||||
|
||||
expect { described_class.new.perform(merge_request_diff.id) }
|
||||
.not_to change { merge_request_diff.merge_request_diff_files.count }
|
||||
.from(20)
|
||||
expect(described_class.name.demodulize).to be_scheduled_delayed_migration(5.minutes, [first_diff.id, second_diff.id])
|
||||
expect(BackgroundMigrationWorker.jobs.size).to eq(1)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#should_wait_deadtuple_vacuum?' do
|
||||
it 'returns true when hitting merge_request_diff_files hits DEAD_TUPLES_THRESHOLD', :postgresql do
|
||||
worker = described_class.new
|
||||
threshold_query_result = [{ "n_dead_tup" => described_class::DEAD_TUPLES_THRESHOLD.to_s }]
|
||||
normal_query_result = [{ "n_dead_tup" => '3' }]
|
||||
|
||||
allow(worker)
|
||||
.to receive(:execute_statement)
|
||||
.with(/SELECT n_dead_tup */)
|
||||
.and_return(threshold_query_result, normal_query_result)
|
||||
|
||||
expect(worker.should_wait_deadtuple_vacuum?).to be(true)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do
|
||||
describe '#perform' do
|
||||
let(:merge_request_diffs) { table(:merge_request_diffs) }
|
||||
let(:merge_requests) { table(:merge_requests) }
|
||||
let(:namespaces) { table(:namespaces) }
|
||||
let(:projects) { table(:projects) }
|
||||
|
||||
before do
|
||||
stub_const("#{described_class.name}::DIFF_BATCH_SIZE", 3)
|
||||
|
||||
namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
|
||||
projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
|
||||
|
||||
merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
|
||||
|
||||
merge_request_diffs.create!(id: 1, merge_request_id: 1, state: 'collected')
|
||||
merge_request_diffs.create!(id: 2, merge_request_id: 1, state: 'empty')
|
||||
merge_request_diffs.create!(id: 3, merge_request_id: 1, state: 'without_files')
|
||||
merge_request_diffs.create!(id: 4, merge_request_id: 1, state: 'collected')
|
||||
merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'collected')
|
||||
merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected')
|
||||
merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected')
|
||||
|
||||
merge_requests.update(1, latest_merge_request_diff_id: 7)
|
||||
end
|
||||
|
||||
it 'correctly schedules diff file deletion workers' do
|
||||
Sidekiq::Testing.fake! do
|
||||
Timecop.freeze do
|
||||
described_class.new.perform
|
||||
|
||||
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(5.minutes, [1, 4, 5])
|
||||
|
||||
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, [6])
|
||||
|
||||
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
17
spec/migrations/enqueue_delete_diff_files_workers_spec.rb
Normal file
17
spec/migrations/enqueue_delete_diff_files_workers_spec.rb
Normal file
|
@ -0,0 +1,17 @@
|
|||
require 'spec_helper'
|
||||
require Rails.root.join('db', 'post_migrate', '20180619121030_enqueue_delete_diff_files_workers.rb')
|
||||
|
||||
describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
|
||||
it 'correctly schedules diff files deletion schedulers' do
|
||||
Sidekiq::Testing.fake! do
|
||||
expect(BackgroundMigrationWorker)
|
||||
.to receive(:perform_async)
|
||||
.with(described_class::SCHEDULER)
|
||||
.and_call_original
|
||||
|
||||
migrate!
|
||||
|
||||
expect(BackgroundMigrationWorker.jobs.size).to eq(1)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue