Make MR diff background migration less likely to time out
This version does not use transactions, but individual statements. As we have unique constraints on the target tables for the inserts, we can just ignore uniqueness violations there (as long as we always insert the same batch size, in the same order). This means the spec now must use truncation, not a transaction, as the uniqueness violation means that the whole transaction for that spec would be invalid, which isn't what we'd want. In real-world use, this isn't run in a transaction anyway. This commit also wraps unhandled exceptions, for easier finding in Sentry, and logs with a consistent format, for easier searching.
This commit is contained in:
parent
917194153f
commit
1507ff8ab7
2 changed files with 102 additions and 32 deletions
|
@ -3,6 +3,12 @@ module Gitlab
|
|||
class DeserializeMergeRequestDiffsAndCommits
|
||||
attr_reader :diff_ids, :commit_rows, :file_rows
|
||||
|
||||
class Error < StandardError
|
||||
def backtrace
|
||||
cause.backtrace
|
||||
end
|
||||
end
|
||||
|
||||
class MergeRequestDiff < ActiveRecord::Base
|
||||
self.table_name = 'merge_request_diffs'
|
||||
end
|
||||
|
@ -34,6 +40,10 @@ module Gitlab
|
|||
end
|
||||
|
||||
flush_buffers!
|
||||
rescue => e
|
||||
Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}")
|
||||
|
||||
raise Error.new(e.inspect)
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -46,22 +56,28 @@ module Gitlab
|
|||
|
||||
def flush_buffers!
|
||||
if diff_ids.any?
|
||||
MergeRequestDiff.transaction do
|
||||
commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
|
||||
Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows_slice)
|
||||
end
|
||||
|
||||
file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
|
||||
Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows_slice)
|
||||
end
|
||||
|
||||
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
|
||||
commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
|
||||
bulk_insert('merge_request_diff_commits', commit_rows_slice)
|
||||
end
|
||||
|
||||
file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
|
||||
bulk_insert('merge_request_diff_files', file_rows_slice)
|
||||
end
|
||||
|
||||
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
|
||||
end
|
||||
|
||||
reset_buffers!
|
||||
end
|
||||
|
||||
def bulk_insert(table, rows)
|
||||
Gitlab::Database.bulk_insert(table, rows)
|
||||
rescue ActiveRecord::RecordNotUnique
|
||||
ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort
|
||||
|
||||
Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}")
|
||||
end
|
||||
|
||||
def single_diff_rows(merge_request_diff)
|
||||
sha_attribute = Gitlab::Database::ShaAttribute.new
|
||||
commits = YAML.load(merge_request_diff.st_commits) rescue []
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
|
||||
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
|
||||
describe '#perform' do
|
||||
set(:merge_request) { create(:merge_request) }
|
||||
set(:merge_request_diff) { merge_request.merge_request_diff }
|
||||
let(:merge_request) { create(:merge_request) }
|
||||
let(:merge_request_diff) { merge_request.merge_request_diff }
|
||||
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
|
||||
|
||||
def diffs_to_hashes(diffs)
|
||||
|
@ -84,12 +84,6 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
|
|||
allow(Gitlab::Database).to receive(:bulk_insert).and_call_original
|
||||
end
|
||||
|
||||
it 'updates and continues' do
|
||||
expect(described_class::MergeRequestDiff).to receive(:transaction).twice
|
||||
|
||||
subject.perform(start_id, stop_id)
|
||||
end
|
||||
|
||||
it 'inserts commit rows in chunks of BUFFER_ROWS' do
|
||||
# There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9.
|
||||
stub_const("#{described_class}::BUFFER_ROWS", 20)
|
||||
|
@ -119,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
|
|||
|
||||
context 'when BUFFER_ROWS is not exceeded' do
|
||||
it 'only updates once' do
|
||||
expect(described_class::MergeRequestDiff).to receive(:transaction).once
|
||||
expect(Gitlab::Database).to receive(:bulk_insert)
|
||||
.with('merge_request_diff_commits', anything)
|
||||
.once
|
||||
.and_call_original
|
||||
|
||||
expect(Gitlab::Database).to receive(:bulk_insert)
|
||||
.with('merge_request_diff_files', anything)
|
||||
.once
|
||||
.and_call_original
|
||||
|
||||
subject.perform(start_id, stop_id)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the merge request diff update fails' do
|
||||
before do
|
||||
allow(described_class::MergeRequestDiff)
|
||||
.to receive(:update_all).and_raise(ActiveRecord::Rollback)
|
||||
context 'when some rows were already inserted due to a previous failure' do
|
||||
before do
|
||||
subject.perform(start_id, stop_id)
|
||||
|
||||
convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
|
||||
convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
|
||||
end
|
||||
|
||||
it 'does not raise' do
|
||||
expect { subject.perform(start_id, stop_id) }.not_to raise_exception
|
||||
end
|
||||
|
||||
it 'logs a message' do
|
||||
expect(Rails.logger).to receive(:info)
|
||||
.with(
|
||||
a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect))
|
||||
)
|
||||
.twice
|
||||
|
||||
subject.perform(start_id, stop_id)
|
||||
end
|
||||
|
||||
it 'ends up with the correct rows' do
|
||||
expect(updated_merge_request_diff.commits.count).to eq(29)
|
||||
expect(updated_merge_request_diff.raw_diffs.count).to eq(20)
|
||||
end
|
||||
end
|
||||
|
||||
it 'does not add any diff commits' do
|
||||
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
|
||||
.not_to change { MergeRequestDiffCommit.count }
|
||||
end
|
||||
context 'when the merge request diff update fails' do
|
||||
let(:exception) { ActiveRecord::RecordNotFound }
|
||||
|
||||
it 'does not add any diff files' do
|
||||
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
|
||||
.not_to change { MergeRequestDiffFile.count }
|
||||
let(:perform_ignoring_exceptions) do
|
||||
begin
|
||||
subject.perform(start_id, stop_id)
|
||||
rescue described_class::Error
|
||||
end
|
||||
end
|
||||
|
||||
before do
|
||||
allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation)
|
||||
.to receive(:update_all).and_raise(exception)
|
||||
end
|
||||
|
||||
it 'raises an error' do
|
||||
expect { subject.perform(start_id, stop_id) }
|
||||
.to raise_exception(described_class::Error)
|
||||
end
|
||||
|
||||
it 'logs the error' do
|
||||
expect(Rails.logger).to receive(:info).with(
|
||||
a_string_matching(described_class.name)
|
||||
.and(matching([start_id, stop_id].inspect))
|
||||
.and(matching(exception.name))
|
||||
)
|
||||
|
||||
perform_ignoring_exceptions
|
||||
end
|
||||
|
||||
it 'still adds diff commits' do
|
||||
expect { perform_ignoring_exceptions }
|
||||
.to change { MergeRequestDiffCommit.count }
|
||||
end
|
||||
|
||||
it 'still adds diff files' do
|
||||
expect { perform_ignoring_exceptions }
|
||||
.to change { MergeRequestDiffFile.count }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in a new issue