Make MR diff background migration less likely to time out

This version does not use transactions, but individual statements. As we have
unique constraints on the target tables for the inserts, we can just ignore
uniqueness violations there (as long as we always insert the same batch size, in
the same order).

This means the spec now must use truncation, not a transaction, as the
uniqueness violation means that the whole transaction for that spec would be
invalid, which isn't what we'd want. In real-world use, this isn't run in a
transaction anyway.

This commit also wraps unhandled exceptions, for easier finding in Sentry, and
logs with a consistent format, for easier searching.
This commit is contained in:
Sean McGivern 2017-09-26 15:09:37 +01:00
parent 917194153f
commit 1507ff8ab7
2 changed files with 102 additions and 32 deletions

View file

@ -3,6 +3,12 @@ module Gitlab
class DeserializeMergeRequestDiffsAndCommits
attr_reader :diff_ids, :commit_rows, :file_rows
class Error < StandardError
def backtrace
cause.backtrace
end
end
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
@ -34,6 +40,10 @@ module Gitlab
end
flush_buffers!
rescue => e
Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}")
raise Error.new(e.inspect)
end
private
@ -46,22 +56,28 @@ module Gitlab
def flush_buffers!
if diff_ids.any?
MergeRequestDiff.transaction do
commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows_slice)
end
file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows_slice)
end
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
bulk_insert('merge_request_diff_commits', commit_rows_slice)
end
file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
bulk_insert('merge_request_diff_files', file_rows_slice)
end
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
end
reset_buffers!
end
def bulk_insert(table, rows)
Gitlab::Database.bulk_insert(table, rows)
rescue ActiveRecord::RecordNotUnique
ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort
Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}")
end
def single_diff_rows(merge_request_diff)
sha_attribute = Gitlab::Database::ShaAttribute.new
commits = YAML.load(merge_request_diff.st_commits) rescue []

View file

@ -1,9 +1,9 @@
require 'spec_helper'
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
describe '#perform' do
set(:merge_request) { create(:merge_request) }
set(:merge_request_diff) { merge_request.merge_request_diff }
let(:merge_request) { create(:merge_request) }
let(:merge_request_diff) { merge_request.merge_request_diff }
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
def diffs_to_hashes(diffs)
@ -84,12 +84,6 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
allow(Gitlab::Database).to receive(:bulk_insert).and_call_original
end
it 'updates and continues' do
expect(described_class::MergeRequestDiff).to receive(:transaction).twice
subject.perform(start_id, stop_id)
end
it 'inserts commit rows in chunks of BUFFER_ROWS' do
# There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9.
stub_const("#{described_class}::BUFFER_ROWS", 20)
@ -119,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
context 'when BUFFER_ROWS is not exceeded' do
it 'only updates once' do
expect(described_class::MergeRequestDiff).to receive(:transaction).once
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_commits', anything)
.once
.and_call_original
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_files', anything)
.once
.and_call_original
subject.perform(start_id, stop_id)
end
end
end
context 'when the merge request diff update fails' do
before do
allow(described_class::MergeRequestDiff)
.to receive(:update_all).and_raise(ActiveRecord::Rollback)
context 'when some rows were already inserted due to a previous failure' do
before do
subject.perform(start_id, stop_id)
convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
end
it 'does not raise' do
expect { subject.perform(start_id, stop_id) }.not_to raise_exception
end
it 'logs a message' do
expect(Rails.logger).to receive(:info)
.with(
a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect))
)
.twice
subject.perform(start_id, stop_id)
end
it 'ends up with the correct rows' do
expect(updated_merge_request_diff.commits.count).to eq(29)
expect(updated_merge_request_diff.raw_diffs.count).to eq(20)
end
end
it 'does not add any diff commits' do
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
.not_to change { MergeRequestDiffCommit.count }
end
context 'when the merge request diff update fails' do
let(:exception) { ActiveRecord::RecordNotFound }
it 'does not add any diff files' do
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
.not_to change { MergeRequestDiffFile.count }
let(:perform_ignoring_exceptions) do
begin
subject.perform(start_id, stop_id)
rescue described_class::Error
end
end
before do
allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation)
.to receive(:update_all).and_raise(exception)
end
it 'raises an error' do
expect { subject.perform(start_id, stop_id) }
.to raise_exception(described_class::Error)
end
it 'logs the error' do
expect(Rails.logger).to receive(:info).with(
a_string_matching(described_class.name)
.and(matching([start_id, stop_id].inspect))
.and(matching(exception.name))
)
perform_ignoring_exceptions
end
it 'still adds diff commits' do
expect { perform_ignoring_exceptions }
.to change { MergeRequestDiffCommit.count }
end
it 'still adds diff files' do
expect { perform_ignoring_exceptions }
.to change { MergeRequestDiffFile.count }
end
end
end