Merge branch '36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout' into 'master'
Insert at most 1,000 rows at once in MR diff background migration Closes #36631 et #37505 See merge request gitlab-org/gitlab-ce!13661
This commit is contained in:
commit
05e9c9f77e
5 changed files with 227 additions and 26 deletions
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
title: Reschedule merge request diff background migrations to catch failures from
|
||||
9.5 run
|
||||
merge_request:
|
||||
author:
|
||||
type: fixed
|
|
@ -0,0 +1,32 @@
|
|||
class ScheduleMergeRequestDiffMigrationsTakeTwo < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
BATCH_SIZE = 500
|
||||
MIGRATION = 'DeserializeMergeRequestDiffsAndCommits'
|
||||
DELAY_INTERVAL = 10.minutes
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
class MergeRequestDiff < ActiveRecord::Base
|
||||
self.table_name = 'merge_request_diffs'
|
||||
|
||||
include ::EachBatch
|
||||
|
||||
default_scope { where('st_commits IS NOT NULL OR st_diffs IS NOT NULL') }
|
||||
end
|
||||
|
||||
# By this point, we assume ScheduleMergeRequestDiffMigrations - the first
|
||||
# version of this - has already run. On GitLab.com, we have ~220k un-migrated
|
||||
# rows, but these rows will, in general, take a long time.
|
||||
#
|
||||
# With a gap of 10 minutes per batch, and 500 rows per batch, these migrations
|
||||
# are scheduled over 220_000 / 500 / 6 ~= 74 hours, which is a little over
|
||||
# three days.
|
||||
def up
|
||||
queue_background_migration_jobs_by_range_at_intervals(MergeRequestDiff, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
|
||||
end
|
||||
|
||||
def down
|
||||
end
|
||||
end
|
|
@ -3,11 +3,18 @@ module Gitlab
|
|||
class DeserializeMergeRequestDiffsAndCommits
|
||||
attr_reader :diff_ids, :commit_rows, :file_rows
|
||||
|
||||
class Error < StandardError
|
||||
def backtrace
|
||||
cause.backtrace
|
||||
end
|
||||
end
|
||||
|
||||
class MergeRequestDiff < ActiveRecord::Base
|
||||
self.table_name = 'merge_request_diffs'
|
||||
end
|
||||
|
||||
BUFFER_ROWS = 1000
|
||||
DIFF_FILE_BUFFER_ROWS = 100
|
||||
|
||||
def perform(start_id, stop_id)
|
||||
merge_request_diffs = MergeRequestDiff
|
||||
|
@ -26,13 +33,17 @@ module Gitlab
|
|||
|
||||
if diff_ids.length > BUFFER_ROWS ||
|
||||
commit_rows.length > BUFFER_ROWS ||
|
||||
file_rows.length > BUFFER_ROWS
|
||||
file_rows.length > DIFF_FILE_BUFFER_ROWS
|
||||
|
||||
flush_buffers!
|
||||
end
|
||||
end
|
||||
|
||||
flush_buffers!
|
||||
rescue => e
|
||||
Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}")
|
||||
|
||||
raise Error.new(e.inspect)
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -45,17 +56,28 @@ module Gitlab
|
|||
|
||||
def flush_buffers!
|
||||
if diff_ids.any?
|
||||
MergeRequestDiff.transaction do
|
||||
Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows)
|
||||
Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows)
|
||||
|
||||
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
|
||||
commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
|
||||
bulk_insert('merge_request_diff_commits', commit_rows_slice)
|
||||
end
|
||||
|
||||
file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
|
||||
bulk_insert('merge_request_diff_files', file_rows_slice)
|
||||
end
|
||||
|
||||
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
|
||||
end
|
||||
|
||||
reset_buffers!
|
||||
end
|
||||
|
||||
def bulk_insert(table, rows)
|
||||
Gitlab::Database.bulk_insert(table, rows)
|
||||
rescue ActiveRecord::RecordNotUnique
|
||||
ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort
|
||||
|
||||
Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}")
|
||||
end
|
||||
|
||||
def single_diff_rows(merge_request_diff)
|
||||
sha_attribute = Gitlab::Database::ShaAttribute.new
|
||||
commits = YAML.load(merge_request_diff.st_commits) rescue []
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
|
||||
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
|
||||
describe '#perform' do
|
||||
set(:merge_request) { create(:merge_request) }
|
||||
set(:merge_request_diff) { merge_request.merge_request_diff }
|
||||
let(:merge_request) { create(:merge_request) }
|
||||
let(:merge_request_diff) { merge_request.merge_request_diff }
|
||||
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
|
||||
|
||||
def diffs_to_hashes(diffs)
|
||||
|
@ -70,8 +70,8 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
|
|||
before do
|
||||
merge_request.reload_diff(true)
|
||||
|
||||
convert_to_yaml(start_id, merge_request_diff.commits, merge_request_diff.diffs)
|
||||
convert_to_yaml(stop_id, updated_merge_request_diff.commits, updated_merge_request_diff.diffs)
|
||||
convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
|
||||
convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
|
||||
|
||||
MergeRequestDiffCommit.delete_all
|
||||
MergeRequestDiffFile.delete_all
|
||||
|
@ -80,10 +80,32 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
|
|||
context 'when BUFFER_ROWS is exceeded' do
|
||||
before do
|
||||
stub_const("#{described_class}::BUFFER_ROWS", 1)
|
||||
|
||||
allow(Gitlab::Database).to receive(:bulk_insert).and_call_original
|
||||
end
|
||||
|
||||
it 'updates and continues' do
|
||||
expect(described_class::MergeRequestDiff).to receive(:transaction).twice
|
||||
it 'inserts commit rows in chunks of BUFFER_ROWS' do
|
||||
# There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9.
|
||||
stub_const("#{described_class}::BUFFER_ROWS", 20)
|
||||
|
||||
expect(Gitlab::Database).to receive(:bulk_insert)
|
||||
.with('merge_request_diff_commits', anything)
|
||||
.exactly(4)
|
||||
.times
|
||||
.and_call_original
|
||||
|
||||
subject.perform(start_id, stop_id)
|
||||
end
|
||||
|
||||
it 'inserts diff rows in chunks of DIFF_FILE_BUFFER_ROWS' do
|
||||
# There are 20 files in each diff, so we should have slices of 20 + 20.
|
||||
stub_const("#{described_class}::DIFF_FILE_BUFFER_ROWS", 20)
|
||||
|
||||
expect(Gitlab::Database).to receive(:bulk_insert)
|
||||
.with('merge_request_diff_files', anything)
|
||||
.exactly(2)
|
||||
.times
|
||||
.and_call_original
|
||||
|
||||
subject.perform(start_id, stop_id)
|
||||
end
|
||||
|
@ -91,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
|
|||
|
||||
context 'when BUFFER_ROWS is not exceeded' do
|
||||
it 'only updates once' do
|
||||
expect(described_class::MergeRequestDiff).to receive(:transaction).once
|
||||
expect(Gitlab::Database).to receive(:bulk_insert)
|
||||
.with('merge_request_diff_commits', anything)
|
||||
.once
|
||||
.and_call_original
|
||||
|
||||
expect(Gitlab::Database).to receive(:bulk_insert)
|
||||
.with('merge_request_diff_files', anything)
|
||||
.once
|
||||
.and_call_original
|
||||
|
||||
subject.perform(start_id, stop_id)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the merge request diff update fails' do
|
||||
before do
|
||||
allow(described_class::MergeRequestDiff)
|
||||
.to receive(:update_all).and_raise(ActiveRecord::Rollback)
|
||||
context 'when some rows were already inserted due to a previous failure' do
|
||||
before do
|
||||
subject.perform(start_id, stop_id)
|
||||
|
||||
convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
|
||||
convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
|
||||
end
|
||||
|
||||
it 'does not raise' do
|
||||
expect { subject.perform(start_id, stop_id) }.not_to raise_exception
|
||||
end
|
||||
|
||||
it 'logs a message' do
|
||||
expect(Rails.logger).to receive(:info)
|
||||
.with(
|
||||
a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect))
|
||||
)
|
||||
.twice
|
||||
|
||||
subject.perform(start_id, stop_id)
|
||||
end
|
||||
|
||||
it 'ends up with the correct rows' do
|
||||
expect(updated_merge_request_diff.commits.count).to eq(29)
|
||||
expect(updated_merge_request_diff.raw_diffs.count).to eq(20)
|
||||
end
|
||||
end
|
||||
|
||||
it 'does not add any diff commits' do
|
||||
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
|
||||
.not_to change { MergeRequestDiffCommit.count }
|
||||
end
|
||||
context 'when the merge request diff update fails' do
|
||||
let(:exception) { ActiveRecord::RecordNotFound }
|
||||
|
||||
it 'does not add any diff files' do
|
||||
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
|
||||
.not_to change { MergeRequestDiffFile.count }
|
||||
let(:perform_ignoring_exceptions) do
|
||||
begin
|
||||
subject.perform(start_id, stop_id)
|
||||
rescue described_class::Error
|
||||
end
|
||||
end
|
||||
|
||||
before do
|
||||
allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation)
|
||||
.to receive(:update_all).and_raise(exception)
|
||||
end
|
||||
|
||||
it 'raises an error' do
|
||||
expect { subject.perform(start_id, stop_id) }
|
||||
.to raise_exception(described_class::Error)
|
||||
end
|
||||
|
||||
it 'logs the error' do
|
||||
expect(Rails.logger).to receive(:info).with(
|
||||
a_string_matching(described_class.name)
|
||||
.and(matching([start_id, stop_id].inspect))
|
||||
.and(matching(exception.name))
|
||||
)
|
||||
|
||||
perform_ignoring_exceptions
|
||||
end
|
||||
|
||||
it 'still adds diff commits' do
|
||||
expect { perform_ignoring_exceptions }
|
||||
.to change { MergeRequestDiffCommit.count }
|
||||
end
|
||||
|
||||
it 'still adds diff files' do
|
||||
expect { perform_ignoring_exceptions }
|
||||
.to change { MergeRequestDiffFile.count }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
require 'spec_helper'
|
||||
require Rails.root.join('db', 'post_migrate', '20170926150348_schedule_merge_request_diff_migrations_take_two')
|
||||
|
||||
describe ScheduleMergeRequestDiffMigrationsTakeTwo, :migration, :sidekiq do
|
||||
matcher :be_scheduled_migration do |time, *expected|
|
||||
match do |migration|
|
||||
BackgroundMigrationWorker.jobs.any? do |job|
|
||||
job['args'] == [migration, expected] &&
|
||||
job['at'].to_i == time.to_i
|
||||
end
|
||||
end
|
||||
|
||||
failure_message do |migration|
|
||||
"Migration `#{migration}` with args `#{expected.inspect}` not scheduled!"
|
||||
end
|
||||
end
|
||||
|
||||
let(:merge_request_diffs) { table(:merge_request_diffs) }
|
||||
let(:merge_requests) { table(:merge_requests) }
|
||||
let(:projects) { table(:projects) }
|
||||
|
||||
before do
|
||||
stub_const("#{described_class.name}::BATCH_SIZE", 1)
|
||||
|
||||
projects.create!(id: 1, name: 'gitlab', path: 'gitlab')
|
||||
|
||||
merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master')
|
||||
|
||||
merge_request_diffs.create!(id: 1, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: nil)
|
||||
merge_request_diffs.create!(id: 2, merge_request_id: 1, st_commits: nil, st_diffs: YAML.dump([]))
|
||||
merge_request_diffs.create!(id: 3, merge_request_id: 1, st_commits: nil, st_diffs: nil)
|
||||
merge_request_diffs.create!(id: 4, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: YAML.dump([]))
|
||||
end
|
||||
|
||||
it 'correctly schedules background migrations' do
|
||||
Sidekiq::Testing.fake! do
|
||||
Timecop.freeze do
|
||||
migrate!
|
||||
|
||||
expect(described_class::MIGRATION).to be_scheduled_migration(10.minutes.from_now, 1, 1)
|
||||
expect(described_class::MIGRATION).to be_scheduled_migration(20.minutes.from_now, 2, 2)
|
||||
expect(described_class::MIGRATION).to be_scheduled_migration(30.minutes.from_now, 4, 4)
|
||||
expect(BackgroundMigrationWorker.jobs.size).to eq 3
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
it 'migrates the data' do
|
||||
Sidekiq::Testing.inline! do
|
||||
non_empty = 'st_commits IS NOT NULL OR st_diffs IS NOT NULL'
|
||||
|
||||
expect(merge_request_diffs.where(non_empty).count).to eq 3
|
||||
|
||||
migrate!
|
||||
|
||||
expect(merge_request_diffs.where(non_empty).count).to eq 0
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue