Reschedule merge request diff background migration

The first attempt didn't migrate all rows on GitLab.com, due to a couple of
issues:

1. Some rows in merge_request_diffs had truly huge numbers of commits and diffs
   serialised - one in particular had 26,000 commits!
2. The jobs were sometimes on Sidekiq hosts with frequent OOM errors, leading to
   the job being lost.

The previous commit adds more logging, and a more robust insertion method. This
commit reschedules the jobs, with a generous pause between each.
This commit is contained in:
Sean McGivern 2017-09-26 15:26:26 +01:00
parent 1507ff8ab7
commit 472be7fe61
3 changed files with 97 additions and 0 deletions

View file

@ -0,0 +1,6 @@
---
title: Reschedule merge request diff background migrations to catch failures from
9.5 run
merge_request:
author:
type: fixed

View file

@ -0,0 +1,32 @@
class ScheduleMergeRequestDiffMigrationsTakeTwo < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 500
MIGRATION = 'DeserializeMergeRequestDiffsAndCommits'
DELAY_INTERVAL = 10.minutes
disable_ddl_transaction!
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
include ::EachBatch
default_scope { where('st_commits IS NOT NULL OR st_diffs IS NOT NULL') }
end
# By this point, we assume ScheduleMergeRequestDiffMigrations - the first
# version of this - has already run. On GitLab.com, we have ~220k un-migrated
# rows, but these rows will, in general, take a long time.
#
# With a gap of 10 minutes per batch, and 500 rows per batch, these migrations
# are scheduled over 220_000 / 500 / 6 ~= 74 hours, which is a little over
# three days.
def up
queue_background_migration_jobs_by_range_at_intervals(MergeRequestDiff, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
end
def down
end
end

View file

@ -0,0 +1,59 @@
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20170926150348_schedule_merge_request_diff_migrations_take_two')
describe ScheduleMergeRequestDiffMigrationsTakeTwo, :migration, :sidekiq do
matcher :be_scheduled_migration do |time, *expected|
match do |migration|
BackgroundMigrationWorker.jobs.any? do |job|
job['args'] == [migration, expected] &&
job['at'].to_i == time.to_i
end
end
failure_message do |migration|
"Migration `#{migration}` with args `#{expected.inspect}` not scheduled!"
end
end
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
let(:projects) { table(:projects) }
before do
stub_const("#{described_class.name}::BATCH_SIZE", 1)
projects.create!(id: 1, name: 'gitlab', path: 'gitlab')
merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master')
merge_request_diffs.create!(id: 1, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: nil)
merge_request_diffs.create!(id: 2, merge_request_id: 1, st_commits: nil, st_diffs: YAML.dump([]))
merge_request_diffs.create!(id: 3, merge_request_id: 1, st_commits: nil, st_diffs: nil)
merge_request_diffs.create!(id: 4, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: YAML.dump([]))
end
it 'correctly schedules background migrations' do
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(described_class::MIGRATION).to be_scheduled_migration(10.minutes.from_now, 1, 1)
expect(described_class::MIGRATION).to be_scheduled_migration(20.minutes.from_now, 2, 2)
expect(described_class::MIGRATION).to be_scheduled_migration(30.minutes.from_now, 4, 4)
expect(BackgroundMigrationWorker.jobs.size).to eq 3
end
end
end
it 'migrates the data' do
Sidekiq::Testing.inline! do
non_empty = 'st_commits IS NOT NULL OR st_diffs IS NOT NULL'
expect(merge_request_diffs.where(non_empty).count).to eq 3
migrate!
expect(merge_request_diffs.where(non_empty).count).to eq 0
end
end
end