diff --git a/changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml b/changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml new file mode 100644 index 00000000000..a2e1d07158b --- /dev/null +++ b/changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml @@ -0,0 +1,6 @@ +--- +title: Reschedule merge request diff background migrations to catch failures from + 9.5 run +merge_request: +author: +type: fixed diff --git a/db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb b/db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb new file mode 100644 index 00000000000..5732cb85ea5 --- /dev/null +++ b/db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb @@ -0,0 +1,32 @@ +class ScheduleMergeRequestDiffMigrationsTakeTwo < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 500 + MIGRATION = 'DeserializeMergeRequestDiffsAndCommits' + DELAY_INTERVAL = 10.minutes + + disable_ddl_transaction! + + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + + include ::EachBatch + + default_scope { where('st_commits IS NOT NULL OR st_diffs IS NOT NULL') } + end + + # By this point, we assume ScheduleMergeRequestDiffMigrations - the first + # version of this - has already run. On GitLab.com, we have ~220k un-migrated + # rows, but these rows will, in general, take a long time. + # + # With a gap of 10 minutes per batch, and 500 rows per batch, these migrations + # are scheduled over 220_000 / 500 / 6 ~= 74 hours, which is a little over + # three days. + def up + queue_background_migration_jobs_by_range_at_intervals(MergeRequestDiff, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + end + + def down + end +end diff --git a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb index 3fde1b09efb..8e5c95f2287 100644 --- a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb +++ b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb @@ -3,11 +3,18 @@ module Gitlab class DeserializeMergeRequestDiffsAndCommits attr_reader :diff_ids, :commit_rows, :file_rows + class Error < StandardError + def backtrace + cause.backtrace + end + end + class MergeRequestDiff < ActiveRecord::Base self.table_name = 'merge_request_diffs' end BUFFER_ROWS = 1000 + DIFF_FILE_BUFFER_ROWS = 100 def perform(start_id, stop_id) merge_request_diffs = MergeRequestDiff @@ -26,13 +33,17 @@ module Gitlab if diff_ids.length > BUFFER_ROWS || commit_rows.length > BUFFER_ROWS || - file_rows.length > BUFFER_ROWS + file_rows.length > DIFF_FILE_BUFFER_ROWS flush_buffers! end end flush_buffers! + rescue => e + Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}") + + raise Error.new(e.inspect) end private @@ -45,17 +56,28 @@ module Gitlab def flush_buffers! if diff_ids.any? - MergeRequestDiff.transaction do - Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows) - Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows) - - MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil) + commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice| + bulk_insert('merge_request_diff_commits', commit_rows_slice) end + + file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice| + bulk_insert('merge_request_diff_files', file_rows_slice) + end + + MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil) end reset_buffers! end + def bulk_insert(table, rows) + Gitlab::Database.bulk_insert(table, rows) + rescue ActiveRecord::RecordNotUnique + ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort + + Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}") + end + def single_diff_rows(merge_request_diff) sha_attribute = Gitlab::Database::ShaAttribute.new commits = YAML.load(merge_request_diff.st_commits) rescue [] diff --git a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb index c0427639746..d2e7243ee05 100644 --- a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb +++ b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb @@ -1,9 +1,9 @@ require 'spec_helper' -describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do +describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do describe '#perform' do - set(:merge_request) { create(:merge_request) } - set(:merge_request_diff) { merge_request.merge_request_diff } + let(:merge_request) { create(:merge_request) } + let(:merge_request_diff) { merge_request.merge_request_diff } let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) } def diffs_to_hashes(diffs) @@ -70,8 +70,8 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do before do merge_request.reload_diff(true) - convert_to_yaml(start_id, merge_request_diff.commits, merge_request_diff.diffs) - convert_to_yaml(stop_id, updated_merge_request_diff.commits, updated_merge_request_diff.diffs) + convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files)) + convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files)) MergeRequestDiffCommit.delete_all MergeRequestDiffFile.delete_all @@ -80,10 +80,32 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do context 'when BUFFER_ROWS is exceeded' do before do stub_const("#{described_class}::BUFFER_ROWS", 1) + + allow(Gitlab::Database).to receive(:bulk_insert).and_call_original end - it 'updates and continues' do - expect(described_class::MergeRequestDiff).to receive(:transaction).twice + it 'inserts commit rows in chunks of BUFFER_ROWS' do + # There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9. + stub_const("#{described_class}::BUFFER_ROWS", 20) + + expect(Gitlab::Database).to receive(:bulk_insert) + .with('merge_request_diff_commits', anything) + .exactly(4) + .times + .and_call_original + + subject.perform(start_id, stop_id) + end + + it 'inserts diff rows in chunks of DIFF_FILE_BUFFER_ROWS' do + # There are 20 files in each diff, so we should have slices of 20 + 20. + stub_const("#{described_class}::DIFF_FILE_BUFFER_ROWS", 20) + + expect(Gitlab::Database).to receive(:bulk_insert) + .with('merge_request_diff_files', anything) + .exactly(2) + .times + .and_call_original subject.perform(start_id, stop_id) end @@ -91,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do context 'when BUFFER_ROWS is not exceeded' do it 'only updates once' do - expect(described_class::MergeRequestDiff).to receive(:transaction).once + expect(Gitlab::Database).to receive(:bulk_insert) + .with('merge_request_diff_commits', anything) + .once + .and_call_original + + expect(Gitlab::Database).to receive(:bulk_insert) + .with('merge_request_diff_files', anything) + .once + .and_call_original subject.perform(start_id, stop_id) end end - end - context 'when the merge request diff update fails' do - before do - allow(described_class::MergeRequestDiff) - .to receive(:update_all).and_raise(ActiveRecord::Rollback) + context 'when some rows were already inserted due to a previous failure' do + before do + subject.perform(start_id, stop_id) + + convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files)) + convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files)) + end + + it 'does not raise' do + expect { subject.perform(start_id, stop_id) }.not_to raise_exception + end + + it 'logs a message' do + expect(Rails.logger).to receive(:info) + .with( + a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect)) + ) + .twice + + subject.perform(start_id, stop_id) + end + + it 'ends up with the correct rows' do + expect(updated_merge_request_diff.commits.count).to eq(29) + expect(updated_merge_request_diff.raw_diffs.count).to eq(20) + end end - it 'does not add any diff commits' do - expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } - .not_to change { MergeRequestDiffCommit.count } - end + context 'when the merge request diff update fails' do + let(:exception) { ActiveRecord::RecordNotFound } - it 'does not add any diff files' do - expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } - .not_to change { MergeRequestDiffFile.count } + let(:perform_ignoring_exceptions) do + begin + subject.perform(start_id, stop_id) + rescue described_class::Error + end + end + + before do + allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation) + .to receive(:update_all).and_raise(exception) + end + + it 'raises an error' do + expect { subject.perform(start_id, stop_id) } + .to raise_exception(described_class::Error) + end + + it 'logs the error' do + expect(Rails.logger).to receive(:info).with( + a_string_matching(described_class.name) + .and(matching([start_id, stop_id].inspect)) + .and(matching(exception.name)) + ) + + perform_ignoring_exceptions + end + + it 'still adds diff commits' do + expect { perform_ignoring_exceptions } + .to change { MergeRequestDiffCommit.count } + end + + it 'still adds diff files' do + expect { perform_ignoring_exceptions } + .to change { MergeRequestDiffFile.count } + end end end diff --git a/spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb b/spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb new file mode 100644 index 00000000000..4ab1bb67058 --- /dev/null +++ b/spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb @@ -0,0 +1,59 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20170926150348_schedule_merge_request_diff_migrations_take_two') + +describe ScheduleMergeRequestDiffMigrationsTakeTwo, :migration, :sidekiq do + matcher :be_scheduled_migration do |time, *expected| + match do |migration| + BackgroundMigrationWorker.jobs.any? do |job| + job['args'] == [migration, expected] && + job['at'].to_i == time.to_i + end + end + + failure_message do |migration| + "Migration `#{migration}` with args `#{expected.inspect}` not scheduled!" + end + end + + let(:merge_request_diffs) { table(:merge_request_diffs) } + let(:merge_requests) { table(:merge_requests) } + let(:projects) { table(:projects) } + + before do + stub_const("#{described_class.name}::BATCH_SIZE", 1) + + projects.create!(id: 1, name: 'gitlab', path: 'gitlab') + + merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master') + + merge_request_diffs.create!(id: 1, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: nil) + merge_request_diffs.create!(id: 2, merge_request_id: 1, st_commits: nil, st_diffs: YAML.dump([])) + merge_request_diffs.create!(id: 3, merge_request_id: 1, st_commits: nil, st_diffs: nil) + merge_request_diffs.create!(id: 4, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: YAML.dump([])) + end + + it 'correctly schedules background migrations' do + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + expect(described_class::MIGRATION).to be_scheduled_migration(10.minutes.from_now, 1, 1) + expect(described_class::MIGRATION).to be_scheduled_migration(20.minutes.from_now, 2, 2) + expect(described_class::MIGRATION).to be_scheduled_migration(30.minutes.from_now, 4, 4) + expect(BackgroundMigrationWorker.jobs.size).to eq 3 + end + end + end + + it 'migrates the data' do + Sidekiq::Testing.inline! do + non_empty = 'st_commits IS NOT NULL OR st_diffs IS NOT NULL' + + expect(merge_request_diffs.where(non_empty).count).to eq 3 + + migrate! + + expect(merge_request_diffs.where(non_empty).count).to eq 0 + end + end +end