Take two for MR metrics population background migration

This commit is contained in:
Oswaldo Ferreira 2018-05-28 17:18:43 -03:00
parent a8c97187f0
commit 54ad5fb8a2
9 changed files with 136 additions and 3 deletions

View file

@ -0,0 +1,5 @@
---
title: Take two for MR metrics population background migration
merge_request: 19097
author:
type: other

View file

@ -0,0 +1,44 @@
class MigrateRemainingMrMetricsPopulatingBackgroundMigration < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 5_000
MIGRATION = 'PopulateMergeRequestMetricsWithEventsData'
DELAY_INTERVAL = 10.minutes
disable_ddl_transaction!
class MergeRequest < ActiveRecord::Base
self.table_name = 'merge_requests'
include ::EachBatch
end
def up
# Perform any ongoing background migration that might still be running. This
# avoids scheduling way too many of the same jobs on self-hosted instances
# if they're updating GitLab across multiple versions. The "Take one"
# migration was executed on 10.4 on
# SchedulePopulateMergeRequestMetricsWithEventsData.
Gitlab::BackgroundMigration.steal(MIGRATION)
metrics_not_exists_clause = <<~SQL
NOT EXISTS (SELECT 1 FROM merge_request_metrics
WHERE merge_request_metrics.merge_request_id = merge_requests.id)
SQL
relation = MergeRequest.where(metrics_not_exists_clause)
# We currently have ~400_000 MR records without metrics on GitLab.com.
# This means it'll schedule ~80 jobs (5000 MRs each) with a 10 minutes gap,
# so this should take ~14 hours for all background migrations to complete.
#
queue_background_migration_jobs_by_range_at_intervals(relation,
MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE)
end
def down
end
end

View file

@ -28,6 +28,7 @@ project_tree:
- project_members:
- :user
- merge_requests:
- :metrics
- notes:
- :author
- events:

View file

@ -18,9 +18,10 @@ module Gitlab
label: :project_label,
custom_attributes: 'ProjectCustomAttribute',
project_badges: 'Badge',
metrics: 'MergeRequest::Metrics',
ci_cd_settings: 'ProjectCiCdSetting' }.freeze
USER_REFERENCES = %w[author_id assignee_id updated_by_id user_id created_by_id last_edited_by_id merge_user_id resolved_by_id closed_by_id].freeze
USER_REFERENCES = %w[author_id assignee_id updated_by_id merged_by_id latest_closed_by_id user_id created_by_id last_edited_by_id merge_user_id resolved_by_id closed_by_id].freeze
PROJECT_REFERENCES = %w[project_id source_project_id target_project_id].freeze
@ -36,6 +37,15 @@ module Gitlab
new(*args).create
end
def self.relation_class(relation_name)
# There are scenarios where the model is pluralized (e.g.
# MergeRequest::Metrics), and we don't want to force it to singular
# with #classify.
relation_name.to_s.classify.constantize
rescue NameError
relation_name.to_s.constantize
end
def initialize(relation_sym:, relation_hash:, members_mapper:, user:, project:, excluded_keys: [])
@relation_name = OVERRIDES[relation_sym] || relation_sym
@relation_hash = relation_hash.except('noteable_id')
@ -195,7 +205,7 @@ module Gitlab
end
def relation_class
@relation_class ||= @relation_name.to_s.classify.constantize
@relation_class ||= self.class.relation_class(@relation_name)
end
def imported_object

View file

@ -310,3 +310,8 @@ lfs_file_locks:
- user
project_badges:
- project
metrics:
- merge_request
- latest_closed_by
- merged_by
- pipeline

View file

@ -119,6 +119,25 @@ describe Gitlab::ImportExport::RelationFactory do
end
end
context 'overrided model with pluralized name' do
let(:relation_sym) { :metrics }
let(:relation_hash) do
{
'id' => 99,
'merge_request_id' => 99,
'merged_at' => Time.now,
'merged_by_id' => 99,
'latest_closed_at' => nil,
'latest_closed_by_id' => nil
}
end
it 'does not raise errors' do
expect { created_object }.not_to raise_error
end
end
context 'Project references' do
let(:relation_sym) { :project_foo_model }
let(:relation_hash) do

View file

@ -205,6 +205,19 @@ MergeRequestDiffFile:
- b_mode
- too_large
- binary
MergeRequest::Metrics:
- id
- created_at
- updated_at
- merge_request_id
- pipeline_id
- latest_closed_by_id
- latest_closed_at
- merged_by_id
- merged_at
- latest_build_started_at
- latest_build_finished_at
- first_deployed_to_production_at
Ci::Pipeline:
- id
- project_id

View file

@ -0,0 +1,36 @@
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180521162137_migrate_remaining_mr_metrics_populating_background_migration.rb')
describe MigrateRemainingMrMetricsPopulatingBackgroundMigration, :migration, :sidekiq do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:mrs) { table(:merge_requests) }
before do
namespaces.create!(id: 1, name: 'foo', path: 'foo')
projects.create!(id: 123, name: 'gitlab1', path: 'gitlab1', namespace_id: 1)
projects.create!(id: 456, name: 'gitlab2', path: 'gitlab2', namespace_id: 1)
projects.create!(id: 789, name: 'gitlab3', path: 'gitlab3', namespace_id: 1)
mrs.create!(title: 'foo', target_branch: 'target', source_branch: 'source', target_project_id: 123)
mrs.create!(title: 'bar', target_branch: 'target', source_branch: 'source', target_project_id: 456)
mrs.create!(title: 'kux', target_branch: 'target', source_branch: 'source', target_project_id: 789)
end
it 'correctly schedules background migrations' do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(10.minutes, mrs.first.id, mrs.second.id)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(20.minutes, mrs.third.id, mrs.third.id)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end

View file

@ -10,7 +10,7 @@ module ConfigurationHelper
def relation_class_for_name(relation_name)
relation_name = Gitlab::ImportExport::RelationFactory::OVERRIDES[relation_name.to_sym] || relation_name
relation_name.to_s.classify.constantize
Gitlab::ImportExport::RelationFactory.relation_class(relation_name)
end
def parsed_attributes(relation_name, attributes)