Remove unnecessary queries on Merge Request Metrics population scheduler

This commit is contained in:
Oswaldo Ferreira 2018-01-08 16:53:58 -02:00
parent 2c66b942bd
commit 259d452dfd

View file

@ -15,8 +15,6 @@ class SchedulePopulateMergeRequestMetricsWithEventsData < ActiveRecord::Migratio
end end
def up def up
merge_requests = MergeRequest.where("id IN (#{updatable_merge_requests_union_sql})").reorder(:id)
say 'Scheduling `PopulateMergeRequestMetricsWithEventsData` jobs' say 'Scheduling `PopulateMergeRequestMetricsWithEventsData` jobs'
# It will update around 4_000_000 records in batches of 10_000 merge # It will update around 4_000_000 records in batches of 10_000 merge
# requests (running between 10 minutes) and should take around 66 hours to complete. # requests (running between 10 minutes) and should take around 66 hours to complete.
@ -25,7 +23,7 @@ class SchedulePopulateMergeRequestMetricsWithEventsData < ActiveRecord::Migratio
# #
# More information about the updates in `PopulateMergeRequestMetricsWithEventsData` class. # More information about the updates in `PopulateMergeRequestMetricsWithEventsData` class.
# #
merge_requests.each_batch(of: BATCH_SIZE) do |relation, index| MergeRequest.all.each_batch(of: BATCH_SIZE) do |relation, index|
range = relation.pluck('MIN(id)', 'MAX(id)').first range = relation.pluck('MIN(id)', 'MAX(id)').first
BackgroundMigrationWorker.perform_in(index * 10.minutes, MIGRATION, range) BackgroundMigrationWorker.perform_in(index * 10.minutes, MIGRATION, range)
@ -37,32 +35,4 @@ class SchedulePopulateMergeRequestMetricsWithEventsData < ActiveRecord::Migratio
execute "update merge_request_metrics set latest_closed_by_id = null" execute "update merge_request_metrics set latest_closed_by_id = null"
execute "update merge_request_metrics set merged_by_id = null" execute "update merge_request_metrics set merged_by_id = null"
end end
private
# On staging:
# Planning time: 0.682 ms
# Execution time: 22033.158 ms
#
def updatable_merge_requests_union_sql
metrics_not_exists_clause =
'NOT EXISTS (SELECT 1 FROM merge_request_metrics WHERE merge_request_metrics.merge_request_id = merge_requests.id)'
without_metrics_data = <<-SQL.strip_heredoc
merge_request_metrics.merged_by_id IS NULL OR
merge_request_metrics.latest_closed_by_id IS NULL OR
merge_request_metrics.latest_closed_at IS NULL
SQL
mrs_without_metrics_record = MergeRequest
.where(metrics_not_exists_clause)
.select(:id)
mrs_without_events_data = MergeRequest
.joins('INNER JOIN merge_request_metrics ON merge_requests.id = merge_request_metrics.merge_request_id')
.where(without_metrics_data)
.select(:id)
Gitlab::SQL::Union.new([mrs_without_metrics_record, mrs_without_events_data]).to_sql
end
end end