2022-04-04 23:08:30 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Gitlab
|
|
|
|
module Database
|
|
|
|
class ConsistencyChecker
|
2022-05-24 05:09:17 -04:00
|
|
|
BATCH_SIZE = 500
|
|
|
|
MAX_BATCHES = 20
|
|
|
|
MAX_RUNTIME = 5.seconds # must be less than the scheduling frequency of the ConsistencyCheck jobs
|
2022-04-04 23:08:30 -04:00
|
|
|
|
|
|
|
delegate :monotonic_time, to: :'Gitlab::Metrics::System'
|
|
|
|
|
|
|
|
def initialize(source_model:, target_model:, source_columns:, target_columns:)
|
|
|
|
@source_model = source_model
|
|
|
|
@target_model = target_model
|
|
|
|
@source_columns = source_columns
|
|
|
|
@target_columns = target_columns
|
|
|
|
@source_sort_column = source_columns.first
|
|
|
|
@target_sort_column = target_columns.first
|
|
|
|
@result = { matches: 0, mismatches: 0, batches: 0, mismatches_details: [] }
|
|
|
|
end
|
|
|
|
|
|
|
|
# rubocop:disable Metrics/AbcSize
|
|
|
|
def execute(start_id:)
|
|
|
|
current_start_id = start_id
|
|
|
|
|
|
|
|
return build_result(next_start_id: nil) if max_id.nil?
|
|
|
|
return build_result(next_start_id: min_id) if current_start_id > max_id
|
|
|
|
|
|
|
|
@start_time = monotonic_time
|
|
|
|
|
|
|
|
MAX_BATCHES.times do
|
|
|
|
if (current_start_id <= max_id) && !over_time_limit?
|
|
|
|
ids_range = current_start_id...(current_start_id + BATCH_SIZE)
|
|
|
|
# rubocop: disable CodeReuse/ActiveRecord
|
|
|
|
source_data = source_model.where(source_sort_column => ids_range)
|
|
|
|
.order(source_sort_column => :asc).pluck(*source_columns)
|
|
|
|
target_data = target_model.where(target_sort_column => ids_range)
|
|
|
|
.order(target_sort_column => :asc).pluck(*target_columns)
|
|
|
|
# rubocop: enable CodeReuse/ActiveRecord
|
|
|
|
|
|
|
|
current_start_id += BATCH_SIZE
|
|
|
|
result[:matches] += append_mismatches_details(source_data, target_data)
|
|
|
|
result[:batches] += 1
|
|
|
|
else
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
result[:mismatches] = result[:mismatches_details].length
|
|
|
|
metrics_counter.increment({ source_table: source_model.table_name, result: "match" }, result[:matches])
|
|
|
|
metrics_counter.increment({ source_table: source_model.table_name, result: "mismatch" }, result[:mismatches])
|
|
|
|
|
|
|
|
build_result(next_start_id: current_start_id > max_id ? min_id : current_start_id)
|
|
|
|
end
|
|
|
|
# rubocop:enable Metrics/AbcSize
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
attr_reader :source_model, :target_model, :source_columns, :target_columns,
|
|
|
|
:source_sort_column, :target_sort_column, :start_time, :result
|
|
|
|
|
|
|
|
def build_result(next_start_id:)
|
|
|
|
{ next_start_id: next_start_id }.merge(result)
|
|
|
|
end
|
|
|
|
|
|
|
|
def over_time_limit?
|
|
|
|
(monotonic_time - start_time) >= MAX_RUNTIME
|
|
|
|
end
|
|
|
|
|
|
|
|
# This where comparing the items happen, and building the diff log
|
|
|
|
# It returns the number of matching elements
|
|
|
|
def append_mismatches_details(source_data, target_data)
|
|
|
|
# Mapping difference the sort key to the item values
|
|
|
|
# source - target
|
|
|
|
source_diff_hash = (source_data - target_data).index_by { |item| item.shift }
|
|
|
|
# target - source
|
|
|
|
target_diff_hash = (target_data - source_data).index_by { |item| item.shift }
|
|
|
|
|
|
|
|
matches = source_data.length - source_diff_hash.length
|
|
|
|
|
|
|
|
# Items that exist in the first table + Different items
|
|
|
|
source_diff_hash.each do |id, values|
|
|
|
|
result[:mismatches_details] << {
|
|
|
|
id: id,
|
|
|
|
source_table: values,
|
|
|
|
target_table: target_diff_hash[id]
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
# Only the items that exist in the target table
|
|
|
|
target_diff_hash.each do |id, values|
|
|
|
|
next if source_diff_hash[id] # It's already added
|
|
|
|
|
|
|
|
result[:mismatches_details] << {
|
|
|
|
id: id,
|
|
|
|
source_table: source_diff_hash[id],
|
|
|
|
target_table: values
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
matches
|
|
|
|
end
|
|
|
|
|
|
|
|
# rubocop: disable CodeReuse/ActiveRecord
|
|
|
|
def min_id
|
|
|
|
@min_id ||= source_model.minimum(source_sort_column)
|
|
|
|
end
|
|
|
|
|
|
|
|
def max_id
|
|
|
|
@max_id ||= source_model.maximum(source_sort_column)
|
|
|
|
end
|
|
|
|
# rubocop: enable CodeReuse/ActiveRecord
|
|
|
|
|
|
|
|
def metrics_counter
|
|
|
|
@metrics_counter ||= Gitlab::Metrics.counter(
|
|
|
|
:consistency_checks,
|
|
|
|
"Consistency Check Results"
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|