gitlab-org--gitlab-foss/lib/gitlab/database/count/tablesample_count_strategy.rb

72 lines
2.3 KiB
Ruby

# frozen_string_literal: true
module Gitlab
module Database
module Count
# A tablesample count executes in two phases:
# * Estimate table sizes based on reltuples.
# * Based on the estimate:
# * If the table is considered 'small', execute an exact relation count.
# * Otherwise, count on a sample of the table using TABLESAMPLE.
#
# The size of the sample is chosen in a way that we always roughly scan
# the same amount of rows (see TABLESAMPLE_ROW_TARGET).
#
# There are no guarantees with respect to the accuracy of the result or runtime.
class TablesampleCountStrategy < ReltuplesCountStrategy
EXACT_COUNT_THRESHOLD = 10_000
TABLESAMPLE_ROW_TARGET = 10_000
def count
estimates = size_estimates(check_statistics: false)
models.each_with_object({}) do |model, count_by_model|
count = perform_count(model, estimates[model])
count_by_model[model] = count if count
end
rescue *CONNECTION_ERRORS
{}
end
private
def perform_count(model, estimate)
# If we estimate 0, we may not have statistics at all. Don't use them.
return unless estimate && estimate > 0
if estimate < EXACT_COUNT_THRESHOLD
# The table is considered small, the assumption here is that
# the exact count will be fast anyways.
model.count
else
# The table is considered large, let's only count on a sample.
tablesample_count(model, estimate)
end
end
def where_clause(model)
return unless sti_model?(model)
"WHERE #{model.inheritance_column} = '#{model.name}'"
end
def tablesample_count(model, estimate)
portion = (TABLESAMPLE_ROW_TARGET.to_f / estimate).round(4)
inverse = 1 / portion
query = <<~SQL
SELECT (COUNT(*)*#{inverse})::integer AS count
FROM #{model.table_name}
TABLESAMPLE SYSTEM (#{portion * 100})
REPEATABLE (0)
#{where_clause(model)}
SQL
rows = model.connection.select_all(query)
Integer(rows.first['count'])
end
end
end
end
end