gitlab-org--gitlab-foss/lib/gitlab/database/count/reltuples_count_strategy.rb

102 lines
3.5 KiB
Ruby

# frozen_string_literal: true
module Gitlab
module Database
module Count
# This strategy counts based on PostgreSQL's statistics in pg_stat_user_tables.
#
# Specifically, it relies on the column reltuples in said table. An additional
# check is performed to make sure statistics were updated within the last hour.
#
# Otherwise, this strategy skips tables with outdated statistics.
#
# There are no guarantees with respect to the accuracy of this strategy. Runtime
# however is guaranteed to be "fast", because it only looks up statistics.
class ReltuplesCountStrategy
attr_reader :models
def initialize(models)
@models = models
end
# Returns a hash of the table names that have recently updated tuples.
#
# @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
def count
size_estimates
rescue *CONNECTION_ERRORS
{}
end
private
# Models using single-type inheritance (STI) don't work with
# reltuple count estimates. We just have to ignore them and
# use another strategy to compute them.
def non_sti_models(models)
models.reject { |model| sti_model?(model) }
end
def non_sti_table_names(models)
non_sti_models(models).map(&:table_name)
end
def sti_model?(model)
model.column_names.include?(model.inheritance_column) &&
model.base_class != model
end
def table_to_model_mapping
@table_to_model_mapping ||= models.index_by(&:table_name)
end
def table_to_model(table_name)
table_to_model_mapping[table_name]
end
def size_estimates(check_statistics: true)
results = {}
models.group_by { |model| model.connection_db_config.name }.map do |db_name, models_for_db|
base_model = Gitlab::Database.database_base_models[db_name]
tables = non_sti_table_names(models_for_db)
# Querying tuple stats only works on the primary. Due to load balancing, the
# easiest way to do this is to start a transaction.
base_model.transaction do
Gitlab::Database::SharedModel.using_connection(base_model.connection) do
get_statistics(tables, check_statistics: check_statistics).each do |row|
model = table_to_model(row.table_name)
results[model] = row.estimate
end
end
end
end
results
end
# Generates the PostgreSQL query to return the tuples for tables
# that have been vacuumed or analyzed in the last hour.
#
# @param [Array] table names
# @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
def get_statistics(table_names, check_statistics: true)
time = 6.hours.ago
query = ::Gitlab::Database::PgClass.joins("LEFT JOIN pg_stat_user_tables ON pg_stat_user_tables.relid = pg_class.oid")
.where(relname: table_names)
.where('schemaname = current_schema()')
.select('pg_class.relname AS table_name, reltuples::bigint AS estimate')
if check_statistics
query = query.where('last_vacuum > ? OR last_autovacuum > ? OR last_analyze > ? OR last_autoanalyze > ?',
time, time, time, time)
end
query
end
end
end
end
end