gitlab-org--gitlab-foss/lib/gitlab/database/count.rb

119 lines
3.9 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
# For large tables, PostgreSQL can take a long time to count rows due to MVCC.
# We can optimize this by using the reltuples count as described in https://wiki.postgresql.org/wiki/Slow_Counting.
module Gitlab
module Database
module Count
CONNECTION_ERRORS =
if defined?(PG)
[
ActionView::Template::Error,
ActiveRecord::StatementInvalid,
PG::Error
].freeze
else
[
ActionView::Template::Error,
ActiveRecord::StatementInvalid
].freeze
end
# Takes in an array of models and returns a Hash for the approximate
# counts for them. If the model's table has not been vacuumed or
# analyzed recently, simply run the Model.count to get the data.
#
# @param [Array]
# @return [Hash] of Model -> count mapping
def self.approximate_counts(models)
counts_by_model = {}
if Gitlab::Database.postgresql?
#counts_by_model = ReltuplesCountStrategy.new(models).count
counts_by_model = reltuples_from_recently_updated(models)
end
missing_models = models - counts_by_model.keys
ExactCountStrategy.new(missing_models).count.each do |model, count|
counts_by_model[model] = count
end
counts_by_model
end
# Returns a hash of the table names that have recently updated tuples.
#
# @param [Array] models to count
# @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
def self.reltuples_from_recently_updated(models)
ReltuplesCountStrategy.new(models).count
2018-10-28 14:50:44 +00:00
end
2018-10-28 15:08:37 +00:00
class ExactCountStrategy
attr_reader :models
def initialize(models)
@models = models
end
def count
models.each_with_object({}) do |model, data|
data[model] = model.count
end
end
end
2018-10-28 14:50:44 +00:00
class ReltuplesCountStrategy
attr_reader :models
def initialize(models)
@models = models
end
2018-10-28 14:50:44 +00:00
# Returns a hash of the table names that have recently updated tuples.
#
# @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
def count
query = postgresql_estimate_query(table_names)
rows = []
2018-10-28 14:50:44 +00:00
# Querying tuple stats only works on the primary. Due to load
# easiest way to do this is to start a transaction.
ActiveRecord::Base.transaction do
rows = ActiveRecord::Base.connection.select_all(query)
end
table_to_model = models.each_with_object({}) { |model, h| h[model.table_name] = model }
rows.each_with_object({}) do |row, data|
model = table_to_model[row['table_name']]
data[model] = row['estimate'].to_i
end
2018-10-28 14:50:44 +00:00
rescue *CONNECTION_ERRORS => e
{}
end
private
def table_names
models.map(&:table_name)
end
2018-10-28 14:50:44 +00:00
# Generates the PostgreSQL query to return the tuples for tables
# that have been vacuumed or analyzed in the last hour.
#
# @param [Array] table names
# @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
def postgresql_estimate_query(table_names)
time = "to_timestamp(#{1.hour.ago.to_i})"
<<~SQL
SELECT pg_class.relname AS table_name, reltuples::bigint AS estimate FROM pg_class
LEFT JOIN pg_stat_user_tables ON pg_class.relname = pg_stat_user_tables.relname
WHERE pg_class.relname IN (#{table_names.map { |table| "'#{table}'" }.join(',')})
AND (last_vacuum > #{time} OR last_autovacuum > #{time} OR last_analyze > #{time} OR last_autoanalyze > #{time})
2018-10-28 14:50:44 +00:00
SQL
end
end
end
end
end