2018-11-05 23:45:35 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2018-05-16 02:06:55 -04:00
|
|
|
# For large tables, PostgreSQL can take a long time to count rows due to MVCC.
|
2018-10-28 14:16:47 -04:00
|
|
|
# We can optimize this by using various strategies for approximate counting.
|
|
|
|
#
|
|
|
|
# For example, we can use the reltuples count as described in https://wiki.postgresql.org/wiki/Slow_Counting.
|
|
|
|
#
|
|
|
|
# However, since statistics are not always up to date, we also implement a table sampling strategy
|
|
|
|
# that performs an exact count but only on a sample of the table. See TablesampleCountStrategy.
|
2018-05-16 02:06:55 -04:00
|
|
|
module Gitlab
|
|
|
|
module Database
|
|
|
|
module Count
|
|
|
|
CONNECTION_ERRORS =
|
|
|
|
if defined?(PG)
|
|
|
|
[
|
|
|
|
ActionView::Template::Error,
|
|
|
|
ActiveRecord::StatementInvalid,
|
|
|
|
PG::Error
|
|
|
|
].freeze
|
|
|
|
else
|
|
|
|
[
|
|
|
|
ActionView::Template::Error,
|
|
|
|
ActiveRecord::StatementInvalid
|
|
|
|
].freeze
|
|
|
|
end
|
|
|
|
|
2018-05-25 17:28:16 -04:00
|
|
|
# Takes in an array of models and returns a Hash for the approximate
|
2018-10-28 14:16:47 -04:00
|
|
|
# counts for them.
|
|
|
|
#
|
|
|
|
# Various count strategies can be specified that are executed in
|
|
|
|
# sequence until all tables have an approximate count attached
|
|
|
|
# or we run out of strategies.
|
|
|
|
#
|
|
|
|
# Note that not all strategies are available on all supported RDBMS.
|
2018-05-25 17:28:16 -04:00
|
|
|
#
|
|
|
|
# @param [Array]
|
|
|
|
# @return [Hash] of Model -> count mapping
|
2018-10-28 14:16:47 -04:00
|
|
|
def self.approximate_counts(models, strategies: [TablesampleCountStrategy, ReltuplesCountStrategy, ExactCountStrategy])
|
2018-10-28 12:07:05 -04:00
|
|
|
strategies.each_with_object({}) do |strategy, counts_by_model|
|
2019-07-24 13:00:34 -04:00
|
|
|
models_with_missing_counts = models - counts_by_model.keys
|
2018-10-28 13:01:17 -04:00
|
|
|
|
2019-07-24 13:00:34 -04:00
|
|
|
break counts_by_model if models_with_missing_counts.empty?
|
2018-10-28 13:01:17 -04:00
|
|
|
|
2019-07-24 13:00:34 -04:00
|
|
|
counts = strategy.new(models_with_missing_counts).count
|
2018-10-28 12:07:05 -04:00
|
|
|
|
2019-07-24 13:00:34 -04:00
|
|
|
counts.each do |model, count|
|
|
|
|
counts_by_model[model] = count
|
2018-10-28 12:07:05 -04:00
|
|
|
end
|
2018-05-25 17:28:16 -04:00
|
|
|
end
|
2018-05-16 02:06:55 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|