9c059a026d
The numbers in https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/22650#note_117645395 are quite promising with that target.
66 lines
2.2 KiB
Ruby
66 lines
2.2 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module Gitlab
|
|
module Database
|
|
module Count
|
|
# A tablesample count executes in two phases:
|
|
# * Estimate table sizes based on reltuples.
|
|
# * Based on the estimate:
|
|
# * If the table is considered 'small', execute an exact relation count.
|
|
# * Otherwise, count on a sample of the table using TABLESAMPLE.
|
|
#
|
|
# The size of the sample is chosen in a way that we always roughly scan
|
|
# the same amount of rows (see TABLESAMPLE_ROW_TARGET).
|
|
#
|
|
# There are no guarantees with respect to the accuracy of the result or runtime.
|
|
class TablesampleCountStrategy < ReltuplesCountStrategy
|
|
EXACT_COUNT_THRESHOLD = 10_000
|
|
TABLESAMPLE_ROW_TARGET = 10_000
|
|
|
|
def count
|
|
estimates = size_estimates(check_statistics: false)
|
|
|
|
models.each_with_object({}) do |model, count_by_model|
|
|
count = perform_count(model, estimates[model])
|
|
count_by_model[model] = count if count
|
|
end
|
|
rescue *CONNECTION_ERRORS
|
|
{}
|
|
end
|
|
|
|
def self.enabled?
|
|
Gitlab::Database.postgresql? && Feature.enabled?(:tablesample_counts)
|
|
end
|
|
|
|
private
|
|
|
|
def perform_count(model, estimate)
|
|
# If we estimate 0, we may not have statistics at all. Don't use them.
|
|
return nil unless estimate && estimate > 0
|
|
|
|
if estimate < EXACT_COUNT_THRESHOLD
|
|
# The table is considered small, the assumption here is that
|
|
# the exact count will be fast anyways.
|
|
model.count
|
|
else
|
|
# The table is considered large, let's only count on a sample.
|
|
tablesample_count(model, estimate)
|
|
end
|
|
end
|
|
|
|
def tablesample_count(model, estimate)
|
|
portion = (TABLESAMPLE_ROW_TARGET.to_f / estimate).round(4)
|
|
inverse = 1 / portion
|
|
query = <<~SQL
|
|
SELECT (COUNT(*)*#{inverse})::integer AS count
|
|
FROM #{model.table_name} TABLESAMPLE SYSTEM (#{portion * 100})
|
|
SQL
|
|
|
|
rows = ActiveRecord::Base.connection.select_all(query)
|
|
|
|
Integer(rows.first['count'])
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|