2020-02-17 13:09:00 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
# For large tables, PostgreSQL can take a long time to count rows due to MVCC.
|
2022-06-15 14:08:44 -04:00
|
|
|
# Implements:
|
|
|
|
# - distinct batch counter
|
|
|
|
# - ordinary batch counter
|
|
|
|
# - sum batch counter
|
|
|
|
# - average batch counter
|
2020-02-17 13:09:00 -05:00
|
|
|
# Needs indexes on the column below to calculate max, min and range queries
|
|
|
|
# For larger tables just set use higher batch_size with index optimization
|
2020-03-31 05:08:16 -04:00
|
|
|
#
|
|
|
|
# In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count
|
|
|
|
# the start and finish can be sent specifically
|
|
|
|
#
|
2020-09-25 08:10:00 -04:00
|
|
|
# Grouped relations can be used as well. However, the preferred batch count should be around 10K because group by count is more expensive.
|
|
|
|
#
|
2020-02-17 13:09:00 -05:00
|
|
|
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
|
2020-03-31 05:08:16 -04:00
|
|
|
#
|
2020-02-17 13:09:00 -05:00
|
|
|
# Examples:
|
|
|
|
# extend ::Gitlab::Database::BatchCount
|
|
|
|
# batch_count(User.active)
|
|
|
|
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
|
2020-09-25 08:10:00 -04:00
|
|
|
# batch_count(Namespace.group(:type))
|
2020-02-17 13:09:00 -05:00
|
|
|
# batch_distinct_count(::Project, :creator_id)
|
2021-07-01 02:07:35 -04:00
|
|
|
# batch_distinct_count(::Project.aimed_for_deletion.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
|
2020-09-25 08:10:00 -04:00
|
|
|
# batch_distinct_count(Project.group(:visibility_level), :creator_id)
|
2020-08-05 20:09:53 -04:00
|
|
|
# batch_sum(User, :sign_in_count)
|
2020-09-25 08:10:00 -04:00
|
|
|
# batch_sum(Issue.group(:state_id), :weight))
|
2022-06-15 14:08:44 -04:00
|
|
|
# batch_average(Ci::Pipeline, :duration)
|
|
|
|
# batch_average(MergeTrain.group(:status), :duration)
|
2020-02-17 13:09:00 -05:00
|
|
|
module Gitlab
|
|
|
|
module Database
|
|
|
|
module BatchCount
|
2020-04-06 05:09:17 -04:00
|
|
|
def batch_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)
|
|
|
|
BatchCounter.new(relation, column: column).count(batch_size: batch_size, start: start, finish: finish)
|
2020-02-17 13:09:00 -05:00
|
|
|
end
|
|
|
|
|
2022-08-22 08:12:54 -04:00
|
|
|
def batch_count_with_timeout(relation, column = nil, batch_size: nil, start: nil, finish: nil, timeout: nil, partial_results: nil)
|
|
|
|
BatchCounter.new(relation, column: column).count_with_timeout(batch_size: batch_size, start: start, finish: finish, timeout: timeout, partial_results: partial_results)
|
|
|
|
end
|
|
|
|
|
2020-03-31 05:08:16 -04:00
|
|
|
def batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)
|
|
|
|
BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size, start: start, finish: finish)
|
2020-02-17 13:09:00 -05:00
|
|
|
end
|
|
|
|
|
2020-08-05 20:09:53 -04:00
|
|
|
def batch_sum(relation, column, batch_size: nil, start: nil, finish: nil)
|
|
|
|
BatchCounter.new(relation, column: nil, operation: :sum, operation_args: [column]).count(batch_size: batch_size, start: start, finish: finish)
|
|
|
|
end
|
|
|
|
|
2022-06-15 14:08:44 -04:00
|
|
|
def batch_average(relation, column, batch_size: nil, start: nil, finish: nil)
|
2022-08-26 14:12:09 -04:00
|
|
|
BatchAverageCounter.new(relation, column).count(batch_size: batch_size)
|
2022-06-15 14:08:44 -04:00
|
|
|
end
|
|
|
|
|
2020-02-17 13:09:00 -05:00
|
|
|
class << self
|
|
|
|
include BatchCount
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|