gitlab-org--gitlab-foss/app/models/concerns/loose_index_scan.rb

67 lines
2.2 KiB
Ruby

# frozen_string_literal: true
module LooseIndexScan
extend ActiveSupport::Concern
class_methods do
# Builds a recursive query to read distinct values from a column.
#
# Example 1: collect all distinct author ids for the `issues` table
#
# Bad: The DB reads all issues, sorts and dedups them in memory
#
# > Issue.select(:author_id).distinct.map(&:author_id)
#
# Good: Use loose index scan (skip index scan)
#
# > Issue.loose_index_scan(column: :author_id).map(&:author_id)
#
# Example 2: List of users for the DONE todos selector. Select all users who created a todo.
#
# Bad: Loads all DONE todos for the given user and extracts the author_ids
#
# > User.where(id: Todo.where(user_id: 4156052).done.select(:author_id))
#
# Good: Loads distinct author_ids from todos and then loads users
#
# > distinct_authors = Todo.where(user_id: 4156052).done.loose_index_scan(column: :author_id).select(:author_id)
# > User.where(id: distinct_authors)
def loose_index_scan(column:, order: :asc)
arel_table = self.arel_table
arel_column = arel_table[column.to_s]
cte = Gitlab::SQL::RecursiveCTE.new(:loose_index_scan_cte, union_args: { remove_order: false })
cte_query = except(:select)
.select(column)
.order(column => order)
.limit(1)
inner_query = except(:select)
cte_query, inner_query = yield([cte_query, inner_query]) if block_given?
cte << cte_query
inner_query = if order == :asc
inner_query.where(arel_column.gt(cte.table[column.to_s]))
else
inner_query.where(arel_column.lt(cte.table[column.to_s]))
end
inner_query = inner_query.order(column => order)
.select(column)
.limit(1)
cte << cte.table
.project(Arel::Nodes::Grouping.new(Arel.sql(inner_query.to_sql)).as(column.to_s))
unscoped do
select(column)
.with
.recursive(cte.to_arel)
.from(cte.alias_to(arel_table))
.where(arel_column.not_eq(nil)) # filtering out the last NULL value
end
end
end
end