gitlab-org--gitlab-foss/lib/gitlab/pagination/keyset/in_operator_optimization/query_builder.rb

264 lines
11 KiB
Ruby

# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
module InOperatorOptimization
# rubocop: disable CodeReuse/ActiveRecord
class QueryBuilder
RECURSIVE_CTE_NAME = 'recursive_keyset_cte'
# This class optimizes slow database queries (PostgreSQL specific) where the
# IN SQL operator is used with sorting.
#
# Arguments:
# scope - ActiveRecord::Relation supporting keyset pagination
# array_scope - ActiveRecord::Relation for the `IN` subselect
# array_mapping_scope - Lambda for connecting scope with array_scope
# finder_query - ActiveRecord::Relation for finding one row by the passed in cursor values
# values - keyset cursor values (optional)
#
# Example ActiveRecord query: Issues in the namespace hierarchy
# > scope = Issue
# > .where(project_id: Group.find(9970).all_projects.select(:id))
# > .order(:created_at, :id)
# > .limit(20);
#
# Optimized version:
#
# > scope = Issue.where({}).order(:created_at, :id) # base scope
# > array_scope = Group.find(9970).all_projects.select(:id)
# > array_mapping_scope = -> (id_expression) { Issue.where(Issue.arel_table[:project_id].eq(id_expression)) }
#
# # finding the record by id is good enough, we can ignore the created_at_expression
# > finder_query = -> (created_at_expression, id_expression) { Issue.where(Issue.arel_table[:id].eq(id_expression)) }
#
# > Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder.new(
# > scope: scope,
# > array_scope: array_scope,
# > array_mapping_scope: array_mapping_scope,
# > finder_query: finder_query
# > ).execute.limit(20)
def initialize(scope:, array_scope:, array_mapping_scope:, finder_query: nil, values: {})
@scope, success = Gitlab::Pagination::Keyset::SimpleOrderBuilder.build(scope)
raise(UnsupportedScopeOrder) unless success
@order = Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(scope)
@array_scope = array_scope
@array_mapping_scope = array_mapping_scope
@values = values
@model = @scope.model
@table_name = @model.table_name
@arel_table = @model.arel_table
@finder_strategy = finder_query.present? ? Strategies::RecordLoaderStrategy.new(finder_query, model, order_by_columns) : Strategies::OrderValuesLoaderStrategy.new(model, order_by_columns)
end
def execute
selector_cte = Gitlab::SQL::CTE.new(:array_cte, array_scope)
cte = Gitlab::SQL::RecursiveCTE.new(RECURSIVE_CTE_NAME, union_args: { remove_duplicates: false, remove_order: false })
cte << initializer_query
cte << data_collector_query
q = cte
.apply_to(model.where({})
.with(selector_cte.to_arel))
.select(finder_strategy.final_projections)
.where("count <> 0") # filter out the initializer row
model.from(q.arel.as(table_name))
end
private
attr_reader :array_scope, :scope, :order, :array_mapping_scope, :finder_strategy, :values, :model, :table_name, :arel_table
def initializer_query
array_column_names = array_scope_columns.array_aggregated_column_names + order_by_columns.array_aggregated_column_names
projections = [
*finder_strategy.initializer_columns,
*array_column_names,
'0::bigint AS count'
]
model.select(projections).from(build_column_arrays_query).limit(1)
end
# This query finds the first cursor values for each item in the array CTE.
#
# array_cte:
#
# |project_id|
# |----------|
# | 1|
# | 2|
# | 3|
# | 4|
#
# For each project_id, find the first issues row by respecting the created_at, id order.
#
# The `array_mapping_scope` parameter defines how the `array_scope` and the `scope` can be combined.
#
# scope = Issue.where({}) # empty scope
# array_mapping_scope = Issue.where(project_id: X)
#
# scope.merge(array_mapping_scope) # Issue.where(project_id: X)
#
# X will be replaced with a value from the `array_cte` temporary table.
#
# |created_at|id|
# |----------|--|
# |2020-01-15| 2|
# |2020-01-07| 3|
# |2020-01-07| 4|
# |2020-01-10| 5|
def build_column_arrays_query
q = Arel::SelectManager.new
.project(array_scope_columns.array_aggregated_columns + order_by_columns.array_aggregated_columns)
.from(array_cte)
.join(Arel.sql("LEFT JOIN LATERAL (#{initial_keyset_query.to_sql}) #{table_name} ON TRUE"))
order_by_columns.each { |c| q.where(c.column_expression.not_eq(nil)) unless c.column.nullable? }
q.as('array_scope_lateral_query')
end
def array_cte
Arel::SelectManager.new
.project(array_scope_columns.arel_columns)
.from(Arel.sql(array_scope_columns.array_scope_cte_name))
.as(array_scope_columns.array_scope_cte_name)
end
def initial_keyset_query
keyset_scope = scope.merge(array_mapping_scope.call(*array_scope_columns.arel_columns))
order
.apply_cursor_conditions(keyset_scope, values, use_union_optimization: true)
.reselect(*order_by_columns.arel_columns)
.limit(1)
end
def data_collector_query
array_column_list = array_scope_columns.array_aggregated_column_names
order_column_value_arrays = order_by_columns.replace_value_in_array_by_position_expressions
select = [
*finder_strategy.columns,
*array_column_list,
*order_column_value_arrays,
"#{RECURSIVE_CTE_NAME}.count + 1"
]
from = <<~SQL
#{RECURSIVE_CTE_NAME},
#{array_order_query.lateral.as('position_query').to_sql},
#{ensure_one_row(next_cursor_values_query).lateral.as('next_cursor_values').to_sql}
SQL
model.select(select).from(from)
end
# NULL guard. This method ensures that NULL values are returned when the passed in scope returns 0 rows.
# Example query: returns issues.id or NULL
#
# SELECT issues.id FROM (VALUES (NULL)) nulls (id)
# LEFT JOIN (SELECT id FROM issues WHERE id = 1 LIMIT 1) issues ON TRUE
# LIMIT 1
def ensure_one_row(query)
q = Arel::SelectManager.new
q.projections = order_by_columns.original_column_names_as_tmp_tamble
null_values = [nil] * order_by_columns.count
from = Arel::Nodes::Grouping.new(Arel::Nodes::ValuesList.new([null_values])).as('nulls')
q.from(from)
q.join(Arel.sql("LEFT JOIN (#{query.to_sql}) record ON TRUE"))
q.limit = 1
q
end
# This subquery finds the cursor values for the next record by sorting the generated cursor arrays in memory and taking the first element.
# It combines the cursor arrays (UNNEST) together and sorts them according to the originally defined ORDER BY clause.
#
# Example: issues in the group hierarchy with ORDER BY created_at, id
#
# |project_id| |created_at|id| # 2 arrays combined: issues_created_at_array, issues_id_array
# |----------| |----------|--|
# | 1| |2020-01-15| 2|
# | 2| |2020-01-07| 3|
# | 3| |2020-01-07| 4|
# | 4| |2020-01-10| 5|
#
# The query will return the cursor values: (2020-01-07, 3) and the array position: 1
# From the position, we can tell that the record belongs to the project with id 2.
def array_order_query
q = Arel::SelectManager.new
.project([*order_by_columns.original_column_names_as_arel_string, Arel.sql('position')])
.from("UNNEST(#{list(order_by_columns.array_aggregated_column_names)}) WITH ORDINALITY AS u(#{list(order_by_columns.original_column_names)}, position)")
order_by_columns.each { |c| q.where(Arel.sql(c.original_column_name).not_eq(nil)) unless c.column.nullable? } # ignore rows where all columns are NULL
q.order(Arel.sql(order_by_without_table_references)).take(1)
end
# This subquery finds the next cursor values after the previously determined position (from array_order_query).
# The current cursor values are passed in as SQL literals since the actual values are encoded into SQL arrays.
#
# Example: issues in the group hierarchy with ORDER BY created_at, id
#
# |project_id| |created_at|id| # 2 arrays combined: issues_created_at_array, issues_id_array
# |----------| |----------|--|
# | 1| |2020-01-15| 2|
# | 2| |2020-01-07| 3|
# | 3| |2020-01-07| 4|
# | 4| |2020-01-10| 5|
#
# Assuming that the determined position is 1, the cursor values will be the following:
# - Filter: project_id = 2
# - created_at = 2020-01-07
# - id = 3
def next_cursor_values_query
cursor_values = order_by_columns.cursor_values(RECURSIVE_CTE_NAME)
array_mapping_scope_columns = array_scope_columns.array_lookup_expressions_by_position(RECURSIVE_CTE_NAME)
keyset_scope = scope
.reselect(*order_by_columns.arel_columns)
.merge(array_mapping_scope.call(*array_mapping_scope_columns))
order
.apply_cursor_conditions(keyset_scope, cursor_values, use_union_optimization: true)
.reselect(*order_by_columns.map(&:column_for_projection))
.limit(1)
end
# Generates an ORDER BY clause by using the column position index and the original order clauses.
# This method is used to sort the collected arrays in SQL.
# Example: "issues".created_at DESC , "issues".id ASC => 1 DESC, 2 ASC
def order_by_without_table_references
order.column_definitions.each_with_index.map do |column_definition, i|
"#{i + 1} #{column_definition.order_direction_as_sql_string}"
end.join(", ")
end
def array_scope_columns
@array_scope_columns ||= ArrayScopeColumns.new(array_scope.select_values)
end
def order_by_columns
@order_by_columns ||= OrderByColumns.new(order.column_definitions, arel_table)
end
def list(array)
array.join(', ')
end
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
end