Use a specialized class for querying events

This changes various controllers to use the new EventCollection class
for retrieving events. This class uses a JOIN LATERAL query on
PostgreSQL to retrieve queries in a more efficient way, while falling
back to a simpler / less efficient query for MySQL.

The EventCollection class also includes a limit on the number of events
to display to prevent malicious users from cycling through all events,
as doing so could put a lot of pressure on the database.

JOIN LATERAL is only supported on PostgreSQL starting with version 9.3.0
and as such this optimisation is only used when using PostgreSQL 9.3 or
newer.
This commit is contained in:
Yorick Peterse 2017-07-27 19:42:15 +02:00
parent 0395c47193
commit aac1de46c9
No known key found for this signature in database
GPG key ID: EDD30D2BEB691AC9
12 changed files with 241 additions and 17 deletions

View file

@ -52,8 +52,10 @@ class Dashboard::ProjectsController < Dashboard::ApplicationController
end
def load_events
@events = Event.in_projects(load_projects(params.merge(non_public: true)))
@events = event_filter.apply_filter(@events).with_associations
@events = @events.limit(20).offset(params[:offset] || 0)
projects = load_projects(params.merge(non_public: true))
@events = EventCollection
.new(projects, offset: params[:offset].to_i, filter: event_filter)
.to_a
end
end

View file

@ -29,9 +29,9 @@ class DashboardController < Dashboard::ApplicationController
current_user.authorized_projects
end
@events = Event.in_projects(projects)
@events = @event_filter.apply_filter(@events).with_associations
@events = @events.limit(20).offset(params[:offset] || 0)
@events = EventCollection
.new(projects, offset: params[:offset].to_i, filter: @event_filter)
.to_a
end
def set_show_full_reference

View file

@ -160,9 +160,9 @@ class GroupsController < Groups::ApplicationController
end
def load_events
@events = Event.in_projects(@projects)
@events = event_filter.apply_filter(@events).with_associations
@events = @events.limit(20).offset(params[:offset] || 0)
@events = EventCollection
.new(@projects, offset: params[:offset].to_i, filter: event_filter)
.to_a
end
def user_actions

View file

@ -301,10 +301,11 @@ class ProjectsController < Projects::ApplicationController
end
def load_events
@events = @project.events.recent
@events = event_filter.apply_filter(@events).with_associations
limit = (params[:limit] || 20).to_i
@events = @events.limit(limit).offset(params[:offset] || 0)
projects = Project.where(id: @project.id)
@events = EventCollection
.new(projects, offset: params[:offset].to_i, filter: event_filter)
.to_a
end
def project_params

View file

@ -62,8 +62,13 @@ class Event < ActiveRecord::Base
scope :recent, -> { reorder(id: :desc) }
scope :code_push, -> { where(action: PUSHED) }
scope :in_projects, ->(projects) do
where(project_id: projects.pluck(:id)).recent
scope :in_projects, -> (projects) do
sub_query = projects
.except(:order)
.select(1)
.where('projects.id = events.project_id')
where('EXISTS (?)', sub_query).recent
end
scope :with_associations, -> do

View file

@ -0,0 +1,98 @@
# A collection of events to display in an event list.
#
# An EventCollection is meant to be used for displaying events to a user (e.g.
# in a controller), it's not suitable for building queries that are used for
# building other queries.
class EventCollection
# To prevent users from putting too much pressure on the database by cycling
# through thousands of events we put a limit on the number of pages.
MAX_PAGE = 10
# projects - An ActiveRecord::Relation object that returns the projects for
# which to retrieve events.
# filter - An EventFilter instance to use for filtering events.
def initialize(projects, limit: 20, offset: 0, filter: nil)
@projects = projects
@limit = limit
@offset = offset
@filter = filter
end
# Returns an Array containing the events.
def to_a
return [] if current_page > MAX_PAGE
relation = if Gitlab::Database.join_lateral_supported?
relation_with_join_lateral
else
relation_without_join_lateral
end
relation.with_associations.to_a
end
private
# Returns the events relation to use when JOIN LATERAL is not supported.
#
# This relation simply gets all the events for all authorized projects, then
# limits that set.
def relation_without_join_lateral
events = filtered_events.in_projects(projects)
paginate_events(events)
end
# Returns the events relation to use when JOIN LATERAL is supported.
#
# This relation is built using JOIN LATERAL, producing faster queries than a
# regular LIMIT + OFFSET approach.
def relation_with_join_lateral
projects_for_lateral = projects.select(:id).to_sql
lateral = filtered_events
.limit(limit_for_join_lateral)
.where('events.project_id = projects_for_lateral.id')
.to_sql
# The outer query does not need to re-apply the filters since the JOIN
# LATERAL body already takes care of this.
outer = base_relation
.from("(#{projects_for_lateral}) projects_for_lateral")
.joins("JOIN LATERAL (#{lateral}) AS #{Event.table_name} ON true")
paginate_events(outer)
end
def filtered_events
@filter ? @filter.apply_filter(base_relation) : base_relation
end
def paginate_events(events)
events.limit(@limit).offset(@offset)
end
def base_relation
# We want to have absolute control over the event queries being built, thus
# we're explicitly opting out of any default scopes that may be set.
Event.unscoped.recent
end
def limit_for_join_lateral
# Applying the OFFSET on the inside of a JOIN LATERAL leads to incorrect
# results. To work around this we need to increase the inner limit for every
# page.
#
# This means that on page 1 we use LIMIT 20, and an outer OFFSET of 0. On
# page 2 we use LIMIT 40 and an outer OFFSET of 20.
@limit + @offset
end
def current_page
(@offset / @limit) + 1
end
def projects
@projects.except(:order)
end
end

View file

@ -0,0 +1,4 @@
---
title: Use a specialized class for querying events to improve performance
merge_request:
author:

View file

@ -0,0 +1,37 @@
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddIndexOnEventsProjectIdId < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
COLUMNS = %i[project_id id].freeze
TABLES = %i[events events_for_migration].freeze
disable_ddl_transaction!
def up
TABLES.each do |table|
add_concurrent_index(table, COLUMNS) unless index_exists?(table, COLUMNS)
# We remove the index _after_ adding the new one since MySQL doesn't let
# you remove an index when a foreign key exists for the same column.
if index_exists?(table, :project_id)
remove_concurrent_index(table, :project_id)
end
end
end
def down
TABLES.each do |table|
unless index_exists?(table, :project_id)
add_concurrent_index(table, :project_id)
end
unless index_exists?(table, COLUMNS)
remove_concurrent_index(table, COLUMNS)
end
end
end
end

View file

@ -530,7 +530,7 @@ ActiveRecord::Schema.define(version: 20170807160457) do
add_index "events", ["action"], name: "index_events_on_action", using: :btree
add_index "events", ["author_id"], name: "index_events_on_author_id", using: :btree
add_index "events", ["created_at"], name: "index_events_on_created_at", using: :btree
add_index "events", ["project_id"], name: "index_events_on_project_id", using: :btree
add_index "events", ["project_id", "id"], name: "index_events_on_project_id_and_id", using: :btree
add_index "events", ["target_id"], name: "index_events_on_target_id", using: :btree
add_index "events", ["target_type"], name: "index_events_on_target_type", using: :btree
@ -546,7 +546,7 @@ ActiveRecord::Schema.define(version: 20170807160457) do
add_index "events_for_migration", ["action"], name: "index_events_for_migration_on_action", using: :btree
add_index "events_for_migration", ["author_id"], name: "index_events_for_migration_on_author_id", using: :btree
add_index "events_for_migration", ["project_id"], name: "index_events_for_migration_on_project_id", using: :btree
add_index "events_for_migration", ["project_id", "id"], name: "index_events_for_migration_on_project_id_and_id", using: :btree
add_index "events_for_migration", ["target_type", "target_id"], name: "index_events_for_migration_on_target_type_and_target_id", using: :btree
create_table "feature_gates", force: :cascade do |t|

View file

@ -25,6 +25,10 @@ module Gitlab
database_version.match(/\A(?:PostgreSQL |)([^\s]+).*\z/)[1]
end
def self.join_lateral_supported?
postgresql? && version.to_f >= 9.3
end
def self.nulls_last_order(field, direction = 'ASC')
order = "#{field} #{direction}"

View file

@ -51,6 +51,28 @@ describe Gitlab::Database do
end
end
describe '.join_lateral_supported?' do
it 'returns false when using MySQL' do
allow(described_class).to receive(:postgresql?).and_return(false)
expect(described_class.join_lateral_supported?).to eq(false)
end
it 'returns false when using PostgreSQL 9.2' do
allow(described_class).to receive(:postgresql?).and_return(true)
allow(described_class).to receive(:version).and_return('9.2.1')
expect(described_class.join_lateral_supported?).to eq(false)
end
it 'returns true when using PostgreSQL 9.3.0 or newer' do
allow(described_class).to receive(:postgresql?).and_return(true)
allow(described_class).to receive(:version).and_return('9.3.0')
expect(described_class.join_lateral_supported?).to eq(true)
end
end
describe '.nulls_last_order' do
context 'when using PostgreSQL' do
before do

View file

@ -0,0 +1,51 @@
require 'spec_helper'
describe EventCollection do
describe '#to_a' do
let(:project) { create(:project_empty_repo) }
let(:projects) { Project.where(id: project.id) }
let(:user) { create(:user) }
before do
20.times do
event = create(:push_event, project: project, author: user)
create(:push_event_payload, event: event)
end
create(:closed_issue_event, project: project, author: user)
end
it 'returns an Array of events' do
events = described_class.new(projects).to_a
expect(events).to be_an_instance_of(Array)
end
it 'applies a limit to the number of events' do
events = described_class.new(projects).to_a
expect(events.length).to eq(20)
end
it 'can paginate through events' do
events = described_class.new(projects, offset: 20).to_a
expect(events.length).to eq(1)
end
it 'returns an empty Array when crossing the maximum page number' do
events = described_class.new(projects, limit: 1, offset: 15).to_a
expect(events).to be_empty
end
it 'allows filtering of events using an EventFilter' do
filter = EventFilter.new(EventFilter.issue)
events = described_class.new(projects, filter: filter).to_a
expect(events.length).to eq(1)
expect(events[0].action).to eq(Event::CLOSED)
end
end
end