From aac1de46c9be659b74da12f704412f38292974db Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Thu, 27 Jul 2017 19:42:15 +0200 Subject: [PATCH] Use a specialized class for querying events This changes various controllers to use the new EventCollection class for retrieving events. This class uses a JOIN LATERAL query on PostgreSQL to retrieve queries in a more efficient way, while falling back to a simpler / less efficient query for MySQL. The EventCollection class also includes a limit on the number of events to display to prevent malicious users from cycling through all events, as doing so could put a lot of pressure on the database. JOIN LATERAL is only supported on PostgreSQL starting with version 9.3.0 and as such this optimisation is only used when using PostgreSQL 9.3 or newer. --- .../dashboard/projects_controller.rb | 8 +- app/controllers/dashboard_controller.rb | 6 +- app/controllers/groups_controller.rb | 6 +- app/controllers/projects_controller.rb | 9 +- app/models/event.rb | 9 +- app/models/event_collection.rb | 98 +++++++++++++++++++ ...-specialized-class-for-querying-events.yml | 4 + ...23534_add_index_on_events_project_id_id.rb | 37 +++++++ db/schema.rb | 4 +- lib/gitlab/database.rb | 4 + spec/lib/gitlab/database_spec.rb | 22 +++++ spec/models/event_collection_spec.rb | 51 ++++++++++ 12 files changed, 241 insertions(+), 17 deletions(-) create mode 100644 app/models/event_collection.rb create mode 100644 changelogs/unreleased/use-a-specialized-class-for-querying-events.yml create mode 100644 db/migrate/20170727123534_add_index_on_events_project_id_id.rb create mode 100644 spec/models/event_collection_spec.rb diff --git a/app/controllers/dashboard/projects_controller.rb b/app/controllers/dashboard/projects_controller.rb index 74fe45e1ff6..f71ab702e71 100644 --- a/app/controllers/dashboard/projects_controller.rb +++ b/app/controllers/dashboard/projects_controller.rb @@ -52,8 +52,10 @@ class Dashboard::ProjectsController < Dashboard::ApplicationController end def load_events - @events = Event.in_projects(load_projects(params.merge(non_public: true))) - @events = event_filter.apply_filter(@events).with_associations - @events = @events.limit(20).offset(params[:offset] || 0) + projects = load_projects(params.merge(non_public: true)) + + @events = EventCollection + .new(projects, offset: params[:offset].to_i, filter: event_filter) + .to_a end end diff --git a/app/controllers/dashboard_controller.rb b/app/controllers/dashboard_controller.rb index f9c31920302..19a5db6fd17 100644 --- a/app/controllers/dashboard_controller.rb +++ b/app/controllers/dashboard_controller.rb @@ -29,9 +29,9 @@ class DashboardController < Dashboard::ApplicationController current_user.authorized_projects end - @events = Event.in_projects(projects) - @events = @event_filter.apply_filter(@events).with_associations - @events = @events.limit(20).offset(params[:offset] || 0) + @events = EventCollection + .new(projects, offset: params[:offset].to_i, filter: @event_filter) + .to_a end def set_show_full_reference diff --git a/app/controllers/groups_controller.rb b/app/controllers/groups_controller.rb index 27137ffde54..f76b3f69e9e 100644 --- a/app/controllers/groups_controller.rb +++ b/app/controllers/groups_controller.rb @@ -160,9 +160,9 @@ class GroupsController < Groups::ApplicationController end def load_events - @events = Event.in_projects(@projects) - @events = event_filter.apply_filter(@events).with_associations - @events = @events.limit(20).offset(params[:offset] || 0) + @events = EventCollection + .new(@projects, offset: params[:offset].to_i, filter: event_filter) + .to_a end def user_actions diff --git a/app/controllers/projects_controller.rb b/app/controllers/projects_controller.rb index 8dfe0f51709..e93f34498d6 100644 --- a/app/controllers/projects_controller.rb +++ b/app/controllers/projects_controller.rb @@ -301,10 +301,11 @@ class ProjectsController < Projects::ApplicationController end def load_events - @events = @project.events.recent - @events = event_filter.apply_filter(@events).with_associations - limit = (params[:limit] || 20).to_i - @events = @events.limit(limit).offset(params[:offset] || 0) + projects = Project.where(id: @project.id) + + @events = EventCollection + .new(projects, offset: params[:offset].to_i, filter: event_filter) + .to_a end def project_params diff --git a/app/models/event.rb b/app/models/event.rb index a598eb08e82..f2a560a6b56 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -62,8 +62,13 @@ class Event < ActiveRecord::Base scope :recent, -> { reorder(id: :desc) } scope :code_push, -> { where(action: PUSHED) } - scope :in_projects, ->(projects) do - where(project_id: projects.pluck(:id)).recent + scope :in_projects, -> (projects) do + sub_query = projects + .except(:order) + .select(1) + .where('projects.id = events.project_id') + + where('EXISTS (?)', sub_query).recent end scope :with_associations, -> do diff --git a/app/models/event_collection.rb b/app/models/event_collection.rb new file mode 100644 index 00000000000..8b8244314af --- /dev/null +++ b/app/models/event_collection.rb @@ -0,0 +1,98 @@ +# A collection of events to display in an event list. +# +# An EventCollection is meant to be used for displaying events to a user (e.g. +# in a controller), it's not suitable for building queries that are used for +# building other queries. +class EventCollection + # To prevent users from putting too much pressure on the database by cycling + # through thousands of events we put a limit on the number of pages. + MAX_PAGE = 10 + + # projects - An ActiveRecord::Relation object that returns the projects for + # which to retrieve events. + # filter - An EventFilter instance to use for filtering events. + def initialize(projects, limit: 20, offset: 0, filter: nil) + @projects = projects + @limit = limit + @offset = offset + @filter = filter + end + + # Returns an Array containing the events. + def to_a + return [] if current_page > MAX_PAGE + + relation = if Gitlab::Database.join_lateral_supported? + relation_with_join_lateral + else + relation_without_join_lateral + end + + relation.with_associations.to_a + end + + private + + # Returns the events relation to use when JOIN LATERAL is not supported. + # + # This relation simply gets all the events for all authorized projects, then + # limits that set. + def relation_without_join_lateral + events = filtered_events.in_projects(projects) + + paginate_events(events) + end + + # Returns the events relation to use when JOIN LATERAL is supported. + # + # This relation is built using JOIN LATERAL, producing faster queries than a + # regular LIMIT + OFFSET approach. + def relation_with_join_lateral + projects_for_lateral = projects.select(:id).to_sql + + lateral = filtered_events + .limit(limit_for_join_lateral) + .where('events.project_id = projects_for_lateral.id') + .to_sql + + # The outer query does not need to re-apply the filters since the JOIN + # LATERAL body already takes care of this. + outer = base_relation + .from("(#{projects_for_lateral}) projects_for_lateral") + .joins("JOIN LATERAL (#{lateral}) AS #{Event.table_name} ON true") + + paginate_events(outer) + end + + def filtered_events + @filter ? @filter.apply_filter(base_relation) : base_relation + end + + def paginate_events(events) + events.limit(@limit).offset(@offset) + end + + def base_relation + # We want to have absolute control over the event queries being built, thus + # we're explicitly opting out of any default scopes that may be set. + Event.unscoped.recent + end + + def limit_for_join_lateral + # Applying the OFFSET on the inside of a JOIN LATERAL leads to incorrect + # results. To work around this we need to increase the inner limit for every + # page. + # + # This means that on page 1 we use LIMIT 20, and an outer OFFSET of 0. On + # page 2 we use LIMIT 40 and an outer OFFSET of 20. + @limit + @offset + end + + def current_page + (@offset / @limit) + 1 + end + + def projects + @projects.except(:order) + end +end diff --git a/changelogs/unreleased/use-a-specialized-class-for-querying-events.yml b/changelogs/unreleased/use-a-specialized-class-for-querying-events.yml new file mode 100644 index 00000000000..6c1ec10aa12 --- /dev/null +++ b/changelogs/unreleased/use-a-specialized-class-for-querying-events.yml @@ -0,0 +1,4 @@ +--- +title: Use a specialized class for querying events to improve performance +merge_request: +author: diff --git a/db/migrate/20170727123534_add_index_on_events_project_id_id.rb b/db/migrate/20170727123534_add_index_on_events_project_id_id.rb new file mode 100644 index 00000000000..1c4aaaf9dd6 --- /dev/null +++ b/db/migrate/20170727123534_add_index_on_events_project_id_id.rb @@ -0,0 +1,37 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddIndexOnEventsProjectIdId < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + COLUMNS = %i[project_id id].freeze + TABLES = %i[events events_for_migration].freeze + + disable_ddl_transaction! + + def up + TABLES.each do |table| + add_concurrent_index(table, COLUMNS) unless index_exists?(table, COLUMNS) + + # We remove the index _after_ adding the new one since MySQL doesn't let + # you remove an index when a foreign key exists for the same column. + if index_exists?(table, :project_id) + remove_concurrent_index(table, :project_id) + end + end + end + + def down + TABLES.each do |table| + unless index_exists?(table, :project_id) + add_concurrent_index(table, :project_id) + end + + unless index_exists?(table, COLUMNS) + remove_concurrent_index(table, COLUMNS) + end + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 8a2df991f0d..d8e8ef41758 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -530,7 +530,7 @@ ActiveRecord::Schema.define(version: 20170807160457) do add_index "events", ["action"], name: "index_events_on_action", using: :btree add_index "events", ["author_id"], name: "index_events_on_author_id", using: :btree add_index "events", ["created_at"], name: "index_events_on_created_at", using: :btree - add_index "events", ["project_id"], name: "index_events_on_project_id", using: :btree + add_index "events", ["project_id", "id"], name: "index_events_on_project_id_and_id", using: :btree add_index "events", ["target_id"], name: "index_events_on_target_id", using: :btree add_index "events", ["target_type"], name: "index_events_on_target_type", using: :btree @@ -546,7 +546,7 @@ ActiveRecord::Schema.define(version: 20170807160457) do add_index "events_for_migration", ["action"], name: "index_events_for_migration_on_action", using: :btree add_index "events_for_migration", ["author_id"], name: "index_events_for_migration_on_author_id", using: :btree - add_index "events_for_migration", ["project_id"], name: "index_events_for_migration_on_project_id", using: :btree + add_index "events_for_migration", ["project_id", "id"], name: "index_events_for_migration_on_project_id_and_id", using: :btree add_index "events_for_migration", ["target_type", "target_id"], name: "index_events_for_migration_on_target_type_and_target_id", using: :btree create_table "feature_gates", force: :cascade do |t| diff --git a/lib/gitlab/database.rb b/lib/gitlab/database.rb index d7dab584a44..e001d25e7b7 100644 --- a/lib/gitlab/database.rb +++ b/lib/gitlab/database.rb @@ -25,6 +25,10 @@ module Gitlab database_version.match(/\A(?:PostgreSQL |)([^\s]+).*\z/)[1] end + def self.join_lateral_supported? + postgresql? && version.to_f >= 9.3 + end + def self.nulls_last_order(field, direction = 'ASC') order = "#{field} #{direction}" diff --git a/spec/lib/gitlab/database_spec.rb b/spec/lib/gitlab/database_spec.rb index c5f9aecd867..5fa94999d25 100644 --- a/spec/lib/gitlab/database_spec.rb +++ b/spec/lib/gitlab/database_spec.rb @@ -51,6 +51,28 @@ describe Gitlab::Database do end end + describe '.join_lateral_supported?' do + it 'returns false when using MySQL' do + allow(described_class).to receive(:postgresql?).and_return(false) + + expect(described_class.join_lateral_supported?).to eq(false) + end + + it 'returns false when using PostgreSQL 9.2' do + allow(described_class).to receive(:postgresql?).and_return(true) + allow(described_class).to receive(:version).and_return('9.2.1') + + expect(described_class.join_lateral_supported?).to eq(false) + end + + it 'returns true when using PostgreSQL 9.3.0 or newer' do + allow(described_class).to receive(:postgresql?).and_return(true) + allow(described_class).to receive(:version).and_return('9.3.0') + + expect(described_class.join_lateral_supported?).to eq(true) + end + end + describe '.nulls_last_order' do context 'when using PostgreSQL' do before do diff --git a/spec/models/event_collection_spec.rb b/spec/models/event_collection_spec.rb new file mode 100644 index 00000000000..e0a87c18cc7 --- /dev/null +++ b/spec/models/event_collection_spec.rb @@ -0,0 +1,51 @@ +require 'spec_helper' + +describe EventCollection do + describe '#to_a' do + let(:project) { create(:project_empty_repo) } + let(:projects) { Project.where(id: project.id) } + let(:user) { create(:user) } + + before do + 20.times do + event = create(:push_event, project: project, author: user) + + create(:push_event_payload, event: event) + end + + create(:closed_issue_event, project: project, author: user) + end + + it 'returns an Array of events' do + events = described_class.new(projects).to_a + + expect(events).to be_an_instance_of(Array) + end + + it 'applies a limit to the number of events' do + events = described_class.new(projects).to_a + + expect(events.length).to eq(20) + end + + it 'can paginate through events' do + events = described_class.new(projects, offset: 20).to_a + + expect(events.length).to eq(1) + end + + it 'returns an empty Array when crossing the maximum page number' do + events = described_class.new(projects, limit: 1, offset: 15).to_a + + expect(events).to be_empty + end + + it 'allows filtering of events using an EventFilter' do + filter = EventFilter.new(EventFilter.issue) + events = described_class.new(projects, filter: filter).to_a + + expect(events.length).to eq(1) + expect(events[0].action).to eq(Event::CLOSED) + end + end +end