Use latest_merge_request_diff association

Compared to the merge_request_diff association:

1. It's simpler to query. The query uses a foreign key to the
   merge_request_diffs table, so no ordering is necessary.
2. It's faster for preloading. The merge_request_diff association has to load
   every diff for the MRs in the set, then discard all but the most recent for
   each. This association means that Rails can just query for N diffs from N
   MRs.
3. It's more complicated to update. This is a bidirectional foreign key, so we
   need to update two tables when adding a diff record. This also means we need
   to handle this as a special case when importing a GitLab project.

There is some juggling with this association in the merge request model:

* `MergeRequest#latest_merge_request_diff` is _always_ the latest diff.
* `MergeRequest#merge_request_diff` reuses
  `MergeRequest#latest_merge_request_diff` unless:
    * Arguments are passed. These are typically to force-reload the association.
    * It doesn't exist. That means we might be trying to implicitly create a
      diff. This only seems to happen in specs.
    * The association is already loaded. This is important for the reasons
      explained in the comment, which I'll reiterate here: if we a) load a
      non-latest diff, then b) get its `merge_request`, then c) get that MR's
      `merge_request_diff`, we should get the diff we loaded in c), even though
      that's not the latest diff.

Basically, `MergeRequest#merge_request_diff` is the latest diff in most cases,
but not quite all.
This commit is contained in:
Sean McGivern 2017-11-15 17:22:18 +00:00
parent e548c61334
commit 991bf24ec8
15 changed files with 197 additions and 8 deletions

View file

@ -150,7 +150,7 @@ module IssuableCollections
when 'MergeRequest' when 'MergeRequest'
[ [
:source_project, :target_project, :author, :assignee, :labels, :milestone, :source_project, :target_project, :author, :assignee, :labels, :milestone,
head_pipeline: :project, target_project: :namespace, merge_request_diff: :merge_request_diff_commits head_pipeline: :project, target_project: :namespace, latest_merge_request_diff: :merge_request_diff_commits
] ]
end end
end end

View file

@ -243,7 +243,7 @@ module Ci
@merge_request ||= @merge_request ||=
begin begin
merge_requests = MergeRequest.includes(:merge_request_diff) merge_requests = MergeRequest.includes(:latest_merge_request_diff)
.where(source_branch: ref, .where(source_branch: ref,
source_project: pipeline.project) source_project: pipeline.project)
.reorder(iid: :desc) .reorder(iid: :desc)

View file

@ -0,0 +1,17 @@
module ManualInverseAssociation
extend ActiveSupport::Concern
module ClassMethods
def manual_inverse_association(association, inverse)
define_method(association) do |*args|
super(*args).tap do |value|
next unless value
child_association = value.association(inverse)
child_association.set_inverse_instance(self)
child_association.target = self
end
end
end
end
end

View file

@ -5,6 +5,8 @@ class MergeRequest < ActiveRecord::Base
include Referable include Referable
include IgnorableColumn include IgnorableColumn
include TimeTrackable include TimeTrackable
include ManualInverseAssociation
include EachBatch
ignore_column :locked_at, ignore_column :locked_at,
:ref_fetched :ref_fetched
@ -14,9 +16,28 @@ class MergeRequest < ActiveRecord::Base
belongs_to :merge_user, class_name: "User" belongs_to :merge_user, class_name: "User"
has_many :merge_request_diffs has_many :merge_request_diffs
has_one :merge_request_diff, has_one :merge_request_diff,
-> { order('merge_request_diffs.id DESC') }, inverse_of: :merge_request -> { order('merge_request_diffs.id DESC') }, inverse_of: :merge_request
belongs_to :latest_merge_request_diff, class_name: 'MergeRequestDiff'
manual_inverse_association :latest_merge_request_diff, :merge_request
# This is the same as latest_merge_request_diff unless:
# 1. There are arguments - in which case we might be trying to force-reload.
# 2. This association is already loaded.
# 3. The latest diff does not exist.
#
# The second one in particular is important - MergeRequestDiff#merge_request
# is the inverse of MergeRequest#merge_request_diff, which means it may not be
# the latest diff, because we could have loaded any diff from this particular
# MR. If we haven't already loaded a diff, then it's fine to load the latest.
def merge_request_diff(*args)
fallback = latest_merge_request_diff if args.empty? && !association(:merge_request_diff).loaded?
fallback || super
end
belongs_to :head_pipeline, foreign_key: "head_pipeline_id", class_name: "Ci::Pipeline" belongs_to :head_pipeline, foreign_key: "head_pipeline_id", class_name: "Ci::Pipeline"
has_many :events, as: :target, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent has_many :events, as: :target, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
@ -167,6 +188,22 @@ class MergeRequest < ActiveRecord::Base
where("merge_requests.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection where("merge_requests.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
end end
# This is used after project import, to reset the IDs to the correct
# values. It is not intended to be called without having already scoped the
# relation.
def self.set_latest_merge_request_diff_ids!
update = '
latest_merge_request_diff_id = (
SELECT MAX(id)
FROM merge_request_diffs
WHERE merge_requests.id = merge_request_diffs.merge_request_id
)'.squish
self.each_batch do |batch|
batch.update_all(update)
end
end
WIP_REGEX = /\A\s*(\[WIP\]\s*|WIP:\s*|WIP\s+)+\s*/i.freeze WIP_REGEX = /\A\s*(\[WIP\]\s*|WIP:\s*|WIP\s+)+\s*/i.freeze
def self.work_in_progress?(title) def self.work_in_progress?(title)

View file

@ -2,6 +2,7 @@ class MergeRequestDiff < ActiveRecord::Base
include Sortable include Sortable
include Importable include Importable
include Gitlab::EncodingHelper include Gitlab::EncodingHelper
include ManualInverseAssociation
# Prevent store of diff if commits amount more then 500 # Prevent store of diff if commits amount more then 500
COMMITS_SAFE_SIZE = 100 COMMITS_SAFE_SIZE = 100
@ -10,6 +11,8 @@ class MergeRequestDiff < ActiveRecord::Base
VALID_CLASSES = [Hash, Rugged::Patch, Rugged::Diff::Delta].freeze VALID_CLASSES = [Hash, Rugged::Patch, Rugged::Diff::Delta].freeze
belongs_to :merge_request belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff
has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) } has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) }
has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) } has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
@ -194,7 +197,7 @@ class MergeRequestDiff < ActiveRecord::Base
end end
def latest? def latest?
self == merge_request.merge_request_diff self.id == merge_request.latest_merge_request_diff_id
end end
def compare_with(sha) def compare_with(sha)

View file

@ -35,7 +35,7 @@ module MergeRequests
# target branch manually # target branch manually
def close_merge_requests def close_merge_requests
commit_ids = @commits.map(&:id) commit_ids = @commits.map(&:id)
merge_requests = @project.merge_requests.preload(:merge_request_diff).opened.where(target_branch: @branch_name).to_a merge_requests = @project.merge_requests.preload(:latest_merge_request_diff).opened.where(target_branch: @branch_name).to_a
merge_requests = merge_requests.select(&:diff_head_commit) merge_requests = merge_requests.select(&:diff_head_commit)
merge_requests = merge_requests.select do |merge_request| merge_requests = merge_requests.select do |merge_request|

View file

@ -0,0 +1,5 @@
---
title: Make finding most recent merge request diffs more efficient
merge_request:
author:
type: performance

View file

@ -0,0 +1,30 @@
# This is identical to the stolen background migration, which already has specs.
class PopulateMergeRequestsLatestMergeRequestDiffIdTakeTwo < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 1_000
class MergeRequest < ActiveRecord::Base
self.table_name = 'merge_requests'
include ::EachBatch
end
disable_ddl_transaction!
def up
Gitlab::BackgroundMigration.steal('PopulateMergeRequestsLatestMergeRequestDiffId')
update = '
latest_merge_request_diff_id = (
SELECT MAX(id)
FROM merge_request_diffs
WHERE merge_requests.id = merge_request_diffs.merge_request_id
)'.squish
MergeRequest.where(latest_merge_request_diff_id: nil).each_batch(of: BATCH_SIZE) do |relation|
relation.update_all(update)
end
end
end

View file

@ -21,7 +21,7 @@ module API
return merge_requests if args[:view] == 'simple' return merge_requests if args[:view] == 'simple'
merge_requests merge_requests
.preload(:notes, :author, :assignee, :milestone, :merge_request_diff, :labels, :timelogs) .preload(:notes, :author, :assignee, :milestone, :latest_merge_request_diff, :labels, :timelogs)
end end
params :merge_requests_params do params :merge_requests_params do

View file

@ -60,6 +60,8 @@ module Gitlab
end end
end end
@project.merge_requests.set_latest_merge_request_diff_ids!
@saved @saved
end end

View file

@ -88,6 +88,7 @@ merge_requests:
- metrics - metrics
- timelogs - timelogs
- head_pipeline - head_pipeline
- latest_merge_request_diff
merge_request_diff: merge_request_diff:
- merge_request - merge_request
- merge_request_diff_commits - merge_request_diff_commits

View file

@ -117,6 +117,12 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do
expect(st_commits.first[:committed_date]).to be_kind_of(Time) expect(st_commits.first[:committed_date]).to be_kind_of(Time)
end end
it 'has the correct data for merge request latest_merge_request_diff' do
MergeRequest.find_each do |merge_request|
expect(merge_request.latest_merge_request_diff_id).to eq(merge_request.merge_request_diffs.maximum(:id))
end
end
it 'has labels associated to label links, associated to issues' do it 'has labels associated to label links, associated to issues' do
expect(Label.first.label_links.first.target).not_to be_nil expect(Label.first.label_links.first.target).not_to be_nil
end end

View file

@ -0,0 +1,51 @@
require 'spec_helper'
describe ManualInverseAssociation do
let(:model) do
Class.new(MergeRequest) do
belongs_to :manual_association, class_name: 'MergeRequestDiff', foreign_key: :latest_merge_request_diff_id
manual_inverse_association :manual_association, :merge_request
end
end
before do
stub_const("#{described_class}::Model", model)
end
let(:instance) { create(:merge_request).becomes(model) }
describe '.manual_inverse_association' do
context 'when the relation exists' do
before do
instance.create_merge_request_diff
instance.reload
end
it 'loads the relation' do
expect(instance.manual_association).to be_an_instance_of(MergeRequestDiff)
end
it 'does not perform extra queries after loading' do
instance.manual_association
expect { instance.manual_association.merge_request }
.not_to exceed_query_limit(0)
end
it 'passes arguments to the default association method, to allow reloading' do
query_count = ActiveRecord::QueryRecorder.new do
instance.manual_association
instance.manual_association(true)
end.count
expect(query_count).to eq(2)
end
end
context 'when the relation does not return a value' do
it 'does not try to set an inverse' do
expect(instance.manual_association).to be_nil
end
end
end
end

View file

@ -18,8 +18,8 @@ describe MergeRequestDiff do
let!(:first_diff) { mr.merge_request_diff } let!(:first_diff) { mr.merge_request_diff }
let!(:last_diff) { mr.create_merge_request_diff } let!(:last_diff) { mr.create_merge_request_diff }
it { expect(last_diff.latest?).to be_truthy } it { expect(last_diff.reload).to be_latest }
it { expect(first_diff.latest?).to be_falsey } it { expect(first_diff.reload).not_to be_latest }
end end
describe '#diffs' do describe '#diffs' do
@ -29,7 +29,7 @@ describe MergeRequestDiff do
context 'when the :ignore_whitespace_change option is set' do context 'when the :ignore_whitespace_change option is set' do
it 'creates a new compare object instead of loading from the DB' do it 'creates a new compare object instead of loading from the DB' do
expect(mr_diff).not_to receive(:load_diffs) expect(mr_diff).not_to receive(:load_diffs)
expect(Gitlab::Git::Compare).to receive(:new).and_call_original expect(mr_diff.compare).to receive(:diffs).and_call_original
mr_diff.raw_diffs(ignore_whitespace_change: true) mr_diff.raw_diffs(ignore_whitespace_change: true)
end end

View file

@ -79,6 +79,43 @@ describe MergeRequest do
end end
end end
describe '.set_latest_merge_request_diff_ids!' do
def create_merge_request_with_diffs(source_branch, diffs: 2)
params = {
target_project: project,
target_branch: 'master',
source_project: project,
source_branch: source_branch
}
create(:merge_request, params).tap do |mr|
diffs.times { mr.merge_request_diffs.create }
end
end
let(:project) { create(:project) }
it 'sets IDs for merge requests, whether they are already set or not' do
merge_requests = [
create_merge_request_with_diffs('feature'),
create_merge_request_with_diffs('feature-conflict'),
create_merge_request_with_diffs('wip', diffs: 0),
create_merge_request_with_diffs('csv')
]
merge_requests.take(2).each do |merge_request|
merge_request.update_column(:latest_merge_request_diff_id, nil)
end
expected = merge_requests.map do |merge_request|
merge_request.merge_request_diffs.maximum(:id)
end
expect { project.merge_requests.set_latest_merge_request_diff_ids! }
.to change { merge_requests.map { |mr| mr.reload.latest_merge_request_diff_id } }.to(expected)
end
end
describe '#target_branch_sha' do describe '#target_branch_sha' do
let(:project) { create(:project, :repository) } let(:project) { create(:project, :repository) }