Enrich commits with full data in CommitCollection

Allow incomplete commit records to load their full data from gitaly.

Commits can be based on a Hash of data retrieved from PostgreSQL, and
this data can be intentionally incomplete in order to save space.

A new method #gitaly? has been added to Gitlab::Git::Commit, which
returns true if the underlying data source of the Commit is a
Gitaly::GitCommit.

CommitCollection now has a method #enrich which replaces non-gitaly
commits in place with commits from gitaly.

CommitCollection#without_merge_commits has been updated to call this
method, as in order to determine a merge commit we need to have parent
data.

Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/58805
This commit is contained in:
Luke Duncalfe 2019-03-14 17:20:40 +13:00
parent dd43abecf9
commit 38bf176c3c
6 changed files with 155 additions and 30 deletions

View file

@ -28,10 +28,39 @@ class CommitCollection
def without_merge_commits def without_merge_commits
strong_memoize(:without_merge_commits) do strong_memoize(:without_merge_commits) do
commits.reject(&:merge_commit?) # `#enrich!` the collection to ensure all commits contain
# the necessary parent data
enrich!.commits.reject(&:merge_commit?)
end end
end end
def unenriched
commits.reject(&:gitaly_commit?)
end
def fully_enriched?
unenriched.empty?
end
# Batch load any commits that are not backed by full gitaly data, and
# replace them in the collection.
def enrich!
return self if fully_enriched?
# Batch load full Commits from the repository
# and map to a Hash of id => Commit
replacements = Hash[unenriched.map do |c|
[c.id, Commit.lazy(project, c.id)]
end.compact]
# Replace the commits, keeping the same order
@commits = @commits.map do |c|
replacements.fetch(c.id, c)
end
self
end
# Sets the pipeline status for every commit. # Sets the pipeline status for every commit.
# #
# Setting this status ahead of time removes the need for running a query for # Setting this status ahead of time removes the need for running a query for

View file

@ -311,6 +311,10 @@ module Gitlab
parent_ids.size > 1 parent_ids.size > 1
end end
def gitaly_commit?
raw_commit.is_a?(Gitaly::GitCommit)
end
def tree_entry(path) def tree_entry(path)
return unless path.present? return unless path.present?
@ -333,7 +337,7 @@ module Gitlab
end end
def to_gitaly_commit def to_gitaly_commit
return raw_commit if raw_commit.is_a?(Gitaly::GitCommit) return raw_commit if gitaly_commit?
message_split = raw_commit.message.split("\n", 2) message_split = raw_commit.message.split("\n", 2)
Gitaly::GitCommit.new( Gitaly::GitCommit.new(

View file

@ -537,6 +537,18 @@ describe Gitlab::Git::Commit, :seed_helper do
end end
end end
describe '#gitaly_commit?' do
context 'when the commit data comes from gitaly' do
it { expect(commit.gitaly_commit?).to eq(true) }
end
context 'when the commit data comes from a Hash' do
let(:commit) { described_class.new(repository, sample_commit_hash) }
it { expect(commit.gitaly_commit?).to eq(false) }
end
end
describe '#has_zero_stats?' do describe '#has_zero_stats?' do
it { expect(commit.has_zero_stats?).to eq(false) } it { expect(commit.has_zero_stats?).to eq(false) }
end end

View file

@ -37,12 +37,92 @@ describe CommitCollection do
describe '#without_merge_commits' do describe '#without_merge_commits' do
it 'returns all commits except merge commits' do it 'returns all commits except merge commits' do
merge_commit = project.commit("60ecb67744cb56576c30214ff52294f8ce2def98")
expect(merge_commit).to receive(:merge_commit?).and_return(true)
collection = described_class.new(project, [ collection = described_class.new(project, [
build(:commit), commit,
build(:commit, :merge_commit) merge_commit
]) ])
expect(collection.without_merge_commits.size).to eq(1) expect(collection.without_merge_commits).to contain_exactly(commit)
end
end
describe 'enrichment methods' do
let(:gitaly_commit) { commit }
let(:hash_commit) { Commit.from_hash(gitaly_commit.to_hash, project) }
describe '#unenriched' do
it 'returns all commits that are not backed by gitaly data' do
collection = described_class.new(project, [gitaly_commit, hash_commit])
expect(collection.unenriched).to contain_exactly(hash_commit)
end
end
describe '#fully_enriched?' do
it 'returns true when all commits are backed by gitaly data' do
collection = described_class.new(project, [gitaly_commit, gitaly_commit])
expect(collection.fully_enriched?).to eq(true)
end
it 'returns false when any commits are not backed by gitaly data' do
collection = described_class.new(project, [gitaly_commit, hash_commit])
expect(collection.fully_enriched?).to eq(false)
end
it 'returns true when the collection is empty' do
collection = described_class.new(project, [])
expect(collection.fully_enriched?).to eq(true)
end
end
describe '#enrich!' do
it 'replaces commits in the collection with those backed by gitaly data' do
collection = described_class.new(project, [hash_commit])
collection.enrich!
new_commit = collection.commits.first
expect(new_commit.id).to eq(hash_commit.id)
expect(hash_commit.gitaly_commit?).to eq(false)
expect(new_commit.gitaly_commit?).to eq(true)
end
it 'maintains the original order of the commits' do
gitaly_commits = [gitaly_commit] * 3
hash_commits = [hash_commit] * 3
# Interleave the gitaly and hash commits together
original_commits = gitaly_commits.zip(hash_commits).flatten
collection = described_class.new(project, original_commits)
collection.enrich!
original_commits.each_with_index do |original_commit, i|
new_commit = collection.commits[i]
expect(original_commit.id).to eq(new_commit.id)
end
end
it 'fetches data if there are unenriched commits' do
collection = described_class.new(project, [hash_commit])
expect(Commit).to receive(:lazy).exactly(:once)
collection.enrich!
end
it 'does not fetch data if all commits are enriched' do
collection = described_class.new(project, [gitaly_commit])
expect(Commit).not_to receive(:lazy)
collection.enrich!
end
end end
end end

View file

@ -84,32 +84,27 @@ describe MergeRequest do
describe '#default_squash_commit_message' do describe '#default_squash_commit_message' do
let(:project) { subject.project } let(:project) { subject.project }
let(:is_multiline) { -> (c) { c.description.present? } }
def commit_collection(commit_hashes) let(:multiline_commits) { subject.commits.select(&is_multiline) }
raw_commits = commit_hashes.map { |raw| Commit.from_hash(raw, project) } let(:singleline_commits) { subject.commits.reject(&is_multiline) }
CommitCollection.new(project, raw_commits)
end
it 'returns the oldest multiline commit message' do it 'returns the oldest multiline commit message' do
commits = commit_collection([ expect(subject.default_squash_commit_message).to eq(multiline_commits.last.message)
{ message: 'Singleline', parent_ids: [] },
{ message: "Second multiline\nCommit message", parent_ids: [] },
{ message: "First multiline\nCommit message", parent_ids: [] }
])
expect(subject).to receive(:commits).and_return(commits)
expect(subject.default_squash_commit_message).to eq("First multiline\nCommit message")
end end
it 'returns the merge request title if there are no multiline commits' do it 'returns the merge request title if there are no multiline commits' do
commits = commit_collection([ expect(subject).to receive(:commits).and_return(
{ message: 'Singleline', parent_ids: [] } CommitCollection.new(project, singleline_commits)
]) )
expect(subject).to receive(:commits).and_return(commits) expect(subject.default_squash_commit_message).to eq(subject.title)
end
it 'does not return commit messages from multiline merge commits' do
collection = CommitCollection.new(project, multiline_commits).enrich!
expect(collection.commits).to all( receive(:merge_commit?).and_return(true) )
expect(subject).to receive(:commits).and_return(collection)
expect(subject.default_squash_commit_message).to eq(subject.title) expect(subject.default_squash_commit_message).to eq(subject.title)
end end
end end

View file

@ -279,13 +279,18 @@ describe MergeRequestWidgetEntity do
end end
describe 'commits_without_merge_commits' do describe 'commits_without_merge_commits' do
it 'should not include merge commits' do def find_matching_commit(short_id)
# Mock all but the first 5 commits to be merge commits resource.commits.find { |c| c.short_id == short_id }
resource.commits.each_with_index do |commit, i| end
expect(commit).to receive(:merge_commit?).at_least(:once).and_return(i > 4)
end
expect(subject[:commits_without_merge_commits].size).to eq(5) it 'should not include merge commits' do
commits_in_widget = subject[:commits_without_merge_commits]
expect(commits_in_widget.length).to be < resource.commits.length
expect(commits_in_widget.length).to eq(resource.commits.without_merge_commits.length)
commits_in_widget.each do |c|
expect(find_matching_commit(c[:short_id]).merge_commit?).to eq(false)
end
end end
end end
end end