Merge branch 'fix/handle-large-commit-tag-message' into 'master'

Add handling for commit/tags with big messages

See merge request gitlab-org/gitlab-ce!17892
This commit is contained in:
Robert Speicher 2018-05-18 19:28:25 +00:00
commit 076ab2e8e0
13 changed files with 310 additions and 32 deletions

View file

@ -10,3 +10,7 @@ lib/gitlab/background_migration/*
app/models/project_services/kubernetes_service.rb
lib/gitlab/workhorse.rb
lib/gitlab/ci/trace/chunked_io.rb
lib/gitlab/gitaly_client/ref_service.rb
lib/gitlab/gitaly_client/commit_service.rb
lib/gitlab/git/commit.rb
lib/gitlab/git/tag.rb

View file

@ -6,6 +6,7 @@ module Gitlab
attr_accessor :raw_commit, :head
MAX_COMMIT_MESSAGE_DISPLAY_SIZE = 10.megabytes
MIN_SHA_LENGTH = 7
SERIALIZE_KEYS = [
:id, :message, :parent_ids,
@ -63,9 +64,7 @@ module Gitlab
if is_enabled
repo.gitaly_commit_client.find_commit(commit_id)
else
obj = repo.rev_parse_target(commit_id)
obj.is_a?(Rugged::Commit) ? obj : nil
rugged_find(repo, commit_id)
end
end
@ -76,6 +75,12 @@ module Gitlab
nil
end
def rugged_find(repo, commit_id)
obj = repo.rev_parse_target(commit_id)
obj.is_a?(Rugged::Commit) ? obj : nil
end
# Get last commit for HEAD
#
# Ex.
@ -297,11 +302,40 @@ module Gitlab
nil
end
end
def get_message(repository, commit_id)
BatchLoader.for({ repository: repository, commit_id: commit_id }).batch do |items, loader|
items_by_repo = items.group_by { |i| i[:repository] }
items_by_repo.each do |repo, items|
commit_ids = items.map { |i| i[:commit_id] }
messages = get_messages(repository, commit_ids)
messages.each do |commit_sha, message|
loader.call({ repository: repository, commit_id: commit_sha }, message)
end
end
end
end
def get_messages(repository, commit_ids)
repository.gitaly_migrate(:commit_messages) do |is_enabled|
if is_enabled
repository.gitaly_commit_client.get_commit_messages(commit_ids)
else
commit_ids.map { |id| [id, rugged_find(repository, id).message] }.to_h
end
end
end
end
def initialize(repository, raw_commit, head = nil)
raise "Nil as raw commit passed" unless raw_commit
@repository = repository
@head = head
case raw_commit
when Hash
init_from_hash(raw_commit)
@ -312,9 +346,6 @@ module Gitlab
else
raise "Invalid raw commit type: #{raw_commit.class}"
end
@repository = repository
@head = head
end
def sha
@ -518,7 +549,7 @@ module Gitlab
# TODO: Once gitaly "takes over" Rugged consider separating the
# subject from the message to make it clearer when there's one
# available but not the other.
@message = (commit.body.presence || commit.subject).dup
@message = message_from_gitaly_body
@authored_date = Time.at(commit.author.date.seconds).utc
@author_name = commit.author.name.dup
@author_email = commit.author.email.dup
@ -570,6 +601,25 @@ module Gitlab
def refs(repo)
repo.refs_hash[id]
end
def message_from_gitaly_body
return @raw_commit.subject.dup if @raw_commit.body_size.zero?
return @raw_commit.body.dup if full_body_fetched_from_gitaly?
if @raw_commit.body_size > MAX_COMMIT_MESSAGE_DISPLAY_SIZE
"#{@raw_commit.subject}\n\n--commit message is too big".strip
else
fetch_body_from_gitaly
end
end
def full_body_fetched_from_gitaly?
@raw_commit.body.bytesize == @raw_commit.body_size
end
def fetch_body_from_gitaly
self.class.get_message(@repository, id)
end
end
end
end

View file

@ -1962,7 +1962,12 @@ module Gitlab
end
target_commit = Gitlab::Git::Commit.find(self, ref.target)
Gitlab::Git::Tag.new(self, ref.name, ref.target, target_commit, message)
Gitlab::Git::Tag.new(self, {
name: ref.name,
target: ref.target,
target_commit: target_commit,
message: message
})
end.sort_by(&:name)
end

View file

@ -35,7 +35,11 @@ module Gitlab
next if name =~ /\^\{\}\Z/
target_commit = Gitlab::Git::Commit.find(self, target)
Gitlab::Git::Tag.new(self, name, target, target_commit)
Gitlab::Git::Tag.new(self, {
name: name,
target: target,
target_commit: target_commit
})
end.compact
end

View file

@ -1,17 +1,99 @@
module Gitlab
module Git
class Tag < Ref
attr_reader :object_sha
extend Gitlab::EncodingHelper
attr_reader :object_sha, :repository
MAX_TAG_MESSAGE_DISPLAY_SIZE = 10.megabytes
SERIALIZE_KEYS = %i[name target target_commit message].freeze
attr_accessor *SERIALIZE_KEYS # rubocop:disable Lint/AmbiguousOperator
class << self
def get_message(repository, tag_id)
BatchLoader.for({ repository: repository, tag_id: tag_id }).batch do |items, loader|
items_by_repo = items.group_by { |i| i[:repository] }
items_by_repo.each do |repo, items|
tag_ids = items.map { |i| i[:tag_id] }
messages = get_messages(repository, tag_ids)
messages.each do |id, message|
loader.call({ repository: repository, tag_id: id }, message)
end
end
end
end
def get_messages(repository, tag_ids)
repository.gitaly_migrate(:tag_messages) do |is_enabled|
if is_enabled
repository.gitaly_ref_client.get_tag_messages(tag_ids)
else
tag_ids.map do |id|
tag = repository.rugged.lookup(id)
message = tag.is_a?(Rugged::Commit) ? "" : tag.message
[id, message]
end.to_h
end
end
end
end
def initialize(repository, raw_tag)
@repository = repository
@raw_tag = raw_tag
case raw_tag
when Hash
init_from_hash
when Gitaly::Tag
init_from_gitaly
end
def initialize(repository, name, target, target_commit, message = nil)
super(repository, name, target, target_commit)
end
@message = message
def init_from_hash
raw_tag = @raw_tag.symbolize_keys
SERIALIZE_KEYS.each do |key|
send("#{key}=", raw_tag[key]) # rubocop:disable GitlabSecurity/PublicSend
end
end
def init_from_gitaly
@name = encode!(@raw_tag.name.dup)
@target = @raw_tag.id
@message = message_from_gitaly_tag
if @raw_tag.target_commit.present?
@target_commit = Gitlab::Git::Commit.decorate(repository, @raw_tag.target_commit)
end
end
def message
encode! @message
end
private
def message_from_gitaly_tag
return @raw_tag.message.dup if full_message_fetched_from_gitaly?
if @raw_tag.message_size > MAX_TAG_MESSAGE_DISPLAY_SIZE
'--tag message is too big'
else
self.class.get_message(@repository, target)
end
end
def full_message_fetched_from_gitaly?
@raw_tag.message.bytesize == @raw_tag.message_size
end
end
end
end

View file

@ -334,6 +334,22 @@ module Gitlab
signatures
end
def get_commit_messages(commit_ids)
request = Gitaly::GetCommitMessagesRequest.new(repository: @gitaly_repo, commit_ids: commit_ids)
response = GitalyClient.call(@repository.storage, :commit_service, :get_commit_messages, request)
messages = Hash.new { |h, k| h[k] = ''.b }
current_commit_id = nil
response.each do |rpc_message|
current_commit_id = rpc_message.commit_id if rpc_message.commit_id.present?
messages[current_commit_id] << rpc_message.message
end
messages
end
private
def call_commit_diff(request_params, options = {})

View file

@ -40,7 +40,7 @@ module Gitlab
raise Gitlab::Git::Repository::TagExistsError
end
Util.gitlab_tag_from_gitaly_tag(@repository, response.tag)
Gitlab::Git::Tag.new(@repository, response.tag)
rescue GRPC::FailedPrecondition => e
raise Gitlab::Git::Repository::InvalidRef, e
end

View file

@ -171,6 +171,22 @@ module Gitlab
consume_ref_contains_sha_response(stream, :branch_names)
end
def get_tag_messages(tag_ids)
request = Gitaly::GetTagMessagesRequest.new(repository: @gitaly_repo, tag_ids: tag_ids)
response = GitalyClient.call(@repository.storage, :ref_service, :get_tag_messages, request)
messages = Hash.new { |h, k| h[k] = ''.b }
current_tag_id = nil
response.each do |rpc_message|
current_tag_id = rpc_message.tag_id if rpc_message.tag_id.present?
messages[current_tag_id] << rpc_message.message
end
messages
end
private
def consume_refs_response(response)
@ -210,7 +226,7 @@ module Gitlab
def consume_tags_response(response)
response.flat_map do |message|
message.tags.map { |gitaly_tag| Util.gitlab_tag_from_gitaly_tag(@repository, gitaly_tag) }
message.tags.map { |gitaly_tag| Gitlab::Git::Tag.new(@repository, gitaly_tag) }
end
end

View file

@ -21,20 +21,6 @@ module Gitlab
gitaly_repository.relative_path,
gitaly_repository.gl_repository)
end
def gitlab_tag_from_gitaly_tag(repository, gitaly_tag)
if gitaly_tag.target_commit.present?
commit = Gitlab::Git::Commit.decorate(repository, gitaly_tag.target_commit)
end
Gitlab::Git::Tag.new(
repository,
Gitlab::EncodingHelper.encode!(gitaly_tag.name.dup),
gitaly_tag.id,
commit,
Gitlab::EncodingHelper.encode!(gitaly_tag.message.chomp)
)
end
end
end
end

View file

@ -0,0 +1,9 @@
FactoryBot.define do
factory :gitaly_tag, class: Gitaly::Tag do
skip_create
name { 'v3.1.4' }
message { 'Pie release' }
target_commit factory: :gitaly_commit
end
end

View file

@ -66,7 +66,8 @@ describe Gitlab::Git::Commit, seed_helper: true do
describe "Commit info from gitaly commit" do
let(:subject) { "My commit".force_encoding('ASCII-8BIT') }
let(:body) { subject + "My body".force_encoding('ASCII-8BIT') }
let(:gitaly_commit) { build(:gitaly_commit, subject: subject, body: body) }
let(:body_size) { body.length }
let(:gitaly_commit) { build(:gitaly_commit, subject: subject, body: body, body_size: body_size) }
let(:id) { gitaly_commit.id }
let(:committer) { gitaly_commit.committer }
let(:author) { gitaly_commit.author }
@ -83,10 +84,30 @@ describe Gitlab::Git::Commit, seed_helper: true do
it { expect(commit.committer_email).to eq(committer.email) }
it { expect(commit.parent_ids).to eq(gitaly_commit.parent_ids) }
context 'no body' do
context 'body_size != body.size' do
let(:body) { "".force_encoding('ASCII-8BIT') }
it { expect(commit.safe_message).to eq(subject) }
context 'zero body_size' do
it { expect(commit.safe_message).to eq(subject) }
end
context 'body_size less than threshold' do
let(:body_size) { 123 }
it 'fetches commit message seperately' do
expect(described_class).to receive(:get_message).with(repository, id)
commit.safe_message
end
end
context 'body_size greater than threshold' do
let(:body_size) { described_class::MAX_COMMIT_MESSAGE_DISPLAY_SIZE + 1 }
it 'returns the suject plus a notice about message size' do
expect(commit.safe_message).to eq("My commit\n\n--commit message is too big")
end
end
end
end
@ -589,6 +610,35 @@ describe Gitlab::Git::Commit, seed_helper: true do
it { is_expected.not_to include("feature") }
end
describe '.get_message' do
let(:commit_ids) { %w[6d394385cf567f80a8fd85055db1ab4c5295806f cfe32cf61b73a0d5e9f13e774abde7ff789b1660] }
subject do
commit_ids.map { |id| described_class.get_message(repository, id) }
end
shared_examples 'getting commit messages' do
it 'gets commit messages' do
expect(subject).to contain_exactly(
"Added contributing guide\n\nSigned-off-by: Dmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com>\n",
"Add submodule\n\nSigned-off-by: Dmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com>\n"
)
end
end
context 'when Gitaly commit_messages feature is enabled' do
it_behaves_like 'getting commit messages'
it 'gets messages in one batch', :request_store do
expect { subject.map(&:itself) }.to change { Gitlab::GitalyClient.get_request_count }.by(1)
end
end
context 'when Gitaly commit_messages feature is disabled', :disable_gitaly do
it_behaves_like 'getting commit messages'
end
end
def sample_commit_hash
{
author_email: "dmitriy.zaporozhets@gmail.com",

View file

@ -32,4 +32,56 @@ describe Gitlab::Git::Tag, seed_helper: true do
context 'when Gitaly tags feature is disabled', :skip_gitaly_mock do
it_behaves_like 'Gitlab::Git::Repository#tags'
end
describe '.get_message' do
let(:tag_ids) { %w[f4e6814c3e4e7a0de82a9e7cd20c626cc963a2f8 8a2a6eb295bb170b34c24c76c49ed0e9b2eaf34b] }
subject do
tag_ids.map { |id| described_class.get_message(repository, id) }
end
shared_examples 'getting tag messages' do
it 'gets tag messages' do
expect(subject[0]).to eq("Release\n")
expect(subject[1]).to eq("Version 1.1.0\n")
end
end
context 'when Gitaly tag_messages feature is enabled' do
it_behaves_like 'getting tag messages'
it 'gets messages in one batch', :request_store do
expect { subject.map(&:itself) }.to change { Gitlab::GitalyClient.get_request_count }.by(1)
end
end
context 'when Gitaly tag_messages feature is disabled', :disable_gitaly do
it_behaves_like 'getting tag messages'
end
end
describe 'tag into from Gitaly tag' do
context 'message_size != message.size' do
let(:gitaly_tag) { build(:gitaly_tag, message: ''.b, message_size: message_size) }
let(:tag) { described_class.new(repository, gitaly_tag) }
context 'message_size less than threshold' do
let(:message_size) { 123 }
it 'fetches tag message seperately' do
expect(described_class).to receive(:get_message).with(repository, gitaly_tag.id)
tag.message
end
end
context 'message_size greater than threshold' do
let(:message_size) { described_class::MAX_TAG_MESSAGE_DISPLAY_SIZE + 1 }
it 'returns a notice about message size' do
expect(tag.message).to eq("--tag message is too big")
end
end
end
end
end

View file

@ -343,7 +343,11 @@ describe Projects::UpdateRemoteMirrorService do
tag = repository.find_tag(name)
target = tag.try(:target)
target_commit = tag.try(:dereferenced_target)
tags << Gitlab::Git::Tag.new(repository.raw_repository, name, target, target_commit)
tags << Gitlab::Git::Tag.new(repository.raw_repository, {
name: name,
target: target,
target_commit: target_commit
})
end
end