diff --git a/changelogs/unreleased/da-gitaly-calculate-repository-checksum.yml b/changelogs/unreleased/da-gitaly-calculate-repository-checksum.yml new file mode 100644 index 00000000000..de09f87a7c9 --- /dev/null +++ b/changelogs/unreleased/da-gitaly-calculate-repository-checksum.yml @@ -0,0 +1,5 @@ +--- +title: Repository checksum calculation is handled by Gitaly when feature is enabled +merge_request: +author: +type: changed diff --git a/lib/gitlab/git/checksum.rb b/lib/gitlab/git/checksum.rb deleted file mode 100644 index 3ef0f0a8854..00000000000 --- a/lib/gitlab/git/checksum.rb +++ /dev/null @@ -1,82 +0,0 @@ -module Gitlab - module Git - class Checksum - include Gitlab::Git::Popen - - EMPTY_REPOSITORY_CHECKSUM = '0000000000000000000000000000000000000000'.freeze - - Failure = Class.new(StandardError) - - attr_reader :path, :relative_path, :storage, :storage_path - - def initialize(storage, relative_path) - @storage = storage - @storage_path = Gitlab.config.repositories.storages[storage].legacy_disk_path - @relative_path = "#{relative_path}.git" - @path = File.join(storage_path, @relative_path) - end - - def calculate - unless repository_exists? - failure!(Gitlab::Git::Repository::NoRepository, 'No repository for such path') - end - - calculate_checksum_by_shelling_out - end - - private - - def repository_exists? - raw_repository.exists? - end - - def calculate_checksum_by_shelling_out - args = %W(--git-dir=#{path} show-ref --heads --tags) - output, status = run_git(args) - - if status&.zero? - refs = output.split("\n") - - result = refs.inject(nil) do |checksum, ref| - value = Digest::SHA1.hexdigest(ref).hex - - if checksum.nil? - value - else - checksum ^ value - end - end - - result.to_s(16) - else - # Empty repositories return with a non-zero status and an empty output. - if output&.empty? - EMPTY_REPOSITORY_CHECKSUM - else - failure!(Gitlab::Git::Checksum::Failure, output) - end - end - end - - def failure!(klass, message) - Gitlab::GitLogger.error("'git show-ref --heads --tags' in #{path}: #{message}") - - raise klass.new("Could not calculate the checksum for #{path}: #{message}") - end - - def circuit_breaker - @circuit_breaker ||= Gitlab::Git::Storage::CircuitBreaker.for_storage(storage) - end - - def raw_repository - Gitlab::Git::Repository.new(storage, relative_path, nil) - end - - def run_git(args) - circuit_breaker.perform do - popen([Gitlab.config.git.bin_path, *args], path) - end - end - end - end -end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 5678c28cf3a..79cacd9f6f5 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -23,6 +23,7 @@ module Gitlab SQUASH_WORKTREE_PREFIX = 'squash'.freeze GITALY_INTERNAL_URL = 'ssh://gitaly/internal.git'.freeze GITLAB_PROJECTS_TIMEOUT = Gitlab.config.gitlab_shell.git_timeout + EMPTY_REPOSITORY_CHECKSUM = '0000000000000000000000000000000000000000'.freeze NoRepository = Class.new(StandardError) InvalidBlobName = Class.new(StandardError) @@ -31,6 +32,7 @@ module Gitlab DeleteBranchError = Class.new(StandardError) CreateTreeError = Class.new(StandardError) TagExistsError = Class.new(StandardError) + ChecksumError = Class.new(StandardError) class << self # Unlike `new`, `create` takes the repository path @@ -1502,6 +1504,16 @@ module Gitlab FileUtils.rm_rf(worktree_git_path) if worktree_git_path && File.exist?(worktree_git_path) end + def checksum + gitaly_migrate(:calculate_checksum) do |is_enabled| + if is_enabled + gitaly_repository_client.calculate_checksum + else + calculate_checksum_by_shelling_out + end + end + end + private def local_write_ref(ref_path, ref, old_ref: nil, shell: true) @@ -2420,6 +2432,34 @@ module Gitlab def sha_from_ref(ref) rev_parse_target(ref).oid end + + def calculate_checksum_by_shelling_out + raise NoRepository unless exists? + + args = %W(--git-dir=#{path} show-ref --heads --tags) + output, status = run_git(args) + + if status.nil? || !status.zero? + # Empty repositories return with a non-zero status and an empty output. + return EMPTY_REPOSITORY_CHECKSUM if output&.empty? + + raise ChecksumError, output + end + + refs = output.split("\n") + + result = refs.inject(nil) do |checksum, ref| + value = Digest::SHA1.hexdigest(ref).hex + + if checksum.nil? + value + else + checksum ^ value + end + end + + result.to_s(16) + end end end end diff --git a/lib/gitlab/gitaly_client/repository_service.rb b/lib/gitlab/gitaly_client/repository_service.rb index b5a734aaef6..6441065f5fe 100644 --- a/lib/gitlab/gitaly_client/repository_service.rb +++ b/lib/gitlab/gitaly_client/repository_service.rb @@ -262,6 +262,12 @@ module Gitlab response.license_short_name.presence end + + def calculate_checksum + request = Gitaly::CalculateChecksumRequest.new(repository: @gitaly_repo) + response = GitalyClient.call(@storage, :repository_service, :calculate_checksum, request) + response.checksum.presence + end end end end diff --git a/spec/lib/gitlab/git/checksum_spec.rb b/spec/lib/gitlab/git/checksum_spec.rb deleted file mode 100644 index 8ff310905bf..00000000000 --- a/spec/lib/gitlab/git/checksum_spec.rb +++ /dev/null @@ -1,38 +0,0 @@ -require 'spec_helper' - -describe Gitlab::Git::Checksum, seed_helper: true do - let(:storage) { 'default' } - - it 'raises Gitlab::Git::Repository::NoRepository when there is no repo' do - checksum = described_class.new(storage, 'nonexistent-repo') - - expect { checksum.calculate }.to raise_error Gitlab::Git::Repository::NoRepository - end - - it 'pretends that checksum is 000000... when the repo is empty' do - FileUtils.rm_rf(File.join(SEED_STORAGE_PATH, 'empty-repo.git')) - - system(git_env, *%W(#{Gitlab.config.git.bin_path} init --bare empty-repo.git), - chdir: SEED_STORAGE_PATH, - out: '/dev/null', - err: '/dev/null') - - checksum = described_class.new(storage, 'empty-repo') - - expect(checksum.calculate).to eq '0000000000000000000000000000000000000000' - end - - it 'raises Gitlab::Git::Repository::Failure when shelling out to git return non-zero status' do - checksum = described_class.new(storage, 'gitlab-git-test') - - allow(checksum).to receive(:popen).and_return(['output', nil]) - - expect { checksum.calculate }.to raise_error Gitlab::Git::Checksum::Failure - end - - it 'calculates the checksum when there is a repo' do - checksum = described_class.new(storage, 'gitlab-git-test') - - expect(checksum.calculate).to eq '54f21be4c32c02f6788d72207fa03ad3bce725e4' - end -end diff --git a/spec/lib/gitlab/git/repository_spec.rb b/spec/lib/gitlab/git/repository_spec.rb index f934bf9a6b0..d3ab61746f4 100644 --- a/spec/lib/gitlab/git/repository_spec.rb +++ b/spec/lib/gitlab/git/repository_spec.rb @@ -2184,6 +2184,55 @@ describe Gitlab::Git::Repository, seed_helper: true do end end + describe '#checksum' do + shared_examples 'calculating checksum' do + it 'calculates the checksum for non-empty repo' do + expect(repository.checksum).to eq '54f21be4c32c02f6788d72207fa03ad3bce725e4' + end + + it 'returns 0000000000000000000000000000000000000000 for an empty repo' do + FileUtils.rm_rf(File.join(storage_path, 'empty-repo.git')) + + system(git_env, *%W(#{Gitlab.config.git.bin_path} init --bare empty-repo.git), + chdir: storage_path, + out: '/dev/null', + err: '/dev/null') + + empty_repo = described_class.new('default', 'empty-repo.git', '') + + expect(empty_repo.checksum).to eq '0000000000000000000000000000000000000000' + end + + it 'raises a no repository exception when there is no repo' do + broken_repo = described_class.new('default', 'a/path.git', '') + + expect { broken_repo.checksum }.to raise_error(Gitlab::Git::Repository::NoRepository) + end + end + + context 'when calculate_checksum Gitaly feature is enabled' do + it_behaves_like 'calculating checksum' + end + + context 'when calculate_checksum Gitaly feature is disabled', :disable_gitaly do + it_behaves_like 'calculating checksum' + + describe 'when storage is broken', :broken_storage do + it 'raises a storage exception when storage is not available' do + broken_repo = described_class.new('broken', 'a/path.git', '') + + expect { broken_repo.rugged }.to raise_error(Gitlab::Git::Storage::Inaccessible) + end + end + + it "raises a Gitlab::Git::Repository::Failure error if the `popen` call to git returns a non-zero exit code" do + allow(repository).to receive(:popen).and_return(['output', nil]) + + expect { repository.checksum }.to raise_error Gitlab::Git::Repository::ChecksumError + end + end + end + context 'gitlab_projects commands' do let(:gitlab_projects) { repository.gitlab_projects } let(:timeout) { Gitlab.config.gitlab_shell.git_timeout } diff --git a/spec/lib/gitlab/gitaly_client/repository_service_spec.rb b/spec/lib/gitlab/gitaly_client/repository_service_spec.rb index 111b1b35fa0..21592688bf0 100644 --- a/spec/lib/gitlab/gitaly_client/repository_service_spec.rb +++ b/spec/lib/gitlab/gitaly_client/repository_service_spec.rb @@ -134,4 +134,15 @@ describe Gitlab::GitalyClient::RepositoryService do client.squash_in_progress?(squash_id) end end + + describe '#calculate_checksum' do + it 'sends a calculate_checksum message' do + expect_any_instance_of(Gitaly::RepositoryService::Stub) + .to receive(:calculate_checksum) + .with(gitaly_request_with_path(storage_name, relative_path), kind_of(Hash)) + .and_return(double(checksum: 0)) + + client.calculate_checksum + end + end end