Add latest changes from gitlab-org/gitlab@master
This commit is contained in:
parent
8423ed74e6
commit
d4aaea5efa
3 changed files with 141 additions and 3 deletions
5
changelogs/unreleased/bvl-robust-gpg-homedir-cleanup.yml
Normal file
5
changelogs/unreleased/bvl-robust-gpg-homedir-cleanup.yml
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Improve handling of gpg-agent processes
|
||||
merge_request: 19311
|
||||
author:
|
||||
type: changed
|
|
@ -4,6 +4,10 @@ module Gitlab
|
|||
module Gpg
|
||||
extend self
|
||||
|
||||
CleanupError = Class.new(StandardError)
|
||||
BG_CLEANUP_RUNTIME_S = 2
|
||||
FG_CLEANUP_RUNTIME_S = 0.5
|
||||
|
||||
MUTEX = Mutex.new
|
||||
|
||||
module CurrentKeyChain
|
||||
|
@ -94,16 +98,55 @@ module Gitlab
|
|||
previous_dir = current_home_dir
|
||||
tmp_dir = Dir.mktmpdir
|
||||
GPGME::Engine.home_dir = tmp_dir
|
||||
tmp_keychains_created.increment
|
||||
|
||||
yield
|
||||
ensure
|
||||
# Ignore any errors when removing the tmp directory, as we may run into a
|
||||
GPGME::Engine.home_dir = previous_dir
|
||||
|
||||
begin
|
||||
cleanup_tmp_dir(tmp_dir)
|
||||
rescue CleanupError => e
|
||||
# This means we left a GPG-agent process hanging. Logging the problem in
|
||||
# sentry will make this more visible.
|
||||
Gitlab::Sentry.track_exception(e,
|
||||
issue_url: 'https://gitlab.com/gitlab-org/gitlab/issues/20918',
|
||||
extra: { tmp_dir: tmp_dir })
|
||||
end
|
||||
|
||||
tmp_keychains_removed.increment unless File.exist?(tmp_dir)
|
||||
end
|
||||
|
||||
def cleanup_tmp_dir(tmp_dir)
|
||||
return FileUtils.remove_entry(tmp_dir, true) if Feature.disabled?(:gpg_cleanup_retries)
|
||||
|
||||
# Retry when removing the tmp directory failed, as we may run into a
|
||||
# race condition:
|
||||
# The `gpg-agent` agent process may clean up some files as well while
|
||||
# `FileUtils.remove_entry` is iterating the directory and removing all
|
||||
# its contained files and directories recursively, which could raise an
|
||||
# error.
|
||||
FileUtils.remove_entry(tmp_dir, true)
|
||||
GPGME::Engine.home_dir = previous_dir
|
||||
# Failing to remove the tmp directory could leave the `gpg-agent` process
|
||||
# running forever.
|
||||
Retriable.retriable(max_elapsed_time: cleanup_time, base_interval: 0.1) do
|
||||
FileUtils.remove_entry(tmp_dir) if File.exist?(tmp_dir)
|
||||
end
|
||||
rescue => e
|
||||
raise CleanupError, e
|
||||
end
|
||||
|
||||
def cleanup_time
|
||||
Sidekiq.server? ? BG_CLEANUP_RUNTIME_S : FG_CLEANUP_RUNTIME_S
|
||||
end
|
||||
|
||||
def tmp_keychains_created
|
||||
@tmp_keychains_created ||= Gitlab::Metrics.counter(:gpg_tmp_keychains_created_total,
|
||||
'The number of temporary GPG keychains created')
|
||||
end
|
||||
|
||||
def tmp_keychains_removed
|
||||
@tmp_keychains_removed ||= Gitlab::Metrics.counter(:gpg_tmp_keychains_removed_total,
|
||||
'The number of temporary GPG keychains removed')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -139,6 +139,96 @@ describe Gitlab::Gpg do
|
|||
end
|
||||
end.not_to raise_error
|
||||
end
|
||||
|
||||
it 'keeps track of created and removed keychains in counters' do
|
||||
created = Gitlab::Metrics.counter(:gpg_tmp_keychains_created_total, 'The number of temporary GPG keychains')
|
||||
removed = Gitlab::Metrics.counter(:gpg_tmp_keychains_removed_total, 'The number of temporary GPG keychains')
|
||||
|
||||
initial_created = created.get
|
||||
initial_removed = removed.get
|
||||
|
||||
described_class.using_tmp_keychain do
|
||||
expect(created.get).to eq(initial_created + 1)
|
||||
expect(removed.get).to eq(initial_removed)
|
||||
end
|
||||
|
||||
expect(removed.get).to eq(initial_removed + 1)
|
||||
end
|
||||
|
||||
it 'cleans up the tmp directory after finishing' do
|
||||
tmp_directory = nil
|
||||
|
||||
described_class.using_tmp_keychain do
|
||||
tmp_directory = described_class.current_home_dir
|
||||
expect(File.exist?(tmp_directory)).to be true
|
||||
end
|
||||
|
||||
expect(tmp_directory).not_to be_nil
|
||||
expect(File.exist?(tmp_directory)).to be false
|
||||
end
|
||||
|
||||
it 'does not fail if the homedir was deleted while running' do
|
||||
expect do
|
||||
described_class.using_tmp_keychain do
|
||||
FileUtils.remove_entry(described_class.current_home_dir)
|
||||
end
|
||||
end.not_to raise_error
|
||||
end
|
||||
|
||||
shared_examples 'multiple deletion attempts of the tmp-dir' do |seconds|
|
||||
let(:tmp_dir) do
|
||||
tmp_dir = Dir.mktmpdir
|
||||
allow(Dir).to receive(:mktmpdir).and_return(tmp_dir)
|
||||
tmp_dir
|
||||
end
|
||||
|
||||
before do
|
||||
# Stub all the other calls for `remove_entry`
|
||||
allow(FileUtils).to receive(:remove_entry).with(any_args).and_call_original
|
||||
end
|
||||
|
||||
it "tries for #{seconds}" do
|
||||
expect(Retriable).to receive(:retriable).with(a_hash_including(max_elapsed_time: seconds))
|
||||
|
||||
described_class.using_tmp_keychain {}
|
||||
end
|
||||
|
||||
it 'tries at least 2 times to remove the tmp dir before raising', :aggregate_failures do
|
||||
expect(Retriable).to receive(:sleep).at_least(2).times
|
||||
expect(FileUtils).to receive(:remove_entry).with(tmp_dir).at_least(2).times.and_raise('Deletion failed')
|
||||
|
||||
expect { described_class.using_tmp_keychain { } }.to raise_error(described_class::CleanupError)
|
||||
end
|
||||
|
||||
it 'does not attempt multiple times when the deletion succeeds' do
|
||||
expect(Retriable).to receive(:sleep).once
|
||||
expect(FileUtils).to receive(:remove_entry).with(tmp_dir).once.and_raise('Deletion failed')
|
||||
expect(FileUtils).to receive(:remove_entry).with(tmp_dir).and_call_original
|
||||
|
||||
expect { described_class.using_tmp_keychain { } }.not_to raise_error
|
||||
|
||||
expect(File.exist?(tmp_dir)).to be false
|
||||
end
|
||||
|
||||
it 'does not retry when the feature flag is disabled' do
|
||||
stub_feature_flags(gpg_cleanup_retries: false)
|
||||
|
||||
expect(FileUtils).to receive(:remove_entry).with(tmp_dir, true).and_call_original
|
||||
expect(Retriable).not_to receive(:retriable)
|
||||
|
||||
described_class.using_tmp_keychain {}
|
||||
end
|
||||
end
|
||||
|
||||
it_behaves_like 'multiple deletion attempts of the tmp-dir', described_class::FG_CLEANUP_RUNTIME_S
|
||||
|
||||
context 'when running in Sidekiq' do
|
||||
before do
|
||||
allow(Sidekiq).to receive(:server?).and_return(true)
|
||||
end
|
||||
|
||||
it_behaves_like 'multiple deletion attempts of the tmp-dir', described_class::BG_CLEANUP_RUNTIME_S
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in a new issue