Don't remove failed install pods

We want to keep failed install pods around so that it is easier to debug
why a failure occured. With this change we also need to ensure that we
remove a previous pod with the same name before installing so that
re-install does not fail.

Another change here is that we no longer need to catch errors from
delete_pod! in CheckInstallationProgressService as we now catch the
ResourceNotFoundError in Helm::Api. The catch statement in
CheckInstallationProgressService was also probably too broad before and
should have been narrowed down simply to ResourceNotFoundError.
This commit is contained in:
Dylan Griffith 2018-11-26 14:54:44 +01:00
parent 242baf1287
commit 6a66e4a1f1
5 changed files with 47 additions and 26 deletions

View File

@ -29,17 +29,13 @@ module Clusters
end
def on_failed
app.make_errored!('Installation failed')
ensure
remove_installation_pod
app.make_errored!("Installation failed. Check pod logs for #{install_command.pod_name} for more details.")
end
def check_timeout
if timeouted?
begin
app.make_errored!('Installation timed out')
ensure
remove_installation_pod
app.make_errored!("Installation timed out. Check pod logs for #{install_command.pod_name} for more details.")
end
else
ClusterWaitForAppInstallationWorker.perform_in(
@ -53,9 +49,6 @@ module Clusters
def remove_installation_pod
helm_api.delete_pod!(install_command.pod_name)
rescue => e
Rails.logger.error("Kubernetes error: #{e.class.name} #{e.message}")
# no-op
end
def installation_phase

View File

@ -0,0 +1,5 @@
---
title: Don't remove failed install pods after installing GitLab managed applications
merge_request: 23350
author:
type: changed

View File

@ -16,12 +16,16 @@ module Gitlab
create_cluster_role_binding(command)
create_config_map(command)
delete_pod!(command.pod_name)
kubeclient.create_pod(command.pod_resource)
end
def update(command)
namespace.ensure_exists!
update_config_map(command)
delete_pod!(command.pod_name)
kubeclient.create_pod(command.pod_resource)
end
@ -42,6 +46,8 @@ module Gitlab
def delete_pod!(pod_name)
kubeclient.delete_pod(pod_name, namespace.name)
rescue ::Kubeclient::ResourceNotFoundError
# no-op
end
def get_config_map(config_map_name)

View File

@ -40,6 +40,7 @@ describe Gitlab::Kubernetes::Helm::Api do
allow(client).to receive(:create_config_map).and_return(nil)
allow(client).to receive(:create_service_account).and_return(nil)
allow(client).to receive(:create_cluster_role_binding).and_return(nil)
allow(client).to receive(:delete_pod).and_return(nil)
allow(namespace).to receive(:ensure_exists!).once
end
@ -50,6 +51,13 @@ describe Gitlab::Kubernetes::Helm::Api do
subject.install(command)
end
it 'removes an existing pod before installing' do
expect(client).to receive(:delete_pod).with('install-app-name', 'gitlab-managed-apps').once.ordered
expect(client).to receive(:create_pod).once.ordered
subject.install(command)
end
context 'with a ConfigMap' do
let(:resource) { Gitlab::Kubernetes::ConfigMap.new(application_name, files).generate }
@ -180,6 +188,7 @@ describe Gitlab::Kubernetes::Helm::Api do
allow(client).to receive(:update_config_map).and_return(nil)
allow(client).to receive(:create_pod).and_return(nil)
allow(client).to receive(:delete_pod).and_return(nil)
end
it 'ensures the namespace exists before creating the pod' do
@ -189,6 +198,13 @@ describe Gitlab::Kubernetes::Helm::Api do
subject.update(command)
end
it 'removes an existing pod before updating' do
expect(client).to receive(:delete_pod).with('upgrade-app-name', 'gitlab-managed-apps').once.ordered
expect(client).to receive(:create_pod).once.ordered
subject.update(command)
end
it 'updates the config map on kubeclient when one exists' do
resource = Gitlab::Kubernetes::ConfigMap.new(
application_name, files
@ -224,9 +240,18 @@ describe Gitlab::Kubernetes::Helm::Api do
describe '#delete_pod!' do
it 'deletes the POD from kubernetes cluster' do
expect(client).to receive(:delete_pod).with(command.pod_name, gitlab_namespace).once
expect(client).to receive(:delete_pod).with('install-app-name', 'gitlab-managed-apps').once
subject.delete_pod!(command.pod_name)
subject.delete_pod!('install-app-name')
end
context 'when the resource being deleted does not exist' do
it 'catches the error' do
expect(client).to receive(:delete_pod).with('install-app-name', 'gitlab-managed-apps')
.and_raise(Kubeclient::ResourceNotFoundError.new(404, 'Not found', nil))
subject.delete_pod!('install-app-name')
end
end
end

View File

@ -8,14 +8,6 @@ describe Clusters::Applications::CheckInstallationProgressService do
let(:phase) { Gitlab::Kubernetes::Pod::UNKNOWN }
let(:errors) { nil }
shared_examples 'a terminated installation' do
it 'removes the installation POD' do
expect(service).to receive(:remove_installation_pod).once
service.execute
end
end
shared_examples 'a not yet terminated installation' do |a_phase|
let(:phase) { a_phase }
@ -39,15 +31,13 @@ describe Clusters::Applications::CheckInstallationProgressService do
context 'when timeouted' do
let(:application) { create(:clusters_applications_helm, :timeouted) }
it_behaves_like 'a terminated installation'
it 'make the application errored' do
expect(ClusterWaitForAppInstallationWorker).not_to receive(:perform_in)
service.execute
expect(application).to be_errored
expect(application.status_reason).to match(/\btimed out\b/)
expect(application.status_reason).to eq("Installation timed out. Check pod logs for install-helm for more details.")
end
end
end
@ -66,7 +56,11 @@ describe Clusters::Applications::CheckInstallationProgressService do
expect(service).to receive(:installation_phase).once.and_return(phase)
end
it_behaves_like 'a terminated installation'
it 'removes the installation POD' do
expect(service).to receive(:remove_installation_pod).once
service.execute
end
it 'make the application installed' do
expect(ClusterWaitForAppInstallationWorker).not_to receive(:perform_in)
@ -86,13 +80,11 @@ describe Clusters::Applications::CheckInstallationProgressService do
expect(service).to receive(:installation_phase).once.and_return(phase)
end
it_behaves_like 'a terminated installation'
it 'make the application errored' do
service.execute
expect(application).to be_errored
expect(application.status_reason).to eq("Installation failed")
expect(application.status_reason).to eq("Installation failed. Check pod logs for install-helm for more details.")
end
end