Improve automated Review Apps cleanup

Signed-off-by: Rémy Coutable <remy@rymai.me>
This commit is contained in:
Rémy Coutable 2018-10-15 18:06:44 +02:00
parent fbb0f71237
commit 25d8c8d1f0
No known key found for this signature in database
GPG key ID: 98DFFD1C0C62B70B
5 changed files with 270 additions and 85 deletions

View file

@ -5,9 +5,13 @@ require_relative '../gitlab/popen' unless defined?(Gitlab::Popen)
module Quality
class HelmClient
CommandFailedError = Class.new(StandardError)
attr_reader :namespace
Release = Struct.new(:name, :revision, :last_update, :status, :chart, :namespace) do
RELEASE_JSON_ATTRIBUTES = %w[Name Revision Updated Status Chart AppVersion Namespace].freeze
Release = Struct.new(:name, :revision, :last_update, :status, :chart, :app_version, :namespace) do
def revision
@revision ||= self[:revision].to_i
end
@ -17,22 +21,24 @@ module Quality
end
end
def initialize(namespace: ENV['KUBE_NAMESPACE'])
# A single page of data and the corresponding page number.
Page = Struct.new(:releases, :number)
def initialize(namespace:)
@namespace = namespace
end
def releases(args: [])
command = ['list', %(--namespace "#{namespace}"), *args]
run_command(command)
.stdout
.lines
.select { |line| line.include?(namespace) }
.map { |line| Release.new(*line.split(/\t/).map(&:strip)) }
each_release(args)
end
def delete(release_name:)
run_command(['delete', '--purge', release_name])
run_command([
'delete',
%(--tiller-namespace "#{namespace}"),
'--purge',
release_name
])
end
private
@ -41,7 +47,67 @@ module Quality
final_command = ['helm', *command].join(' ')
puts "Running command: `#{final_command}`" # rubocop:disable Rails/Output
Gitlab::Popen.popen_with_detail([final_command])
result = Gitlab::Popen.popen_with_detail([final_command])
if result.status.success?
result.stdout.chomp.freeze
else
raise CommandFailedError, "The `#{final_command}` command failed (status: #{result.status}) with the following error:\n#{result.stderr}"
end
end
def raw_releases(args = [])
command = [
'list',
%(--namespace "#{namespace}"),
%(--tiller-namespace "#{namespace}" --output json),
*args
]
json = JSON.parse(run_command(command))
releases = json['Releases'].map do |json_release|
Release.new(*json_release.values_at(*RELEASE_JSON_ATTRIBUTES))
end
[releases, json['Next']]
rescue JSON::ParserError => ex
puts "Ignoring this JSON parsing error: #{ex}" # rubocop:disable Rails/Output
[[], nil]
end
# Fetches data from Helm and yields a Page object for every page
# of data, without loading all of them into memory.
#
# method - The Octokit method to use for getting the data.
# args - Arguments to pass to the `helm list` command.
def each_releases_page(args, &block)
return to_enum(__method__, args) unless block_given?
page = 1
offset = ''
loop do
final_args = args.dup
final_args << "--offset #{offset}" unless offset.to_s.empty?
collection, offset = raw_releases(final_args)
yield Page.new(collection, page += 1)
break if offset.to_s.empty?
end
end
# Iterates over all of the releases.
#
# args - Any arguments to pass to the `helm list` command.
def each_release(args, &block)
return to_enum(__method__, args) unless block_given?
each_releases_page(args) do |page|
page.releases.each do |release|
yield release
end
end
end
end
end

View file

@ -4,19 +4,22 @@ require_relative '../gitlab/popen' unless defined?(Gitlab::Popen)
module Quality
class KubernetesClient
CommandFailedError = Class.new(StandardError)
attr_reader :namespace
def initialize(namespace: ENV['KUBE_NAMESPACE'])
def initialize(namespace:)
@namespace = namespace
end
def cleanup(release_name:)
command = ['kubectl']
command << %(-n "#{namespace}" get ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa 2>&1)
command << '|' << %(grep "#{release_name}")
command << '|' << "awk '{print $1}'"
command << '|' << %(xargs kubectl -n "#{namespace}" delete)
command << '||' << 'true'
command = [
%(--namespace "#{namespace}"),
'delete',
'ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa',
'--now',
%(-l release="#{release_name}")
]
run_command(command)
end
@ -24,9 +27,16 @@ module Quality
private
def run_command(command)
puts "Running command: `#{command.join(' ')}`" # rubocop:disable Rails/Output
final_command = ['kubectl', *command].join(' ')
puts "Running command: `#{final_command}`" # rubocop:disable Rails/Output
Gitlab::Popen.popen_with_detail(command)
result = Gitlab::Popen.popen_with_detail([final_command])
if result.status.success?
result.stdout.chomp.freeze
else
raise CommandFailedError, "The `#{final_command}` command failed (status: #{result.status}) with the following error:\n#{result.stderr}"
end
end
end
end

View file

@ -5,12 +5,26 @@ require_relative File.expand_path('../../lib/quality/helm_client.rb', __dir__)
require_relative File.expand_path('../../lib/quality/kubernetes_client.rb', __dir__)
class AutomatedCleanup
attr_reader :project_path, :gitlab_token, :cleaned_up_releases
attr_reader :project_path, :gitlab_token
DEPLOYMENTS_PER_PAGE = 100
HELM_RELEASES_BATCH_SIZE = 5
IGNORED_HELM_ERRORS = [
'transport is closing',
'error upgrading connection'
].freeze
IGNORED_KUBERNETES_ERRORS = [
'NotFound'
].freeze
def self.ee?
ENV['CI_PROJECT_NAME'] == 'gitlab-ee' || File.exist?('CHANGELOG-EE.md')
end
def initialize(project_path: ENV['CI_PROJECT_PATH'], gitlab_token: ENV['GITLAB_BOT_REVIEW_APPS_CLEANUP_TOKEN'])
@project_path = project_path
@gitlab_token = gitlab_token
@cleaned_up_releases = []
ENV['TILLER_NAMESPACE'] ||= review_apps_namespace
end
def gitlab
@ -25,12 +39,16 @@ class AutomatedCleanup
end
end
def review_apps_namespace
self.class.ee? ? 'review-apps-ee' : 'review-apps-ce'
end
def helm
@helm ||= Quality::HelmClient.new
@helm ||= Quality::HelmClient.new(namespace: review_apps_namespace)
end
def kubernetes
@kubernetes ||= Quality::KubernetesClient.new
@kubernetes ||= Quality::KubernetesClient.new(namespace: review_apps_namespace)
end
def perform_gitlab_environment_cleanup!(days_for_stop:, days_for_delete:)
@ -39,26 +57,27 @@ class AutomatedCleanup
checked_environments = []
delete_threshold = threshold_time(days: days_for_delete)
stop_threshold = threshold_time(days: days_for_stop)
gitlab.deployments(project_path, per_page: 50).auto_paginate do |deployment|
next unless deployment.environment.name.start_with?('review/')
next if checked_environments.include?(deployment.environment.slug)
puts
gitlab.deployments(project_path, per_page: DEPLOYMENTS_PER_PAGE).auto_paginate do |deployment|
environment = deployment.environment
checked_environments << deployment.environment.slug
deployed_at = Time.parse(deployment.created_at)
next unless environment.name.start_with?('review/')
next if checked_environments.include?(environment.slug)
last_deploy = deployment.created_at
deployed_at = Time.parse(last_deploy)
if deployed_at < delete_threshold
print_release_state(subject: 'Review app', release_name: deployment.environment.slug, release_date: deployment.created_at, action: 'deleting')
gitlab.delete_environment(project_path, deployment.environment.id)
cleaned_up_releases << deployment.environment.slug
delete_environment(environment, deployment)
release = Quality::HelmClient::Release.new(environment.slug, 1, deployed_at.to_s, nil, nil, review_apps_namespace)
delete_helm_release(release)
elsif deployed_at < stop_threshold
print_release_state(subject: 'Review app', release_name: deployment.environment.slug, release_date: deployment.created_at, action: 'stopping')
gitlab.stop_environment(project_path, deployment.environment.id)
cleaned_up_releases << deployment.environment.slug
stop_environment(environment, deployment)
else
print_release_state(subject: 'Review app', release_name: deployment.environment.slug, release_date: deployment.created_at, action: 'leaving')
print_release_state(subject: 'Review app', release_name: environment.slug, release_date: last_deploy, action: 'leaving')
end
checked_environments << environment.slug
end
end
@ -66,25 +85,58 @@ class AutomatedCleanup
puts "Checking for Helm releases not updated in the last #{days} days..."
threshold_day = threshold_time(days: days)
helm.releases(args: ['--deployed', '--failed', '--date', '--reverse', '--max 25']).each do |release|
next if cleaned_up_releases.include?(release.name)
if release.last_update < threshold_day
print_release_state(subject: 'Release', release_name: release.name, release_date: release.last_update, action: 'cleaning')
helm.delete(release_name: release.name)
kubernetes.cleanup(release_name: release.name)
helm_releases.each do |release|
if release.status == 'FAILED' || release.last_update < threshold_day
delete_helm_release(release)
else
print_release_state(subject: 'Release', release_name: release.name, release_date: release.last_update, action: 'leaving')
end
end
end
private
def delete_environment(environment, deployment)
print_release_state(subject: 'Review app', release_name: environment.slug, release_date: deployment.created_at, action: 'deleting')
gitlab.delete_environment(project_path, environment.id)
end
def stop_environment(environment, deployment)
print_release_state(subject: 'Review app', release_name: environment.slug, release_date: deployment.created_at, action: 'stopping')
gitlab.stop_environment(project_path, environment.id)
end
def helm_releases
args = ['--all', '--date', "--max #{HELM_RELEASES_BATCH_SIZE}"]
helm.releases(args: args)
end
def delete_helm_release(release)
print_release_state(subject: 'Release', release_name: release.name, release_status: release.status, release_date: release.last_update, action: 'cleaning')
helm.delete(release_name: release.name)
kubernetes.cleanup(release_name: release.name)
rescue Quality::HelmClient::CommandFailedError => ex
raise ex unless ignore_exception?(ex.message, IGNORED_HELM_ERRORS)
puts "Ignoring the following Helm error:\n#{ex}\n"
rescue Quality::KubernetesClient::CommandFailedError => ex
raise ex unless ignore_exception?(ex.message, IGNORED_KUBERNETES_ERRORS)
puts "Ignoring the following Kubernetes error:\n#{ex}\n"
end
def threshold_time(days:)
Time.now - days * 24 * 3600
end
def print_release_state(subject:, release_name:, release_date:, action:)
puts "\n#{subject} '#{release_name}' was last deployed on #{release_date}: #{action} it."
def ignore_exception?(exception_message, exceptions_ignored)
exception_message.match?(/(#{exceptions_ignored})/)
end
def print_release_state(subject:, release_name:, release_date:, action:, release_status: nil)
puts "\n#{subject} '#{release_name}' #{"(#{release_status}) " if release_status}was last deployed on #{release_date}: #{action} it.\n"
end
end

View file

@ -1,62 +1,111 @@
# frozen_string_literal: true
require 'spec_helper'
require 'fast_spec_helper'
RSpec.describe Quality::HelmClient do
let(:namespace) { 'review-apps-ee' }
let(:release_name) { 'my-release' }
let(:raw_helm_list_result) do
let(:raw_helm_list_page1) do
<<~OUTPUT
NAME REVISION UPDATED STATUS CHART NAMESPACE
review-improve-re-2dsd9d 1 Tue Jul 31 15:53:17 2018 FAILED gitlab-0.3.4 #{namespace}
review-11-1-stabl-3r2fso 1 Mon Jul 30 22:44:14 2018 FAILED gitlab-0.3.3 #{namespace}
review-49375-css-fk664j 1 Thu Jul 19 11:01:30 2018 FAILED gitlab-0.2.4 #{namespace}
{"Next":"review-6709-group-t40qbv",
"Releases":[
{"Name":"review-qa-60-reor-1mugd1", "Revision":1,"Updated":"Thu Oct 4 17:52:31 2018","Status":"FAILED", "Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"},
{"Name":"review-7846-fix-s-261vd6","Revision":1,"Updated":"Thu Oct 4 17:33:29 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"},
{"Name":"review-7867-snowp-lzo3iy","Revision":1,"Updated":"Thu Oct 4 17:22:14 2018","Status":"DEPLOYED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"},
{"Name":"review-rename-geo-o4a780","Revision":1,"Updated":"Thu Oct 4 17:14:57 2018","Status":"DEPLOYED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"},
{"Name":"review-5781-opera-0k93fx","Revision":1,"Updated":"Thu Oct 4 17:06:15 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"},
{"Name":"review-6709-group-2pzeec","Revision":1,"Updated":"Thu Oct 4 16:36:59 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"},
{"Name":"review-ce-to-ee-2-l554mn","Revision":1,"Updated":"Thu Oct 4 16:27:02 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"},
{"Name":"review-epics-e2e-m690eb","Revision":1,"Updated":"Thu Oct 4 16:08:26 2018","Status":"DEPLOYED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"},
{"Name":"review-7126-admin-06fae2","Revision":1,"Updated":"Thu Oct 4 15:56:35 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"},
{"Name":"review-6983-promo-xyou11","Revision":1,"Updated":"Thu Oct 4 15:15:34 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}
]}
OUTPUT
end
let(:raw_helm_list_page2) do
<<~OUTPUT
{"Releases":[
{"Name":"review-6709-group-t40qbv","Revision":1,"Updated":"Thu Oct 4 17:52:31 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}
]}
OUTPUT
end
subject { described_class.new(namespace: namespace) }
describe '#releases' do
it 'calls helm list with default arguments' do
it 'raises an error if the Helm command fails' do
expect(Gitlab::Popen).to receive(:popen_with_detail)
.with([%(helm list --namespace "#{namespace}")])
.and_return(Gitlab::Popen::Result.new([], ''))
.with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json)])
.and_return(Gitlab::Popen::Result.new([], '', '', double(success?: false)))
subject.releases
expect { subject.releases.to_a }.to raise_error(described_class::CommandFailedError)
end
it 'calls helm list with given arguments' do
it 'calls helm list with default arguments' do
expect(Gitlab::Popen).to receive(:popen_with_detail)
.with([%(helm list --namespace "#{namespace}" --deployed)])
.and_return(Gitlab::Popen::Result.new([], ''))
.with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json)])
.and_return(Gitlab::Popen::Result.new([], '', '', double(success?: true)))
subject.releases(args: ['--deployed'])
subject.releases.to_a
end
it 'calls helm list with extra arguments' do
expect(Gitlab::Popen).to receive(:popen_with_detail)
.with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json --deployed)])
.and_return(Gitlab::Popen::Result.new([], '', '', double(success?: true)))
subject.releases(args: ['--deployed']).to_a
end
it 'returns a list of Release objects' do
expect(Gitlab::Popen).to receive(:popen_with_detail)
.with([%(helm list --namespace "#{namespace}" --deployed)])
.and_return(Gitlab::Popen::Result.new([], raw_helm_list_result))
.with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json --deployed)])
.and_return(Gitlab::Popen::Result.new([], raw_helm_list_page2, '', double(success?: true)))
releases = subject.releases(args: ['--deployed'])
releases = subject.releases(args: ['--deployed']).to_a
expect(releases.size).to eq(3)
expect(releases[0].name).to eq('review-improve-re-2dsd9d')
expect(releases[0].revision).to eq(1)
expect(releases[0].last_update).to eq(Time.parse('Tue Jul 31 15:53:17 2018'))
expect(releases[0].status).to eq('FAILED')
expect(releases[0].chart).to eq('gitlab-0.3.4')
expect(releases[0].namespace).to eq(namespace)
expect(releases.size).to eq(1)
expect(releases[0]).to have_attributes(
name: 'review-6709-group-t40qbv',
revision: 1,
last_update: Time.parse('Thu Oct 4 17:52:31 2018'),
status: 'FAILED',
chart: 'gitlab-1.1.3',
app_version: 'master',
namespace: namespace
)
end
it 'automatically paginates releases' do
expect(Gitlab::Popen).to receive(:popen_with_detail).ordered
.with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json)])
.and_return(Gitlab::Popen::Result.new([], raw_helm_list_page1, '', double(success?: true)))
expect(Gitlab::Popen).to receive(:popen_with_detail).ordered
.with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json --offset review-6709-group-t40qbv)])
.and_return(Gitlab::Popen::Result.new([], raw_helm_list_page2, '', double(success?: true)))
releases = subject.releases.to_a
expect(releases.size).to eq(11)
expect(releases.last.name).to eq('review-6709-group-t40qbv')
end
end
describe '#delete' do
it 'raises an error if the Helm command fails' do
expect(Gitlab::Popen).to receive(:popen_with_detail)
.with([%(helm delete --tiller-namespace "#{namespace}" --purge #{release_name})])
.and_return(Gitlab::Popen::Result.new([], '', '', double(success?: false)))
expect { subject.delete(release_name: release_name) }.to raise_error(described_class::CommandFailedError)
end
it 'calls helm delete with default arguments' do
expect(Gitlab::Popen).to receive(:popen_with_detail)
.with(["helm delete --purge #{release_name}"])
.and_return(Gitlab::Popen::Result.new([], '', '', 0))
.with([%(helm delete --tiller-namespace "#{namespace}" --purge #{release_name})])
.and_return(Gitlab::Popen::Result.new([], '', '', double(success?: true)))
expect(subject.delete(release_name: release_name).status).to eq(0)
expect(subject.delete(release_name: release_name)).to eq('')
end
end
end

View file

@ -1,25 +1,33 @@
# frozen_string_literal: true
require 'spec_helper'
require 'fast_spec_helper'
RSpec.describe Quality::KubernetesClient do
subject { described_class.new(namespace: 'review-apps-ee') }
let(:namespace) { 'review-apps-ee' }
let(:release_name) { 'my-release' }
subject { described_class.new(namespace: namespace) }
describe '#cleanup' do
it 'calls kubectl with the correct arguments' do
# popen_with_detail will receive an array with a bunch of arguments; we're
# only concerned with it having the correct namespace and release name
expect(Gitlab::Popen).to receive(:popen_with_detail) do |args|
expect(args)
.to satisfy_one { |arg| arg.start_with?('-n "review-apps-ee" get') }
expect(args)
.to satisfy_one { |arg| arg == 'grep "my-release"' }
expect(args)
.to satisfy_one { |arg| arg.end_with?('-n "review-apps-ee" delete') }
it 'raises an error if the Kubernetes command fails' do
expect(Gitlab::Popen).to receive(:popen_with_detail)
.with([%(kubectl --namespace "#{namespace}" delete ) \
'ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa ' \
"--now -l release=\"#{release_name}\""])
.and_return(Gitlab::Popen::Result.new([], '', '', double(success?: false)))
expect { subject.cleanup(release_name: release_name) }.to raise_error(described_class::CommandFailedError)
end
it 'calls kubectl with the correct arguments' do
expect(Gitlab::Popen).to receive(:popen_with_detail)
.with([%(kubectl --namespace "#{namespace}" delete ) \
'ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa ' \
"--now -l release=\"#{release_name}\""])
.and_return(Gitlab::Popen::Result.new([], '', '', double(success?: true)))
# We're not verifying the output here, just silencing it
expect { subject.cleanup(release_name: 'my-release') }.to output.to_stdout
expect { subject.cleanup(release_name: release_name) }.to output.to_stdout
end
end
end