From 25d8c8d1f0846f563745da99e4e16fba8c268b36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Coutable?= Date: Mon, 15 Oct 2018 18:06:44 +0200 Subject: [PATCH] Improve automated Review Apps cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémy Coutable --- lib/quality/helm_client.rb | 88 +++++++++++++++--- lib/quality/kubernetes_client.rb | 28 ++++-- scripts/review_apps/automated_cleanup.rb | 102 +++++++++++++++----- spec/lib/quality/helm_client_spec.rb | 103 +++++++++++++++------ spec/lib/quality/kubernetes_client_spec.rb | 34 ++++--- 5 files changed, 270 insertions(+), 85 deletions(-) diff --git a/lib/quality/helm_client.rb b/lib/quality/helm_client.rb index 49d953da681..cf1f03b35b5 100644 --- a/lib/quality/helm_client.rb +++ b/lib/quality/helm_client.rb @@ -5,9 +5,13 @@ require_relative '../gitlab/popen' unless defined?(Gitlab::Popen) module Quality class HelmClient + CommandFailedError = Class.new(StandardError) + attr_reader :namespace - Release = Struct.new(:name, :revision, :last_update, :status, :chart, :namespace) do + RELEASE_JSON_ATTRIBUTES = %w[Name Revision Updated Status Chart AppVersion Namespace].freeze + + Release = Struct.new(:name, :revision, :last_update, :status, :chart, :app_version, :namespace) do def revision @revision ||= self[:revision].to_i end @@ -17,22 +21,24 @@ module Quality end end - def initialize(namespace: ENV['KUBE_NAMESPACE']) + # A single page of data and the corresponding page number. + Page = Struct.new(:releases, :number) + + def initialize(namespace:) @namespace = namespace end def releases(args: []) - command = ['list', %(--namespace "#{namespace}"), *args] - - run_command(command) - .stdout - .lines - .select { |line| line.include?(namespace) } - .map { |line| Release.new(*line.split(/\t/).map(&:strip)) } + each_release(args) end def delete(release_name:) - run_command(['delete', '--purge', release_name]) + run_command([ + 'delete', + %(--tiller-namespace "#{namespace}"), + '--purge', + release_name + ]) end private @@ -41,7 +47,67 @@ module Quality final_command = ['helm', *command].join(' ') puts "Running command: `#{final_command}`" # rubocop:disable Rails/Output - Gitlab::Popen.popen_with_detail([final_command]) + result = Gitlab::Popen.popen_with_detail([final_command]) + + if result.status.success? + result.stdout.chomp.freeze + else + raise CommandFailedError, "The `#{final_command}` command failed (status: #{result.status}) with the following error:\n#{result.stderr}" + end + end + + def raw_releases(args = []) + command = [ + 'list', + %(--namespace "#{namespace}"), + %(--tiller-namespace "#{namespace}" --output json), + *args + ] + json = JSON.parse(run_command(command)) + + releases = json['Releases'].map do |json_release| + Release.new(*json_release.values_at(*RELEASE_JSON_ATTRIBUTES)) + end + + [releases, json['Next']] + rescue JSON::ParserError => ex + puts "Ignoring this JSON parsing error: #{ex}" # rubocop:disable Rails/Output + [[], nil] + end + + # Fetches data from Helm and yields a Page object for every page + # of data, without loading all of them into memory. + # + # method - The Octokit method to use for getting the data. + # args - Arguments to pass to the `helm list` command. + def each_releases_page(args, &block) + return to_enum(__method__, args) unless block_given? + + page = 1 + offset = '' + + loop do + final_args = args.dup + final_args << "--offset #{offset}" unless offset.to_s.empty? + collection, offset = raw_releases(final_args) + + yield Page.new(collection, page += 1) + + break if offset.to_s.empty? + end + end + + # Iterates over all of the releases. + # + # args - Any arguments to pass to the `helm list` command. + def each_release(args, &block) + return to_enum(__method__, args) unless block_given? + + each_releases_page(args) do |page| + page.releases.each do |release| + yield release + end + end end end end diff --git a/lib/quality/kubernetes_client.rb b/lib/quality/kubernetes_client.rb index e366a688e3e..2ff9e811425 100644 --- a/lib/quality/kubernetes_client.rb +++ b/lib/quality/kubernetes_client.rb @@ -4,19 +4,22 @@ require_relative '../gitlab/popen' unless defined?(Gitlab::Popen) module Quality class KubernetesClient + CommandFailedError = Class.new(StandardError) + attr_reader :namespace - def initialize(namespace: ENV['KUBE_NAMESPACE']) + def initialize(namespace:) @namespace = namespace end def cleanup(release_name:) - command = ['kubectl'] - command << %(-n "#{namespace}" get ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa 2>&1) - command << '|' << %(grep "#{release_name}") - command << '|' << "awk '{print $1}'" - command << '|' << %(xargs kubectl -n "#{namespace}" delete) - command << '||' << 'true' + command = [ + %(--namespace "#{namespace}"), + 'delete', + 'ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa', + '--now', + %(-l release="#{release_name}") + ] run_command(command) end @@ -24,9 +27,16 @@ module Quality private def run_command(command) - puts "Running command: `#{command.join(' ')}`" # rubocop:disable Rails/Output + final_command = ['kubectl', *command].join(' ') + puts "Running command: `#{final_command}`" # rubocop:disable Rails/Output - Gitlab::Popen.popen_with_detail(command) + result = Gitlab::Popen.popen_with_detail([final_command]) + + if result.status.success? + result.stdout.chomp.freeze + else + raise CommandFailedError, "The `#{final_command}` command failed (status: #{result.status}) with the following error:\n#{result.stderr}" + end end end end diff --git a/scripts/review_apps/automated_cleanup.rb b/scripts/review_apps/automated_cleanup.rb index a5f0ec372d8..4166070f7cd 100755 --- a/scripts/review_apps/automated_cleanup.rb +++ b/scripts/review_apps/automated_cleanup.rb @@ -5,12 +5,26 @@ require_relative File.expand_path('../../lib/quality/helm_client.rb', __dir__) require_relative File.expand_path('../../lib/quality/kubernetes_client.rb', __dir__) class AutomatedCleanup - attr_reader :project_path, :gitlab_token, :cleaned_up_releases + attr_reader :project_path, :gitlab_token + + DEPLOYMENTS_PER_PAGE = 100 + HELM_RELEASES_BATCH_SIZE = 5 + IGNORED_HELM_ERRORS = [ + 'transport is closing', + 'error upgrading connection' + ].freeze + IGNORED_KUBERNETES_ERRORS = [ + 'NotFound' + ].freeze + + def self.ee? + ENV['CI_PROJECT_NAME'] == 'gitlab-ee' || File.exist?('CHANGELOG-EE.md') + end def initialize(project_path: ENV['CI_PROJECT_PATH'], gitlab_token: ENV['GITLAB_BOT_REVIEW_APPS_CLEANUP_TOKEN']) @project_path = project_path @gitlab_token = gitlab_token - @cleaned_up_releases = [] + ENV['TILLER_NAMESPACE'] ||= review_apps_namespace end def gitlab @@ -25,12 +39,16 @@ class AutomatedCleanup end end + def review_apps_namespace + self.class.ee? ? 'review-apps-ee' : 'review-apps-ce' + end + def helm - @helm ||= Quality::HelmClient.new + @helm ||= Quality::HelmClient.new(namespace: review_apps_namespace) end def kubernetes - @kubernetes ||= Quality::KubernetesClient.new + @kubernetes ||= Quality::KubernetesClient.new(namespace: review_apps_namespace) end def perform_gitlab_environment_cleanup!(days_for_stop:, days_for_delete:) @@ -39,26 +57,27 @@ class AutomatedCleanup checked_environments = [] delete_threshold = threshold_time(days: days_for_delete) stop_threshold = threshold_time(days: days_for_stop) - gitlab.deployments(project_path, per_page: 50).auto_paginate do |deployment| - next unless deployment.environment.name.start_with?('review/') - next if checked_environments.include?(deployment.environment.slug) - puts + gitlab.deployments(project_path, per_page: DEPLOYMENTS_PER_PAGE).auto_paginate do |deployment| + environment = deployment.environment - checked_environments << deployment.environment.slug - deployed_at = Time.parse(deployment.created_at) + next unless environment.name.start_with?('review/') + next if checked_environments.include?(environment.slug) + + last_deploy = deployment.created_at + deployed_at = Time.parse(last_deploy) if deployed_at < delete_threshold - print_release_state(subject: 'Review app', release_name: deployment.environment.slug, release_date: deployment.created_at, action: 'deleting') - gitlab.delete_environment(project_path, deployment.environment.id) - cleaned_up_releases << deployment.environment.slug + delete_environment(environment, deployment) + release = Quality::HelmClient::Release.new(environment.slug, 1, deployed_at.to_s, nil, nil, review_apps_namespace) + delete_helm_release(release) elsif deployed_at < stop_threshold - print_release_state(subject: 'Review app', release_name: deployment.environment.slug, release_date: deployment.created_at, action: 'stopping') - gitlab.stop_environment(project_path, deployment.environment.id) - cleaned_up_releases << deployment.environment.slug + stop_environment(environment, deployment) else - print_release_state(subject: 'Review app', release_name: deployment.environment.slug, release_date: deployment.created_at, action: 'leaving') + print_release_state(subject: 'Review app', release_name: environment.slug, release_date: last_deploy, action: 'leaving') end + + checked_environments << environment.slug end end @@ -66,25 +85,58 @@ class AutomatedCleanup puts "Checking for Helm releases not updated in the last #{days} days..." threshold_day = threshold_time(days: days) - helm.releases(args: ['--deployed', '--failed', '--date', '--reverse', '--max 25']).each do |release| - next if cleaned_up_releases.include?(release.name) - if release.last_update < threshold_day - print_release_state(subject: 'Release', release_name: release.name, release_date: release.last_update, action: 'cleaning') - helm.delete(release_name: release.name) - kubernetes.cleanup(release_name: release.name) + helm_releases.each do |release| + if release.status == 'FAILED' || release.last_update < threshold_day + delete_helm_release(release) else print_release_state(subject: 'Release', release_name: release.name, release_date: release.last_update, action: 'leaving') end end end + private + + def delete_environment(environment, deployment) + print_release_state(subject: 'Review app', release_name: environment.slug, release_date: deployment.created_at, action: 'deleting') + gitlab.delete_environment(project_path, environment.id) + end + + def stop_environment(environment, deployment) + print_release_state(subject: 'Review app', release_name: environment.slug, release_date: deployment.created_at, action: 'stopping') + gitlab.stop_environment(project_path, environment.id) + end + + def helm_releases + args = ['--all', '--date', "--max #{HELM_RELEASES_BATCH_SIZE}"] + + helm.releases(args: args) + end + + def delete_helm_release(release) + print_release_state(subject: 'Release', release_name: release.name, release_status: release.status, release_date: release.last_update, action: 'cleaning') + helm.delete(release_name: release.name) + kubernetes.cleanup(release_name: release.name) + rescue Quality::HelmClient::CommandFailedError => ex + raise ex unless ignore_exception?(ex.message, IGNORED_HELM_ERRORS) + + puts "Ignoring the following Helm error:\n#{ex}\n" + rescue Quality::KubernetesClient::CommandFailedError => ex + raise ex unless ignore_exception?(ex.message, IGNORED_KUBERNETES_ERRORS) + + puts "Ignoring the following Kubernetes error:\n#{ex}\n" + end + def threshold_time(days:) Time.now - days * 24 * 3600 end - def print_release_state(subject:, release_name:, release_date:, action:) - puts "\n#{subject} '#{release_name}' was last deployed on #{release_date}: #{action} it." + def ignore_exception?(exception_message, exceptions_ignored) + exception_message.match?(/(#{exceptions_ignored})/) + end + + def print_release_state(subject:, release_name:, release_date:, action:, release_status: nil) + puts "\n#{subject} '#{release_name}' #{"(#{release_status}) " if release_status}was last deployed on #{release_date}: #{action} it.\n" end end diff --git a/spec/lib/quality/helm_client_spec.rb b/spec/lib/quality/helm_client_spec.rb index 553cd8719de..7abb9688d5a 100644 --- a/spec/lib/quality/helm_client_spec.rb +++ b/spec/lib/quality/helm_client_spec.rb @@ -1,62 +1,111 @@ # frozen_string_literal: true -require 'spec_helper' +require 'fast_spec_helper' RSpec.describe Quality::HelmClient do let(:namespace) { 'review-apps-ee' } let(:release_name) { 'my-release' } - let(:raw_helm_list_result) do + let(:raw_helm_list_page1) do <<~OUTPUT - NAME REVISION UPDATED STATUS CHART NAMESPACE - review-improve-re-2dsd9d 1 Tue Jul 31 15:53:17 2018 FAILED gitlab-0.3.4 #{namespace} - review-11-1-stabl-3r2fso 1 Mon Jul 30 22:44:14 2018 FAILED gitlab-0.3.3 #{namespace} - review-49375-css-fk664j 1 Thu Jul 19 11:01:30 2018 FAILED gitlab-0.2.4 #{namespace} + {"Next":"review-6709-group-t40qbv", + "Releases":[ + {"Name":"review-qa-60-reor-1mugd1", "Revision":1,"Updated":"Thu Oct 4 17:52:31 2018","Status":"FAILED", "Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}, + {"Name":"review-7846-fix-s-261vd6","Revision":1,"Updated":"Thu Oct 4 17:33:29 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}, + {"Name":"review-7867-snowp-lzo3iy","Revision":1,"Updated":"Thu Oct 4 17:22:14 2018","Status":"DEPLOYED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}, + {"Name":"review-rename-geo-o4a780","Revision":1,"Updated":"Thu Oct 4 17:14:57 2018","Status":"DEPLOYED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}, + {"Name":"review-5781-opera-0k93fx","Revision":1,"Updated":"Thu Oct 4 17:06:15 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}, + {"Name":"review-6709-group-2pzeec","Revision":1,"Updated":"Thu Oct 4 16:36:59 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}, + {"Name":"review-ce-to-ee-2-l554mn","Revision":1,"Updated":"Thu Oct 4 16:27:02 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}, + {"Name":"review-epics-e2e-m690eb","Revision":1,"Updated":"Thu Oct 4 16:08:26 2018","Status":"DEPLOYED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}, + {"Name":"review-7126-admin-06fae2","Revision":1,"Updated":"Thu Oct 4 15:56:35 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"}, + {"Name":"review-6983-promo-xyou11","Revision":1,"Updated":"Thu Oct 4 15:15:34 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"} + ]} + OUTPUT + end + let(:raw_helm_list_page2) do + <<~OUTPUT + {"Releases":[ + {"Name":"review-6709-group-t40qbv","Revision":1,"Updated":"Thu Oct 4 17:52:31 2018","Status":"FAILED","Chart":"gitlab-1.1.3","AppVersion":"master","Namespace":"#{namespace}"} + ]} OUTPUT end subject { described_class.new(namespace: namespace) } describe '#releases' do - it 'calls helm list with default arguments' do + it 'raises an error if the Helm command fails' do expect(Gitlab::Popen).to receive(:popen_with_detail) - .with([%(helm list --namespace "#{namespace}")]) - .and_return(Gitlab::Popen::Result.new([], '')) + .with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json)]) + .and_return(Gitlab::Popen::Result.new([], '', '', double(success?: false))) - subject.releases + expect { subject.releases.to_a }.to raise_error(described_class::CommandFailedError) end - it 'calls helm list with given arguments' do + it 'calls helm list with default arguments' do expect(Gitlab::Popen).to receive(:popen_with_detail) - .with([%(helm list --namespace "#{namespace}" --deployed)]) - .and_return(Gitlab::Popen::Result.new([], '')) + .with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json)]) + .and_return(Gitlab::Popen::Result.new([], '', '', double(success?: true))) - subject.releases(args: ['--deployed']) + subject.releases.to_a + end + + it 'calls helm list with extra arguments' do + expect(Gitlab::Popen).to receive(:popen_with_detail) + .with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json --deployed)]) + .and_return(Gitlab::Popen::Result.new([], '', '', double(success?: true))) + + subject.releases(args: ['--deployed']).to_a end it 'returns a list of Release objects' do expect(Gitlab::Popen).to receive(:popen_with_detail) - .with([%(helm list --namespace "#{namespace}" --deployed)]) - .and_return(Gitlab::Popen::Result.new([], raw_helm_list_result)) + .with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json --deployed)]) + .and_return(Gitlab::Popen::Result.new([], raw_helm_list_page2, '', double(success?: true))) - releases = subject.releases(args: ['--deployed']) + releases = subject.releases(args: ['--deployed']).to_a - expect(releases.size).to eq(3) - expect(releases[0].name).to eq('review-improve-re-2dsd9d') - expect(releases[0].revision).to eq(1) - expect(releases[0].last_update).to eq(Time.parse('Tue Jul 31 15:53:17 2018')) - expect(releases[0].status).to eq('FAILED') - expect(releases[0].chart).to eq('gitlab-0.3.4') - expect(releases[0].namespace).to eq(namespace) + expect(releases.size).to eq(1) + expect(releases[0]).to have_attributes( + name: 'review-6709-group-t40qbv', + revision: 1, + last_update: Time.parse('Thu Oct 4 17:52:31 2018'), + status: 'FAILED', + chart: 'gitlab-1.1.3', + app_version: 'master', + namespace: namespace + ) + end + + it 'automatically paginates releases' do + expect(Gitlab::Popen).to receive(:popen_with_detail).ordered + .with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json)]) + .and_return(Gitlab::Popen::Result.new([], raw_helm_list_page1, '', double(success?: true))) + expect(Gitlab::Popen).to receive(:popen_with_detail).ordered + .with([%(helm list --namespace "#{namespace}" --tiller-namespace "#{namespace}" --output json --offset review-6709-group-t40qbv)]) + .and_return(Gitlab::Popen::Result.new([], raw_helm_list_page2, '', double(success?: true))) + + releases = subject.releases.to_a + + expect(releases.size).to eq(11) + expect(releases.last.name).to eq('review-6709-group-t40qbv') end end describe '#delete' do + it 'raises an error if the Helm command fails' do + expect(Gitlab::Popen).to receive(:popen_with_detail) + .with([%(helm delete --tiller-namespace "#{namespace}" --purge #{release_name})]) + .and_return(Gitlab::Popen::Result.new([], '', '', double(success?: false))) + + expect { subject.delete(release_name: release_name) }.to raise_error(described_class::CommandFailedError) + end + it 'calls helm delete with default arguments' do expect(Gitlab::Popen).to receive(:popen_with_detail) - .with(["helm delete --purge #{release_name}"]) - .and_return(Gitlab::Popen::Result.new([], '', '', 0)) + .with([%(helm delete --tiller-namespace "#{namespace}" --purge #{release_name})]) + .and_return(Gitlab::Popen::Result.new([], '', '', double(success?: true))) - expect(subject.delete(release_name: release_name).status).to eq(0) + expect(subject.delete(release_name: release_name)).to eq('') end end end diff --git a/spec/lib/quality/kubernetes_client_spec.rb b/spec/lib/quality/kubernetes_client_spec.rb index 3c0c0d0977a..f35d9464d48 100644 --- a/spec/lib/quality/kubernetes_client_spec.rb +++ b/spec/lib/quality/kubernetes_client_spec.rb @@ -1,25 +1,33 @@ # frozen_string_literal: true -require 'spec_helper' +require 'fast_spec_helper' RSpec.describe Quality::KubernetesClient do - subject { described_class.new(namespace: 'review-apps-ee') } + let(:namespace) { 'review-apps-ee' } + let(:release_name) { 'my-release' } + + subject { described_class.new(namespace: namespace) } describe '#cleanup' do + it 'raises an error if the Kubernetes command fails' do + expect(Gitlab::Popen).to receive(:popen_with_detail) + .with([%(kubectl --namespace "#{namespace}" delete ) \ + 'ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa ' \ + "--now -l release=\"#{release_name}\""]) + .and_return(Gitlab::Popen::Result.new([], '', '', double(success?: false))) + + expect { subject.cleanup(release_name: release_name) }.to raise_error(described_class::CommandFailedError) + end + it 'calls kubectl with the correct arguments' do - # popen_with_detail will receive an array with a bunch of arguments; we're - # only concerned with it having the correct namespace and release name - expect(Gitlab::Popen).to receive(:popen_with_detail) do |args| - expect(args) - .to satisfy_one { |arg| arg.start_with?('-n "review-apps-ee" get') } - expect(args) - .to satisfy_one { |arg| arg == 'grep "my-release"' } - expect(args) - .to satisfy_one { |arg| arg.end_with?('-n "review-apps-ee" delete') } - end + expect(Gitlab::Popen).to receive(:popen_with_detail) + .with([%(kubectl --namespace "#{namespace}" delete ) \ + 'ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa ' \ + "--now -l release=\"#{release_name}\""]) + .and_return(Gitlab::Popen::Result.new([], '', '', double(success?: true))) # We're not verifying the output here, just silencing it - expect { subject.cleanup(release_name: 'my-release') }.to output.to_stdout + expect { subject.cleanup(release_name: release_name) }.to output.to_stdout end end end