diff --git a/app/services/clusters/gcp/kubernetes/fetch_kubernetes_token_service.rb b/app/services/clusters/gcp/kubernetes/fetch_kubernetes_token_service.rb index 4ad04ab801e..5d9bdc52d37 100644 --- a/app/services/clusters/gcp/kubernetes/fetch_kubernetes_token_service.rb +++ b/app/services/clusters/gcp/kubernetes/fetch_kubernetes_token_service.rb @@ -4,17 +4,30 @@ module Clusters module Gcp module Kubernetes class FetchKubernetesTokenService + DEFAULT_TOKEN_RETRY_DELAY = 5.seconds + TOKEN_RETRY_LIMIT = 5 + attr_reader :kubeclient, :service_account_token_name, :namespace - def initialize(kubeclient, service_account_token_name, namespace) + def initialize(kubeclient, service_account_token_name, namespace, token_retry_delay: DEFAULT_TOKEN_RETRY_DELAY) @kubeclient = kubeclient @service_account_token_name = service_account_token_name @namespace = namespace + @token_retry_delay = token_retry_delay end def execute - token_base64 = get_secret&.dig('data', 'token') - Base64.decode64(token_base64) if token_base64 + # Kubernetes will create the Secret and set the token asynchronously + # so it is necessary to retry + # https://kubernetes.io/docs/reference/access-authn-authz/service-accounts-admin/#token-controller + TOKEN_RETRY_LIMIT.times do + token_base64 = get_secret&.dig('data', 'token') + return Base64.decode64(token_base64) if token_base64 + + sleep @token_retry_delay + end + + nil end private diff --git a/changelogs/unreleased/63507-fix-race-condition-fetching-token.yml b/changelogs/unreleased/63507-fix-race-condition-fetching-token.yml new file mode 100644 index 00000000000..7f2b59fc9eb --- /dev/null +++ b/changelogs/unreleased/63507-fix-race-condition-fetching-token.yml @@ -0,0 +1,5 @@ +--- +title: Retry fetching Kubernetes Secret#token (#63507) +merge_request: 29922 +author: +type: fixed diff --git a/spec/services/clusters/gcp/kubernetes/fetch_kubernetes_token_service_spec.rb b/spec/services/clusters/gcp/kubernetes/fetch_kubernetes_token_service_spec.rb index a5806559b14..93c0dc37ade 100644 --- a/spec/services/clusters/gcp/kubernetes/fetch_kubernetes_token_service_spec.rb +++ b/spec/services/clusters/gcp/kubernetes/fetch_kubernetes_token_service_spec.rb @@ -17,7 +17,7 @@ describe Clusters::Gcp::Kubernetes::FetchKubernetesTokenService do ) end - subject { described_class.new(kubeclient, service_account_token_name, namespace).execute } + subject { described_class.new(kubeclient, service_account_token_name, namespace, token_retry_delay: 0).execute } before do stub_kubeclient_discover(api_url) @@ -26,8 +26,7 @@ describe Clusters::Gcp::Kubernetes::FetchKubernetesTokenService do context 'when params correct' do let(:decoded_token) { 'xxx.token.xxx' } let(:token) { Base64.encode64(decoded_token) } - - context 'when gitlab-token exists' do + context 'when the secret exists' do before do stub_kubeclient_get_secret( api_url, @@ -50,13 +49,62 @@ describe Clusters::Gcp::Kubernetes::FetchKubernetesTokenService do it { expect { subject }.to raise_error(Kubeclient::HttpError) } end - context 'when gitlab-token does not exist' do + context 'when the secret does not exist on the first try' do + before do + stub_kubeclient_get_secret_not_found_then_found( + api_url, + { + metadata_name: service_account_token_name, + namespace: namespace, + token: token + } + ) + end + + it 'retries and finds the token' do + expect(subject).to eq(decoded_token) + end + end + + context 'when the secret permanently does not exist' do before do stub_kubeclient_get_secret_error(api_url, service_account_token_name, namespace: namespace, status: 404) end it { is_expected.to be_nil } end + + context 'when the secret is missing a token on the first try' do + before do + stub_kubeclient_get_secret_missing_token_then_with_token( + api_url, + { + metadata_name: service_account_token_name, + namespace: namespace, + token: token + } + ) + end + + it 'retries and finds the token' do + expect(subject).to eq(decoded_token) + end + end + + context 'when the secret is permanently missing a token' do + before do + stub_kubeclient_get_secret( + api_url, + { + metadata_name: service_account_token_name, + namespace: namespace, + token: nil + } + ) + end + + it { is_expected.to be_nil } + end end end end diff --git a/spec/support/helpers/kubernetes_helpers.rb b/spec/support/helpers/kubernetes_helpers.rb index 011c4df0fe5..3c7bcba2b42 100644 --- a/spec/support/helpers/kubernetes_helpers.rb +++ b/spec/support/helpers/kubernetes_helpers.rb @@ -104,6 +104,26 @@ module KubernetesHelpers .to_return(status: [status, "Internal Server Error"]) end + def stub_kubeclient_get_secret_not_found_then_found(api_url, **options) + options[:metadata_name] ||= "default-token-1" + options[:namespace] ||= "default" + + WebMock.stub_request(:get, api_url + "/api/v1/namespaces/#{options[:namespace]}/secrets/#{options[:metadata_name]}") + .to_return(status: [404, "Not Found"]) + .then + .to_return(kube_response(kube_v1_secret_body(options))) + end + + def stub_kubeclient_get_secret_missing_token_then_with_token(api_url, **options) + options[:metadata_name] ||= "default-token-1" + options[:namespace] ||= "default" + + WebMock.stub_request(:get, api_url + "/api/v1/namespaces/#{options[:namespace]}/secrets/#{options[:metadata_name]}") + .to_return(kube_response(kube_v1_secret_body(options.merge(token: nil)))) + .then + .to_return(kube_response(kube_v1_secret_body(options))) + end + def stub_kubeclient_get_service_account(api_url, name, namespace: 'default') WebMock.stub_request(:get, api_url + "/api/v1/namespaces/#{namespace}/serviceaccounts/#{name}") .to_return(kube_response({})) @@ -184,11 +204,11 @@ module KubernetesHelpers "kind" => "SecretList", "apiVersion": "v1", "metadata": { - "name": options[:metadata_name] || "default-token-1", + "name": options.fetch(:metadata_name, "default-token-1"), "namespace": "kube-system" }, "data": { - "token": options[:token] || Base64.encode64('token-sample-123') + "token": options.fetch(:token, Base64.encode64('token-sample-123')) } } end