Merge branch 'max_retries_when' into 'master'

Allow to configure when to retry builds Closes gitlab-runner#3515 See merge request gitlab-org/gitlab-ce!21758
2018-11-07 15:02:18 +00:00 · 2018-11-07 15:02:18 +00:00 · f323d6148b
parent 5dc0577b84 0c59fdaab6
commit f323d6148b
12 changed files with 567 additions and 71 deletions
--- a/app/models/ci/build.rb
+++ b/app/models/ci/build.rb
@ -221,9 +221,7 @@ module Ci
        build.deployment&.drop
-        next if build.retries_max.zero?
+        if build.retry_failure?
        if build.retries_count < build.retries_max
          begin
            Ci::Build.retry(build, build.user)
          rescue Gitlab::Access::AccessDeniedError => ex
@ -321,7 +319,17 @@ module Ci
    end
    def retries_max
-      self.options.to_h.fetch(:retry, 0).to_i
+      normalized_retry.fetch(:max, 0)
    end
    def retry_when
      normalized_retry.fetch(:when, ['always'])
    end
    def retry_failure?
      return false if retries_max.zero? || retries_count >= retries_max
      retry_when.include?('always') || retry_when.include?(failure_reason.to_s)
    end
    def latest?
@ -886,6 +894,16 @@ module Ci
      options&.dig(:environment, :url) || persisted_environment&.external_url
    end
    # The format of the retry option changed in GitLab 11.5: Before it was
    # integer only, after it is a hash. New builds are created with the new
    # format, but builds created before GitLab 11.5 and saved in database still
    # have the old integer only format. This method returns the retry option
    # normalized as a hash in 11.5+ format.
    def normalized_retry
      value = options&.dig(:retry)
      value.is_a?(Integer) ? { max: value } : value.to_h
    end
    def build_attributes_from_config
      return {} unless pipeline.config_processor
--- a/changelogs/unreleased/max_retries_when.yml
+++ b/changelogs/unreleased/max_retries_when.yml
@ -0,0 +1,5 @@
 ---
 title: Allow to configure when to retry failed CI jobs
 merge_request: 21758
 author: Markus Doits
 type: added
--- a/doc/ci/yaml/README.md
+++ b/doc/ci/yaml/README.md
@ -74,7 +74,7 @@ A job is defined by a list of parameters that define the job behavior.
 | after_script  | no       | Override a set of commands that are executed after job |
 | environment   | no       | Defines a name of environment to which deployment is done by this job |
 | coverage      | no       | Define code coverage settings for a given job |
-| retry         | no       | Define how many times a job can be auto-retried in case of a failure |
+| retry         | no       | Define when and how many times a job can be auto-retried in case of a failure |
 | parallel      | no       | Defines how many instances of a job should be run in parallel |
 ### `extends`
@ -1433,18 +1433,20 @@ job1:
 ## `retry`
 > [Introduced][ce-12909] in GitLab 9.5.
 > [Behaviour expanded](https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/21758)
 > in GitLab 11.5 to control on which failures to retry.
 `retry` allows you to configure how many times a job is going to be retried in
 case of a failure.
-When a job fails, and has `retry` configured it is going to be processed again
+When a job fails and has `retry` configured, it is going to be processed again
 up to the amount of times specified by the `retry` keyword.
 If `retry` is set to 2, and a job succeeds in a second run (first retry), it won't be retried
 again. `retry` value has to be a positive integer, equal or larger than 0, but
 lower or equal to 2 (two retries maximum, three runs in total).
-A simple example:
+A simple example to retry in all failure cases:
 ```yaml
 test:
@ -1452,6 +1454,57 @@ test:
  retry: 2
 ```
 By default, a job will be retried on all failure cases. To have a better control
 on which failures to retry, `retry` can be a hash with with the following keys:
 - `max`: The maximum number of retries.
 - `when`: The failure cases to retry.
 To retry only runner system failures at maximum two times:
 ```yaml
 test:
  script: rspec
  retry:
    max: 2
    when: runner_system_failure
 ```
 If there is another failure, other than a runner system failure, the job will
 not be retried.
 To retry on multiple failure cases, `when` can also be an array of failures:
 ```yaml
 test:
  script: rspec
  retry:
    max: 2
    when:
      - runner_system_failure
      - stuck_or_timeout_failure
 ```
 Possible values for `when` are:
 <!--
  Please make sure to update `RETRY_WHEN_IN_DOCUMENTATION` array in
  `spec/lib/gitlab/ci/config/entry/retry_spec.rb` if you change any of
  the documented values below. The test there makes sure that all documented
  values are really valid as a config option and therefore should always
  stay in sync with this documentation.
 -->
 - `always`: Retry on any failure (default).
 - `unknown_failure`: Retry when the failure reason is unknown.
 - `script_failure`: Retry when the script failed.
 - `api_failure`: Retry on API failure.
 - `stuck_or_timeout_failure`: Retry when the job got stuck or timed out.
 - `runner_system_failure`: Retry if there was a runner system failure (e.g. setting up the job failed).
 - `missing_dependency_failure`: Retry if a dependency was missing.
 - `runner_unsupported`: Retry if the runner was unsupported.
 ## `parallel`
 > [Introduced](https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/22631) in GitLab 11.5.
--- a/lib/gitlab/ci/config/entry/job.rb
+++ b/lib/gitlab/ci/config/entry/job.rb
@ -26,16 +26,12 @@ module Gitlab
            with_options allow_nil: true do
              validates :tags, array_of_strings: true
              validates :allow_failure, boolean: true
              validates :retry, numericality: { only_integer: true,
                                                greater_than_or_equal_to: 0,
                                                less_than_or_equal_to: 2 }
              validates :parallel, numericality: { only_integer: true,
                                                   greater_than_or_equal_to: 2 }
              validates :when,
                inclusion: { in: %w[on_success on_failure always manual delayed],
                             message: 'should be on_success, on_failure, ' \
                                      'always, manual or delayed' }
              validates :dependencies, array_of_strings: true
              validates :extends, type: String
            end
@ -86,6 +82,9 @@ module Gitlab
          entry :coverage, Entry::Coverage,
            description: 'Coverage configuration for this job.'
          entry :retry, Entry::Retry,
               description: 'Retry configuration for this job.'
          helpers :before_script, :script, :stage, :type, :after_script,
                  :cache, :image, :services, :only, :except, :variables,
                  :artifacts, :commands, :environment, :coverage, :retry,
@ -160,7 +159,7 @@ module Gitlab
              environment: environment_defined? ? environment_value : nil,
              environment_name: environment_defined? ? environment_value[:name] : nil,
              coverage: coverage_defined? ? coverage_value : nil,
-              retry: retry_defined? ? retry_value.to_i : nil,
+              retry: retry_defined? ? retry_value : nil,
              parallel: parallel_defined? ? parallel_value.to_i : nil,
              artifacts: artifacts_value,
              after_script: after_script_value,
--- a/lib/gitlab/ci/config/entry/retry.rb
+++ b/lib/gitlab/ci/config/entry/retry.rb
@ -0,0 +1,90 @@
 module Gitlab
  module Ci
    class Config
      module Entry
        ##
        # Entry that represents a retry config for a job.
        #
        class Retry < Simplifiable
          strategy :SimpleRetry, if: -> (config) { config.is_a?(Integer) }
          strategy :FullRetry, if: -> (config) { config.is_a?(Hash) }
          class SimpleRetry < Entry::Node
            include Entry::Validatable
            validations do
              validates :config, numericality: { only_integer: true,
                                                 greater_than_or_equal_to: 0,
                                                 less_than_or_equal_to: 2 }
            end
            def value
              {
                max: config
              }
            end
            def location
              'retry'
            end
          end
          class FullRetry < Entry::Node
            include Entry::Validatable
            include Entry::Attributable
            ALLOWED_KEYS = %i[max when].freeze
            attributes :max, :when
            validations do
              validates :config, allowed_keys: ALLOWED_KEYS
              with_options allow_nil: true do
                validates :max, numericality: { only_integer: true,
                                                greater_than_or_equal_to: 0,
                                                less_than_or_equal_to: 2 }
                validates :when, array_of_strings_or_string: true
                validates :when,
                          allowed_array_values: { in: FullRetry.possible_retry_when_values },
                          if: -> (config) { config.when.is_a?(Array) }
                validates :when,
                          inclusion: { in: FullRetry.possible_retry_when_values },
                          if: -> (config) { config.when.is_a?(String) }
              end
            end
            def self.possible_retry_when_values
              @possible_retry_when_values ||= ::Ci::Build.failure_reasons.keys.map(&:to_s) + ['always']
            end
            def value
              super.tap do |config|
                # make sure that `when` is an array, because we allow it to
                # be passed as a String in config for simplicity
                config[:when] = Array.wrap(config[:when]) if config[:when]
              end
            end
            def location
              'retry'
            end
          end
          class UnknownStrategy < Entry::Node
            def errors
              ["#{location} has to be either an integer or a hash"]
            end
            def location
              'retry config'
            end
          end
          def self.default
          end
        end
      end
    end
  end
 end
--- a/lib/gitlab/ci/config/entry/validators.rb
+++ b/lib/gitlab/ci/config/entry/validators.rb
@ -7,10 +7,10 @@ module Gitlab
        module Validators
          class AllowedKeysValidator < ActiveModel::EachValidator
            def validate_each(record, attribute, value)
-              unknown_keys = record.config.try(:keys).to_a - options[:in]
+              unknown_keys = value.try(:keys).to_a - options[:in]
              if unknown_keys.any?
-                record.errors.add(:config, 'contains unknown keys: ' +
+                record.errors.add(attribute, "contains unknown keys: " +
                                             unknown_keys.join(', '))
              end
            end
@ -24,6 +24,16 @@ module Gitlab
            end
          end
          class AllowedArrayValuesValidator < ActiveModel::EachValidator
            def validate_each(record, attribute, value)
              unkown_values = value - options[:in]
              unless unkown_values.empty?
                record.errors.add(attribute, "contains unknown values: " +
                                              unkown_values.join(', '))
              end
            end
          end
          class ArrayOfStringsValidator < ActiveModel::EachValidator
            include LegacyValidationHelpers
@ -68,6 +78,14 @@ module Gitlab
            end
          end
          class HashOrIntegerValidator < ActiveModel::EachValidator
            def validate_each(record, attribute, value)
              unless value.is_a?(Hash) || value.is_a?(Integer)
                record.errors.add(attribute, 'should be a hash or an integer')
              end
            end
          end
          class KeyValidator < ActiveModel::EachValidator
            include LegacyValidationHelpers
--- a/spec/lib/gitlab/ci/config/entry/job_spec.rb
+++ b/spec/lib/gitlab/ci/config/entry/job_spec.rb
@ -10,7 +10,7 @@ describe Gitlab::Ci::Config::Entry::Job do
      let(:result) do
        %i[before_script script stage type after_script cache
           image services only except variables artifacts
-           environment coverage]
+           environment coverage retry]
      end
      it { is_expected.to match_array result }
@ -98,45 +98,6 @@ describe Gitlab::Ci::Config::Entry::Job do
        end
      end
      context 'when retry value is not correct' do
        context 'when it is not a numeric value' do
          let(:config) { { retry: true } }
          it 'returns error about invalid type' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'job retry is not a number'
          end
        end
        context 'when it is lower than zero' do
          let(:config) { { retry: -1 } }
          it 'returns error about value too low' do
            expect(entry).not_to be_valid
            expect(entry.errors)
              .to include 'job retry must be greater than or equal to 0'
          end
        end
        context 'when it is not an integer' do
          let(:config) { { retry: 1.5 } }
          it 'returns error about wrong value' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'job retry must be an integer'
          end
        end
        context 'when the value is too high' do
          let(:config) { { retry: 10 } }
          it 'returns error about value too high' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'job retry must be less than or equal to 2'
          end
        end
      end
      context 'when parallel value is not correct' do
        context 'when it is not a numeric value' do
          let(:config) { { parallel: true } }
--- a/spec/lib/gitlab/ci/config/entry/retry_spec.rb
+++ b/spec/lib/gitlab/ci/config/entry/retry_spec.rb
@ -0,0 +1,236 @@
 require 'spec_helper'
 describe Gitlab::Ci::Config::Entry::Retry do
  let(:entry) { described_class.new(config) }
  shared_context 'when retry value is a numeric', :numeric do
    let(:config) { max }
    let(:max) {}
  end
  shared_context 'when retry value is a hash', :hash do
    let(:config) { { max: max, when: public_send(:when) }.compact }
    let(:when) {}
    let(:max) {}
  end
  describe '#value' do
    subject(:value) { entry.value }
    context 'when retry value is a numeric', :numeric do
      let(:max) { 2 }
      it 'is returned as a hash with max key' do
        expect(value).to eq(max: 2)
      end
    end
    context 'when retry value is a hash', :hash do
      context 'and `when` is a string' do
        let(:when) { 'unknown_failure' }
        it 'returns when wrapped in an array' do
          expect(value).to eq(when: ['unknown_failure'])
        end
      end
      context 'and `when` is an array' do
        let(:when) { %w[unknown_failure runner_system_failure] }
        it 'returns when as it was passed' do
          expect(value).to eq(when: %w[unknown_failure runner_system_failure])
        end
      end
    end
  end
  describe 'validation' do
    context 'when retry value is correct' do
      context 'when it is a numeric', :numeric do
        let(:max) { 2 }
        it 'is valid' do
          expect(entry).to be_valid
        end
      end
      context 'when it is a hash', :hash do
        context 'with max' do
          let(:max) { 2 }
          it 'is valid' do
            expect(entry).to be_valid
          end
        end
        context 'with string when' do
          let(:when) { 'unknown_failure' }
          it 'is valid' do
            expect(entry).to be_valid
          end
        end
        context 'with string when always' do
          let(:when) { 'always' }
          it 'is valid' do
            expect(entry).to be_valid
          end
        end
        context 'with array when' do
          let(:when) { %w[unknown_failure runner_system_failure] }
          it 'is valid' do
            expect(entry).to be_valid
          end
        end
        # Those values are documented at `doc/ci/yaml/README.md`. If any of
        # those values gets invalid, documentation must be updated. To make
        # sure this is catched, check explicitly that all of the documented
        # values are valid. If they are not it means the documentation and this
        # array must be updated.
        RETRY_WHEN_IN_DOCUMENTATION = %w[
            always
            unknown_failure
            script_failure
            api_failure
            stuck_or_timeout_failure
            runner_system_failure
            missing_dependency_failure
            runner_unsupported
        ].freeze
        RETRY_WHEN_IN_DOCUMENTATION.each do |reason|
          context "with when from documentation `#{reason}`" do
            let(:when) { reason }
            it 'is valid' do
              expect(entry).to be_valid
            end
          end
        end
        ::Ci::Build.failure_reasons.each_key do |reason|
          context "with when from CommitStatus.failure_reasons `#{reason}`" do
            let(:when) { reason }
            it 'is valid' do
              expect(entry).to be_valid
            end
          end
        end
      end
    end
    context 'when retry value is not correct' do
      context 'when it is not a numeric nor an array' do
        let(:config) { true }
        it 'returns error about invalid type' do
          expect(entry).not_to be_valid
          expect(entry.errors).to include 'retry config has to be either an integer or a hash'
        end
      end
      context 'when it is a numeric', :numeric do
        context 'when it is lower than zero' do
          let(:max) { -1 }
          it 'returns error about value too low' do
            expect(entry).not_to be_valid
            expect(entry.errors)
              .to include 'retry config must be greater than or equal to 0'
          end
        end
        context 'when it is not an integer' do
          let(:max) { 1.5 }
          it 'returns error about wrong value' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'retry config has to be either an integer or a hash'
          end
        end
        context 'when the value is too high' do
          let(:max) { 10 }
          it 'returns error about value too high' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'retry config must be less than or equal to 2'
          end
        end
      end
      context 'when it is a hash', :hash do
        context 'with unknown keys' do
          let(:config) { { max: 2, unknown_key: :something, one_more: :key } }
          it 'returns error about the unknown key' do
            expect(entry).not_to be_valid
            expect(entry.errors)
              .to include 'retry config contains unknown keys: unknown_key, one_more'
          end
        end
        context 'with max lower than zero' do
          let(:max) { -1 }
          it 'returns error about value too low' do
            expect(entry).not_to be_valid
            expect(entry.errors)
              .to include 'retry max must be greater than or equal to 0'
          end
        end
        context 'with max not an integer' do
          let(:max) { 1.5 }
          it 'returns error about wrong value' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'retry max must be an integer'
          end
        end
        context 'iwth max too high' do
          let(:max) { 10 }
          it 'returns error about value too high' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'retry max must be less than or equal to 2'
          end
        end
        context 'with when in wrong format' do
          let(:when) { true }
          it 'returns error about the wrong format' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'retry when should be an array of strings or a string'
          end
        end
        context 'with an unknown when string' do
          let(:when) { 'unknown_reason' }
          it 'returns error about the wrong format' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'retry when is not included in the list'
          end
        end
        context 'with an unknown failure reason in a when array' do
          let(:when) { %w[unknown_reason runner_system_failure] }
          it 'returns error about the wrong format' do
            expect(entry).not_to be_valid
            expect(entry.errors).to include 'retry when contains unknown values: unknown_reason'
          end
        end
      end
    end
  end
 end
--- a/spec/lib/gitlab/ci/yaml_processor_spec.rb
+++ b/spec/lib/gitlab/ci/yaml_processor_spec.rb
@ -53,11 +53,11 @@ module Gitlab
        describe 'retry entry' do
          context 'when retry count is specified' do
            let(:config) do
-              YAML.dump(rspec: { script: 'rspec', retry: 1 })
+              YAML.dump(rspec: { script: 'rspec', retry: { max: 1 } })
            end
            it 'includes retry count in build options attribute' do
-              expect(subject[:options]).to include(retry: 1)
+              expect(subject[:options]).to include(retry: { max: 1 })
            end
          end
--- a/spec/models/ci/build_spec.rb
+++ b/spec/models/ci/build_spec.rb
@ -1472,15 +1472,15 @@ describe Ci::Build do
    end
    describe '#retries_max' do
-      context 'when max retries value is defined' do
+      context 'with retries max config option' do
-        subject { create(:ci_build, options: { retry: 1 }) }
+        subject { create(:ci_build, options: { retry: { max: 1 } }) }
-        it 'returns a number of configured max retries' do
+        it 'returns the number of configured max retries' do
          expect(subject.retries_max).to eq 1
        end
      end
-      context 'when max retries value is not defined' do
+      context 'without retries max config option' do
        subject { create(:ci_build) }
        it 'returns zero' do
@ -1495,6 +1495,104 @@ describe Ci::Build do
          expect(subject.retries_max).to eq 0
        end
      end
      context 'with integer only config option' do
        subject { create(:ci_build, options: { retry: 1 }) }
        it 'returns the number of configured max retries' do
          expect(subject.retries_max).to eq 1
        end
      end
    end
    describe '#retry_when' do
      context 'with retries when config option' do
        subject { create(:ci_build, options: { retry: { when: ['some_reason'] } }) }
        it 'returns the configured when' do
          expect(subject.retry_when).to eq ['some_reason']
        end
      end
      context 'without retries when config option' do
        subject { create(:ci_build) }
        it 'returns always array' do
          expect(subject.retry_when).to eq ['always']
        end
      end
      context 'with integer only config option' do
        subject { create(:ci_build, options: { retry: 1 }) }
        it 'returns always array' do
          expect(subject.retry_when).to eq ['always']
        end
      end
    end
    describe '#retry_failure?' do
      subject { create(:ci_build) }
      context 'when retries max is zero' do
        before do
          expect(subject).to receive(:retries_max).at_least(:once).and_return(0)
        end
        it 'returns false' do
          expect(subject.retry_failure?).to eq false
        end
      end
      context 'when retries max equals retries count' do
        before do
          expect(subject).to receive(:retries_max).at_least(:once).and_return(1)
          expect(subject).to receive(:retries_count).at_least(:once).and_return(1)
        end
        it 'returns false' do
          expect(subject.retry_failure?).to eq false
        end
      end
      context 'when retries max is higher than retries count' do
        before do
          expect(subject).to receive(:retries_max).at_least(:once).and_return(2)
          expect(subject).to receive(:retries_count).at_least(:once).and_return(1)
        end
        context 'and retry when is always' do
          before do
            expect(subject).to receive(:retry_when).at_least(:once).and_return(['always'])
          end
          it 'returns true' do
            expect(subject.retry_failure?).to eq true
          end
        end
        context 'and retry when includes the failure_reason' do
          before do
            expect(subject).to receive(:failure_reason).at_least(:once).and_return('some_reason')
            expect(subject).to receive(:retry_when).at_least(:once).and_return(['some_reason'])
          end
          it 'returns true' do
            expect(subject.retry_failure?).to eq true
          end
        end
        context 'and retry when does not include failure_reason' do
          before do
            expect(subject).to receive(:failure_reason).at_least(:once).and_return('some_reason')
            expect(subject).to receive(:retry_when).at_least(:once).and_return(['some', 'other failure'])
          end
          it 'returns false' do
            expect(subject.retry_failure?).to eq false
          end
        end
      end
    end
  end
@ -2887,7 +2985,7 @@ describe Ci::Build do
    end
    context 'when build is configured to be retried' do
-      subject { create(:ci_build, :running, options: { retry: 3 }, project: project, user: user) }
+      subject { create(:ci_build, :running, options: { retry: { max: 3 } }, project: project, user: user) }
      it 'retries build and assigns the same user to it' do
        expect(described_class).to receive(:retry)
--- a/spec/services/ci/create_pipeline_service_spec.rb
+++ b/spec/services/ci/create_pipeline_service_spec.rb
@ -435,6 +435,7 @@ describe Ci::CreatePipelineService do
    end
    context 'when builds with auto-retries are configured' do
      context 'as an integer' do
        before do
          config = YAML.dump(rspec: { script: 'rspec', retry: 2 })
          stub_ci_pipeline_yaml_file(config)
@ -445,6 +446,23 @@ describe Ci::CreatePipelineService do
          expect(pipeline).to be_persisted
          expect(pipeline.builds.find_by(name: 'rspec').retries_max).to eq 2
          expect(pipeline.builds.find_by(name: 'rspec').retry_when).to eq ['always']
        end
      end
      context 'as hash' do
        before do
          config = YAML.dump(rspec: { script: 'rspec', retry: { max: 2, when: 'runner_system_failure' } })
          stub_ci_pipeline_yaml_file(config)
        end
        it 'correctly creates builds with auto-retry value configured' do
          pipeline = execute_service
          expect(pipeline).to be_persisted
          expect(pipeline.builds.find_by(name: 'rspec').retries_max).to eq 2
          expect(pipeline.builds.find_by(name: 'rspec').retry_when).to eq ['runner_system_failure']
        end
      end
    end
--- a/spec/services/ci/process_pipeline_service_spec.rb
+++ b/spec/services/ci/process_pipeline_service_spec.rb
@ -671,9 +671,9 @@ describe Ci::ProcessPipelineService, '#execute' do
  context 'when builds with auto-retries are configured' do
    before do
-      create_build('build:1', stage_idx: 0, user: user, options: { retry: 2 })
+      create_build('build:1', stage_idx: 0, user: user, options: { retry: { max: 2 } })
      create_build('test:1', stage_idx: 1, user: user, when: :on_failure)
-      create_build('test:2', stage_idx: 1, user: user, options: { retry: 1 })
+      create_build('test:2', stage_idx: 1, user: user, options: { retry: { max: 1 } })
    end
    it 'automatically retries builds in a valid order' do