diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 5c1168e8ef5..889f8ce27a6 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -221,9 +221,7 @@ module Ci build.deployment&.drop - next if build.retries_max.zero? - - if build.retries_count < build.retries_max + if build.retry_failure? begin Ci::Build.retry(build, build.user) rescue Gitlab::Access::AccessDeniedError => ex @@ -321,7 +319,17 @@ module Ci end def retries_max - self.options.to_h.fetch(:retry, 0).to_i + normalized_retry.fetch(:max, 0) + end + + def retry_when + normalized_retry.fetch(:when, ['always']) + end + + def retry_failure? + return false if retries_max.zero? || retries_count >= retries_max + + retry_when.include?('always') || retry_when.include?(failure_reason.to_s) end def latest? @@ -886,6 +894,16 @@ module Ci options&.dig(:environment, :url) || persisted_environment&.external_url end + # The format of the retry option changed in GitLab 11.5: Before it was + # integer only, after it is a hash. New builds are created with the new + # format, but builds created before GitLab 11.5 and saved in database still + # have the old integer only format. This method returns the retry option + # normalized as a hash in 11.5+ format. + def normalized_retry + value = options&.dig(:retry) + value.is_a?(Integer) ? { max: value } : value.to_h + end + def build_attributes_from_config return {} unless pipeline.config_processor diff --git a/changelogs/unreleased/max_retries_when.yml b/changelogs/unreleased/max_retries_when.yml new file mode 100644 index 00000000000..dad3cd8a123 --- /dev/null +++ b/changelogs/unreleased/max_retries_when.yml @@ -0,0 +1,5 @@ +--- +title: Allow to configure when to retry failed CI jobs +merge_request: 21758 +author: Markus Doits +type: added diff --git a/doc/ci/yaml/README.md b/doc/ci/yaml/README.md index b3a55e48f4e..c827faace33 100644 --- a/doc/ci/yaml/README.md +++ b/doc/ci/yaml/README.md @@ -74,7 +74,7 @@ A job is defined by a list of parameters that define the job behavior. | after_script | no | Override a set of commands that are executed after job | | environment | no | Defines a name of environment to which deployment is done by this job | | coverage | no | Define code coverage settings for a given job | -| retry | no | Define how many times a job can be auto-retried in case of a failure | +| retry | no | Define when and how many times a job can be auto-retried in case of a failure | | parallel | no | Defines how many instances of a job should be run in parallel | ### `extends` @@ -1433,18 +1433,20 @@ job1: ## `retry` > [Introduced][ce-12909] in GitLab 9.5. +> [Behaviour expanded](https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/21758) +> in GitLab 11.5 to control on which failures to retry. `retry` allows you to configure how many times a job is going to be retried in case of a failure. -When a job fails, and has `retry` configured it is going to be processed again +When a job fails and has `retry` configured, it is going to be processed again up to the amount of times specified by the `retry` keyword. If `retry` is set to 2, and a job succeeds in a second run (first retry), it won't be retried again. `retry` value has to be a positive integer, equal or larger than 0, but lower or equal to 2 (two retries maximum, three runs in total). -A simple example: +A simple example to retry in all failure cases: ```yaml test: @@ -1452,6 +1454,57 @@ test: retry: 2 ``` +By default, a job will be retried on all failure cases. To have a better control +on which failures to retry, `retry` can be a hash with with the following keys: + +- `max`: The maximum number of retries. +- `when`: The failure cases to retry. + +To retry only runner system failures at maximum two times: + +```yaml +test: + script: rspec + retry: + max: 2 + when: runner_system_failure +``` + +If there is another failure, other than a runner system failure, the job will +not be retried. + +To retry on multiple failure cases, `when` can also be an array of failures: + +```yaml +test: + script: rspec + retry: + max: 2 + when: + - runner_system_failure + - stuck_or_timeout_failure +``` + +Possible values for `when` are: + + + +- `always`: Retry on any failure (default). +- `unknown_failure`: Retry when the failure reason is unknown. +- `script_failure`: Retry when the script failed. +- `api_failure`: Retry on API failure. +- `stuck_or_timeout_failure`: Retry when the job got stuck or timed out. +- `runner_system_failure`: Retry if there was a runner system failure (e.g. setting up the job failed). +- `missing_dependency_failure`: Retry if a dependency was missing. +- `runner_unsupported`: Retry if the runner was unsupported. + + ## `parallel` > [Introduced](https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/22631) in GitLab 11.5. diff --git a/lib/gitlab/ci/config/entry/job.rb b/lib/gitlab/ci/config/entry/job.rb index 362014b1a09..8e8c979f973 100644 --- a/lib/gitlab/ci/config/entry/job.rb +++ b/lib/gitlab/ci/config/entry/job.rb @@ -26,16 +26,12 @@ module Gitlab with_options allow_nil: true do validates :tags, array_of_strings: true validates :allow_failure, boolean: true - validates :retry, numericality: { only_integer: true, - greater_than_or_equal_to: 0, - less_than_or_equal_to: 2 } validates :parallel, numericality: { only_integer: true, greater_than_or_equal_to: 2 } validates :when, inclusion: { in: %w[on_success on_failure always manual delayed], message: 'should be on_success, on_failure, ' \ 'always, manual or delayed' } - validates :dependencies, array_of_strings: true validates :extends, type: String end @@ -86,6 +82,9 @@ module Gitlab entry :coverage, Entry::Coverage, description: 'Coverage configuration for this job.' + entry :retry, Entry::Retry, + description: 'Retry configuration for this job.' + helpers :before_script, :script, :stage, :type, :after_script, :cache, :image, :services, :only, :except, :variables, :artifacts, :commands, :environment, :coverage, :retry, @@ -160,7 +159,7 @@ module Gitlab environment: environment_defined? ? environment_value : nil, environment_name: environment_defined? ? environment_value[:name] : nil, coverage: coverage_defined? ? coverage_value : nil, - retry: retry_defined? ? retry_value.to_i : nil, + retry: retry_defined? ? retry_value : nil, parallel: parallel_defined? ? parallel_value.to_i : nil, artifacts: artifacts_value, after_script: after_script_value, diff --git a/lib/gitlab/ci/config/entry/retry.rb b/lib/gitlab/ci/config/entry/retry.rb new file mode 100644 index 00000000000..e39cc5de229 --- /dev/null +++ b/lib/gitlab/ci/config/entry/retry.rb @@ -0,0 +1,90 @@ +module Gitlab + module Ci + class Config + module Entry + ## + # Entry that represents a retry config for a job. + # + class Retry < Simplifiable + strategy :SimpleRetry, if: -> (config) { config.is_a?(Integer) } + strategy :FullRetry, if: -> (config) { config.is_a?(Hash) } + + class SimpleRetry < Entry::Node + include Entry::Validatable + + validations do + validates :config, numericality: { only_integer: true, + greater_than_or_equal_to: 0, + less_than_or_equal_to: 2 } + end + + def value + { + max: config + } + end + + def location + 'retry' + end + end + + class FullRetry < Entry::Node + include Entry::Validatable + include Entry::Attributable + + ALLOWED_KEYS = %i[max when].freeze + attributes :max, :when + + validations do + validates :config, allowed_keys: ALLOWED_KEYS + + with_options allow_nil: true do + validates :max, numericality: { only_integer: true, + greater_than_or_equal_to: 0, + less_than_or_equal_to: 2 } + + validates :when, array_of_strings_or_string: true + validates :when, + allowed_array_values: { in: FullRetry.possible_retry_when_values }, + if: -> (config) { config.when.is_a?(Array) } + validates :when, + inclusion: { in: FullRetry.possible_retry_when_values }, + if: -> (config) { config.when.is_a?(String) } + end + end + + def self.possible_retry_when_values + @possible_retry_when_values ||= ::Ci::Build.failure_reasons.keys.map(&:to_s) + ['always'] + end + + def value + super.tap do |config| + # make sure that `when` is an array, because we allow it to + # be passed as a String in config for simplicity + config[:when] = Array.wrap(config[:when]) if config[:when] + end + end + + def location + 'retry' + end + end + + class UnknownStrategy < Entry::Node + def errors + ["#{location} has to be either an integer or a hash"] + end + + def location + 'retry config' + end + end + + def self.default + end + end + end + end + end +end diff --git a/lib/gitlab/ci/config/entry/validators.rb b/lib/gitlab/ci/config/entry/validators.rb index 805d26ca8d8..a1d552fb2e5 100644 --- a/lib/gitlab/ci/config/entry/validators.rb +++ b/lib/gitlab/ci/config/entry/validators.rb @@ -7,11 +7,11 @@ module Gitlab module Validators class AllowedKeysValidator < ActiveModel::EachValidator def validate_each(record, attribute, value) - unknown_keys = record.config.try(:keys).to_a - options[:in] + unknown_keys = value.try(:keys).to_a - options[:in] if unknown_keys.any? - record.errors.add(:config, 'contains unknown keys: ' + - unknown_keys.join(', ')) + record.errors.add(attribute, "contains unknown keys: " + + unknown_keys.join(', ')) end end end @@ -24,6 +24,16 @@ module Gitlab end end + class AllowedArrayValuesValidator < ActiveModel::EachValidator + def validate_each(record, attribute, value) + unkown_values = value - options[:in] + unless unkown_values.empty? + record.errors.add(attribute, "contains unknown values: " + + unkown_values.join(', ')) + end + end + end + class ArrayOfStringsValidator < ActiveModel::EachValidator include LegacyValidationHelpers @@ -68,6 +78,14 @@ module Gitlab end end + class HashOrIntegerValidator < ActiveModel::EachValidator + def validate_each(record, attribute, value) + unless value.is_a?(Hash) || value.is_a?(Integer) + record.errors.add(attribute, 'should be a hash or an integer') + end + end + end + class KeyValidator < ActiveModel::EachValidator include LegacyValidationHelpers diff --git a/spec/lib/gitlab/ci/config/entry/job_spec.rb b/spec/lib/gitlab/ci/config/entry/job_spec.rb index f1a2946acda..ac9b0c674a5 100644 --- a/spec/lib/gitlab/ci/config/entry/job_spec.rb +++ b/spec/lib/gitlab/ci/config/entry/job_spec.rb @@ -10,7 +10,7 @@ describe Gitlab::Ci::Config::Entry::Job do let(:result) do %i[before_script script stage type after_script cache image services only except variables artifacts - environment coverage] + environment coverage retry] end it { is_expected.to match_array result } @@ -98,45 +98,6 @@ describe Gitlab::Ci::Config::Entry::Job do end end - context 'when retry value is not correct' do - context 'when it is not a numeric value' do - let(:config) { { retry: true } } - - it 'returns error about invalid type' do - expect(entry).not_to be_valid - expect(entry.errors).to include 'job retry is not a number' - end - end - - context 'when it is lower than zero' do - let(:config) { { retry: -1 } } - - it 'returns error about value too low' do - expect(entry).not_to be_valid - expect(entry.errors) - .to include 'job retry must be greater than or equal to 0' - end - end - - context 'when it is not an integer' do - let(:config) { { retry: 1.5 } } - - it 'returns error about wrong value' do - expect(entry).not_to be_valid - expect(entry.errors).to include 'job retry must be an integer' - end - end - - context 'when the value is too high' do - let(:config) { { retry: 10 } } - - it 'returns error about value too high' do - expect(entry).not_to be_valid - expect(entry.errors).to include 'job retry must be less than or equal to 2' - end - end - end - context 'when parallel value is not correct' do context 'when it is not a numeric value' do let(:config) { { parallel: true } } diff --git a/spec/lib/gitlab/ci/config/entry/retry_spec.rb b/spec/lib/gitlab/ci/config/entry/retry_spec.rb new file mode 100644 index 00000000000..164a9ed4c3d --- /dev/null +++ b/spec/lib/gitlab/ci/config/entry/retry_spec.rb @@ -0,0 +1,236 @@ +require 'spec_helper' + +describe Gitlab::Ci::Config::Entry::Retry do + let(:entry) { described_class.new(config) } + + shared_context 'when retry value is a numeric', :numeric do + let(:config) { max } + let(:max) {} + end + + shared_context 'when retry value is a hash', :hash do + let(:config) { { max: max, when: public_send(:when) }.compact } + let(:when) {} + let(:max) {} + end + + describe '#value' do + subject(:value) { entry.value } + + context 'when retry value is a numeric', :numeric do + let(:max) { 2 } + + it 'is returned as a hash with max key' do + expect(value).to eq(max: 2) + end + end + + context 'when retry value is a hash', :hash do + context 'and `when` is a string' do + let(:when) { 'unknown_failure' } + + it 'returns when wrapped in an array' do + expect(value).to eq(when: ['unknown_failure']) + end + end + + context 'and `when` is an array' do + let(:when) { %w[unknown_failure runner_system_failure] } + + it 'returns when as it was passed' do + expect(value).to eq(when: %w[unknown_failure runner_system_failure]) + end + end + end + end + + describe 'validation' do + context 'when retry value is correct' do + context 'when it is a numeric', :numeric do + let(:max) { 2 } + + it 'is valid' do + expect(entry).to be_valid + end + end + + context 'when it is a hash', :hash do + context 'with max' do + let(:max) { 2 } + + it 'is valid' do + expect(entry).to be_valid + end + end + + context 'with string when' do + let(:when) { 'unknown_failure' } + + it 'is valid' do + expect(entry).to be_valid + end + end + + context 'with string when always' do + let(:when) { 'always' } + + it 'is valid' do + expect(entry).to be_valid + end + end + + context 'with array when' do + let(:when) { %w[unknown_failure runner_system_failure] } + + it 'is valid' do + expect(entry).to be_valid + end + end + + # Those values are documented at `doc/ci/yaml/README.md`. If any of + # those values gets invalid, documentation must be updated. To make + # sure this is catched, check explicitly that all of the documented + # values are valid. If they are not it means the documentation and this + # array must be updated. + RETRY_WHEN_IN_DOCUMENTATION = %w[ + always + unknown_failure + script_failure + api_failure + stuck_or_timeout_failure + runner_system_failure + missing_dependency_failure + runner_unsupported + ].freeze + + RETRY_WHEN_IN_DOCUMENTATION.each do |reason| + context "with when from documentation `#{reason}`" do + let(:when) { reason } + + it 'is valid' do + expect(entry).to be_valid + end + end + end + + ::Ci::Build.failure_reasons.each_key do |reason| + context "with when from CommitStatus.failure_reasons `#{reason}`" do + let(:when) { reason } + + it 'is valid' do + expect(entry).to be_valid + end + end + end + end + end + + context 'when retry value is not correct' do + context 'when it is not a numeric nor an array' do + let(:config) { true } + + it 'returns error about invalid type' do + expect(entry).not_to be_valid + expect(entry.errors).to include 'retry config has to be either an integer or a hash' + end + end + + context 'when it is a numeric', :numeric do + context 'when it is lower than zero' do + let(:max) { -1 } + + it 'returns error about value too low' do + expect(entry).not_to be_valid + expect(entry.errors) + .to include 'retry config must be greater than or equal to 0' + end + end + + context 'when it is not an integer' do + let(:max) { 1.5 } + + it 'returns error about wrong value' do + expect(entry).not_to be_valid + expect(entry.errors).to include 'retry config has to be either an integer or a hash' + end + end + + context 'when the value is too high' do + let(:max) { 10 } + + it 'returns error about value too high' do + expect(entry).not_to be_valid + expect(entry.errors).to include 'retry config must be less than or equal to 2' + end + end + end + + context 'when it is a hash', :hash do + context 'with unknown keys' do + let(:config) { { max: 2, unknown_key: :something, one_more: :key } } + + it 'returns error about the unknown key' do + expect(entry).not_to be_valid + expect(entry.errors) + .to include 'retry config contains unknown keys: unknown_key, one_more' + end + end + + context 'with max lower than zero' do + let(:max) { -1 } + + it 'returns error about value too low' do + expect(entry).not_to be_valid + expect(entry.errors) + .to include 'retry max must be greater than or equal to 0' + end + end + + context 'with max not an integer' do + let(:max) { 1.5 } + + it 'returns error about wrong value' do + expect(entry).not_to be_valid + expect(entry.errors).to include 'retry max must be an integer' + end + end + + context 'iwth max too high' do + let(:max) { 10 } + + it 'returns error about value too high' do + expect(entry).not_to be_valid + expect(entry.errors).to include 'retry max must be less than or equal to 2' + end + end + + context 'with when in wrong format' do + let(:when) { true } + + it 'returns error about the wrong format' do + expect(entry).not_to be_valid + expect(entry.errors).to include 'retry when should be an array of strings or a string' + end + end + + context 'with an unknown when string' do + let(:when) { 'unknown_reason' } + + it 'returns error about the wrong format' do + expect(entry).not_to be_valid + expect(entry.errors).to include 'retry when is not included in the list' + end + end + + context 'with an unknown failure reason in a when array' do + let(:when) { %w[unknown_reason runner_system_failure] } + + it 'returns error about the wrong format' do + expect(entry).not_to be_valid + expect(entry.errors).to include 'retry when contains unknown values: unknown_reason' + end + end + end + end + end +end diff --git a/spec/lib/gitlab/ci/yaml_processor_spec.rb b/spec/lib/gitlab/ci/yaml_processor_spec.rb index dcfd54107a3..441e8214181 100644 --- a/spec/lib/gitlab/ci/yaml_processor_spec.rb +++ b/spec/lib/gitlab/ci/yaml_processor_spec.rb @@ -53,11 +53,11 @@ module Gitlab describe 'retry entry' do context 'when retry count is specified' do let(:config) do - YAML.dump(rspec: { script: 'rspec', retry: 1 }) + YAML.dump(rspec: { script: 'rspec', retry: { max: 1 } }) end it 'includes retry count in build options attribute' do - expect(subject[:options]).to include(retry: 1) + expect(subject[:options]).to include(retry: { max: 1 }) end end diff --git a/spec/models/ci/build_spec.rb b/spec/models/ci/build_spec.rb index 5bd2f096656..6849bc6db7a 100644 --- a/spec/models/ci/build_spec.rb +++ b/spec/models/ci/build_spec.rb @@ -1472,15 +1472,15 @@ describe Ci::Build do end describe '#retries_max' do - context 'when max retries value is defined' do - subject { create(:ci_build, options: { retry: 1 }) } + context 'with retries max config option' do + subject { create(:ci_build, options: { retry: { max: 1 } }) } - it 'returns a number of configured max retries' do + it 'returns the number of configured max retries' do expect(subject.retries_max).to eq 1 end end - context 'when max retries value is not defined' do + context 'without retries max config option' do subject { create(:ci_build) } it 'returns zero' do @@ -1495,6 +1495,104 @@ describe Ci::Build do expect(subject.retries_max).to eq 0 end end + + context 'with integer only config option' do + subject { create(:ci_build, options: { retry: 1 }) } + + it 'returns the number of configured max retries' do + expect(subject.retries_max).to eq 1 + end + end + end + + describe '#retry_when' do + context 'with retries when config option' do + subject { create(:ci_build, options: { retry: { when: ['some_reason'] } }) } + + it 'returns the configured when' do + expect(subject.retry_when).to eq ['some_reason'] + end + end + + context 'without retries when config option' do + subject { create(:ci_build) } + + it 'returns always array' do + expect(subject.retry_when).to eq ['always'] + end + end + + context 'with integer only config option' do + subject { create(:ci_build, options: { retry: 1 }) } + + it 'returns always array' do + expect(subject.retry_when).to eq ['always'] + end + end + end + + describe '#retry_failure?' do + subject { create(:ci_build) } + + context 'when retries max is zero' do + before do + expect(subject).to receive(:retries_max).at_least(:once).and_return(0) + end + + it 'returns false' do + expect(subject.retry_failure?).to eq false + end + end + + context 'when retries max equals retries count' do + before do + expect(subject).to receive(:retries_max).at_least(:once).and_return(1) + expect(subject).to receive(:retries_count).at_least(:once).and_return(1) + end + + it 'returns false' do + expect(subject.retry_failure?).to eq false + end + end + + context 'when retries max is higher than retries count' do + before do + expect(subject).to receive(:retries_max).at_least(:once).and_return(2) + expect(subject).to receive(:retries_count).at_least(:once).and_return(1) + end + + context 'and retry when is always' do + before do + expect(subject).to receive(:retry_when).at_least(:once).and_return(['always']) + end + + it 'returns true' do + expect(subject.retry_failure?).to eq true + end + end + + context 'and retry when includes the failure_reason' do + before do + expect(subject).to receive(:failure_reason).at_least(:once).and_return('some_reason') + expect(subject).to receive(:retry_when).at_least(:once).and_return(['some_reason']) + end + + it 'returns true' do + expect(subject.retry_failure?).to eq true + end + end + + context 'and retry when does not include failure_reason' do + before do + expect(subject).to receive(:failure_reason).at_least(:once).and_return('some_reason') + expect(subject).to receive(:retry_when).at_least(:once).and_return(['some', 'other failure']) + end + + it 'returns false' do + expect(subject.retry_failure?).to eq false + end + end + end end end @@ -2887,7 +2985,7 @@ describe Ci::Build do end context 'when build is configured to be retried' do - subject { create(:ci_build, :running, options: { retry: 3 }, project: project, user: user) } + subject { create(:ci_build, :running, options: { retry: { max: 3 } }, project: project, user: user) } it 'retries build and assigns the same user to it' do expect(described_class).to receive(:retry) diff --git a/spec/services/ci/create_pipeline_service_spec.rb b/spec/services/ci/create_pipeline_service_spec.rb index 054b7b1561c..5c87ed5c3c6 100644 --- a/spec/services/ci/create_pipeline_service_spec.rb +++ b/spec/services/ci/create_pipeline_service_spec.rb @@ -435,16 +435,34 @@ describe Ci::CreatePipelineService do end context 'when builds with auto-retries are configured' do - before do - config = YAML.dump(rspec: { script: 'rspec', retry: 2 }) - stub_ci_pipeline_yaml_file(config) + context 'as an integer' do + before do + config = YAML.dump(rspec: { script: 'rspec', retry: 2 }) + stub_ci_pipeline_yaml_file(config) + end + + it 'correctly creates builds with auto-retry value configured' do + pipeline = execute_service + + expect(pipeline).to be_persisted + expect(pipeline.builds.find_by(name: 'rspec').retries_max).to eq 2 + expect(pipeline.builds.find_by(name: 'rspec').retry_when).to eq ['always'] + end end - it 'correctly creates builds with auto-retry value configured' do - pipeline = execute_service + context 'as hash' do + before do + config = YAML.dump(rspec: { script: 'rspec', retry: { max: 2, when: 'runner_system_failure' } }) + stub_ci_pipeline_yaml_file(config) + end - expect(pipeline).to be_persisted - expect(pipeline.builds.find_by(name: 'rspec').retries_max).to eq 2 + it 'correctly creates builds with auto-retry value configured' do + pipeline = execute_service + + expect(pipeline).to be_persisted + expect(pipeline.builds.find_by(name: 'rspec').retries_max).to eq 2 + expect(pipeline.builds.find_by(name: 'rspec').retry_when).to eq ['runner_system_failure'] + end end end diff --git a/spec/services/ci/process_pipeline_service_spec.rb b/spec/services/ci/process_pipeline_service_spec.rb index 8c7258c42ad..538992b621e 100644 --- a/spec/services/ci/process_pipeline_service_spec.rb +++ b/spec/services/ci/process_pipeline_service_spec.rb @@ -671,9 +671,9 @@ describe Ci::ProcessPipelineService, '#execute' do context 'when builds with auto-retries are configured' do before do - create_build('build:1', stage_idx: 0, user: user, options: { retry: 2 }) + create_build('build:1', stage_idx: 0, user: user, options: { retry: { max: 2 } }) create_build('test:1', stage_idx: 1, user: user, when: :on_failure) - create_build('test:2', stage_idx: 1, user: user, options: { retry: 1 }) + create_build('test:2', stage_idx: 1, user: user, options: { retry: { max: 1 } }) end it 'automatically retries builds in a valid order' do