Merge branch 'max_retries_when' into 'master'

Allow to configure when to retry builds

Closes gitlab-runner#3515

See merge request gitlab-org/gitlab-ce!21758
This commit is contained in:
Grzegorz Bizon 2018-11-07 15:02:18 +00:00
commit f323d6148b
12 changed files with 567 additions and 71 deletions

View File

@ -221,9 +221,7 @@ module Ci
build.deployment&.drop
next if build.retries_max.zero?
if build.retries_count < build.retries_max
if build.retry_failure?
begin
Ci::Build.retry(build, build.user)
rescue Gitlab::Access::AccessDeniedError => ex
@ -321,7 +319,17 @@ module Ci
end
def retries_max
self.options.to_h.fetch(:retry, 0).to_i
normalized_retry.fetch(:max, 0)
end
def retry_when
normalized_retry.fetch(:when, ['always'])
end
def retry_failure?
return false if retries_max.zero? || retries_count >= retries_max
retry_when.include?('always') || retry_when.include?(failure_reason.to_s)
end
def latest?
@ -886,6 +894,16 @@ module Ci
options&.dig(:environment, :url) || persisted_environment&.external_url
end
# The format of the retry option changed in GitLab 11.5: Before it was
# integer only, after it is a hash. New builds are created with the new
# format, but builds created before GitLab 11.5 and saved in database still
# have the old integer only format. This method returns the retry option
# normalized as a hash in 11.5+ format.
def normalized_retry
value = options&.dig(:retry)
value.is_a?(Integer) ? { max: value } : value.to_h
end
def build_attributes_from_config
return {} unless pipeline.config_processor

View File

@ -0,0 +1,5 @@
---
title: Allow to configure when to retry failed CI jobs
merge_request: 21758
author: Markus Doits
type: added

View File

@ -74,7 +74,7 @@ A job is defined by a list of parameters that define the job behavior.
| after_script | no | Override a set of commands that are executed after job |
| environment | no | Defines a name of environment to which deployment is done by this job |
| coverage | no | Define code coverage settings for a given job |
| retry | no | Define how many times a job can be auto-retried in case of a failure |
| retry | no | Define when and how many times a job can be auto-retried in case of a failure |
| parallel | no | Defines how many instances of a job should be run in parallel |
### `extends`
@ -1433,18 +1433,20 @@ job1:
## `retry`
> [Introduced][ce-12909] in GitLab 9.5.
> [Behaviour expanded](https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/21758)
> in GitLab 11.5 to control on which failures to retry.
`retry` allows you to configure how many times a job is going to be retried in
case of a failure.
When a job fails, and has `retry` configured it is going to be processed again
When a job fails and has `retry` configured, it is going to be processed again
up to the amount of times specified by the `retry` keyword.
If `retry` is set to 2, and a job succeeds in a second run (first retry), it won't be retried
again. `retry` value has to be a positive integer, equal or larger than 0, but
lower or equal to 2 (two retries maximum, three runs in total).
A simple example:
A simple example to retry in all failure cases:
```yaml
test:
@ -1452,6 +1454,57 @@ test:
retry: 2
```
By default, a job will be retried on all failure cases. To have a better control
on which failures to retry, `retry` can be a hash with with the following keys:
- `max`: The maximum number of retries.
- `when`: The failure cases to retry.
To retry only runner system failures at maximum two times:
```yaml
test:
script: rspec
retry:
max: 2
when: runner_system_failure
```
If there is another failure, other than a runner system failure, the job will
not be retried.
To retry on multiple failure cases, `when` can also be an array of failures:
```yaml
test:
script: rspec
retry:
max: 2
when:
- runner_system_failure
- stuck_or_timeout_failure
```
Possible values for `when` are:
<!--
Please make sure to update `RETRY_WHEN_IN_DOCUMENTATION` array in
`spec/lib/gitlab/ci/config/entry/retry_spec.rb` if you change any of
the documented values below. The test there makes sure that all documented
values are really valid as a config option and therefore should always
stay in sync with this documentation.
-->
- `always`: Retry on any failure (default).
- `unknown_failure`: Retry when the failure reason is unknown.
- `script_failure`: Retry when the script failed.
- `api_failure`: Retry on API failure.
- `stuck_or_timeout_failure`: Retry when the job got stuck or timed out.
- `runner_system_failure`: Retry if there was a runner system failure (e.g. setting up the job failed).
- `missing_dependency_failure`: Retry if a dependency was missing.
- `runner_unsupported`: Retry if the runner was unsupported.
## `parallel`
> [Introduced](https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/22631) in GitLab 11.5.

View File

@ -26,16 +26,12 @@ module Gitlab
with_options allow_nil: true do
validates :tags, array_of_strings: true
validates :allow_failure, boolean: true
validates :retry, numericality: { only_integer: true,
greater_than_or_equal_to: 0,
less_than_or_equal_to: 2 }
validates :parallel, numericality: { only_integer: true,
greater_than_or_equal_to: 2 }
validates :when,
inclusion: { in: %w[on_success on_failure always manual delayed],
message: 'should be on_success, on_failure, ' \
'always, manual or delayed' }
validates :dependencies, array_of_strings: true
validates :extends, type: String
end
@ -86,6 +82,9 @@ module Gitlab
entry :coverage, Entry::Coverage,
description: 'Coverage configuration for this job.'
entry :retry, Entry::Retry,
description: 'Retry configuration for this job.'
helpers :before_script, :script, :stage, :type, :after_script,
:cache, :image, :services, :only, :except, :variables,
:artifacts, :commands, :environment, :coverage, :retry,
@ -160,7 +159,7 @@ module Gitlab
environment: environment_defined? ? environment_value : nil,
environment_name: environment_defined? ? environment_value[:name] : nil,
coverage: coverage_defined? ? coverage_value : nil,
retry: retry_defined? ? retry_value.to_i : nil,
retry: retry_defined? ? retry_value : nil,
parallel: parallel_defined? ? parallel_value.to_i : nil,
artifacts: artifacts_value,
after_script: after_script_value,

View File

@ -0,0 +1,90 @@
module Gitlab
module Ci
class Config
module Entry
##
# Entry that represents a retry config for a job.
#
class Retry < Simplifiable
strategy :SimpleRetry, if: -> (config) { config.is_a?(Integer) }
strategy :FullRetry, if: -> (config) { config.is_a?(Hash) }
class SimpleRetry < Entry::Node
include Entry::Validatable
validations do
validates :config, numericality: { only_integer: true,
greater_than_or_equal_to: 0,
less_than_or_equal_to: 2 }
end
def value
{
max: config
}
end
def location
'retry'
end
end
class FullRetry < Entry::Node
include Entry::Validatable
include Entry::Attributable
ALLOWED_KEYS = %i[max when].freeze
attributes :max, :when
validations do
validates :config, allowed_keys: ALLOWED_KEYS
with_options allow_nil: true do
validates :max, numericality: { only_integer: true,
greater_than_or_equal_to: 0,
less_than_or_equal_to: 2 }
validates :when, array_of_strings_or_string: true
validates :when,
allowed_array_values: { in: FullRetry.possible_retry_when_values },
if: -> (config) { config.when.is_a?(Array) }
validates :when,
inclusion: { in: FullRetry.possible_retry_when_values },
if: -> (config) { config.when.is_a?(String) }
end
end
def self.possible_retry_when_values
@possible_retry_when_values ||= ::Ci::Build.failure_reasons.keys.map(&:to_s) + ['always']
end
def value
super.tap do |config|
# make sure that `when` is an array, because we allow it to
# be passed as a String in config for simplicity
config[:when] = Array.wrap(config[:when]) if config[:when]
end
end
def location
'retry'
end
end
class UnknownStrategy < Entry::Node
def errors
["#{location} has to be either an integer or a hash"]
end
def location
'retry config'
end
end
def self.default
end
end
end
end
end
end

View File

@ -7,11 +7,11 @@ module Gitlab
module Validators
class AllowedKeysValidator < ActiveModel::EachValidator
def validate_each(record, attribute, value)
unknown_keys = record.config.try(:keys).to_a - options[:in]
unknown_keys = value.try(:keys).to_a - options[:in]
if unknown_keys.any?
record.errors.add(:config, 'contains unknown keys: ' +
unknown_keys.join(', '))
record.errors.add(attribute, "contains unknown keys: " +
unknown_keys.join(', '))
end
end
end
@ -24,6 +24,16 @@ module Gitlab
end
end
class AllowedArrayValuesValidator < ActiveModel::EachValidator
def validate_each(record, attribute, value)
unkown_values = value - options[:in]
unless unkown_values.empty?
record.errors.add(attribute, "contains unknown values: " +
unkown_values.join(', '))
end
end
end
class ArrayOfStringsValidator < ActiveModel::EachValidator
include LegacyValidationHelpers
@ -68,6 +78,14 @@ module Gitlab
end
end
class HashOrIntegerValidator < ActiveModel::EachValidator
def validate_each(record, attribute, value)
unless value.is_a?(Hash) || value.is_a?(Integer)
record.errors.add(attribute, 'should be a hash or an integer')
end
end
end
class KeyValidator < ActiveModel::EachValidator
include LegacyValidationHelpers

View File

@ -10,7 +10,7 @@ describe Gitlab::Ci::Config::Entry::Job do
let(:result) do
%i[before_script script stage type after_script cache
image services only except variables artifacts
environment coverage]
environment coverage retry]
end
it { is_expected.to match_array result }
@ -98,45 +98,6 @@ describe Gitlab::Ci::Config::Entry::Job do
end
end
context 'when retry value is not correct' do
context 'when it is not a numeric value' do
let(:config) { { retry: true } }
it 'returns error about invalid type' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'job retry is not a number'
end
end
context 'when it is lower than zero' do
let(:config) { { retry: -1 } }
it 'returns error about value too low' do
expect(entry).not_to be_valid
expect(entry.errors)
.to include 'job retry must be greater than or equal to 0'
end
end
context 'when it is not an integer' do
let(:config) { { retry: 1.5 } }
it 'returns error about wrong value' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'job retry must be an integer'
end
end
context 'when the value is too high' do
let(:config) { { retry: 10 } }
it 'returns error about value too high' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'job retry must be less than or equal to 2'
end
end
end
context 'when parallel value is not correct' do
context 'when it is not a numeric value' do
let(:config) { { parallel: true } }

View File

@ -0,0 +1,236 @@
require 'spec_helper'
describe Gitlab::Ci::Config::Entry::Retry do
let(:entry) { described_class.new(config) }
shared_context 'when retry value is a numeric', :numeric do
let(:config) { max }
let(:max) {}
end
shared_context 'when retry value is a hash', :hash do
let(:config) { { max: max, when: public_send(:when) }.compact }
let(:when) {}
let(:max) {}
end
describe '#value' do
subject(:value) { entry.value }
context 'when retry value is a numeric', :numeric do
let(:max) { 2 }
it 'is returned as a hash with max key' do
expect(value).to eq(max: 2)
end
end
context 'when retry value is a hash', :hash do
context 'and `when` is a string' do
let(:when) { 'unknown_failure' }
it 'returns when wrapped in an array' do
expect(value).to eq(when: ['unknown_failure'])
end
end
context 'and `when` is an array' do
let(:when) { %w[unknown_failure runner_system_failure] }
it 'returns when as it was passed' do
expect(value).to eq(when: %w[unknown_failure runner_system_failure])
end
end
end
end
describe 'validation' do
context 'when retry value is correct' do
context 'when it is a numeric', :numeric do
let(:max) { 2 }
it 'is valid' do
expect(entry).to be_valid
end
end
context 'when it is a hash', :hash do
context 'with max' do
let(:max) { 2 }
it 'is valid' do
expect(entry).to be_valid
end
end
context 'with string when' do
let(:when) { 'unknown_failure' }
it 'is valid' do
expect(entry).to be_valid
end
end
context 'with string when always' do
let(:when) { 'always' }
it 'is valid' do
expect(entry).to be_valid
end
end
context 'with array when' do
let(:when) { %w[unknown_failure runner_system_failure] }
it 'is valid' do
expect(entry).to be_valid
end
end
# Those values are documented at `doc/ci/yaml/README.md`. If any of
# those values gets invalid, documentation must be updated. To make
# sure this is catched, check explicitly that all of the documented
# values are valid. If they are not it means the documentation and this
# array must be updated.
RETRY_WHEN_IN_DOCUMENTATION = %w[
always
unknown_failure
script_failure
api_failure
stuck_or_timeout_failure
runner_system_failure
missing_dependency_failure
runner_unsupported
].freeze
RETRY_WHEN_IN_DOCUMENTATION.each do |reason|
context "with when from documentation `#{reason}`" do
let(:when) { reason }
it 'is valid' do
expect(entry).to be_valid
end
end
end
::Ci::Build.failure_reasons.each_key do |reason|
context "with when from CommitStatus.failure_reasons `#{reason}`" do
let(:when) { reason }
it 'is valid' do
expect(entry).to be_valid
end
end
end
end
end
context 'when retry value is not correct' do
context 'when it is not a numeric nor an array' do
let(:config) { true }
it 'returns error about invalid type' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'retry config has to be either an integer or a hash'
end
end
context 'when it is a numeric', :numeric do
context 'when it is lower than zero' do
let(:max) { -1 }
it 'returns error about value too low' do
expect(entry).not_to be_valid
expect(entry.errors)
.to include 'retry config must be greater than or equal to 0'
end
end
context 'when it is not an integer' do
let(:max) { 1.5 }
it 'returns error about wrong value' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'retry config has to be either an integer or a hash'
end
end
context 'when the value is too high' do
let(:max) { 10 }
it 'returns error about value too high' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'retry config must be less than or equal to 2'
end
end
end
context 'when it is a hash', :hash do
context 'with unknown keys' do
let(:config) { { max: 2, unknown_key: :something, one_more: :key } }
it 'returns error about the unknown key' do
expect(entry).not_to be_valid
expect(entry.errors)
.to include 'retry config contains unknown keys: unknown_key, one_more'
end
end
context 'with max lower than zero' do
let(:max) { -1 }
it 'returns error about value too low' do
expect(entry).not_to be_valid
expect(entry.errors)
.to include 'retry max must be greater than or equal to 0'
end
end
context 'with max not an integer' do
let(:max) { 1.5 }
it 'returns error about wrong value' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'retry max must be an integer'
end
end
context 'iwth max too high' do
let(:max) { 10 }
it 'returns error about value too high' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'retry max must be less than or equal to 2'
end
end
context 'with when in wrong format' do
let(:when) { true }
it 'returns error about the wrong format' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'retry when should be an array of strings or a string'
end
end
context 'with an unknown when string' do
let(:when) { 'unknown_reason' }
it 'returns error about the wrong format' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'retry when is not included in the list'
end
end
context 'with an unknown failure reason in a when array' do
let(:when) { %w[unknown_reason runner_system_failure] }
it 'returns error about the wrong format' do
expect(entry).not_to be_valid
expect(entry.errors).to include 'retry when contains unknown values: unknown_reason'
end
end
end
end
end
end

View File

@ -53,11 +53,11 @@ module Gitlab
describe 'retry entry' do
context 'when retry count is specified' do
let(:config) do
YAML.dump(rspec: { script: 'rspec', retry: 1 })
YAML.dump(rspec: { script: 'rspec', retry: { max: 1 } })
end
it 'includes retry count in build options attribute' do
expect(subject[:options]).to include(retry: 1)
expect(subject[:options]).to include(retry: { max: 1 })
end
end

View File

@ -1472,15 +1472,15 @@ describe Ci::Build do
end
describe '#retries_max' do
context 'when max retries value is defined' do
subject { create(:ci_build, options: { retry: 1 }) }
context 'with retries max config option' do
subject { create(:ci_build, options: { retry: { max: 1 } }) }
it 'returns a number of configured max retries' do
it 'returns the number of configured max retries' do
expect(subject.retries_max).to eq 1
end
end
context 'when max retries value is not defined' do
context 'without retries max config option' do
subject { create(:ci_build) }
it 'returns zero' do
@ -1495,6 +1495,104 @@ describe Ci::Build do
expect(subject.retries_max).to eq 0
end
end
context 'with integer only config option' do
subject { create(:ci_build, options: { retry: 1 }) }
it 'returns the number of configured max retries' do
expect(subject.retries_max).to eq 1
end
end
end
describe '#retry_when' do
context 'with retries when config option' do
subject { create(:ci_build, options: { retry: { when: ['some_reason'] } }) }
it 'returns the configured when' do
expect(subject.retry_when).to eq ['some_reason']
end
end
context 'without retries when config option' do
subject { create(:ci_build) }
it 'returns always array' do
expect(subject.retry_when).to eq ['always']
end
end
context 'with integer only config option' do
subject { create(:ci_build, options: { retry: 1 }) }
it 'returns always array' do
expect(subject.retry_when).to eq ['always']
end
end
end
describe '#retry_failure?' do
subject { create(:ci_build) }
context 'when retries max is zero' do
before do
expect(subject).to receive(:retries_max).at_least(:once).and_return(0)
end
it 'returns false' do
expect(subject.retry_failure?).to eq false
end
end
context 'when retries max equals retries count' do
before do
expect(subject).to receive(:retries_max).at_least(:once).and_return(1)
expect(subject).to receive(:retries_count).at_least(:once).and_return(1)
end
it 'returns false' do
expect(subject.retry_failure?).to eq false
end
end
context 'when retries max is higher than retries count' do
before do
expect(subject).to receive(:retries_max).at_least(:once).and_return(2)
expect(subject).to receive(:retries_count).at_least(:once).and_return(1)
end
context 'and retry when is always' do
before do
expect(subject).to receive(:retry_when).at_least(:once).and_return(['always'])
end
it 'returns true' do
expect(subject.retry_failure?).to eq true
end
end
context 'and retry when includes the failure_reason' do
before do
expect(subject).to receive(:failure_reason).at_least(:once).and_return('some_reason')
expect(subject).to receive(:retry_when).at_least(:once).and_return(['some_reason'])
end
it 'returns true' do
expect(subject.retry_failure?).to eq true
end
end
context 'and retry when does not include failure_reason' do
before do
expect(subject).to receive(:failure_reason).at_least(:once).and_return('some_reason')
expect(subject).to receive(:retry_when).at_least(:once).and_return(['some', 'other failure'])
end
it 'returns false' do
expect(subject.retry_failure?).to eq false
end
end
end
end
end
@ -2887,7 +2985,7 @@ describe Ci::Build do
end
context 'when build is configured to be retried' do
subject { create(:ci_build, :running, options: { retry: 3 }, project: project, user: user) }
subject { create(:ci_build, :running, options: { retry: { max: 3 } }, project: project, user: user) }
it 'retries build and assigns the same user to it' do
expect(described_class).to receive(:retry)

View File

@ -435,16 +435,34 @@ describe Ci::CreatePipelineService do
end
context 'when builds with auto-retries are configured' do
before do
config = YAML.dump(rspec: { script: 'rspec', retry: 2 })
stub_ci_pipeline_yaml_file(config)
context 'as an integer' do
before do
config = YAML.dump(rspec: { script: 'rspec', retry: 2 })
stub_ci_pipeline_yaml_file(config)
end
it 'correctly creates builds with auto-retry value configured' do
pipeline = execute_service
expect(pipeline).to be_persisted
expect(pipeline.builds.find_by(name: 'rspec').retries_max).to eq 2
expect(pipeline.builds.find_by(name: 'rspec').retry_when).to eq ['always']
end
end
it 'correctly creates builds with auto-retry value configured' do
pipeline = execute_service
context 'as hash' do
before do
config = YAML.dump(rspec: { script: 'rspec', retry: { max: 2, when: 'runner_system_failure' } })
stub_ci_pipeline_yaml_file(config)
end
expect(pipeline).to be_persisted
expect(pipeline.builds.find_by(name: 'rspec').retries_max).to eq 2
it 'correctly creates builds with auto-retry value configured' do
pipeline = execute_service
expect(pipeline).to be_persisted
expect(pipeline.builds.find_by(name: 'rspec').retries_max).to eq 2
expect(pipeline.builds.find_by(name: 'rspec').retry_when).to eq ['runner_system_failure']
end
end
end

View File

@ -671,9 +671,9 @@ describe Ci::ProcessPipelineService, '#execute' do
context 'when builds with auto-retries are configured' do
before do
create_build('build:1', stage_idx: 0, user: user, options: { retry: 2 })
create_build('build:1', stage_idx: 0, user: user, options: { retry: { max: 2 } })
create_build('test:1', stage_idx: 1, user: user, when: :on_failure)
create_build('test:2', stage_idx: 1, user: user, options: { retry: 1 })
create_build('test:2', stage_idx: 1, user: user, options: { retry: { max: 1 } })
end
it 'automatically retries builds in a valid order' do