Merge branch 'bvl-circuitbreaker-backoff' into 'master'
Circuitbreaker backoff and retries Closes #37383 and #38231 See merge request gitlab-org/gitlab-ce!14933
This commit is contained in:
commit
7fbefa3d04
|
@ -120,6 +120,15 @@ module ApplicationSettingsHelper
|
|||
message.html_safe
|
||||
end
|
||||
|
||||
def circuitbreaker_access_retries_help_text
|
||||
_('The number of attempts GitLab will make to access a storage.')
|
||||
end
|
||||
|
||||
def circuitbreaker_backoff_threshold_help_text
|
||||
_("The number of failures after which GitLab will start temporarily "\
|
||||
"disabling access to a storage shard on a host")
|
||||
end
|
||||
|
||||
def circuitbreaker_failure_wait_time_help_text
|
||||
_("When access to a storage fails. GitLab will prevent access to the "\
|
||||
"storage for the time specified here. This allows the filesystem to "\
|
||||
|
@ -144,6 +153,8 @@ module ApplicationSettingsHelper
|
|||
:akismet_api_key,
|
||||
:akismet_enabled,
|
||||
:auto_devops_enabled,
|
||||
:circuitbreaker_access_retries,
|
||||
:circuitbreaker_backoff_threshold,
|
||||
:circuitbreaker_failure_count_threshold,
|
||||
:circuitbreaker_failure_reset_time,
|
||||
:circuitbreaker_failure_wait_time,
|
||||
|
|
|
@ -16,17 +16,16 @@ module StorageHealthHelper
|
|||
def message_for_circuit_breaker(circuit_breaker)
|
||||
maximum_failures = circuit_breaker.failure_count_threshold
|
||||
current_failures = circuit_breaker.failure_count
|
||||
permanently_broken = circuit_breaker.circuit_broken? && current_failures >= maximum_failures
|
||||
|
||||
translation_params = { number_of_failures: current_failures,
|
||||
maximum_failures: maximum_failures,
|
||||
number_of_seconds: circuit_breaker.failure_wait_time }
|
||||
|
||||
if permanently_broken
|
||||
if circuit_breaker.circuit_broken?
|
||||
s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\
|
||||
"retry automatically. Reset storage information when the problem is "\
|
||||
"resolved.") % translation_params
|
||||
elsif circuit_breaker.circuit_broken?
|
||||
elsif circuit_breaker.backing_off?
|
||||
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
|
||||
"block access for %{number_of_seconds} seconds.") % translation_params
|
||||
else
|
||||
|
|
|
@ -153,13 +153,25 @@ class ApplicationSetting < ActiveRecord::Base
|
|||
presence: true,
|
||||
numericality: { greater_than_or_equal_to: 0 }
|
||||
|
||||
validates :circuitbreaker_failure_count_threshold,
|
||||
validates :circuitbreaker_backoff_threshold,
|
||||
:circuitbreaker_failure_count_threshold,
|
||||
:circuitbreaker_failure_wait_time,
|
||||
:circuitbreaker_failure_reset_time,
|
||||
:circuitbreaker_storage_timeout,
|
||||
presence: true,
|
||||
numericality: { only_integer: true, greater_than_or_equal_to: 0 }
|
||||
|
||||
validates :circuitbreaker_access_retries,
|
||||
presence: true,
|
||||
numericality: { only_integer: true, greater_than_or_equal_to: 1 }
|
||||
|
||||
validates_each :circuitbreaker_backoff_threshold do |record, attr, value|
|
||||
if value.to_i >= record.circuitbreaker_failure_count_threshold
|
||||
record.errors.add(attr, _("The circuitbreaker backoff threshold should be "\
|
||||
"lower than the failure count threshold"))
|
||||
end
|
||||
end
|
||||
|
||||
SUPPORTED_KEY_TYPES.each do |type|
|
||||
validates :"#{type}_key_restriction", presence: true, key_restriction: { type: type }
|
||||
end
|
||||
|
|
|
@ -533,29 +533,41 @@
|
|||
%fieldset
|
||||
%legend Git Storage Circuitbreaker settings
|
||||
.form-group
|
||||
= f.label :circuitbreaker_failure_count_threshold, _('Maximum git storage failures'), class: 'control-label col-sm-2'
|
||||
= f.label :circuitbreaker_access_retries, _('Number of access attempts'), class: 'control-label col-sm-2'
|
||||
.col-sm-10
|
||||
= f.number_field :circuitbreaker_failure_count_threshold, class: 'form-control'
|
||||
= f.number_field :circuitbreaker_access_retries, class: 'form-control'
|
||||
.help-block
|
||||
= circuitbreaker_failure_count_help_text
|
||||
= circuitbreaker_access_retries_help_text
|
||||
.form-group
|
||||
= f.label :circuitbreaker_storage_timeout, _('Seconds to wait for a storage access attempt'), class: 'control-label col-sm-2'
|
||||
.col-sm-10
|
||||
= f.number_field :circuitbreaker_storage_timeout, class: 'form-control'
|
||||
.help-block
|
||||
= circuitbreaker_storage_timeout_help_text
|
||||
.form-group
|
||||
= f.label :circuitbreaker_backoff_threshold, _('Number of failures before backing off'), class: 'control-label col-sm-2'
|
||||
.col-sm-10
|
||||
= f.number_field :circuitbreaker_backoff_threshold, class: 'form-control'
|
||||
.help-block
|
||||
= circuitbreaker_backoff_threshold_help_text
|
||||
.form-group
|
||||
= f.label :circuitbreaker_failure_wait_time, _('Seconds to wait after a storage failure'), class: 'control-label col-sm-2'
|
||||
.col-sm-10
|
||||
= f.number_field :circuitbreaker_failure_wait_time, class: 'form-control'
|
||||
.help-block
|
||||
= circuitbreaker_failure_wait_time_help_text
|
||||
.form-group
|
||||
= f.label :circuitbreaker_failure_count_threshold, _('Maximum git storage failures'), class: 'control-label col-sm-2'
|
||||
.col-sm-10
|
||||
= f.number_field :circuitbreaker_failure_count_threshold, class: 'form-control'
|
||||
.help-block
|
||||
= circuitbreaker_failure_count_help_text
|
||||
.form-group
|
||||
= f.label :circuitbreaker_failure_reset_time, _('Seconds before reseting failure information'), class: 'control-label col-sm-2'
|
||||
.col-sm-10
|
||||
= f.number_field :circuitbreaker_failure_reset_time, class: 'form-control'
|
||||
.help-block
|
||||
= circuitbreaker_failure_reset_time_help_text
|
||||
.form-group
|
||||
= f.label :circuitbreaker_storage_timeout, _('Seconds to wait for a storage access attempt'), class: 'control-label col-sm-2'
|
||||
.col-sm-10
|
||||
= f.number_field :circuitbreaker_storage_timeout, class: 'form-control'
|
||||
.help-block
|
||||
= circuitbreaker_storage_timeout_help_text
|
||||
|
||||
%fieldset
|
||||
%legend Repository Checks
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
title: Make the circuitbreaker more robust by adding higher thresholds, and multiple
|
||||
access attempts.
|
||||
merge_request: 14933
|
||||
author:
|
||||
type: fixed
|
|
@ -0,0 +1,16 @@
|
|||
class AddNewCircuitbreakerSettingsToApplicationSettings < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
|
||||
def change
|
||||
add_column :application_settings,
|
||||
:circuitbreaker_access_retries,
|
||||
:integer,
|
||||
default: 3
|
||||
add_column :application_settings,
|
||||
:circuitbreaker_backoff_threshold,
|
||||
:integer,
|
||||
default: 80
|
||||
end
|
||||
end
|
|
@ -11,7 +11,7 @@
|
|||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema.define(version: 20171012101043) do
|
||||
ActiveRecord::Schema.define(version: 20171017145932) do
|
||||
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "plpgsql"
|
||||
|
@ -138,6 +138,8 @@ ActiveRecord::Schema.define(version: 20171012101043) do
|
|||
t.integer "circuitbreaker_failure_wait_time", default: 30
|
||||
t.integer "circuitbreaker_failure_reset_time", default: 1800
|
||||
t.integer "circuitbreaker_storage_timeout", default: 30
|
||||
t.integer "circuitbreaker_access_retries", default: 3
|
||||
t.integer "circuitbreaker_backoff_threshold", default: 80
|
||||
end
|
||||
|
||||
create_table "audit_events", force: :cascade do |t|
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 208 KiB After Width: | Height: | Size: 327 KiB |
|
@ -109,6 +109,11 @@ This can be configured from the admin interface:
|
|||
|
||||
![circuitbreaker configuration](img/circuitbreaker_config.png)
|
||||
|
||||
**Number of access attempts**: The number of attempts GitLab will make to access a
|
||||
storage when probing a shard.
|
||||
|
||||
**Number of failures before backing off**: The number of failures after which
|
||||
GitLab will start temporarily disabling access to a storage shard on a host.
|
||||
|
||||
**Maximum git storage failures:** The number of failures of after which GitLab will
|
||||
completely prevent access to the storage. The number of failures can be reset in
|
||||
|
@ -126,6 +131,15 @@ mount is reset.
|
|||
**Seconds to wait for a storage access attempt:** The time in seconds GitLab will
|
||||
try to access storage. After this time a timeout error will be raised.
|
||||
|
||||
To enable the circuitbreaker for repository storage you can flip the feature flag from a rails console:
|
||||
|
||||
```
|
||||
Feature.enable('git_storage_circuit_breaker')
|
||||
```
|
||||
|
||||
Alternatively it can be enabled by setting `true` in the `GIT_STORAGE_CIRCUIT_BREAKER` environment variable.
|
||||
This approach would be used when enabling the circuit breaker on a single host.
|
||||
|
||||
When storage failures occur, this will be visible in the admin interface like this:
|
||||
|
||||
![failing storage](img/failing_storage.png)
|
||||
|
|
|
@ -69,6 +69,8 @@ PUT /application/settings
|
|||
| `after_sign_up_text` | string | no | Text shown to the user after signing up |
|
||||
| `akismet_api_key` | string | no | API key for akismet spam protection |
|
||||
| `akismet_enabled` | boolean | no | Enable or disable akismet spam protection |
|
||||
| `circuitbreaker_access_retries | integer | no | The number of attempts GitLab will make to access a storage. |
|
||||
| `circuitbreaker_backoff_threshold | integer | no | The number of failures after which GitLab will start temporarily disabling access to a storage shard on a host. |
|
||||
| `circuitbreaker_failure_count_threshold` | integer | no | The number of failures of after which GitLab will completely prevent access to the storage. |
|
||||
| `circuitbreaker_failure_reset_time` | integer | no | Time in seconds GitLab will keep storage failure information. When no failures occur during this time, the failure information is reset. |
|
||||
| `circuitbreaker_failure_wait_time` | integer | no | Time in seconds GitLab will block access to a failing storage to allow it to recover. |
|
||||
|
|
|
@ -12,6 +12,7 @@ module Gitlab
|
|||
|
||||
CircuitOpen = Class.new(Inaccessible)
|
||||
Misconfiguration = Class.new(Inaccessible)
|
||||
Failing = Class.new(Inaccessible)
|
||||
|
||||
REDIS_KEY_PREFIX = 'storage_accessible:'.freeze
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ module Gitlab
|
|||
end
|
||||
|
||||
def perform
|
||||
return yield unless Feature.enabled?('git_storage_circuit_breaker')
|
||||
return yield unless enabled?
|
||||
|
||||
check_storage_accessible!
|
||||
|
||||
|
@ -64,10 +64,27 @@ module Gitlab
|
|||
def circuit_broken?
|
||||
return false if no_failures?
|
||||
|
||||
recent_failure = last_failure > failure_wait_time.seconds.ago
|
||||
too_many_failures = failure_count > failure_count_threshold
|
||||
failure_count > failure_count_threshold
|
||||
end
|
||||
|
||||
recent_failure || too_many_failures
|
||||
def backing_off?
|
||||
return false if no_failures?
|
||||
|
||||
recent_failure = last_failure > failure_wait_time.seconds.ago
|
||||
too_many_failures = failure_count > backoff_threshold
|
||||
|
||||
recent_failure && too_many_failures
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# The circuitbreaker can be enabled for the entire fleet using a Feature
|
||||
# flag.
|
||||
#
|
||||
# Enabling it for a single host can be done setting the
|
||||
# `GIT_STORAGE_CIRCUIT_BREAKER` environment variable.
|
||||
def enabled?
|
||||
ENV['GIT_STORAGE_CIRCUIT_BREAKER'].present? || Feature.enabled?('git_storage_circuit_breaker')
|
||||
end
|
||||
|
||||
def failure_info
|
||||
|
@ -83,7 +100,7 @@ module Gitlab
|
|||
return @storage_available if @storage_available
|
||||
|
||||
if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck
|
||||
.storage_available?(storage_path, storage_timeout)
|
||||
.storage_available?(storage_path, storage_timeout, access_retries)
|
||||
track_storage_accessible
|
||||
else
|
||||
track_storage_inaccessible
|
||||
|
@ -94,7 +111,11 @@ module Gitlab
|
|||
|
||||
def check_storage_accessible!
|
||||
if circuit_broken?
|
||||
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_wait_time)
|
||||
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time)
|
||||
end
|
||||
|
||||
if backing_off?
|
||||
raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time)
|
||||
end
|
||||
|
||||
unless storage_available?
|
||||
|
@ -131,12 +152,6 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
|
||||
def cache_key
|
||||
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def get_failure_info
|
||||
last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
|
||||
redis.hmget(cache_key, :last_failure, :failure_count)
|
||||
|
@ -146,6 +161,10 @@ module Gitlab
|
|||
|
||||
FailureInfo.new(last_failure, failure_count.to_i)
|
||||
end
|
||||
|
||||
def cache_key
|
||||
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -18,6 +18,14 @@ module Gitlab
|
|||
application_settings.circuitbreaker_storage_timeout
|
||||
end
|
||||
|
||||
def access_retries
|
||||
application_settings.circuitbreaker_access_retries
|
||||
end
|
||||
|
||||
def backoff_threshold
|
||||
application_settings.circuitbreaker_backoff_threshold
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def application_settings
|
||||
|
|
|
@ -4,8 +4,17 @@ module Gitlab
|
|||
module ForkedStorageCheck
|
||||
extend self
|
||||
|
||||
def storage_available?(path, timeout_seconds = 5)
|
||||
status = timeout_check(path, timeout_seconds)
|
||||
def storage_available?(path, timeout_seconds = 5, retries = 1)
|
||||
partial_timeout = timeout_seconds / retries
|
||||
status = timeout_check(path, partial_timeout)
|
||||
|
||||
# If the status check did not succeed the first time, we retry a few
|
||||
# more times to avoid one-off failures
|
||||
current_attempts = 1
|
||||
while current_attempts < retries && !status.success?
|
||||
status = timeout_check(path, partial_timeout)
|
||||
current_attempts += 1
|
||||
end
|
||||
|
||||
status.success?
|
||||
end
|
||||
|
|
|
@ -25,6 +25,10 @@ module Gitlab
|
|||
!!@error
|
||||
end
|
||||
|
||||
def backing_off?
|
||||
false
|
||||
end
|
||||
|
||||
def last_failure
|
||||
circuit_broken? ? Time.now : nil
|
||||
end
|
||||
|
|
|
@ -79,7 +79,9 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
|
|||
stub_application_setting(circuitbreaker_failure_count_threshold: 0,
|
||||
circuitbreaker_failure_wait_time: 1,
|
||||
circuitbreaker_failure_reset_time: 2,
|
||||
circuitbreaker_storage_timeout: 3)
|
||||
circuitbreaker_storage_timeout: 3,
|
||||
circuitbreaker_access_retries: 4,
|
||||
circuitbreaker_backoff_threshold: 5)
|
||||
end
|
||||
|
||||
describe '#failure_count_threshold' do
|
||||
|
@ -105,14 +107,43 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
|
|||
expect(circuit_breaker.storage_timeout).to eq(3)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#access_retries' do
|
||||
it 'reads the value from settings' do
|
||||
expect(circuit_breaker.access_retries).to eq(4)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#backoff_threshold' do
|
||||
it 'reads the value from settings' do
|
||||
expect(circuit_breaker.backoff_threshold).to eq(5)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#perform' do
|
||||
it 'raises an exception with retry time when the circuit is open' do
|
||||
allow(circuit_breaker).to receive(:circuit_broken?).and_return(true)
|
||||
it 'raises the correct exception when the circuit is open' do
|
||||
set_in_redis(:last_failure, 1.day.ago.to_f)
|
||||
set_in_redis(:failure_count, 999)
|
||||
|
||||
expect { |b| circuit_breaker.perform(&b) }
|
||||
.to raise_error(Gitlab::Git::Storage::CircuitOpen)
|
||||
.to raise_error do |exception|
|
||||
expect(exception).to be_kind_of(Gitlab::Git::Storage::CircuitOpen)
|
||||
expect(exception.retry_after).to eq(1800)
|
||||
end
|
||||
end
|
||||
|
||||
it 'raises the correct exception when backing off' do
|
||||
Timecop.freeze do
|
||||
set_in_redis(:last_failure, 1.second.ago.to_f)
|
||||
set_in_redis(:failure_count, 90)
|
||||
|
||||
expect { |b| circuit_breaker.perform(&b) }
|
||||
.to raise_error do |exception|
|
||||
expect(exception).to be_kind_of(Gitlab::Git::Storage::Failing)
|
||||
expect(exception.retry_after).to eq(30)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
it 'yields the block' do
|
||||
|
@ -122,6 +153,7 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
|
|||
|
||||
it 'checks if the storage is available' do
|
||||
expect(circuit_breaker).to receive(:check_storage_accessible!)
|
||||
.and_call_original
|
||||
|
||||
circuit_breaker.perform { 'hello world' }
|
||||
end
|
||||
|
@ -137,16 +169,82 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
|
|||
.to raise_error(Rugged::OSError)
|
||||
end
|
||||
|
||||
context 'with the feature disabled' do
|
||||
it 'returns the block without checking accessibility' do
|
||||
stub_feature_flags(git_storage_circuit_breaker: false)
|
||||
it 'tracks that the storage was accessible' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
set_in_redis(:last_failure, Time.now.to_f)
|
||||
|
||||
expect(circuit_breaker).not_to receive(:circuit_broken?)
|
||||
circuit_breaker.perform { '' }
|
||||
|
||||
expect(value_from_redis(:failure_count).to_i).to eq(0)
|
||||
expect(value_from_redis(:last_failure)).to be_empty
|
||||
expect(circuit_breaker.failure_count).to eq(0)
|
||||
expect(circuit_breaker.last_failure).to be_nil
|
||||
end
|
||||
|
||||
it 'only performs the accessibility check once' do
|
||||
expect(Gitlab::Git::Storage::ForkedStorageCheck)
|
||||
.to receive(:storage_available?).once.and_call_original
|
||||
|
||||
2.times { circuit_breaker.perform { '' } }
|
||||
end
|
||||
|
||||
it 'calls the check with the correct arguments' do
|
||||
stub_application_setting(circuitbreaker_storage_timeout: 30,
|
||||
circuitbreaker_access_retries: 3)
|
||||
|
||||
expect(Gitlab::Git::Storage::ForkedStorageCheck)
|
||||
.to receive(:storage_available?).with(TestEnv.repos_path, 30, 3)
|
||||
.and_call_original
|
||||
|
||||
circuit_breaker.perform { '' }
|
||||
end
|
||||
|
||||
context 'with the feature disabled' do
|
||||
before do
|
||||
stub_feature_flags(git_storage_circuit_breaker: false)
|
||||
end
|
||||
|
||||
it 'returns the block without checking accessibility' do
|
||||
expect(circuit_breaker).not_to receive(:check_storage_accessible!)
|
||||
|
||||
result = circuit_breaker.perform { 'hello' }
|
||||
|
||||
expect(result).to eq('hello')
|
||||
end
|
||||
|
||||
it 'allows enabling the feature using an ENV var' do
|
||||
stub_env('GIT_STORAGE_CIRCUIT_BREAKER', 'true')
|
||||
expect(circuit_breaker).to receive(:check_storage_accessible!)
|
||||
|
||||
result = circuit_breaker.perform { 'hello' }
|
||||
|
||||
expect(result).to eq('hello')
|
||||
end
|
||||
end
|
||||
|
||||
context 'the storage is not available' do
|
||||
let(:storage_name) { 'broken' }
|
||||
|
||||
it 'raises the correct exception' do
|
||||
expect(circuit_breaker).to receive(:track_storage_inaccessible)
|
||||
|
||||
expect { circuit_breaker.perform { '' } }
|
||||
.to raise_error do |exception|
|
||||
expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible)
|
||||
expect(exception.retry_after).to eq(30)
|
||||
end
|
||||
end
|
||||
|
||||
it 'tracks that the storage was inaccessible' do
|
||||
Timecop.freeze do
|
||||
expect { circuit_breaker.perform { '' } }.to raise_error(Gitlab::Git::Storage::Inaccessible)
|
||||
|
||||
expect(value_from_redis(:failure_count).to_i).to eq(1)
|
||||
expect(value_from_redis(:last_failure)).not_to be_empty
|
||||
expect(circuit_breaker.failure_count).to eq(1)
|
||||
expect(circuit_breaker.last_failure).to be_within(1.second).of(Time.now)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -158,183 +256,40 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
|
|||
expect(circuit_breaker.circuit_broken?).to be_falsey
|
||||
end
|
||||
|
||||
it 'is broken when there was a recent failure' do
|
||||
Timecop.freeze do
|
||||
set_in_redis(:last_failure, 1.second.ago.to_f)
|
||||
set_in_redis(:failure_count, 1)
|
||||
|
||||
expect(circuit_breaker.circuit_broken?).to be_truthy
|
||||
end
|
||||
end
|
||||
|
||||
it 'is broken when there are too many failures' do
|
||||
set_in_redis(:last_failure, 1.day.ago.to_f)
|
||||
set_in_redis(:failure_count, 200)
|
||||
|
||||
expect(circuit_breaker.circuit_broken?).to be_truthy
|
||||
end
|
||||
end
|
||||
|
||||
describe '#backing_off?' do
|
||||
it 'is true when there was a recent failure' do
|
||||
Timecop.freeze do
|
||||
set_in_redis(:last_failure, 1.second.ago.to_f)
|
||||
set_in_redis(:failure_count, 90)
|
||||
|
||||
expect(circuit_breaker.backing_off?).to be_truthy
|
||||
end
|
||||
end
|
||||
|
||||
context 'the `failure_wait_time` is set to 0' do
|
||||
before do
|
||||
stub_application_setting(circuitbreaker_failure_wait_time: 0)
|
||||
end
|
||||
|
||||
it 'is working even when there is a recent failure' do
|
||||
it 'is working even when there are failures' do
|
||||
Timecop.freeze do
|
||||
set_in_redis(:last_failure, 0.seconds.ago.to_f)
|
||||
set_in_redis(:failure_count, 1)
|
||||
set_in_redis(:failure_count, 90)
|
||||
|
||||
expect(circuit_breaker.circuit_broken?).to be_falsey
|
||||
expect(circuit_breaker.backing_off?).to be_falsey
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "storage_available?" do
|
||||
context 'the storage is available' do
|
||||
it 'tracks that the storage was accessible an raises the error' do
|
||||
expect(circuit_breaker).to receive(:track_storage_accessible)
|
||||
|
||||
circuit_breaker.storage_available?
|
||||
end
|
||||
|
||||
it 'only performs the check once' do
|
||||
expect(Gitlab::Git::Storage::ForkedStorageCheck)
|
||||
.to receive(:storage_available?).once.and_call_original
|
||||
|
||||
2.times { circuit_breaker.storage_available? }
|
||||
end
|
||||
end
|
||||
|
||||
context 'storage is not available' do
|
||||
let(:storage_name) { 'broken' }
|
||||
|
||||
it 'tracks that the storage was inaccessible' do
|
||||
expect(circuit_breaker).to receive(:track_storage_inaccessible)
|
||||
|
||||
circuit_breaker.storage_available?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#check_storage_accessible!' do
|
||||
it 'raises an exception with retry time when the circuit is open' do
|
||||
allow(circuit_breaker).to receive(:circuit_broken?).and_return(true)
|
||||
|
||||
expect { circuit_breaker.check_storage_accessible! }
|
||||
.to raise_error do |exception|
|
||||
expect(exception).to be_kind_of(Gitlab::Git::Storage::CircuitOpen)
|
||||
expect(exception.retry_after).to eq(30)
|
||||
end
|
||||
end
|
||||
|
||||
context 'the storage is not available' do
|
||||
let(:storage_name) { 'broken' }
|
||||
|
||||
it 'raises an error' do
|
||||
expect(circuit_breaker).to receive(:track_storage_inaccessible)
|
||||
|
||||
expect { circuit_breaker.check_storage_accessible! }
|
||||
.to raise_error do |exception|
|
||||
expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible)
|
||||
expect(exception.retry_after).to eq(30)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#track_storage_inaccessible' do
|
||||
around do |example|
|
||||
Timecop.freeze { example.run }
|
||||
end
|
||||
|
||||
it 'records the failure time in redis' do
|
||||
circuit_breaker.track_storage_inaccessible
|
||||
|
||||
failure_time = value_from_redis(:last_failure)
|
||||
|
||||
expect(Time.at(failure_time.to_i)).to be_within(1.second).of(Time.now)
|
||||
end
|
||||
|
||||
it 'sets the failure time on the breaker without reloading' do
|
||||
circuit_breaker.track_storage_inaccessible
|
||||
|
||||
expect(circuit_breaker).not_to receive(:get_failure_info)
|
||||
expect(circuit_breaker.last_failure).to eq(Time.now)
|
||||
end
|
||||
|
||||
it 'increments the failure count in redis' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
|
||||
circuit_breaker.track_storage_inaccessible
|
||||
|
||||
expect(value_from_redis(:failure_count).to_i).to be(11)
|
||||
end
|
||||
|
||||
it 'increments the failure count on the breaker without reloading' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
|
||||
circuit_breaker.track_storage_inaccessible
|
||||
|
||||
expect(circuit_breaker).not_to receive(:get_failure_info)
|
||||
expect(circuit_breaker.failure_count).to eq(11)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#track_storage_accessible' do
|
||||
it 'sets the failure count to zero in redis' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
|
||||
expect(value_from_redis(:failure_count).to_i).to be(0)
|
||||
end
|
||||
|
||||
it 'sets the failure count to zero on the breaker without reloading' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
|
||||
expect(circuit_breaker).not_to receive(:get_failure_info)
|
||||
expect(circuit_breaker.failure_count).to eq(0)
|
||||
end
|
||||
|
||||
it 'removes the last failure time from redis' do
|
||||
set_in_redis(:last_failure, Time.now.to_i)
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
|
||||
expect(circuit_breaker).not_to receive(:get_failure_info)
|
||||
expect(circuit_breaker.last_failure).to be_nil
|
||||
end
|
||||
|
||||
it 'removes the last failure time from the breaker without reloading' do
|
||||
set_in_redis(:last_failure, Time.now.to_i)
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
|
||||
expect(value_from_redis(:last_failure)).to be_empty
|
||||
end
|
||||
|
||||
it 'wont connect to redis when there are no failures' do
|
||||
expect(Gitlab::Git::Storage.redis).to receive(:with).once
|
||||
.and_call_original
|
||||
expect(circuit_breaker).to receive(:track_storage_accessible)
|
||||
.and_call_original
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
end
|
||||
end
|
||||
|
||||
describe '#no_failures?' do
|
||||
it 'is false when a failure was tracked' do
|
||||
set_in_redis(:last_failure, Time.now.to_i)
|
||||
set_in_redis(:failure_count, 1)
|
||||
|
||||
expect(circuit_breaker.no_failures?).to be_falsey
|
||||
end
|
||||
end
|
||||
|
||||
describe '#last_failure' do
|
||||
it 'returns the last failure time' do
|
||||
time = Time.parse("2017-05-26 17:52:30")
|
||||
|
@ -351,10 +306,4 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
|
|||
expect(circuit_breaker.failure_count).to eq(7)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#cache_key' do
|
||||
it 'includes storage and host' do
|
||||
expect(circuit_breaker.cache_key).to eq(cache_key)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -33,6 +33,21 @@ describe Gitlab::Git::Storage::ForkedStorageCheck, broken_storage: true, skip_da
|
|||
expect(runtime).to be < 1.0
|
||||
end
|
||||
|
||||
it 'will try the specified amount of times before failing' do
|
||||
allow(described_class).to receive(:check_filesystem_in_process) do
|
||||
Process.spawn("sleep 10")
|
||||
end
|
||||
|
||||
expect(Process).to receive(:spawn).with('sleep 10').twice
|
||||
.and_call_original
|
||||
|
||||
runtime = Benchmark.realtime do
|
||||
described_class.storage_available?(existing_path, 0.5, 2)
|
||||
end
|
||||
|
||||
expect(runtime).to be < 1.0
|
||||
end
|
||||
|
||||
describe 'when using paths with spaces' do
|
||||
let(:test_dir) { Rails.root.join('tmp', 'tests', 'storage_check') }
|
||||
let(:path_with_spaces) { File.join(test_dir, 'path with spaces') }
|
||||
|
|
|
@ -65,17 +65,6 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do
|
|||
ours = described_class.public_instance_methods
|
||||
theirs = Gitlab::Git::Storage::CircuitBreaker.public_instance_methods
|
||||
|
||||
# These methods are not part of the public API, but are public to allow the
|
||||
# CircuitBreaker specs to operate. They should be made private over time.
|
||||
exceptions = %i[
|
||||
cache_key
|
||||
check_storage_accessible!
|
||||
no_failures?
|
||||
storage_available?
|
||||
track_storage_accessible
|
||||
track_storage_inaccessible
|
||||
]
|
||||
|
||||
expect(theirs - ours).to contain_exactly(*exceptions)
|
||||
expect(theirs - ours).to be_empty
|
||||
end
|
||||
end
|
||||
|
|
|
@ -115,7 +115,8 @@ describe ApplicationSetting do
|
|||
end
|
||||
|
||||
context 'circuitbreaker settings' do
|
||||
[:circuitbreaker_failure_count_threshold,
|
||||
[:circuitbreaker_backoff_threshold,
|
||||
:circuitbreaker_failure_count_threshold,
|
||||
:circuitbreaker_failure_wait_time,
|
||||
:circuitbreaker_failure_reset_time,
|
||||
:circuitbreaker_storage_timeout].each do |field|
|
||||
|
@ -125,6 +126,16 @@ describe ApplicationSetting do
|
|||
.is_greater_than_or_equal_to(0)
|
||||
end
|
||||
end
|
||||
|
||||
it 'requires the `backoff_threshold` to be lower than the `failure_count_threshold`' do
|
||||
setting.circuitbreaker_failure_count_threshold = 10
|
||||
setting.circuitbreaker_backoff_threshold = 15
|
||||
failure_message = "The circuitbreaker backoff threshold should be lower "\
|
||||
"than the failure count threshold"
|
||||
|
||||
expect(setting).not_to be_valid
|
||||
expect(setting.errors[:circuitbreaker_backoff_threshold]).to include(failure_message)
|
||||
end
|
||||
end
|
||||
|
||||
context 'repository storages' do
|
||||
|
|
Loading…
Reference in New Issue