diff --git a/app/helpers/application_settings_helper.rb b/app/helpers/application_settings_helper.rb index 1ee8911bb1a..cd1ecaadb85 100644 --- a/app/helpers/application_settings_helper.rb +++ b/app/helpers/application_settings_helper.rb @@ -120,6 +120,15 @@ module ApplicationSettingsHelper message.html_safe end + def circuitbreaker_access_retries_help_text + _('The number of attempts GitLab will make to access a storage.') + end + + def circuitbreaker_backoff_threshold_help_text + _("The number of failures after which GitLab will start temporarily "\ + "disabling access to a storage shard on a host") + end + def circuitbreaker_failure_wait_time_help_text _("When access to a storage fails. GitLab will prevent access to the "\ "storage for the time specified here. This allows the filesystem to "\ @@ -144,6 +153,8 @@ module ApplicationSettingsHelper :akismet_api_key, :akismet_enabled, :auto_devops_enabled, + :circuitbreaker_access_retries, + :circuitbreaker_backoff_threshold, :circuitbreaker_failure_count_threshold, :circuitbreaker_failure_reset_time, :circuitbreaker_failure_wait_time, diff --git a/app/helpers/storage_health_helper.rb b/app/helpers/storage_health_helper.rb index 544c9efb845..4d2180f7eee 100644 --- a/app/helpers/storage_health_helper.rb +++ b/app/helpers/storage_health_helper.rb @@ -16,17 +16,16 @@ module StorageHealthHelper def message_for_circuit_breaker(circuit_breaker) maximum_failures = circuit_breaker.failure_count_threshold current_failures = circuit_breaker.failure_count - permanently_broken = circuit_breaker.circuit_broken? && current_failures >= maximum_failures translation_params = { number_of_failures: current_failures, maximum_failures: maximum_failures, number_of_seconds: circuit_breaker.failure_wait_time } - if permanently_broken + if circuit_breaker.circuit_broken? s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\ "retry automatically. Reset storage information when the problem is "\ "resolved.") % translation_params - elsif circuit_breaker.circuit_broken? + elsif circuit_breaker.backing_off? _("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\ "block access for %{number_of_seconds} seconds.") % translation_params else diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb index 4dda276bb41..f266e7db6da 100644 --- a/app/models/application_setting.rb +++ b/app/models/application_setting.rb @@ -153,13 +153,25 @@ class ApplicationSetting < ActiveRecord::Base presence: true, numericality: { greater_than_or_equal_to: 0 } - validates :circuitbreaker_failure_count_threshold, + validates :circuitbreaker_backoff_threshold, + :circuitbreaker_failure_count_threshold, :circuitbreaker_failure_wait_time, :circuitbreaker_failure_reset_time, :circuitbreaker_storage_timeout, presence: true, numericality: { only_integer: true, greater_than_or_equal_to: 0 } + validates :circuitbreaker_access_retries, + presence: true, + numericality: { only_integer: true, greater_than_or_equal_to: 1 } + + validates_each :circuitbreaker_backoff_threshold do |record, attr, value| + if value.to_i >= record.circuitbreaker_failure_count_threshold + record.errors.add(attr, _("The circuitbreaker backoff threshold should be "\ + "lower than the failure count threshold")) + end + end + SUPPORTED_KEY_TYPES.each do |type| validates :"#{type}_key_restriction", presence: true, key_restriction: { type: type } end diff --git a/app/views/admin/application_settings/_form.html.haml b/app/views/admin/application_settings/_form.html.haml index 2b23af9212e..3a4d5ce0b5c 100644 --- a/app/views/admin/application_settings/_form.html.haml +++ b/app/views/admin/application_settings/_form.html.haml @@ -533,29 +533,41 @@ %fieldset %legend Git Storage Circuitbreaker settings .form-group - = f.label :circuitbreaker_failure_count_threshold, _('Maximum git storage failures'), class: 'control-label col-sm-2' + = f.label :circuitbreaker_access_retries, _('Number of access attempts'), class: 'control-label col-sm-2' .col-sm-10 - = f.number_field :circuitbreaker_failure_count_threshold, class: 'form-control' + = f.number_field :circuitbreaker_access_retries, class: 'form-control' .help-block - = circuitbreaker_failure_count_help_text + = circuitbreaker_access_retries_help_text + .form-group + = f.label :circuitbreaker_storage_timeout, _('Seconds to wait for a storage access attempt'), class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :circuitbreaker_storage_timeout, class: 'form-control' + .help-block + = circuitbreaker_storage_timeout_help_text + .form-group + = f.label :circuitbreaker_backoff_threshold, _('Number of failures before backing off'), class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :circuitbreaker_backoff_threshold, class: 'form-control' + .help-block + = circuitbreaker_backoff_threshold_help_text .form-group = f.label :circuitbreaker_failure_wait_time, _('Seconds to wait after a storage failure'), class: 'control-label col-sm-2' .col-sm-10 = f.number_field :circuitbreaker_failure_wait_time, class: 'form-control' .help-block = circuitbreaker_failure_wait_time_help_text + .form-group + = f.label :circuitbreaker_failure_count_threshold, _('Maximum git storage failures'), class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :circuitbreaker_failure_count_threshold, class: 'form-control' + .help-block + = circuitbreaker_failure_count_help_text .form-group = f.label :circuitbreaker_failure_reset_time, _('Seconds before reseting failure information'), class: 'control-label col-sm-2' .col-sm-10 = f.number_field :circuitbreaker_failure_reset_time, class: 'form-control' .help-block = circuitbreaker_failure_reset_time_help_text - .form-group - = f.label :circuitbreaker_storage_timeout, _('Seconds to wait for a storage access attempt'), class: 'control-label col-sm-2' - .col-sm-10 - = f.number_field :circuitbreaker_storage_timeout, class: 'form-control' - .help-block - = circuitbreaker_storage_timeout_help_text %fieldset %legend Repository Checks diff --git a/changelogs/unreleased/bvl-circuitbreaker-backoff.yml b/changelogs/unreleased/bvl-circuitbreaker-backoff.yml new file mode 100644 index 00000000000..5cb90e7c085 --- /dev/null +++ b/changelogs/unreleased/bvl-circuitbreaker-backoff.yml @@ -0,0 +1,6 @@ +--- +title: Make the circuitbreaker more robust by adding higher thresholds, and multiple + access attempts. +merge_request: 14933 +author: +type: fixed diff --git a/db/migrate/20171017145932_add_new_circuitbreaker_settings_to_application_settings.rb b/db/migrate/20171017145932_add_new_circuitbreaker_settings_to_application_settings.rb new file mode 100644 index 00000000000..07eb25c0b0f --- /dev/null +++ b/db/migrate/20171017145932_add_new_circuitbreaker_settings_to_application_settings.rb @@ -0,0 +1,16 @@ +class AddNewCircuitbreakerSettingsToApplicationSettings < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + add_column :application_settings, + :circuitbreaker_access_retries, + :integer, + default: 3 + add_column :application_settings, + :circuitbreaker_backoff_threshold, + :integer, + default: 80 + end +end diff --git a/db/schema.rb b/db/schema.rb index c2c04873d4d..530f08022be 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20171012101043) do +ActiveRecord::Schema.define(version: 20171017145932) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -138,6 +138,8 @@ ActiveRecord::Schema.define(version: 20171012101043) do t.integer "circuitbreaker_failure_wait_time", default: 30 t.integer "circuitbreaker_failure_reset_time", default: 1800 t.integer "circuitbreaker_storage_timeout", default: 30 + t.integer "circuitbreaker_access_retries", default: 3 + t.integer "circuitbreaker_backoff_threshold", default: 80 end create_table "audit_events", force: :cascade do |t| diff --git a/doc/administration/img/circuitbreaker_config.png b/doc/administration/img/circuitbreaker_config.png index 9250d38297c..e811d173634 100644 Binary files a/doc/administration/img/circuitbreaker_config.png and b/doc/administration/img/circuitbreaker_config.png differ diff --git a/doc/administration/repository_storage_paths.md b/doc/administration/repository_storage_paths.md index efcabd69822..96f436fa7c3 100644 --- a/doc/administration/repository_storage_paths.md +++ b/doc/administration/repository_storage_paths.md @@ -109,6 +109,11 @@ This can be configured from the admin interface: ![circuitbreaker configuration](img/circuitbreaker_config.png) +**Number of access attempts**: The number of attempts GitLab will make to access a +storage when probing a shard. + +**Number of failures before backing off**: The number of failures after which +GitLab will start temporarily disabling access to a storage shard on a host. **Maximum git storage failures:** The number of failures of after which GitLab will completely prevent access to the storage. The number of failures can be reset in @@ -126,6 +131,15 @@ mount is reset. **Seconds to wait for a storage access attempt:** The time in seconds GitLab will try to access storage. After this time a timeout error will be raised. +To enable the circuitbreaker for repository storage you can flip the feature flag from a rails console: + +``` +Feature.enable('git_storage_circuit_breaker') +``` + +Alternatively it can be enabled by setting `true` in the `GIT_STORAGE_CIRCUIT_BREAKER` environment variable. +This approach would be used when enabling the circuit breaker on a single host. + When storage failures occur, this will be visible in the admin interface like this: ![failing storage](img/failing_storage.png) diff --git a/doc/api/settings.md b/doc/api/settings.md index 664f3ef7b77..4e24e4bbfc3 100644 --- a/doc/api/settings.md +++ b/doc/api/settings.md @@ -69,6 +69,8 @@ PUT /application/settings | `after_sign_up_text` | string | no | Text shown to the user after signing up | | `akismet_api_key` | string | no | API key for akismet spam protection | | `akismet_enabled` | boolean | no | Enable or disable akismet spam protection | +| `circuitbreaker_access_retries | integer | no | The number of attempts GitLab will make to access a storage. | +| `circuitbreaker_backoff_threshold | integer | no | The number of failures after which GitLab will start temporarily disabling access to a storage shard on a host. | | `circuitbreaker_failure_count_threshold` | integer | no | The number of failures of after which GitLab will completely prevent access to the storage. | | `circuitbreaker_failure_reset_time` | integer | no | Time in seconds GitLab will keep storage failure information. When no failures occur during this time, the failure information is reset. | | `circuitbreaker_failure_wait_time` | integer | no | Time in seconds GitLab will block access to a failing storage to allow it to recover. | diff --git a/lib/gitlab/git/storage.rb b/lib/gitlab/git/storage.rb index 08e6c29abad..99518c9b1e4 100644 --- a/lib/gitlab/git/storage.rb +++ b/lib/gitlab/git/storage.rb @@ -12,6 +12,7 @@ module Gitlab CircuitOpen = Class.new(Inaccessible) Misconfiguration = Class.new(Inaccessible) + Failing = Class.new(Inaccessible) REDIS_KEY_PREFIX = 'storage_accessible:'.freeze diff --git a/lib/gitlab/git/storage/circuit_breaker.rb b/lib/gitlab/git/storage/circuit_breaker.rb index 0456ad9a1f3..be7598ef011 100644 --- a/lib/gitlab/git/storage/circuit_breaker.rb +++ b/lib/gitlab/git/storage/circuit_breaker.rb @@ -54,7 +54,7 @@ module Gitlab end def perform - return yield unless Feature.enabled?('git_storage_circuit_breaker') + return yield unless enabled? check_storage_accessible! @@ -64,10 +64,27 @@ module Gitlab def circuit_broken? return false if no_failures? - recent_failure = last_failure > failure_wait_time.seconds.ago - too_many_failures = failure_count > failure_count_threshold + failure_count > failure_count_threshold + end - recent_failure || too_many_failures + def backing_off? + return false if no_failures? + + recent_failure = last_failure > failure_wait_time.seconds.ago + too_many_failures = failure_count > backoff_threshold + + recent_failure && too_many_failures + end + + private + + # The circuitbreaker can be enabled for the entire fleet using a Feature + # flag. + # + # Enabling it for a single host can be done setting the + # `GIT_STORAGE_CIRCUIT_BREAKER` environment variable. + def enabled? + ENV['GIT_STORAGE_CIRCUIT_BREAKER'].present? || Feature.enabled?('git_storage_circuit_breaker') end def failure_info @@ -83,7 +100,7 @@ module Gitlab return @storage_available if @storage_available if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck - .storage_available?(storage_path, storage_timeout) + .storage_available?(storage_path, storage_timeout, access_retries) track_storage_accessible else track_storage_inaccessible @@ -94,7 +111,11 @@ module Gitlab def check_storage_accessible! if circuit_broken? - raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_wait_time) + raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time) + end + + if backing_off? + raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time) end unless storage_available? @@ -131,12 +152,6 @@ module Gitlab end end - def cache_key - @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}" - end - - private - def get_failure_info last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis| redis.hmget(cache_key, :last_failure, :failure_count) @@ -146,6 +161,10 @@ module Gitlab FailureInfo.new(last_failure, failure_count.to_i) end + + def cache_key + @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}" + end end end end diff --git a/lib/gitlab/git/storage/circuit_breaker_settings.rb b/lib/gitlab/git/storage/circuit_breaker_settings.rb index d2313fe7c1b..257fe8cd8f0 100644 --- a/lib/gitlab/git/storage/circuit_breaker_settings.rb +++ b/lib/gitlab/git/storage/circuit_breaker_settings.rb @@ -18,6 +18,14 @@ module Gitlab application_settings.circuitbreaker_storage_timeout end + def access_retries + application_settings.circuitbreaker_access_retries + end + + def backoff_threshold + application_settings.circuitbreaker_backoff_threshold + end + private def application_settings diff --git a/lib/gitlab/git/storage/forked_storage_check.rb b/lib/gitlab/git/storage/forked_storage_check.rb index 91d8241f17b..1307f400700 100644 --- a/lib/gitlab/git/storage/forked_storage_check.rb +++ b/lib/gitlab/git/storage/forked_storage_check.rb @@ -4,8 +4,17 @@ module Gitlab module ForkedStorageCheck extend self - def storage_available?(path, timeout_seconds = 5) - status = timeout_check(path, timeout_seconds) + def storage_available?(path, timeout_seconds = 5, retries = 1) + partial_timeout = timeout_seconds / retries + status = timeout_check(path, partial_timeout) + + # If the status check did not succeed the first time, we retry a few + # more times to avoid one-off failures + current_attempts = 1 + while current_attempts < retries && !status.success? + status = timeout_check(path, partial_timeout) + current_attempts += 1 + end status.success? end diff --git a/lib/gitlab/git/storage/null_circuit_breaker.rb b/lib/gitlab/git/storage/null_circuit_breaker.rb index 60c6791a7e4..a12d52d295f 100644 --- a/lib/gitlab/git/storage/null_circuit_breaker.rb +++ b/lib/gitlab/git/storage/null_circuit_breaker.rb @@ -25,6 +25,10 @@ module Gitlab !!@error end + def backing_off? + false + end + def last_failure circuit_broken? ? Time.now : nil end diff --git a/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb b/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb index c8d532df059..72dabca793a 100644 --- a/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb +++ b/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb @@ -79,7 +79,9 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: stub_application_setting(circuitbreaker_failure_count_threshold: 0, circuitbreaker_failure_wait_time: 1, circuitbreaker_failure_reset_time: 2, - circuitbreaker_storage_timeout: 3) + circuitbreaker_storage_timeout: 3, + circuitbreaker_access_retries: 4, + circuitbreaker_backoff_threshold: 5) end describe '#failure_count_threshold' do @@ -105,14 +107,43 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: expect(circuit_breaker.storage_timeout).to eq(3) end end + + describe '#access_retries' do + it 'reads the value from settings' do + expect(circuit_breaker.access_retries).to eq(4) + end + end + + describe '#backoff_threshold' do + it 'reads the value from settings' do + expect(circuit_breaker.backoff_threshold).to eq(5) + end + end end describe '#perform' do - it 'raises an exception with retry time when the circuit is open' do - allow(circuit_breaker).to receive(:circuit_broken?).and_return(true) + it 'raises the correct exception when the circuit is open' do + set_in_redis(:last_failure, 1.day.ago.to_f) + set_in_redis(:failure_count, 999) expect { |b| circuit_breaker.perform(&b) } - .to raise_error(Gitlab::Git::Storage::CircuitOpen) + .to raise_error do |exception| + expect(exception).to be_kind_of(Gitlab::Git::Storage::CircuitOpen) + expect(exception.retry_after).to eq(1800) + end + end + + it 'raises the correct exception when backing off' do + Timecop.freeze do + set_in_redis(:last_failure, 1.second.ago.to_f) + set_in_redis(:failure_count, 90) + + expect { |b| circuit_breaker.perform(&b) } + .to raise_error do |exception| + expect(exception).to be_kind_of(Gitlab::Git::Storage::Failing) + expect(exception.retry_after).to eq(30) + end + end end it 'yields the block' do @@ -122,6 +153,7 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: it 'checks if the storage is available' do expect(circuit_breaker).to receive(:check_storage_accessible!) + .and_call_original circuit_breaker.perform { 'hello world' } end @@ -137,16 +169,82 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: .to raise_error(Rugged::OSError) end - context 'with the feature disabled' do - it 'returns the block without checking accessibility' do - stub_feature_flags(git_storage_circuit_breaker: false) + it 'tracks that the storage was accessible' do + set_in_redis(:failure_count, 10) + set_in_redis(:last_failure, Time.now.to_f) - expect(circuit_breaker).not_to receive(:circuit_broken?) + circuit_breaker.perform { '' } + + expect(value_from_redis(:failure_count).to_i).to eq(0) + expect(value_from_redis(:last_failure)).to be_empty + expect(circuit_breaker.failure_count).to eq(0) + expect(circuit_breaker.last_failure).to be_nil + end + + it 'only performs the accessibility check once' do + expect(Gitlab::Git::Storage::ForkedStorageCheck) + .to receive(:storage_available?).once.and_call_original + + 2.times { circuit_breaker.perform { '' } } + end + + it 'calls the check with the correct arguments' do + stub_application_setting(circuitbreaker_storage_timeout: 30, + circuitbreaker_access_retries: 3) + + expect(Gitlab::Git::Storage::ForkedStorageCheck) + .to receive(:storage_available?).with(TestEnv.repos_path, 30, 3) + .and_call_original + + circuit_breaker.perform { '' } + end + + context 'with the feature disabled' do + before do + stub_feature_flags(git_storage_circuit_breaker: false) + end + + it 'returns the block without checking accessibility' do + expect(circuit_breaker).not_to receive(:check_storage_accessible!) result = circuit_breaker.perform { 'hello' } expect(result).to eq('hello') end + + it 'allows enabling the feature using an ENV var' do + stub_env('GIT_STORAGE_CIRCUIT_BREAKER', 'true') + expect(circuit_breaker).to receive(:check_storage_accessible!) + + result = circuit_breaker.perform { 'hello' } + + expect(result).to eq('hello') + end + end + + context 'the storage is not available' do + let(:storage_name) { 'broken' } + + it 'raises the correct exception' do + expect(circuit_breaker).to receive(:track_storage_inaccessible) + + expect { circuit_breaker.perform { '' } } + .to raise_error do |exception| + expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible) + expect(exception.retry_after).to eq(30) + end + end + + it 'tracks that the storage was inaccessible' do + Timecop.freeze do + expect { circuit_breaker.perform { '' } }.to raise_error(Gitlab::Git::Storage::Inaccessible) + + expect(value_from_redis(:failure_count).to_i).to eq(1) + expect(value_from_redis(:last_failure)).not_to be_empty + expect(circuit_breaker.failure_count).to eq(1) + expect(circuit_breaker.last_failure).to be_within(1.second).of(Time.now) + end + end end end @@ -158,183 +256,40 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: expect(circuit_breaker.circuit_broken?).to be_falsey end - it 'is broken when there was a recent failure' do - Timecop.freeze do - set_in_redis(:last_failure, 1.second.ago.to_f) - set_in_redis(:failure_count, 1) - - expect(circuit_breaker.circuit_broken?).to be_truthy - end - end - it 'is broken when there are too many failures' do set_in_redis(:last_failure, 1.day.ago.to_f) set_in_redis(:failure_count, 200) expect(circuit_breaker.circuit_broken?).to be_truthy end + end + + describe '#backing_off?' do + it 'is true when there was a recent failure' do + Timecop.freeze do + set_in_redis(:last_failure, 1.second.ago.to_f) + set_in_redis(:failure_count, 90) + + expect(circuit_breaker.backing_off?).to be_truthy + end + end context 'the `failure_wait_time` is set to 0' do before do stub_application_setting(circuitbreaker_failure_wait_time: 0) end - it 'is working even when there is a recent failure' do + it 'is working even when there are failures' do Timecop.freeze do set_in_redis(:last_failure, 0.seconds.ago.to_f) - set_in_redis(:failure_count, 1) + set_in_redis(:failure_count, 90) - expect(circuit_breaker.circuit_broken?).to be_falsey + expect(circuit_breaker.backing_off?).to be_falsey end end end end - describe "storage_available?" do - context 'the storage is available' do - it 'tracks that the storage was accessible an raises the error' do - expect(circuit_breaker).to receive(:track_storage_accessible) - - circuit_breaker.storage_available? - end - - it 'only performs the check once' do - expect(Gitlab::Git::Storage::ForkedStorageCheck) - .to receive(:storage_available?).once.and_call_original - - 2.times { circuit_breaker.storage_available? } - end - end - - context 'storage is not available' do - let(:storage_name) { 'broken' } - - it 'tracks that the storage was inaccessible' do - expect(circuit_breaker).to receive(:track_storage_inaccessible) - - circuit_breaker.storage_available? - end - end - end - - describe '#check_storage_accessible!' do - it 'raises an exception with retry time when the circuit is open' do - allow(circuit_breaker).to receive(:circuit_broken?).and_return(true) - - expect { circuit_breaker.check_storage_accessible! } - .to raise_error do |exception| - expect(exception).to be_kind_of(Gitlab::Git::Storage::CircuitOpen) - expect(exception.retry_after).to eq(30) - end - end - - context 'the storage is not available' do - let(:storage_name) { 'broken' } - - it 'raises an error' do - expect(circuit_breaker).to receive(:track_storage_inaccessible) - - expect { circuit_breaker.check_storage_accessible! } - .to raise_error do |exception| - expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible) - expect(exception.retry_after).to eq(30) - end - end - end - end - - describe '#track_storage_inaccessible' do - around do |example| - Timecop.freeze { example.run } - end - - it 'records the failure time in redis' do - circuit_breaker.track_storage_inaccessible - - failure_time = value_from_redis(:last_failure) - - expect(Time.at(failure_time.to_i)).to be_within(1.second).of(Time.now) - end - - it 'sets the failure time on the breaker without reloading' do - circuit_breaker.track_storage_inaccessible - - expect(circuit_breaker).not_to receive(:get_failure_info) - expect(circuit_breaker.last_failure).to eq(Time.now) - end - - it 'increments the failure count in redis' do - set_in_redis(:failure_count, 10) - - circuit_breaker.track_storage_inaccessible - - expect(value_from_redis(:failure_count).to_i).to be(11) - end - - it 'increments the failure count on the breaker without reloading' do - set_in_redis(:failure_count, 10) - - circuit_breaker.track_storage_inaccessible - - expect(circuit_breaker).not_to receive(:get_failure_info) - expect(circuit_breaker.failure_count).to eq(11) - end - end - - describe '#track_storage_accessible' do - it 'sets the failure count to zero in redis' do - set_in_redis(:failure_count, 10) - - circuit_breaker.track_storage_accessible - - expect(value_from_redis(:failure_count).to_i).to be(0) - end - - it 'sets the failure count to zero on the breaker without reloading' do - set_in_redis(:failure_count, 10) - - circuit_breaker.track_storage_accessible - - expect(circuit_breaker).not_to receive(:get_failure_info) - expect(circuit_breaker.failure_count).to eq(0) - end - - it 'removes the last failure time from redis' do - set_in_redis(:last_failure, Time.now.to_i) - - circuit_breaker.track_storage_accessible - - expect(circuit_breaker).not_to receive(:get_failure_info) - expect(circuit_breaker.last_failure).to be_nil - end - - it 'removes the last failure time from the breaker without reloading' do - set_in_redis(:last_failure, Time.now.to_i) - - circuit_breaker.track_storage_accessible - - expect(value_from_redis(:last_failure)).to be_empty - end - - it 'wont connect to redis when there are no failures' do - expect(Gitlab::Git::Storage.redis).to receive(:with).once - .and_call_original - expect(circuit_breaker).to receive(:track_storage_accessible) - .and_call_original - - circuit_breaker.track_storage_accessible - end - end - - describe '#no_failures?' do - it 'is false when a failure was tracked' do - set_in_redis(:last_failure, Time.now.to_i) - set_in_redis(:failure_count, 1) - - expect(circuit_breaker.no_failures?).to be_falsey - end - end - describe '#last_failure' do it 'returns the last failure time' do time = Time.parse("2017-05-26 17:52:30") @@ -351,10 +306,4 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: expect(circuit_breaker.failure_count).to eq(7) end end - - describe '#cache_key' do - it 'includes storage and host' do - expect(circuit_breaker.cache_key).to eq(cache_key) - end - end end diff --git a/spec/lib/gitlab/git/storage/forked_storage_check_spec.rb b/spec/lib/gitlab/git/storage/forked_storage_check_spec.rb index c708b15853a..39a5d020bb4 100644 --- a/spec/lib/gitlab/git/storage/forked_storage_check_spec.rb +++ b/spec/lib/gitlab/git/storage/forked_storage_check_spec.rb @@ -33,6 +33,21 @@ describe Gitlab::Git::Storage::ForkedStorageCheck, broken_storage: true, skip_da expect(runtime).to be < 1.0 end + it 'will try the specified amount of times before failing' do + allow(described_class).to receive(:check_filesystem_in_process) do + Process.spawn("sleep 10") + end + + expect(Process).to receive(:spawn).with('sleep 10').twice + .and_call_original + + runtime = Benchmark.realtime do + described_class.storage_available?(existing_path, 0.5, 2) + end + + expect(runtime).to be < 1.0 + end + describe 'when using paths with spaces' do let(:test_dir) { Rails.root.join('tmp', 'tests', 'storage_check') } let(:path_with_spaces) { File.join(test_dir, 'path with spaces') } diff --git a/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb b/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb index 7ee6d2f3709..5db37f55e03 100644 --- a/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb +++ b/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb @@ -65,17 +65,6 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do ours = described_class.public_instance_methods theirs = Gitlab::Git::Storage::CircuitBreaker.public_instance_methods - # These methods are not part of the public API, but are public to allow the - # CircuitBreaker specs to operate. They should be made private over time. - exceptions = %i[ - cache_key - check_storage_accessible! - no_failures? - storage_available? - track_storage_accessible - track_storage_inaccessible - ] - - expect(theirs - ours).to contain_exactly(*exceptions) + expect(theirs - ours).to be_empty end end diff --git a/spec/models/application_setting_spec.rb b/spec/models/application_setting_spec.rb index 30495fd4f5e..47b7150d36f 100644 --- a/spec/models/application_setting_spec.rb +++ b/spec/models/application_setting_spec.rb @@ -115,7 +115,8 @@ describe ApplicationSetting do end context 'circuitbreaker settings' do - [:circuitbreaker_failure_count_threshold, + [:circuitbreaker_backoff_threshold, + :circuitbreaker_failure_count_threshold, :circuitbreaker_failure_wait_time, :circuitbreaker_failure_reset_time, :circuitbreaker_storage_timeout].each do |field| @@ -125,6 +126,16 @@ describe ApplicationSetting do .is_greater_than_or_equal_to(0) end end + + it 'requires the `backoff_threshold` to be lower than the `failure_count_threshold`' do + setting.circuitbreaker_failure_count_threshold = 10 + setting.circuitbreaker_backoff_threshold = 15 + failure_message = "The circuitbreaker backoff threshold should be lower "\ + "than the failure count threshold" + + expect(setting).not_to be_valid + expect(setting.errors[:circuitbreaker_backoff_threshold]).to include(failure_message) + end end context 'repository storages' do