Move the circuitbreaker check out in a separate process

Moving the check out of the general requests, makes sure we don't have
any slowdown in the regular requests.

To keep the process performing this checks small, the check is still
performed inside a unicorn. But that is called from a process running
on the same server.

Because the checks are now done outside normal request, we can have a
simpler failure strategy:

The check is now performed in the background every
`circuitbreaker_check_interval`. Failures are logged in redis. The
failures are reset when the check succeeds. Per check we will try
`circuitbreaker_access_retries` times within
`circuitbreaker_storage_timeout` seconds.

When the number of failures exceeds
`circuitbreaker_failure_count_threshold`, we will block access to the
storage.

After `failure_reset_time` of no checks, we will clear the stored
failures. This could happen when the process that performs the checks
is not running.
This commit is contained in:
Bob Van Landuyt 2017-11-13 16:52:07 +01:00
parent 12d33b883a
commit f1ae1e39ce
45 changed files with 983 additions and 362 deletions

View File

@ -5,7 +5,7 @@ class Admin::HealthCheckController < Admin::ApplicationController
end
def reset_storage_health
Gitlab::Git::Storage::CircuitBreaker.reset_all!
Gitlab::Git::Storage::FailureInfo.reset_all!
redirect_to admin_health_check_path,
notice: _('Git storage health information has been reset')
end

View File

@ -1,5 +1,5 @@
class HealthController < ActionController::Base
protect_from_forgery with: :exception
protect_from_forgery with: :exception, except: :storage_check
include RequiresWhitelistedMonitoringClient
CHECKS = [
@ -23,6 +23,15 @@ class HealthController < ActionController::Base
render_check_results(results)
end
def storage_check
results = Gitlab::Git::Storage::Checker.check_all
render json: {
check_interval: Gitlab::CurrentSettings.current_application_settings.circuitbreaker_check_interval,
results: results
}
end
private
def render_check_results(results)

View File

@ -124,17 +124,6 @@ module ApplicationSettingsHelper
_('The number of attempts GitLab will make to access a storage.')
end
def circuitbreaker_backoff_threshold_help_text
_("The number of failures after which GitLab will start temporarily "\
"disabling access to a storage shard on a host")
end
def circuitbreaker_failure_wait_time_help_text
_("When access to a storage fails. GitLab will prevent access to the "\
"storage for the time specified here. This allows the filesystem to "\
"recover. Repositories on failing shards are temporarly unavailable")
end
def circuitbreaker_failure_reset_time_help_text
_("The time in seconds GitLab will keep failure information. When no "\
"failures occur during this time, information about the mount is reset.")
@ -145,6 +134,11 @@ module ApplicationSettingsHelper
"timeout error will be raised.")
end
def circuitbreaker_check_interval_help_text
_("The time in seconds between storage checks. When a previous check did "\
"complete yet, GitLab will skip a check.")
end
def visible_attributes
[
:admin_notification_email,
@ -154,10 +148,9 @@ module ApplicationSettingsHelper
:akismet_enabled,
:auto_devops_enabled,
:circuitbreaker_access_retries,
:circuitbreaker_backoff_threshold,
:circuitbreaker_check_interval,
:circuitbreaker_failure_count_threshold,
:circuitbreaker_failure_reset_time,
:circuitbreaker_failure_wait_time,
:circuitbreaker_storage_timeout,
:clientside_sentry_dsn,
:clientside_sentry_enabled,

View File

@ -18,16 +18,12 @@ module StorageHealthHelper
current_failures = circuit_breaker.failure_count
translation_params = { number_of_failures: current_failures,
maximum_failures: maximum_failures,
number_of_seconds: circuit_breaker.failure_wait_time }
maximum_failures: maximum_failures }
if circuit_breaker.circuit_broken?
s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\
"retry automatically. Reset storage information when the problem is "\
"resolved.") % translation_params
elsif circuit_breaker.backing_off?
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
"block access for %{number_of_seconds} seconds.") % translation_params
else
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
"allow access on the next attempt.") % translation_params

View File

@ -153,11 +153,10 @@ class ApplicationSetting < ActiveRecord::Base
presence: true,
numericality: { greater_than_or_equal_to: 0 }
validates :circuitbreaker_backoff_threshold,
:circuitbreaker_failure_count_threshold,
:circuitbreaker_failure_wait_time,
validates :circuitbreaker_failure_count_threshold,
:circuitbreaker_failure_reset_time,
:circuitbreaker_storage_timeout,
:circuitbreaker_check_interval,
presence: true,
numericality: { only_integer: true, greater_than_or_equal_to: 0 }
@ -165,13 +164,6 @@ class ApplicationSetting < ActiveRecord::Base
presence: true,
numericality: { only_integer: true, greater_than_or_equal_to: 1 }
validates_each :circuitbreaker_backoff_threshold do |record, attr, value|
if value.to_i >= record.circuitbreaker_failure_count_threshold
record.errors.add(attr, _("The circuitbreaker backoff threshold should be "\
"lower than the failure count threshold"))
end
end
validates :gitaly_timeout_default,
presence: true,
numericality: { only_integer: true, greater_than_or_equal_to: 0 }

View File

@ -545,6 +545,12 @@
%fieldset
%legend Git Storage Circuitbreaker settings
.form-group
= f.label :circuitbreaker_check_interval, _('Check interval'), class: 'control-label col-sm-2'
.col-sm-10
= f.number_field :circuitbreaker_check_interval, class: 'form-control'
.help-block
= circuitbreaker_check_interval_help_text
.form-group
= f.label :circuitbreaker_access_retries, _('Number of access attempts'), class: 'control-label col-sm-2'
.col-sm-10
@ -557,18 +563,6 @@
= f.number_field :circuitbreaker_storage_timeout, class: 'form-control'
.help-block
= circuitbreaker_storage_timeout_help_text
.form-group
= f.label :circuitbreaker_backoff_threshold, _('Number of failures before backing off'), class: 'control-label col-sm-2'
.col-sm-10
= f.number_field :circuitbreaker_backoff_threshold, class: 'form-control'
.help-block
= circuitbreaker_backoff_threshold_help_text
.form-group
= f.label :circuitbreaker_failure_wait_time, _('Seconds to wait after a storage failure'), class: 'control-label col-sm-2'
.col-sm-10
= f.number_field :circuitbreaker_failure_wait_time, class: 'form-control'
.help-block
= circuitbreaker_failure_wait_time_help_text
.form-group
= f.label :circuitbreaker_failure_count_threshold, _('Maximum git storage failures'), class: 'control-label col-sm-2'
.col-sm-10

11
bin/storage_check Executable file
View File

@ -0,0 +1,11 @@
#!/usr/bin/env ruby
require 'optparse'
require 'net/http'
require 'json'
require 'socket'
require 'logger'
require_relative '../lib/gitlab/storage_check'
Gitlab::StorageCheck::CLI.start!(ARGV)

View File

@ -0,0 +1,5 @@
---
title: Monitor NFS shards for circuitbreaker in a separate process
merge_request: 15426
author:
type: changed

View File

@ -42,6 +42,7 @@ Rails.application.routes.draw do
scope path: '-' do
get 'liveness' => 'health#liveness'
get 'readiness' => 'health#readiness'
post 'storage_check' => 'health#storage_check'
resources :metrics, only: [:index]
mount Peek::Railtie => '/peek'

View File

@ -0,0 +1,20 @@
class AddCircuitbreakerCheckIntervalToApplicationSettings < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
disable_ddl_transaction!
def up
add_column_with_default :application_settings,
:circuitbreaker_check_interval,
:integer,
default: 1
end
def down
remove_column :application_settings,
:circuitbreaker_check_interval
end
end

View File

@ -0,0 +1,34 @@
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class UpdateCircuitbreakerDefaults < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
class ApplicationSetting < ActiveRecord::Base; end
def up
change_column_default :application_settings,
:circuitbreaker_failure_count_threshold,
3
change_column_default :application_settings,
:circuitbreaker_storage_timeout,
15
ApplicationSetting.update_all(circuitbreaker_failure_count_threshold: 3,
circuitbreaker_storage_timeout: 15)
end
def down
change_column_default :application_settings,
:circuitbreaker_failure_count_threshold,
160
change_column_default :application_settings,
:circuitbreaker_storage_timeout,
30
ApplicationSetting.update_all(circuitbreaker_failure_count_threshold: 160,
circuitbreaker_storage_timeout: 30)
end
end

View File

@ -0,0 +1,26 @@
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class RemoveOldCircuitbreakerConfig < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
remove_column :application_settings,
:circuitbreaker_backoff_threshold
remove_column :application_settings,
:circuitbreaker_failure_wait_time
end
def down
add_column :application_settings,
:circuitbreaker_backoff_threshold,
:integer,
default: 80
add_column :application_settings,
:circuitbreaker_failure_wait_time,
:integer,
default: 30
end
end

View File

@ -135,12 +135,10 @@ ActiveRecord::Schema.define(version: 20171205190711) do
t.boolean "hashed_storage_enabled", default: false, null: false
t.boolean "project_export_enabled", default: true, null: false
t.boolean "auto_devops_enabled", default: false, null: false
t.integer "circuitbreaker_failure_count_threshold", default: 160
t.integer "circuitbreaker_failure_wait_time", default: 30
t.integer "circuitbreaker_failure_count_threshold", default: 3
t.integer "circuitbreaker_failure_reset_time", default: 1800
t.integer "circuitbreaker_storage_timeout", default: 30
t.integer "circuitbreaker_storage_timeout", default: 15
t.integer "circuitbreaker_access_retries", default: 3
t.integer "circuitbreaker_backoff_threshold", default: 80
t.boolean "throttle_unauthenticated_enabled", default: false, null: false
t.integer "throttle_unauthenticated_requests_per_period", default: 3600, null: false
t.integer "throttle_unauthenticated_period_in_seconds", default: 3600, null: false
@ -150,6 +148,7 @@ ActiveRecord::Schema.define(version: 20171205190711) do
t.boolean "throttle_authenticated_web_enabled", default: false, null: false
t.integer "throttle_authenticated_web_requests_per_period", default: 7200, null: false
t.integer "throttle_authenticated_web_period_in_seconds", default: 3600, null: false
t.integer "circuitbreaker_check_interval", default: 1, null: false
t.boolean "password_authentication_enabled_for_web"
t.boolean "password_authentication_enabled_for_git", default: true
t.integer "gitaly_timeout_default", default: 55, null: false

View File

@ -70,10 +70,9 @@ PUT /application/settings
| `akismet_api_key` | string | no | API key for akismet spam protection |
| `akismet_enabled` | boolean | no | Enable or disable akismet spam protection |
| `circuitbreaker_access_retries | integer | no | The number of attempts GitLab will make to access a storage. |
| `circuitbreaker_backoff_threshold | integer | no | The number of failures after which GitLab will start temporarily disabling access to a storage shard on a host. |
| `circuitbreaker_check_interval` | integer | no | Number of seconds in between storage checks. |
| `circuitbreaker_failure_count_threshold` | integer | no | The number of failures of after which GitLab will completely prevent access to the storage. |
| `circuitbreaker_failure_reset_time` | integer | no | Time in seconds GitLab will keep storage failure information. When no failures occur during this time, the failure information is reset. |
| `circuitbreaker_failure_wait_time` | integer | no | Time in seconds GitLab will block access to a failing storage to allow it to recover. |
| `circuitbreaker_storage_timeout` | integer | no | Seconds to wait for a storage access attempt |
| `clientside_sentry_dsn` | string | no | Required if `clientside_sentry_dsn` is enabled |
| `clientside_sentry_enabled` | boolean | no | Enable Sentry error reporting for the client side |

View File

@ -41,7 +41,7 @@ module API
detail 'This feature was introduced in GitLab 9.5'
end
delete do
Gitlab::Git::Storage::CircuitBreaker.reset_all!
Gitlab::Git::Storage::FailureInfo.reset_all!
end
end
end

View File

@ -0,0 +1,98 @@
module Gitlab
module Git
module Storage
class Checker
include CircuitBreakerSettings
attr_reader :storage_path, :storage, :hostname, :logger
def self.check_all(logger = Rails.logger)
threads = Gitlab.config.repositories.storages.keys.map do |storage_name|
Thread.new do
Thread.current[:result] = new(storage_name, logger).check_with_lease
end
end
threads.map do |thread|
thread.join
thread[:result]
end
end
def initialize(storage, logger = Rails.logger)
@storage = storage
config = Gitlab.config.repositories.storages[@storage]
@storage_path = config['path']
@logger = logger
@hostname = Gitlab::Environment.hostname
end
def check_with_lease
lease_key = "storage_check:#{cache_key}"
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: storage_timeout)
result = { storage: storage, success: nil }
if uuid = lease.try_obtain
result[:success] = check
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
else
logger.warn("#{hostname}: #{storage}: Skipping check, previous check still running")
end
result
end
def check
if Gitlab::Git::Storage::ForkedStorageCheck.storage_available?(storage_path, storage_timeout, access_retries)
track_storage_accessible
true
else
track_storage_inaccessible
logger.error("#{hostname}: #{storage}: Not accessible.")
false
end
end
private
def track_storage_inaccessible
first_failure = current_failure_info.first_failure || Time.now
last_failure = Time.now
Gitlab::Git::Storage.redis.with do |redis|
redis.pipelined do
redis.hset(cache_key, :first_failure, first_failure.to_i)
redis.hset(cache_key, :last_failure, last_failure.to_i)
redis.hincrby(cache_key, :failure_count, 1)
redis.expire(cache_key, failure_reset_time)
maintain_known_keys(redis)
end
end
end
def track_storage_accessible
Gitlab::Git::Storage.redis.with do |redis|
redis.pipelined do
redis.hset(cache_key, :first_failure, nil)
redis.hset(cache_key, :last_failure, nil)
redis.hset(cache_key, :failure_count, 0)
maintain_known_keys(redis)
end
end
end
def maintain_known_keys(redis)
expire_time = Time.now.to_i + failure_reset_time
redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key)
redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i)
end
def current_failure_info
FailureInfo.load(cache_key)
end
end
end
end
end

View File

@ -4,22 +4,11 @@ module Gitlab
class CircuitBreaker
include CircuitBreakerSettings
FailureInfo = Struct.new(:last_failure, :failure_count)
attr_reader :storage,
:hostname,
:storage_path
:hostname
delegate :last_failure, :failure_count, to: :failure_info
def self.reset_all!
Gitlab::Git::Storage.redis.with do |redis|
all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
redis.del(*all_storage_keys) unless all_storage_keys.empty?
end
RequestStore.delete(:circuitbreaker_cache)
end
delegate :last_failure, :failure_count, :no_failures?,
to: :failure_info
def self.for_storage(storage)
cached_circuitbreakers = RequestStore.fetch(:circuitbreaker_cache) do
@ -46,9 +35,6 @@ module Gitlab
def initialize(storage, hostname)
@storage = storage
@hostname = hostname
config = Gitlab.config.repositories.storages[@storage]
@storage_path = config['path']
end
def perform
@ -65,15 +51,6 @@ module Gitlab
failure_count > failure_count_threshold
end
def backing_off?
return false if no_failures?
recent_failure = last_failure > failure_wait_time.seconds.ago
too_many_failures = failure_count > backoff_threshold
recent_failure && too_many_failures
end
private
# The circuitbreaker can be enabled for the entire fleet using a Feature
@ -86,88 +63,13 @@ module Gitlab
end
def failure_info
@failure_info ||= get_failure_info
end
# Memoizing the `storage_available` call means we only do it once per
# request when the storage is available.
#
# When the storage appears not available, and the memoized value is `false`
# we might want to try again.
def storage_available?
return @storage_available if @storage_available
if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck
.storage_available?(storage_path, storage_timeout, access_retries)
track_storage_accessible
else
track_storage_inaccessible
end
@storage_available
@failure_info ||= FailureInfo.load(cache_key)
end
def check_storage_accessible!
if circuit_broken?
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time)
end
if backing_off?
raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time)
end
unless storage_available?
raise Gitlab::Git::Storage::Inaccessible.new("#{storage} not accessible", failure_wait_time)
end
end
def no_failures?
last_failure.blank? && failure_count == 0
end
def track_storage_inaccessible
@failure_info = FailureInfo.new(Time.now, failure_count + 1)
Gitlab::Git::Storage.redis.with do |redis|
redis.pipelined do
redis.hset(cache_key, :last_failure, last_failure.to_i)
redis.hincrby(cache_key, :failure_count, 1)
redis.expire(cache_key, failure_reset_time)
maintain_known_keys(redis)
end
end
end
def track_storage_accessible
@failure_info = FailureInfo.new(nil, 0)
Gitlab::Git::Storage.redis.with do |redis|
redis.pipelined do
redis.hset(cache_key, :last_failure, nil)
redis.hset(cache_key, :failure_count, 0)
maintain_known_keys(redis)
end
end
end
def maintain_known_keys(redis)
expire_time = Time.now.to_i + failure_reset_time
redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key)
redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i)
end
def get_failure_info
last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, :last_failure, :failure_count)
end
last_failure = Time.at(last_failure.to_i) if last_failure.present?
FailureInfo.new(last_failure, failure_count.to_i)
end
def cache_key
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
end
end

View File

@ -6,10 +6,6 @@ module Gitlab
application_settings.circuitbreaker_failure_count_threshold
end
def failure_wait_time
application_settings.circuitbreaker_failure_wait_time
end
def failure_reset_time
application_settings.circuitbreaker_failure_reset_time
end
@ -22,8 +18,12 @@ module Gitlab
application_settings.circuitbreaker_access_retries
end
def backoff_threshold
application_settings.circuitbreaker_backoff_threshold
def check_interval
application_settings.circuitbreaker_check_interval
end
def cache_key
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
private

View File

@ -0,0 +1,39 @@
module Gitlab
module Git
module Storage
class FailureInfo
attr_accessor :first_failure, :last_failure, :failure_count
def self.reset_all!
Gitlab::Git::Storage.redis.with do |redis|
all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
redis.del(*all_storage_keys) unless all_storage_keys.empty?
end
RequestStore.delete(:circuitbreaker_cache)
end
def self.load(cache_key)
first_failure, last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, :first_failure, :last_failure, :failure_count)
end
last_failure = Time.at(last_failure.to_i) if last_failure.present?
first_failure = Time.at(first_failure.to_i) if first_failure.present?
new(first_failure, last_failure, failure_count.to_i)
end
def initialize(first_failure, last_failure, failure_count)
@first_failure = first_failure
@last_failure = last_failure
@failure_count = failure_count
end
def no_failures?
first_failure.blank? && last_failure.blank? && failure_count == 0
end
end
end
end
end

View File

@ -11,6 +11,9 @@ module Gitlab
# These will always have nil values
attr_reader :storage_path
delegate :last_failure, :failure_count, :no_failures?,
to: :failure_info
def initialize(storage, hostname, error: nil)
@storage = storage
@hostname = hostname
@ -29,16 +32,17 @@ module Gitlab
false
end
def last_failure
circuit_broken? ? Time.now : nil
end
def failure_count
circuit_broken? ? failure_count_threshold : 0
end
def failure_info
Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(last_failure, failure_count)
@failure_info ||=
if circuit_broken?
Gitlab::Git::Storage::FailureInfo.new(Time.now,
Time.now,
failure_count_threshold)
else
Gitlab::Git::Storage::FailureInfo.new(nil,
nil,
0)
end
end
end
end

View File

@ -0,0 +1,11 @@
require_relative 'storage_check/cli'
require_relative 'storage_check/gitlab_caller'
require_relative 'storage_check/option_parser'
require_relative 'storage_check/response'
module Gitlab
module StorageCheck
ENDPOINT = '/-/storage_check'.freeze
Options = Struct.new(:target, :token, :interval, :dryrun)
end
end

View File

@ -0,0 +1,69 @@
module Gitlab
module StorageCheck
class CLI
def self.start!(args)
runner = new(Gitlab::StorageCheck::OptionParser.parse!(args))
runner.start_loop
end
attr_reader :logger, :options
def initialize(options)
@options = options
@logger = Logger.new(STDOUT)
end
def start_loop
logger.info "Checking #{options.target} every #{options.interval} seconds"
if options.dryrun
logger.info "Dryrun, exiting..."
return
end
begin
loop do
response = GitlabCaller.new(options).call!
log_response(response)
update_settings(response)
sleep options.interval
end
rescue Interrupt
logger.info "Ending storage-check"
end
end
def update_settings(response)
previous_interval = options.interval
if response.valid?
options.interval = response.check_interval || previous_interval
end
if previous_interval != options.interval
logger.info "Interval changed: #{options.interval} seconds"
end
end
def log_response(response)
unless response.valid?
return logger.error("Invalid response checking nfs storage: #{response.http_response.inspect}")
end
if response.responsive_shards.any?
logger.debug("Responsive shards: #{response.responsive_shards.join(', ')}")
end
warnings = []
if response.skipped_shards.any?
warnings << "Skipped shards: #{response.skipped_shards.join(', ')}"
end
if response.failing_shards.any?
warnings << "Failing shards: #{response.failing_shards.join(', ')}"
end
logger.warn(warnings.join(' - ')) if warnings.any?
end
end
end
end

View File

@ -0,0 +1,39 @@
require 'excon'
module Gitlab
module StorageCheck
class GitlabCaller
def initialize(options)
@options = options
end
def call!
Gitlab::StorageCheck::Response.new(get_response)
rescue Errno::ECONNREFUSED, Excon::Error
# Server not ready, treated as invalid response.
Gitlab::StorageCheck::Response.new(nil)
end
def get_response
scheme, *other_parts = URI.split(@options.target)
socket_path = if scheme == 'unix'
other_parts.compact.join
end
connection = Excon.new(@options.target, socket: socket_path)
connection.post(path: Gitlab::StorageCheck::ENDPOINT,
headers: headers)
end
def headers
@headers ||= begin
headers = {}
headers['Content-Type'] = headers['Accept'] = 'application/json'
headers['TOKEN'] = @options.token if @options.token
headers
end
end
end
end
end

View File

@ -0,0 +1,39 @@
module Gitlab
module StorageCheck
class OptionParser
def self.parse!(args)
# Start out with some defaults
options = Gitlab::StorageCheck::Options.new(nil, nil, 1, false)
parser = ::OptionParser.new do |opts|
opts.banner = "Usage: bin/storage_check [options]"
opts.on('-t=string', '--target string', 'URL or socket to trigger storage check') do |value|
options.target = value
end
opts.on('-T=string', '--token string', 'Health token to use') { |value| options.token = value }
opts.on('-i=n', '--interval n', ::OptionParser::DecimalInteger, 'Seconds between checks') do |value|
options.interval = value
end
opts.on('-d', '--dryrun', "Output what will be performed, but don't start the process") do |value|
options.dryrun = value
end
end
parser.parse!(args)
unless options.target
raise ::OptionParser::InvalidArgument.new('Provide a URI to provide checks')
end
if URI.parse(options.target).scheme.nil?
raise ::OptionParser::InvalidArgument.new('Add the scheme to the target, `unix://`, `https://` or `http://` are supported')
end
options
end
end
end
end

View File

@ -0,0 +1,77 @@
require 'json'
module Gitlab
module StorageCheck
class Response
attr_reader :http_response
def initialize(http_response)
@http_response = http_response
end
def valid?
@http_response && (200...299).cover?(@http_response.status) &&
@http_response.headers['Content-Type'].include?('application/json') &&
parsed_response
end
def check_interval
return nil unless parsed_response
parsed_response['check_interval']
end
def responsive_shards
divided_results[:responsive_shards]
end
def skipped_shards
divided_results[:skipped_shards]
end
def failing_shards
divided_results[:failing_shards]
end
private
def results
return [] unless parsed_response
parsed_response['results']
end
def divided_results
return @divided_results if @divided_results
@divided_results = {}
@divided_results[:responsive_shards] = []
@divided_results[:skipped_shards] = []
@divided_results[:failing_shards] = []
results.each do |info|
name = info['storage']
case info['success']
when true
@divided_results[:responsive_shards] << name
when false
@divided_results[:failing_shards] << name
else
@divided_results[:skipped_shards] << name
end
end
@divided_results
end
def parsed_response
return @parsed_response if defined?(@parsed_response)
@parsed_response = JSON.parse(@http_response.body)
rescue JSON::JSONError
@parsed_response = nil
end
end
end
end

View File

@ -0,0 +1,13 @@
require 'spec_helper'
describe 'bin/storage_check' do
it 'is executable' do
command = %w[bin/storage_check -t unix://the/path/to/a/unix-socket.sock -i 10 -d]
expected_output = 'Checking unix://the/path/to/a/unix-socket.sock every 10 seconds'
output, status = Gitlab::Popen.popen(command, Rails.root.to_s)
expect(status).to eq(0)
expect(output).to include(expected_output)
end
end

View File

@ -1,6 +1,6 @@
require 'spec_helper'
describe Admin::HealthCheckController, broken_storage: true do
describe Admin::HealthCheckController do
let(:admin) { create(:admin) }
before do
@ -17,7 +17,7 @@ describe Admin::HealthCheckController, broken_storage: true do
describe 'POST reset_storage_health' do
it 'resets all storage health information' do
expect(Gitlab::Git::Storage::CircuitBreaker).to receive(:reset_all!)
expect(Gitlab::Git::Storage::FailureInfo).to receive(:reset_all!)
post :reset_storage_health
end

View File

@ -14,6 +14,48 @@ describe HealthController do
stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'false')
end
describe '#storage_check' do
before do
allow(Gitlab::RequestContext).to receive(:client_ip).and_return(whitelisted_ip)
end
subject { post :storage_check }
it 'checks all the configured storages' do
expect(Gitlab::Git::Storage::Checker).to receive(:check_all).and_call_original
subject
end
it 'returns the check interval' do
stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'true')
stub_application_setting(circuitbreaker_check_interval: 10)
subject
expect(json_response['check_interval']).to eq(10)
end
context 'with failing storages', :broken_storage do
before do
stub_storage_settings(
broken: { path: 'tmp/tests/non-existent-repositories' }
)
end
it 'includes the failure information' do
subject
expected_results = [
{ 'storage' => 'broken', 'success' => false },
{ 'storage' => 'default', 'success' => true }
]
expect(json_response['results']).to eq(expected_results)
end
end
end
describe '#readiness' do
shared_context 'endpoint responding with readiness data' do
let(:request_params) { {} }

View File

@ -1,6 +1,6 @@
require 'spec_helper'
feature "Admin Health Check", :feature, :broken_storage do
feature "Admin Health Check", :feature do
include StubENV
before do
@ -36,6 +36,7 @@ feature "Admin Health Check", :feature, :broken_storage do
context 'when services are up' do
before do
stub_storage_settings({}) # Hide the broken storage
visit admin_health_check_path
end
@ -56,10 +57,8 @@ feature "Admin Health Check", :feature, :broken_storage do
end
end
context 'with repository storage failures' do
context 'with repository storage failures', :broken_storage do
before do
# Track a failure
Gitlab::Git::Storage::CircuitBreaker.for_storage('broken').perform { nil } rescue nil
visit admin_health_check_path
end
@ -67,9 +66,10 @@ feature "Admin Health Check", :feature, :broken_storage do
hostname = Gitlab::Environment.hostname
maximum_failures = Gitlab::CurrentSettings.current_application_settings
.circuitbreaker_failure_count_threshold
number_of_failures = maximum_failures + 1
expect(page).to have_content('broken: failed storage access attempt on host:')
expect(page).to have_content("#{hostname}: 1 of #{maximum_failures} failures.")
expect(page).to have_content("broken: #{number_of_failures} failed storage access attempts:")
expect(page).to have_content("#{hostname}: #{number_of_failures} of #{maximum_failures} failures.")
end
it 'allows resetting storage failures' do

View File

@ -0,0 +1,132 @@
require 'spec_helper'
describe Gitlab::Git::Storage::Checker, :clean_gitlab_redis_shared_state do
let(:storage_name) { 'default' }
let(:hostname) { Gitlab::Environment.hostname }
let(:cache_key) { "storage_accessible:#{storage_name}:#{hostname}" }
subject(:checker) { described_class.new(storage_name) }
def value_from_redis(name)
Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, name)
end.first
end
def set_in_redis(name, value)
Gitlab::Git::Storage.redis.with do |redis|
redis.hmset(cache_key, name, value)
end.first
end
describe '.check_all' do
it 'calls a check for each storage' do
fake_checker_default = double
fake_checker_broken = double
fake_logger = fake_logger
expect(described_class).to receive(:new).with('default', fake_logger) { fake_checker_default }
expect(described_class).to receive(:new).with('broken', fake_logger) { fake_checker_broken }
expect(fake_checker_default).to receive(:check_with_lease)
expect(fake_checker_broken).to receive(:check_with_lease)
described_class.check_all(fake_logger)
end
context 'with broken storage', :broken_storage do
it 'returns the results' do
expected_result = [
{ storage: 'default', success: true },
{ storage: 'broken', success: false }
]
expect(described_class.check_all).to eq(expected_result)
end
end
end
describe '#initialize' do
it 'assigns the settings' do
expect(checker.hostname).to eq(hostname)
expect(checker.storage).to eq('default')
expect(checker.storage_path).to eq(TestEnv.repos_path)
end
end
describe '#check_with_lease' do
it 'only allows one check at a time' do
expect(checker).to receive(:check).once { sleep 1 }
thread = Thread.new { checker.check_with_lease }
checker.check_with_lease
thread.join
end
it 'returns a result hash' do
expect(checker.check_with_lease).to eq(storage: 'default', success: true)
end
end
describe '#check' do
it 'tracks that the storage was accessible' do
set_in_redis(:failure_count, 10)
set_in_redis(:last_failure, Time.now.to_f)
checker.check
expect(value_from_redis(:failure_count).to_i).to eq(0)
expect(value_from_redis(:last_failure)).to be_empty
expect(value_from_redis(:first_failure)).to be_empty
end
it 'calls the check with the correct arguments' do
stub_application_setting(circuitbreaker_storage_timeout: 30,
circuitbreaker_access_retries: 3)
expect(Gitlab::Git::Storage::ForkedStorageCheck)
.to receive(:storage_available?).with(TestEnv.repos_path, 30, 3)
.and_call_original
checker.check
end
it 'returns `true`' do
expect(checker.check).to eq(true)
end
it 'maintains known storage keys' do
Timecop.freeze do
# Insert an old key to expire
old_entry = Time.now.to_i - 3.days.to_i
Gitlab::Git::Storage.redis.with do |redis|
redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, old_entry, 'to_be_removed')
end
checker.check
known_keys = Gitlab::Git::Storage.redis.with do |redis|
redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
end
expect(known_keys).to contain_exactly(cache_key)
end
end
context 'the storage is not available', :broken_storage do
let(:storage_name) { 'broken' }
it 'tracks that the storage was inaccessible' do
Timecop.freeze do
expect { checker.check }.to change { value_from_redis(:failure_count).to_i }.by(1)
expect(value_from_redis(:last_failure)).not_to be_empty
expect(value_from_redis(:first_failure)).not_to be_empty
end
end
it 'returns `false`' do
expect(checker.check).to eq(false)
end
end
end
end

View File

@ -1,11 +1,18 @@
require 'spec_helper'
describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: true, broken_storage: true do
describe Gitlab::Git::Storage::CircuitBreaker, :broken_storage do
let(:storage_name) { 'default' }
let(:circuit_breaker) { described_class.new(storage_name, hostname) }
let(:hostname) { Gitlab::Environment.hostname }
let(:cache_key) { "storage_accessible:#{storage_name}:#{hostname}" }
def set_in_redis(name, value)
Gitlab::Git::Storage.redis.with do |redis|
redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, cache_key)
redis.hmset(cache_key, name, value)
end.first
end
before do
# Override test-settings for the circuitbreaker with something more realistic
# for these specs.
@ -19,36 +26,7 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
)
end
def value_from_redis(name)
Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, name)
end.first
end
def set_in_redis(name, value)
Gitlab::Git::Storage.redis.with do |redis|
redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, cache_key)
redis.hmset(cache_key, name, value)
end.first
end
describe '.reset_all!' do
it 'clears all entries form redis' do
set_in_redis(:failure_count, 10)
described_class.reset_all!
key_exists = Gitlab::Git::Storage.redis.with { |redis| redis.exists(cache_key) }
expect(key_exists).to be_falsey
end
it 'does not break when there are no keys in redis' do
expect { described_class.reset_all! }.not_to raise_error
end
end
describe '.for_storage' do
describe '.for_storage', :request_store do
it 'only builds a single circuitbreaker per storage' do
expect(described_class).to receive(:new).once.and_call_original
@ -71,7 +49,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
it 'assigns the settings' do
expect(circuit_breaker.hostname).to eq(hostname)
expect(circuit_breaker.storage).to eq('default')
expect(circuit_breaker.storage_path).to eq(TestEnv.repos_path)
end
end
@ -91,9 +68,9 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
end
end
describe '#failure_wait_time' do
describe '#check_interval' do
it 'reads the value from settings' do
expect(circuit_breaker.failure_wait_time).to eq(1)
expect(circuit_breaker.check_interval).to eq(1)
end
end
@ -114,12 +91,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
expect(circuit_breaker.access_retries).to eq(4)
end
end
describe '#backoff_threshold' do
it 'reads the value from settings' do
expect(circuit_breaker.backoff_threshold).to eq(5)
end
end
end
describe '#perform' do
@ -134,19 +105,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
end
end
it 'raises the correct exception when backing off' do
Timecop.freeze do
set_in_redis(:last_failure, 1.second.ago.to_f)
set_in_redis(:failure_count, 90)
expect { |b| circuit_breaker.perform(&b) }
.to raise_error do |exception|
expect(exception).to be_kind_of(Gitlab::Git::Storage::Failing)
expect(exception.retry_after).to eq(30)
end
end
end
it 'yields the block' do
expect { |b| circuit_breaker.perform(&b) }
.to yield_control
@ -170,54 +128,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
.to raise_error(Rugged::OSError)
end
it 'tracks that the storage was accessible' do
set_in_redis(:failure_count, 10)
set_in_redis(:last_failure, Time.now.to_f)
circuit_breaker.perform { '' }
expect(value_from_redis(:failure_count).to_i).to eq(0)
expect(value_from_redis(:last_failure)).to be_empty
expect(circuit_breaker.failure_count).to eq(0)
expect(circuit_breaker.last_failure).to be_nil
end
it 'maintains known storage keys' do
Timecop.freeze do
# Insert an old key to expire
old_entry = Time.now.to_i - 3.days.to_i
Gitlab::Git::Storage.redis.with do |redis|
redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, old_entry, 'to_be_removed')
end
circuit_breaker.perform { '' }
known_keys = Gitlab::Git::Storage.redis.with do |redis|
redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
end
expect(known_keys).to contain_exactly(cache_key)
end
end
it 'only performs the accessibility check once' do
expect(Gitlab::Git::Storage::ForkedStorageCheck)
.to receive(:storage_available?).once.and_call_original
2.times { circuit_breaker.perform { '' } }
end
it 'calls the check with the correct arguments' do
stub_application_setting(circuitbreaker_storage_timeout: 30,
circuitbreaker_access_retries: 3)
expect(Gitlab::Git::Storage::ForkedStorageCheck)
.to receive(:storage_available?).with(TestEnv.repos_path, 30, 3)
.and_call_original
circuit_breaker.perform { '' }
end
context 'with the feature disabled' do
before do
stub_feature_flags(git_storage_circuit_breaker: false)
@ -240,31 +150,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
expect(result).to eq('hello')
end
end
context 'the storage is not available' do
let(:storage_name) { 'broken' }
it 'raises the correct exception' do
expect(circuit_breaker).to receive(:track_storage_inaccessible)
expect { circuit_breaker.perform { '' } }
.to raise_error do |exception|
expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible)
expect(exception.retry_after).to eq(30)
end
end
it 'tracks that the storage was inaccessible' do
Timecop.freeze do
expect { circuit_breaker.perform { '' } }.to raise_error(Gitlab::Git::Storage::Inaccessible)
expect(value_from_redis(:failure_count).to_i).to eq(1)
expect(value_from_redis(:last_failure)).not_to be_empty
expect(circuit_breaker.failure_count).to eq(1)
expect(circuit_breaker.last_failure).to be_within(1.second).of(Time.now)
end
end
end
end
describe '#circuit_broken?' do
@ -283,32 +168,6 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state:
end
end
describe '#backing_off?' do
it 'is true when there was a recent failure' do
Timecop.freeze do
set_in_redis(:last_failure, 1.second.ago.to_f)
set_in_redis(:failure_count, 90)
expect(circuit_breaker.backing_off?).to be_truthy
end
end
context 'the `failure_wait_time` is set to 0' do
before do
stub_application_setting(circuitbreaker_failure_wait_time: 0)
end
it 'is working even when there are failures' do
Timecop.freeze do
set_in_redis(:last_failure, 0.seconds.ago.to_f)
set_in_redis(:failure_count, 90)
expect(circuit_breaker.backing_off?).to be_falsey
end
end
end
end
describe '#last_failure' do
it 'returns the last failure time' do
time = Time.parse("2017-05-26 17:52:30")

View File

@ -0,0 +1,70 @@
require 'spec_helper'
describe Gitlab::Git::Storage::FailureInfo, :broken_storage do
let(:storage_name) { 'default' }
let(:hostname) { Gitlab::Environment.hostname }
let(:cache_key) { "storage_accessible:#{storage_name}:#{hostname}" }
def value_from_redis(name)
Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, name)
end.first
end
def set_in_redis(name, value)
Gitlab::Git::Storage.redis.with do |redis|
redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, cache_key)
redis.hmset(cache_key, name, value)
end.first
end
describe '.reset_all!' do
it 'clears all entries form redis' do
set_in_redis(:failure_count, 10)
described_class.reset_all!
key_exists = Gitlab::Git::Storage.redis.with { |redis| redis.exists(cache_key) }
expect(key_exists).to be_falsey
end
it 'does not break when there are no keys in redis' do
expect { described_class.reset_all! }.not_to raise_error
end
end
describe '.load' do
it 'loads failure information for a storage on a host' do
first_failure = Time.parse("2017-11-14 17:52:30")
last_failure = Time.parse("2017-11-14 18:54:37")
failure_count = 11
set_in_redis(:first_failure, first_failure.to_i)
set_in_redis(:last_failure, last_failure.to_i)
set_in_redis(:failure_count, failure_count.to_i)
info = described_class.load(cache_key)
expect(info.first_failure).to eq(first_failure)
expect(info.last_failure).to eq(last_failure)
expect(info.failure_count).to eq(failure_count)
end
end
describe '#no_failures?' do
it 'is true when there are no failures' do
info = described_class.new(nil, nil, 0)
expect(info.no_failures?).to be_truthy
end
it 'is false when there are failures' do
info = described_class.new(Time.parse("2017-11-14 17:52:30"),
Time.parse("2017-11-14 18:54:37"),
20)
expect(info.no_failures?).to be_falsy
end
end
end

View File

@ -1,6 +1,6 @@
require 'spec_helper'
describe Gitlab::Git::Storage::Health, clean_gitlab_redis_shared_state: true, broken_storage: true do
describe Gitlab::Git::Storage::Health, broken_storage: true do
let(:host1_key) { 'storage_accessible:broken:web01' }
let(:host2_key) { 'storage_accessible:default:kiq01' }

View File

@ -27,7 +27,7 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do
end
describe '#failure_info' do
it { Timecop.freeze { expect(breaker.failure_info).to eq(Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(Time.now, breaker.failure_count_threshold)) } }
it { expect(breaker.failure_info.no_failures?).to be_falsy }
end
end
@ -49,7 +49,7 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do
end
describe '#failure_info' do
it { expect(breaker.failure_info).to eq(Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(nil, 0)) }
it { expect(breaker.failure_info.no_failures?).to be_truthy }
end
end

View File

@ -0,0 +1,19 @@
require 'spec_helper'
describe Gitlab::StorageCheck::CLI do
let(:options) { Gitlab::StorageCheck::Options.new('unix://tmp/socket.sock', nil, 1, false) }
subject(:runner) { described_class.new(options) }
describe '#update_settings' do
it 'updates the interval when changed in a valid response and logs the change' do
fake_response = double
expect(fake_response).to receive(:valid?).and_return(true)
expect(fake_response).to receive(:check_interval).and_return(42)
expect(runner.logger).to receive(:info)
runner.update_settings(fake_response)
expect(options.interval).to eq(42)
end
end
end

View File

@ -0,0 +1,46 @@
require 'spec_helper'
describe Gitlab::StorageCheck::GitlabCaller do
let(:options) { Gitlab::StorageCheck::Options.new('unix://tmp/socket.sock', nil, nil, false) }
subject(:gitlab_caller) { described_class.new(options) }
describe '#call!' do
context 'when a socket is given' do
it 'calls a socket' do
fake_connection = double
expect(fake_connection).to receive(:post)
expect(Excon).to receive(:new).with('unix://tmp/socket.sock', socket: "tmp/socket.sock") { fake_connection }
gitlab_caller.call!
end
end
context 'when a host is given' do
let(:options) { Gitlab::StorageCheck::Options.new('http://localhost:8080', nil, nil, false) }
it 'it calls a http response' do
fake_connection = double
expect(Excon).to receive(:new).with('http://localhost:8080', socket: nil) { fake_connection }
expect(fake_connection).to receive(:post)
gitlab_caller.call!
end
end
end
describe '#headers' do
it 'Adds the JSON header' do
headers = gitlab_caller.headers
expect(headers['Content-Type']).to eq('application/json')
end
context 'when a token was provided' do
let(:options) { Gitlab::StorageCheck::Options.new('unix://tmp/socket.sock', 'atoken', nil, false) }
it 'adds it to the headers' do
expect(gitlab_caller.headers['TOKEN']).to eq('atoken')
end
end
end
end

View File

@ -0,0 +1,31 @@
require 'spec_helper'
describe Gitlab::StorageCheck::OptionParser do
describe '.parse!' do
it 'assigns all options' do
args = %w(--target unix://tmp/hello/world.sock --token thetoken --interval 42)
options = described_class.parse!(args)
expect(options.token).to eq('thetoken')
expect(options.interval).to eq(42)
expect(options.target).to eq('unix://tmp/hello/world.sock')
end
it 'requires the interval to be a number' do
args = %w(--target unix://tmp/hello/world.sock --interval fortytwo)
expect { described_class.parse!(args) }.to raise_error(OptionParser::InvalidArgument)
end
it 'raises an error if the scheme is not included' do
args = %w(--target tmp/hello/world.sock)
expect { described_class.parse!(args) }.to raise_error(OptionParser::InvalidArgument)
end
it 'raises an error if both socket and host are missing' do
expect { described_class.parse!([]) }.to raise_error(OptionParser::InvalidArgument)
end
end
end

View File

@ -0,0 +1,54 @@
require 'spec_helper'
describe Gitlab::StorageCheck::Response do
let(:fake_json) do
{
check_interval: 42,
results: [
{ storage: 'working', success: true },
{ storage: 'skipped', success: nil },
{ storage: 'failing', success: false }
]
}.to_json
end
let(:fake_http_response) do
fake_response = instance_double("Excon::Response - Status check")
allow(fake_response).to receive(:status).and_return(200)
allow(fake_response).to receive(:body).and_return(fake_json)
allow(fake_response).to receive(:headers).and_return('Content-Type' => 'application/json')
fake_response
end
let(:response) { described_class.new(fake_http_response) }
describe '#valid?' do
it 'is valid for a success response with parseable JSON' do
expect(response).to be_valid
end
end
describe '#check_interval' do
it 'returns the result from the JSON' do
expect(response.check_interval).to eq(42)
end
end
describe '#responsive_shards' do
it 'contains the names of working shards' do
expect(response.responsive_shards).to contain_exactly('working')
end
end
describe '#skipped_shards' do
it 'contains the names of skipped shards' do
expect(response.skipped_shards).to contain_exactly('skipped')
end
end
describe '#failing_shards' do
it 'contains the name of failing shards' do
expect(response.failing_shards).to contain_exactly('failing')
end
end
end

View File

@ -115,9 +115,8 @@ describe ApplicationSetting do
end
context 'circuitbreaker settings' do
[:circuitbreaker_backoff_threshold,
:circuitbreaker_failure_count_threshold,
:circuitbreaker_failure_wait_time,
[:circuitbreaker_failure_count_threshold,
:circuitbreaker_check_interval,
:circuitbreaker_failure_reset_time,
:circuitbreaker_storage_timeout].each do |field|
it "Validates #{field} as number" do
@ -126,16 +125,6 @@ describe ApplicationSetting do
.is_greater_than_or_equal_to(0)
end
end
it 'requires the `backoff_threshold` to be lower than the `failure_count_threshold`' do
setting.circuitbreaker_failure_count_threshold = 10
setting.circuitbreaker_backoff_threshold = 15
failure_message = "The circuitbreaker backoff threshold should be lower "\
"than the failure count threshold"
expect(setting).not_to be_valid
expect(setting.errors[:circuitbreaker_backoff_threshold]).to include(failure_message)
end
end
context 'repository storages' do

View File

@ -29,7 +29,9 @@ describe Repository do
def expect_to_raise_storage_error
expect { yield }.to raise_error do |exception|
storage_exceptions = [Gitlab::Git::Storage::Inaccessible, Gitlab::Git::CommandError, GRPC::Unavailable]
expect(exception.class).to be_in(storage_exceptions)
known_exception = storage_exceptions.select { |e| exception.is_a?(e) }
expect(known_exception).not_to be_nil
end
end
@ -634,9 +636,7 @@ describe Repository do
end
describe '#fetch_ref' do
# Setting the var here, sidesteps the stub that makes gitaly raise an error
# before the actual test call
set(:broken_repository) { create(:project, :broken_storage).repository }
let(:broken_repository) { create(:project, :broken_storage).repository }
describe 'when storage is broken', :broken_storage do
it 'should raise a storage error' do

View File

@ -47,7 +47,7 @@ describe API::CircuitBreakers do
describe 'DELETE circuit_breakers/repository_storage' do
it 'clears all circuit_breakers' do
expect(Gitlab::Git::Storage::CircuitBreaker).to receive(:reset_all!)
expect(Gitlab::Git::Storage::FailureInfo).to receive(:reset_all!)
delete api('/circuit_breakers/repository_storage', admin)

View File

@ -54,7 +54,7 @@ describe API::Settings, 'Settings' do
dsa_key_restriction: 2048,
ecdsa_key_restriction: 384,
ed25519_key_restriction: 256,
circuitbreaker_failure_wait_time: 2
circuitbreaker_check_interval: 2
expect(response).to have_gitlab_http_status(200)
expect(json_response['default_projects_limit']).to eq(3)
@ -75,7 +75,7 @@ describe API::Settings, 'Settings' do
expect(json_response['dsa_key_restriction']).to eq(2048)
expect(json_response['ecdsa_key_restriction']).to eq(384)
expect(json_response['ed25519_key_restriction']).to eq(256)
expect(json_response['circuitbreaker_failure_wait_time']).to eq(2)
expect(json_response['circuitbreaker_check_interval']).to eq(2)
end
end

View File

@ -121,18 +121,6 @@ RSpec.configure do |config|
reset_delivered_emails!
end
# Stub the `ForkedStorageCheck.storage_available?` method unless
# `:broken_storage` metadata is defined
#
# This check can be slow and is unnecessary in a test environment where we
# know the storage is available, because we create it at runtime
config.before(:example) do |example|
unless example.metadata[:broken_storage]
allow(Gitlab::Git::Storage::ForkedStorageCheck)
.to receive(:storage_available?).and_return(true)
end
end
config.around(:each, :use_clean_rails_memory_store_caching) do |example|
caching_store = Rails.cache
Rails.cache = ActiveSupport::Cache::MemoryStore.new

View File

@ -12,6 +12,25 @@ RSpec.configure do |config|
raise GRPC::Unavailable.new('Gitaly broken in this spec')
end
Gitlab::Git::Storage::CircuitBreaker.reset_all!
# Track the maximum number of failures
first_failure = Time.parse("2017-11-14 17:52:30")
last_failure = Time.parse("2017-11-14 18:54:37")
failure_count = Gitlab::CurrentSettings
.current_application_settings
.circuitbreaker_failure_count_threshold + 1
cache_key = "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}broken:#{Gitlab::Environment.hostname}"
Gitlab::Git::Storage.redis.with do |redis|
redis.pipelined do
redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, cache_key)
redis.hset(cache_key, :first_failure, first_failure.to_i)
redis.hset(cache_key, :last_failure, last_failure.to_i)
redis.hset(cache_key, :failure_count, failure_count.to_i)
end
end
end
config.after(:each, :broken_storage) do
Gitlab::Git::Storage.redis.with(&:flushall)
end
end

View File

@ -43,6 +43,8 @@ module StubConfiguration
end
def stub_storage_settings(messages)
messages.deep_stringify_keys!
# Default storage is always required
messages['default'] ||= Gitlab.config.repositories.storages.default
messages.each do |storage_name, storage_settings|