Add a Circuitbreaker for storage paths

This commit is contained in:
Bob Van Landuyt 2017-05-17 18:17:15 +02:00 committed by Bob Van Landuyt
parent 990feb9f2b
commit 3598e60bf2
40 changed files with 1421 additions and 37 deletions

View file

@ -1,5 +1,12 @@
class Admin::HealthCheckController < Admin::ApplicationController
def show
@errors = HealthCheck::Utils.process_checks(['standard'])
@failing_storage_statuses = Gitlab::Git::Storage::Health.for_failing_storages
end
def reset_storage_health
Gitlab::Git::Storage::CircuitBreaker.reset_all!
redirect_to admin_health_check_path,
notice: _('Git storage health information has been reset')
end
end

View file

@ -52,6 +52,15 @@ class ApplicationController < ActionController::Base
head :forbidden, retry_after: Gitlab::Auth::UniqueIpsLimiter.config.unique_ips_limit_time_window
end
rescue_from Gitlab::Git::Storage::Inaccessible, GRPC::Unavailable do |exception|
Raven.capture_exception(exception) if sentry_enabled?
log_exception(exception)
headers['Retry-After'] = exception.retry_after if exception.respond_to?(:retry_after)
render_503
end
def redirect_back_or_default(default: root_path, options: {})
redirect_to request.referer.present? ? :back : default, options
end
@ -152,6 +161,19 @@ class ApplicationController < ActionController::Base
head :unprocessable_entity
end
def render_503
respond_to do |format|
format.html do
render(
file: Rails.root.join("public", "503"),
layout: false,
status: :service_unavailable
)
end
format.any { head :service_unavailable }
end
end
def no_cache_headers
response.headers["Cache-Control"] = "no-cache, no-store, max-age=0, must-revalidate"
response.headers["Pragma"] = "no-cache"

View file

@ -0,0 +1,37 @@
module StorageHealthHelper
def failing_storage_health_message(storage_health)
storage_name = content_tag(:strong, h(storage_health.storage_name))
host_names = h(storage_health.failing_on_hosts.to_sentence)
translation_params = { storage_name: storage_name,
host_names: host_names,
failed_attempts: storage_health.total_failures }
translation = n_('%{storage_name}: failed storage access attempt on host:',
'%{storage_name}: %{failed_attempts} failed storage access attempts:',
storage_health.total_failures) % translation_params
translation.html_safe
end
def message_for_circuit_breaker(circuit_breaker)
maximum_failures = circuit_breaker.failure_count_threshold
current_failures = circuit_breaker.failure_count
permanently_broken = circuit_breaker.circuit_broken? && current_failures >= maximum_failures
translation_params = { number_of_failures: current_failures,
maximum_failures: maximum_failures,
number_of_seconds: circuit_breaker.failure_wait_time }
if permanently_broken
s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\
"retry automatically. Reset storage information when the problem is "\
"resolved.") % translation_params
elsif circuit_breaker.circuit_broken?
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
"block access for %{number_of_seconds} seconds.") % translation_params
else
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
"allow access on the next attempt.") % translation_params
end
end
end

View file

@ -133,12 +133,13 @@ class Repository
ref ||= root_ref
args = %W(
#{Gitlab.config.git.bin_path} log #{ref} --pretty=%H --skip #{offset}
log #{ref} --pretty=%H --skip #{offset}
--max-count #{limit} --grep=#{query} --regexp-ignore-case
)
args = args.concat(%W(-- #{path})) if path.present?
git_log_results = Gitlab::Popen.popen(args, path_to_repo).first.lines
git_log_results = run_git(args).first.lines
git_log_results.map { |c| commit(c.chomp) }.compact
end
@ -622,8 +623,8 @@ class Repository
key = path.blank? ? "last_commit_id_for_path:#{sha}" : "last_commit_id_for_path:#{sha}:#{Digest::SHA1.hexdigest(path)}"
cache.fetch(key) do
args = %W(#{Gitlab.config.git.bin_path} rev-list --max-count=1 #{sha} -- #{path})
Gitlab::Popen.popen(args, path_to_repo).first.strip
args = %W(rev-list --max-count=1 #{sha} -- #{path})
run_git(args).first.strip
end
end
@ -678,8 +679,8 @@ class Repository
end
def refs_contains_sha(ref_type, sha)
args = %W(#{Gitlab.config.git.bin_path} #{ref_type} --contains #{sha})
names = Gitlab::Popen.popen(args, path_to_repo).first
args = %W(#{ref_type} --contains #{sha})
names = run_git(args).first
if names.respond_to?(:split)
names = names.split("\n").map(&:strip)
@ -957,15 +958,17 @@ class Repository
return [] if empty_repo? || query.blank?
offset = 2
args = %W(#{Gitlab.config.git.bin_path} grep -i -I -n --before-context #{offset} --after-context #{offset} -E -e #{Regexp.escape(query)} #{ref || root_ref})
Gitlab::Popen.popen(args, path_to_repo).first.scrub.split(/^--$/)
args = %W(grep -i -I -n --before-context #{offset} --after-context #{offset} -E -e #{Regexp.escape(query)} #{ref || root_ref})
run_git(args).first.scrub.split(/^--$/)
end
def search_files_by_name(query, ref)
return [] if empty_repo? || query.blank?
args = %W(#{Gitlab.config.git.bin_path} ls-tree --full-tree -r #{ref || root_ref} --name-status | #{Regexp.escape(query)})
Gitlab::Popen.popen(args, path_to_repo).first.lines.map(&:strip)
args = %W(ls-tree --full-tree -r #{ref || root_ref} --name-status | #{Regexp.escape(query)})
run_git(args).first.lines.map(&:strip)
end
def with_repo_branch_commit(start_repository, start_branch_name)
@ -1010,8 +1013,8 @@ class Repository
end
def fetch_ref(source_path, source_ref, target_ref)
args = %W(#{Gitlab.config.git.bin_path} fetch --no-tags -f #{source_path} #{source_ref}:#{target_ref})
Gitlab::Popen.popen(args, path_to_repo)
args = %W(fetch --no-tags -f #{source_path} #{source_ref}:#{target_ref})
run_git(args)
end
def create_ref(ref, ref_path)
@ -1092,6 +1095,12 @@ class Repository
private
def run_git(args)
circuit_breaker.perform do
Gitlab::Popen.popen([Gitlab.config.git.bin_path, *args], path_to_repo)
end
end
def blob_data_at(sha, path)
blob = blob_at(sha, path)
return unless blob
@ -1101,8 +1110,10 @@ class Repository
end
def refs_directory_exists?
circuit_breaker.perform do
File.exist?(File.join(path_to_repo, 'refs'))
end
end
def cache
# TODO: should we use UUIDs here? We could move repositories without clearing this cache
@ -1145,4 +1156,8 @@ class Repository
def initialize_raw_repository
Gitlab::Git::Repository.new(project.repository_storage, disk_path + '.git')
end
def circuit_breaker
@circuit_breaker ||= Gitlab::Git::Storage::CircuitBreaker.for_storage(project.repository_storage)
end
end

View file

@ -0,0 +1,15 @@
- if failing_storages.any?
= _('There are problems accessing Git storage: ')
%ul
- failing_storages.each do |storage_health|
%li
= failing_storage_health_message(storage_health)
%ul
- storage_health.failing_circuit_breakers.each do |circuit_breaker|
%li
#{circuit_breaker.hostname}: #{message_for_circuit_breaker(circuit_breaker)}
= _("Access to failing storages has been temporarily disabled to allow the mount to recover. Reset storage information after the issue has been resolved to allow access again.")
.prepend-top-10
= button_to _("Reset git storage health information"), reset_storage_health_admin_health_check_path,
method: :post, class: 'btn btn-default'

View file

@ -1,22 +1,22 @@
- @no_container = true
- page_title "Health Check"
- page_title _('Health Check')
- no_errors = @errors.blank? && @failing_storage_statuses.blank?
= render 'admin/monitoring/head'
%div{ class: container_class }
%h3.page-title
Health Check
%h3.page-title= page_title
.bs-callout.clearfix
.pull-left
%p
Access token is
#{ s_('HealthCheck|Access token is') }
%code#health-check-token= current_application_settings.health_check_access_token
.prepend-top-10
= button_to "Reset health check access token", reset_health_check_token_admin_application_settings_path,
= button_to _("Reset health check access token"), reset_health_check_token_admin_application_settings_path,
method: :put, class: 'btn btn-default',
data: { confirm: 'Are you sure you want to reset the health check token?' }
data: { confirm: _('Are you sure you want to reset the health check token?') }
%p.light
Health information can be retrieved from the following endpoints. More information is available
= link_to 'here', help_page_path('user/admin_area/monitoring/health_check')
#{ _('Health information can be retrieved from the following endpoints. More information is available') }
= link_to s_('More information is available|here'), help_page_path('user/admin_area/monitoring/health_check')
%ul
%li
%code= readiness_url(token: current_application_settings.health_check_access_token)
@ -29,14 +29,15 @@
.panel.panel-default
.panel-heading
Current Status:
- if @errors.blank?
- if no_errors
= icon('circle', class: 'cgreen')
Healthy
#{ s_('HealthCheck|Healthy') }
- else
= icon('warning', class: 'cred')
Unhealthy
#{ s_('HealthCheck|Unhealthy') }
.panel-body
- if @errors.blank?
No Health Problems Detected
- if no_errors
#{ s_('HealthCheck|No Health Problems Detected') }
- else
= @errors
= render partial: 'failing_storages', object: @failing_storage_statuses

View file

@ -0,0 +1,4 @@
---
title: Block access to failing repository storage
merge_request: 11449
author:

View file

@ -506,6 +506,11 @@ production: &base
path: /home/git/repositories/
gitaly_address: unix:/home/git/gitlab/tmp/sockets/private/gitaly.socket # TCP connections are supported too (e.g. tcp://host:port)
# gitaly_token: 'special token' # Optional: override global gitaly.token for this storage.
failure_count_threshold: 10 # number of failures before stopping attempts
failure_wait_time: 30 # Seconds after an access failure before allowing access again
failure_reset_time: 1800 # Time in seconds to expire failures
storage_timeout: 5 # Time in seconds to wait before aborting a storage access attempt
## Backup settings
backup:
@ -638,6 +643,10 @@ test:
default:
path: tmp/tests/repositories/
gitaly_address: unix:tmp/tests/gitaly/gitaly.socket
broken:
path: tmp/tests/non-existent-repositories
gitaly_address: unix:tmp/tests/gitaly/gitaly.socket
gitaly:
enabled: true
token: secret

View file

@ -222,6 +222,7 @@ Settings.gitlab['default_branch_protection'] ||= 2
Settings.gitlab['default_can_create_group'] = true if Settings.gitlab['default_can_create_group'].nil?
Settings.gitlab['host'] ||= ENV['GITLAB_HOST'] || 'localhost'
Settings.gitlab['ssh_host'] ||= Settings.gitlab.host
Settings.gitlab['hostname'] ||= ENV['HOSTNAME'] || Socket.gethostname
Settings.gitlab['https'] = false if Settings.gitlab['https'].nil?
Settings.gitlab['port'] ||= ENV['GITLAB_PORT'] || (Settings.gitlab.https ? 443 : 80)
Settings.gitlab['relative_url_root'] ||= ENV['RAILS_RELATIVE_URL_ROOT'] || ''
@ -433,6 +434,17 @@ end
Settings.repositories.storages.values.each do |storage|
# Expand relative paths
storage['path'] = Settings.absolute(storage['path'])
# Set failure defaults
storage['failure_count_threshold'] ||= 10
storage['failure_wait_time'] ||= 30
storage['failure_reset_time'] ||= 1800
storage['storage_timeout'] ||= 5
# Set turn strings into numbers
storage['failure_count_threshold'] = storage['failure_count_threshold'].to_i
storage['failure_wait_time'] = storage['failure_wait_time'].to_i
storage['failure_reset_time'] = storage['failure_reset_time'].to_i
# We might want to have a timeout shorter than 1 second.
storage['storage_timeout'] = storage['storage_timeout'].to_f
end
#

View file

@ -7,6 +7,13 @@ def find_parent_path(name, path)
Gitlab.config.repositories.storages.detect do |n, rs|
name != n && Pathname.new(rs['path']).realpath == parent
end
rescue Errno::EIO, Errno::ENOENT => e
warning = "WARNING: couldn't verify #{path} (#{name}). "\
"If this is an external storage, it might be offline."
message = "#{warning}\n#{e.message}"
Rails.logger.error("#{message}\n\t" + e.backtrace.join("\n\t"))
nil
end
def storage_validation_error(message)
@ -29,6 +36,15 @@ def validate_storages_config
if !repository_storage.is_a?(Hash) || repository_storage['path'].nil?
storage_validation_error("#{name} is not a valid storage, because it has no `path` key. Refer to gitlab.yml.example for an updated example")
end
%w(failure_count_threshold failure_wait_time failure_reset_time storage_timeout).each do |setting|
# Falling back to the defaults is fine!
next if repository_storage[setting].nil?
unless repository_storage[setting].to_f > 0
storage_validation_error("#{setting}, for storage `#{name}` needs to be greater than 0")
end
end
end
end

View file

@ -67,7 +67,9 @@ namespace :admin do
end
resource :logs, only: [:show]
resource :health_check, controller: 'health_check', only: [:show]
resource :health_check, controller: 'health_check', only: [:show] do
post :reset_storage_health
end
resource :background_jobs, controller: 'background_jobs', only: [:show]
resource :system_info, controller: 'system_info', only: [:show]
resources :requests_profiles, only: [:index, :show], param: :name, constraints: { name: /.+\.html/ }

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

View file

@ -60,7 +60,7 @@ respectively.
path: /mnt/cephfs/repositories
```
1. [Restart GitLab] for the changes to take effect.
1. [Restart GitLab][restart-gitlab] for the changes to take effect.
>**Note:**
The [`gitlab_shell: repos_path` entry][repospath] in `gitlab.yml` will be
@ -97,9 +97,80 @@ be stored via the **Application Settings** in the Admin area.
Beginning with GitLab 8.13.4, multiple paths can be chosen. New projects will be
randomly placed on one of the selected paths.
## Handling failing repository storage
> [Introduced][ce-11449] in GitLab 9.5.
When GitLab detects access to the repositories storage fails repeatedly, it can
gracefully prevent attempts to access the storage. This might be useful when
the repositories are stored somewhere on the network.
The configuration could look as follows:
**For Omnibus installations**
1. Edit `/etc/gitlab/gitlab.rb`:
```ruby
git_data_dirs({
"default" => {
"path" => "/mnt/nfs-01/git-data",
"failure_count_threshold" => 10,
"failure_wait_time" => 30,
"failure_reset_time" => 1800,
"storage_timeout" => 5
}
})
```
1. Save the file and [reconfigure GitLab][reconfigure-gitlab] for the changes to take effect.
---
**For installations from source**
1. Edit `config/gitlab.yml`:
```yaml
repositories:
storages: # You must have at least a `default` storage path.
default:
path: /home/git/repositories/
failure_count_threshold: 10 # number of failures before stopping attempts
failure_wait_time: 30 # Seconds after last access failure before trying again
failure_reset_time: 1800 # Time in seconds to expire failures
storage_timeout: 5 # Time in seconds to wait before aborting a storage access attempt
```
1. Save the file and [restart GitLab][restart-gitlab] for the changes to take effect.
**`failure_count_threshold`:** The number of failures of after which GitLab will
completely prevent access to the storage. The number of failures can be reset in
the admin interface: `https://gitlab.example.com/admin/health_check` or using the
[api](../api/repository_storage_health.md) to allow access to the storage again.
**`failure_wait_time`:** When access to a storage fails. GitLab will prevent
access to the storage for the time specified here. This allows the filesystem to
recover without.
**`failure_reset_time`:** The time in seconds GitLab will keep failure
information. When no failures occur during this time, information about the
mount is reset.
**`storage_timeout`:** The time in seconds GitLab will try to access storage.
After this time a timeout error will be raised.
When storage failures occur, this will be visible in the admin interface like this:
![failing storage](img/failing_storage.png)
To allow access to all storages, click the `Reset git storage health information` button.
[ce-4578]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/4578
[restart gitlab]: restart_gitlab.md#installations-from-source
[reconfigure gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure
[restart-gitlab]: restart_gitlab.md#installations-from-source
[reconfigure-gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure
[backups]: ../raketasks/backup_restore.md
[raketask]: https://gitlab.com/gitlab-org/gitlab-ce/blob/033e5423a2594e08a7ebcd2379bd2331f4c39032/lib/backup/repository.rb#L54-56
[repospath]: https://gitlab.com/gitlab-org/gitlab-ce/blob/8-9-stable/config/gitlab.yml.example#L457
[ce-11449]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/11449

View file

@ -0,0 +1,74 @@
# Circuitbreaker API
> [Introduced][ce-11449] in GitLab 9.5.
The Circuitbreaker API is only accessible to administrators. All requests by
guests will respond with `401 Unauthorized`, and all requests by normal users
will respond with `403 Forbidden`.
## Repository Storages
### Get all storage information
Returns of all currently configured storages and their health information.
```
GET /circuit_breakers/repository_storage
```
```bash
curl --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/circuit_breakers/repository_storage
```
```json
[
{
"storage_name": "default",
"failing_on_hosts": [],
"total_failures": 0
},
{
"storage_name": "broken",
"failing_on_hosts": [
"web01", "worker01"
],
"total_failures": 1
}
]
```
### Get failing storages
This returns a list of all currently failing storages.
```
GET /circuit_breakers/repository_storage/failing
```
```bash
curl --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/circuit_breakers/repository_storage/failing
```
```json
[
{
"storage_name":"broken",
"failing_on_hosts":["web01", "worker01"],
"total_failures":2
}
]
```
## Reset failing storage information
Use this remove all failing storage information and allow access to the storage again.
```
DELETE /circuit_breakers/repository_storage
```
```bash
curl --request DELETE --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/circuit_breakers/repository_storage
```
[ce-11449]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/11449

View file

@ -95,6 +95,7 @@ module API
mount ::API::Boards
mount ::API::Branches
mount ::API::BroadcastMessages
mount ::API::CircuitBreakers
mount ::API::Commits
mount ::API::CommitStatuses
mount ::API::DeployKeys

View file

@ -0,0 +1,50 @@
module API
class CircuitBreakers < Grape::API
before { authenticated_as_admin! }
resource :circuit_breakers do
params do
requires :type,
type: String,
desc: "The type of circuitbreaker",
values: ['repository_storage']
end
resource ':type' do
namespace '', requirements: { type: 'repository_storage' } do
helpers do
def failing_storage_health
@failing_storage_health ||= Gitlab::Git::Storage::Health.for_failing_storages
end
def storage_health
@failing_storage_health ||= Gitlab::Git::Storage::Health.for_all_storages
end
end
desc 'Get all failing git storages' do
detail 'This feature was introduced in GitLab 9.5'
success Entities::RepositoryStorageHealth
end
get do
present storage_health, with: Entities::RepositoryStorageHealth
end
desc 'Get all failing git storages' do
detail 'This feature was introduced in GitLab 9.5'
success Entities::RepositoryStorageHealth
end
get 'failing' do
present failing_storage_health, with: Entities::RepositoryStorageHealth
end
desc 'Reset all storage failures and open circuitbreaker' do
detail 'This feature was introduced in GitLab 9.5'
end
delete do
Gitlab::Git::Storage::CircuitBreaker.reset_all!
end
end
end
end
end
end

View file

@ -954,5 +954,11 @@ module API
expose :ip_address
expose :submitted, as: :akismet_submitted
end
class RepositoryStorageHealth < Grape::Entity
expose :storage_name
expose :failing_on_hosts
expose :total_failures
end
end
end

View file

@ -64,11 +64,17 @@ module Gitlab
end
def rugged
@rugged ||= Rugged::Repository.new(path, alternates: alternate_object_directories)
@rugged ||= circuit_breaker.perform do
Rugged::Repository.new(path, alternates: alternate_object_directories)
end
rescue Rugged::RepositoryError, Rugged::OSError
raise NoRepository.new('no repository for such path')
end
def circuit_breaker
@circuit_breaker ||= Gitlab::Git::Storage::CircuitBreaker.for_storage(storage)
end
# Returns an Array of branch names
# sorted by name ASC
def branch_names

22
lib/gitlab/git/storage.rb Normal file
View file

@ -0,0 +1,22 @@
module Gitlab
module Git
module Storage
class Inaccessible < StandardError
attr_reader :retry_after
def initialize(message = nil, retry_after = nil)
super(message)
@retry_after = retry_after
end
end
CircuitOpen = Class.new(Inaccessible)
REDIS_KEY_PREFIX = 'storage_accessible:'.freeze
def self.redis
Gitlab::Redis::SharedState
end
end
end
end

View file

@ -0,0 +1,142 @@
module Gitlab
module Git
module Storage
class CircuitBreaker
attr_reader :storage,
:hostname,
:storage_path,
:failure_count_threshold,
:failure_wait_time,
:failure_reset_time,
:storage_timeout
def self.reset_all!
pattern = "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}*"
Gitlab::Git::Storage.redis.with do |redis|
all_storage_keys = redis.scan_each(match: pattern).to_a
redis.del(*all_storage_keys) unless all_storage_keys.empty?
end
RequestStore.delete(:circuitbreaker_cache)
end
def self.for_storage(storage)
cached_circuitbreakers = RequestStore.fetch(:circuitbreaker_cache) do
Hash.new do |hash, storage_name|
hash[storage_name] = new(storage_name)
end
end
cached_circuitbreakers[storage]
end
def initialize(storage, hostname = Gitlab.config.gitlab.hostname)
@storage = storage
@hostname = hostname
config = Gitlab.config.repositories.storages[@storage]
@storage_path = config['path']
@failure_count_threshold = config['failure_count_threshold']
@failure_wait_time = config['failure_wait_time']
@failure_reset_time = config['failure_reset_time']
@storage_timeout = config['storage_timeout']
end
def perform
return yield unless Feature.enabled?('git_storage_circuit_breaker')
if circuit_broken?
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} open", failure_wait_time)
end
check_storage_accessible!
yield
end
def circuit_broken?
return false if no_failures?
recent_failure = last_failure > failure_wait_time.seconds.ago
too_many_failures = failure_count > failure_count_threshold
recent_failure || too_many_failures
end
# Memoizing the `storage_available` call means we only do it once per
# request when the storage is available.
#
# When the storage appears not available, and the memoized value is `false`
# we might want to try again.
def storage_available?
@storage_available ||= Gitlab::Git::Storage::ForkedStorageCheck.storage_available?(storage_path, storage_timeout)
end
def check_storage_accessible!
if storage_available?
track_storage_accessible
else
track_storage_inaccessible
raise Gitlab::Git::Storage::Inaccessible.new("#{storage} not accessible", failure_wait_time)
end
end
def no_failures?
last_failure.blank? && failure_count == 0
end
def track_storage_inaccessible
@failure_info = [Time.now, failure_count + 1]
Gitlab::Git::Storage.redis.with do |redis|
redis.pipelined do
redis.hset(cache_key, :last_failure, last_failure.to_i)
redis.hincrby(cache_key, :failure_count, 1)
redis.expire(cache_key, failure_reset_time)
end
end
end
def track_storage_accessible
return if no_failures?
@failure_info = [nil, 0]
Gitlab::Git::Storage.redis.with do |redis|
redis.pipelined do
redis.hset(cache_key, :last_failure, nil)
redis.hset(cache_key, :failure_count, 0)
end
end
end
def last_failure
failure_info.first
end
def failure_count
failure_info.last
end
def failure_info
@failure_info ||= get_failure_info
end
def get_failure_info
last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, :last_failure, :failure_count)
end
last_failure = Time.at(last_failure.to_i) if last_failure.present?
[last_failure, failure_count.to_i]
end
def cache_key
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
end
end
end
end

View file

@ -0,0 +1,59 @@
module Gitlab
module Git
module Storage
module ForkedStorageCheck
extend self
def storage_available?(path, timeout_seconds = 5)
status = timeout_check(path, timeout_seconds)
status.success?
end
def timeout_check(path, timeout_seconds)
filesystem_check_pid = check_filesystem_in_fork(path)
deadline = timeout_seconds.seconds.from_now.utc
wait_time = 0.01
status = nil
while status.nil?
if deadline > Time.now.utc
sleep(wait_time)
_pid, status = Process.wait2(filesystem_check_pid, Process::WNOHANG)
else
Process.kill('KILL', filesystem_check_pid)
# Blocking wait, so we are sure the process is gone before continuing
_pid, status = Process.wait2(filesystem_check_pid)
end
end
status
end
# This call forks out into a process, that process will then be replaced
# With an `exec` call, since we fork out into a shell, we can create a
# child process without needing an ActiveRecord-connection.
#
# Inside the shell, we use `& wait` to fork another child. We do this
# to prevent leaving a zombie process when the parent gets killed by the
# timeout.
#
# https://stackoverflow.com/questions/27892975/what-causes-activerecord-breaking-postgres-connection-after-forking
# https://stackoverflow.com/questions/22012943/activerecordstatementinvalid-runtimeerror-the-connection-cannot-be-reused-in
def check_filesystem_in_fork(path)
fork do
STDOUT.reopen('/dev/null')
STDERR.reopen('/dev/null')
exec("(#{test_script(path)}) & wait %1")
end
end
def test_script(path)
"testpath=\"$(realpath #{Shellwords.escape(path)})\" && stat $testpath"
end
end
end
end
end

View file

@ -0,0 +1,101 @@
module Gitlab
module Git
module Storage
class Health
attr_reader :storage_name, :info
def self.pattern_for_storage(storage_name)
"#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage_name}:*"
end
def self.for_all_storages
storage_names = Gitlab.config.repositories.storages.keys
results_per_storage = nil
Gitlab::Git::Storage.redis.with do |redis|
keys_per_storage = all_keys_for_storages(storage_names, redis)
# We need to make sure all keys are actually loaded as an array.
# Otherwise when using the enumerator of the `scan_each` within a
# second pipeline, it will be assumed unloaded, wich would make the
# result unusable inside the pipeline.
loaded_keys_per_storage = keys_per_storage.inject({}) do |loaded_keys, (storage_name, keys)|
loaded_keys[storage_name] = keys.to_a
loaded_keys
end
results_per_storage = load_for_keys(loaded_keys_per_storage, redis)
end
results_per_storage.map do |name, info|
info.each { |i| i[:failure_count] = i[:failure_count].value.to_i }
new(name, info)
end
end
def self.all_keys_for_storages(storage_names, redis)
keys_per_storage = nil
redis.pipelined do
keys_per_storage = storage_names.inject({}) do |result, storage_name|
key = pattern_for_storage(storage_name)
result.merge(storage_name => redis.scan_each(match: key))
end
end
keys_per_storage
end
def self.load_for_keys(keys_per_storage, redis)
info_for_keys = nil
redis.pipelined do
info_for_keys = keys_per_storage.inject({}) do |result, (storage_name, keys)|
info_for_storage = keys.map do |key|
{ name: key, failure_count: redis.hget(key, :failure_count) }
end
result.merge(storage_name => info_for_storage)
end
end
info_for_keys
end
def self.for_failing_storages
for_all_storages.select(&:failing?)
end
def initialize(storage_name, info)
@storage_name = storage_name
@info = info
end
def failing_info
@failing_info ||= info.select { |info_for_host| info_for_host[:failure_count] > 0 }
end
def failing?
failing_info.any?
end
def failing_on_hosts
@failing_on_hosts ||= failing_info.map do |info_for_host|
info_for_host[:name].split(':').last
end
end
def failing_circuit_breakers
@failing_circuit_breakers ||= failing_on_hosts.map do |hostname|
CircuitBreaker.new(storage_name, hostname)
end
end
def total_failures
@total_failures ||= failing_info.sum { |info_for_host| info_for_host[:failure_count] }
end
end
end
end
end

View file

@ -10,7 +10,9 @@ module Gitlab
def readiness
repository_storages.map do |storage_name|
begin
if !storage_stat_test(storage_name)
if !storage_circuitbreaker_test(storage_name)
HealthChecks::Result.new(false, 'circuitbreaker tripped', shard: storage_name)
elsif !storage_stat_test(storage_name)
HealthChecks::Result.new(false, 'cannot stat storage', shard: storage_name)
else
with_temp_file(storage_name) do |tmp_file_path|
@ -36,7 +38,8 @@ module Gitlab
[
storage_stat_metrics(storage_name),
storage_write_metrics(storage_name),
storage_read_metrics(storage_name)
storage_read_metrics(storage_name),
storage_circuitbreaker_metrics(storage_name)
].flatten
end
end
@ -121,6 +124,12 @@ module Gitlab
file_contents == RANDOM_STRING
end
def storage_circuitbreaker_test(storage_name)
Gitlab::Git::Storage::CircuitBreaker.new(storage_name).perform { "OK" }
rescue Gitlab::Git::Storage::Inaccessible
nil
end
def storage_stat_metrics(storage_name)
operation_metrics(:filesystem_accessible, :filesystem_access_latency_seconds, shard: storage_name) do
with_timing { storage_stat_test(storage_name) }
@ -143,6 +152,14 @@ module Gitlab
end
end
end
def storage_circuitbreaker_metrics(storage_name)
operation_metrics(:filesystem_circuitbreaker,
:filesystem_circuitbreaker_latency_seconds,
shard: storage_name) do
with_timing { storage_circuitbreaker_test(storage_name) }
end
end
end
end
end

View file

@ -0,0 +1,25 @@
require 'spec_helper'
describe Admin::HealthCheckController, broken_storage: true do
let(:admin) { create(:admin) }
before do
sign_in(admin)
end
describe 'GET show' do
it 'loads the git storage health information' do
get :show
expect(assigns[:failing_storage_statuses]).not_to be_nil
end
end
describe 'POST reset_storage_health' do
it 'resets all storage health information' do
expect(Gitlab::Git::Storage::CircuitBreaker).to receive(:reset_all!)
post :reset_storage_health
end
end
end

View file

@ -108,6 +108,30 @@ describe ApplicationController do
end
end
describe 'rescue from Gitlab::Git::Storage::Inaccessible' do
controller(described_class) do
def index
raise Gitlab::Git::Storage::Inaccessible.new('broken', 100)
end
end
it 'renders a 503 when storage is not available' do
sign_in(create(:user))
get :index
expect(response.status).to eq(503)
end
it 'renders includes a Retry-After header' do
sign_in(create(:user))
get :index
expect(response.headers['Retry-After']).to eq(100)
end
end
describe 'response format' do
controller(described_class) do
def index

View file

@ -107,6 +107,20 @@ describe ProjectsController do
end
end
context 'when the storage is not available', broken_storage: true do
let(:project) { create(:project, :broken_storage) }
before do
project.add_developer(user)
sign_in(user)
end
it 'renders a 503' do
get :show, namespace_id: project.namespace, id: project
expect(response).to have_http_status(503)
end
end
context "project with empty repo" do
let(:empty_project) { create(:project_empty_repo, :public) }

View file

@ -54,6 +54,12 @@ FactoryGirl.define do
avatar { File.open(Rails.root.join('spec/fixtures/dk.png')) }
end
trait :broken_storage do
after(:create) do |project|
project.update_column(:repository_storage, 'broken')
end
end
# Test repository - https://gitlab.com/gitlab-org/gitlab-test
trait :repository do
path { 'gitlabhq' }

View file

@ -1,6 +1,6 @@
require 'spec_helper'
feature "Admin Health Check" do
feature "Admin Health Check", feature: true, broken_storage: true do
include StubENV
before do
@ -55,4 +55,26 @@ feature "Admin Health Check" do
expect(page).to have_content('The server is on fire')
end
end
context 'with repository storage failures' do
before do
# Track a failure
Gitlab::Git::Storage::CircuitBreaker.for_storage('broken').perform { nil } rescue nil
visit admin_health_check_path
end
it 'shows storage failure information' do
hostname = Gitlab.config.gitlab.hostname
expect(page).to have_content('broken: failed storage access attempt on host:')
expect(page).to have_content("#{hostname}: 1 of 10 failures.")
end
it 'allows resetting storage failures' do
click_button 'Reset git storage health information'
expect(page).to have_content('Git storage health information has been reset')
expect(page).not_to have_content('failed storage access attempt')
end
end
end

View file

@ -0,0 +1,20 @@
require 'spec_helper'
describe StorageHealthHelper do
describe '#failing_storage_health_message' do
let(:health) do
Gitlab::Git::Storage::Health.new(
"<script>alert('storage name');)</script>",
[]
)
end
it 'escapes storage names' do
escaped_storage_name = '&lt;script&gt;alert(&#39;storage name&#39;);)&lt;/script&gt;'
result = helper.failing_storage_health_message(health)
expect(result).to include(escaped_storage_name)
end
end
end

View file

@ -23,6 +23,16 @@ describe '6_validations' do
end
end
context 'when one of the settings is incorrect' do
before do
mock_storages('foo' => { 'path' => 'tmp/tests/paths/a/b/c', 'failure_count_threshold' => 'not a number' })
end
it 'throws an error' do
expect { validate_storages_config }.to raise_error(/failure_count_threshold/)
end
end
context 'with invalid storage names' do
before do
mock_storages('name with spaces' => { 'path' => 'tmp/tests/paths/a/b/c' })
@ -84,6 +94,17 @@ describe '6_validations' do
expect { validate_storages_paths }.not_to raise_error
end
end
describe 'inaccessible storage' do
before do
mock_storages('foo' => { 'path' => 'tmp/tests/a/path/that/does/not/exist' })
end
it 'passes through with a warning' do
expect(Rails.logger).to receive(:error)
expect { validate_storages_paths }.not_to raise_error
end
end
end
def mock_storages(storages)

View file

@ -2,6 +2,17 @@ require 'spec_helper'
require_relative '../../config/initializers/1_settings'
describe Settings do
describe '#repositories' do
it 'assigns the default failure attributes' do
repository_settings = Gitlab.config.repositories.storages['broken']
expect(repository_settings['failure_count_threshold']).to eq(10)
expect(repository_settings['failure_wait_time']).to eq(30)
expect(repository_settings['failure_reset_time']).to eq(1800)
expect(repository_settings['storage_timeout']).to eq(5)
end
end
describe '#host_without_www' do
context 'URL with protocol' do
it 'returns the host' do

View file

@ -48,8 +48,9 @@ describe Gitlab::Cache::Ci::ProjectPipelineStatus, :clean_gitlab_redis_cache do
described_class.load_in_batch_for_projects([project_without_status])
end
it 'only connects to redis_cache twice' do
# Once to load, once to store in the cache
it 'only connects to redis twice' do
# Stub circuitbreaker so it doesn't count the redis connections in there
stub_circuit_breaker(project_without_status)
expect(Gitlab::Redis::Cache).to receive(:with).exactly(2).and_call_original
described_class.load_in_batch_for_projects([project_without_status])
@ -301,4 +302,13 @@ describe Gitlab::Cache::Ci::ProjectPipelineStatus, :clean_gitlab_redis_cache do
end
end
end
def stub_circuit_breaker(project)
fake_circuitbreaker = double
allow(fake_circuitbreaker).to receive(:perform).and_yield
allow(project.repository.raw_repository)
.to receive(:circuit_breaker).and_return(fake_circuitbreaker)
allow(project.repository)
.to receive(:circuit_breaker).and_return(fake_circuitbreaker)
end
end

View file

@ -55,6 +55,20 @@ describe Gitlab::Git::Repository, seed_helper: true do
end
describe "#rugged" do
describe 'when storage is broken', broken_storage: true do
it 'raises a storage exception when storage is not available' do
broken_repo = described_class.new('broken', 'a/path.git')
expect { broken_repo.rugged }.to raise_error(Gitlab::Git::Storage::Inaccessible)
end
end
it 'raises a no repository exception when there is no repo' do
broken_repo = described_class.new('default', 'a/path.git')
expect { broken_repo.rugged }.to raise_error(Gitlab::Git::Repository::NoRepository)
end
context 'with no Git env stored' do
before do
expect(Gitlab::Git::Env).to receive(:all).and_return({})

View file

@ -0,0 +1,265 @@
require 'spec_helper'
describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: true, broken_storage: true do
let(:circuit_breaker) { described_class.new('default') }
let(:hostname) { Gitlab.config.gitlab.hostname }
let(:cache_key) { "storage_accessible:default:#{hostname}" }
def value_from_redis(name)
Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, name)
end.first
end
def set_in_redis(name, value)
Gitlab::Git::Storage.redis.with do |redis|
redis.hmset(cache_key, name, value)
end.first
end
describe '.reset_all!' do
it 'clears all entries form redis' do
set_in_redis(:failure_count, 10)
described_class.reset_all!
key_exists = Gitlab::Git::Storage.redis.with { |redis| redis.exists(cache_key) }
expect(key_exists).to be_falsey
end
end
describe '.for_storage' do
it 'only builds a single circuitbreaker per storage' do
expect(described_class).to receive(:new).once.and_call_original
breaker = described_class.for_storage('default')
expect(breaker).to be_a(described_class)
expect(described_class.for_storage('default')).to eq(breaker)
end
end
describe '#initialize' do
it 'assigns the settings' do
expect(circuit_breaker.hostname).to eq(hostname)
expect(circuit_breaker.storage).to eq('default')
expect(circuit_breaker.storage_path).to eq(TestEnv.repos_path)
expect(circuit_breaker.failure_count_threshold).to eq(10)
expect(circuit_breaker.failure_wait_time).to eq(30)
expect(circuit_breaker.failure_reset_time).to eq(1800)
expect(circuit_breaker.storage_timeout).to eq(5)
end
end
describe '#perform' do
it 'raises an exception with retry time when the circuit is open' do
allow(circuit_breaker).to receive(:circuit_broken?).and_return(true)
expect { |b| circuit_breaker.perform(&b) }
.to raise_error(Gitlab::Git::Storage::CircuitOpen)
end
it 'yields the block' do
expect { |b| circuit_breaker.perform(&b) }
.to yield_control
end
it 'checks if the storage is available' do
expect(circuit_breaker).to receive(:check_storage_accessible!)
circuit_breaker.perform { 'hello world' }
end
it 'returns the value of the block' do
result = circuit_breaker.perform { 'return value' }
expect(result).to eq('return value')
end
it 'raises possible errors' do
expect { circuit_breaker.perform { raise Rugged::OSError.new('Broken') } }
.to raise_error(Rugged::OSError)
end
context 'with the feature disabled' do
it 'returns the block without checking accessibility' do
stub_feature_flags(git_storage_circuit_breaker: false)
expect(circuit_breaker).not_to receive(:circuit_broken?)
result = circuit_breaker.perform { 'hello' }
expect(result).to eq('hello')
end
end
end
describe '#circuit_broken?' do
it 'is closed when there is no last failure' do
set_in_redis(:last_failure, nil)
set_in_redis(:failure_count, 0)
expect(circuit_breaker.circuit_broken?).to be_falsey
end
it 'is open when there was a recent failure' do
Timecop.freeze do
set_in_redis(:last_failure, 1.second.ago.to_f)
set_in_redis(:failure_count, 1)
expect(circuit_breaker.circuit_broken?).to be_truthy
end
end
it 'is open when there are to many failures' do
set_in_redis(:last_failure, 1.day.ago.to_f)
set_in_redis(:failure_count, 200)
expect(circuit_breaker.circuit_broken?).to be_truthy
end
end
describe '#check_storage_accessible!' do
context 'when the storage is available' do
it 'tracks that the storage was accessible an raises the error' do
expect(circuit_breaker).to receive(:track_storage_accessible)
circuit_breaker.check_storage_accessible!
end
end
context 'when the storage is not available' do
let(:circuit_breaker) { described_class.new('broken') }
it 'tracks that the storage was unavailable and raises an error with retry time' do
expect(circuit_breaker).to receive(:track_storage_inaccessible)
expect { circuit_breaker.check_storage_accessible! }
.to raise_error do |exception|
expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible)
expect(exception.retry_after).to eq(30)
end
end
end
end
describe '#track_storage_inaccessible' do
around(:each) do |example|
Timecop.freeze
example.run
Timecop.return
end
it 'records the failure time in redis' do
circuit_breaker.track_storage_inaccessible
failure_time = value_from_redis(:last_failure)
expect(Time.at(failure_time.to_i)).to be_within(1.second).of(Time.now)
end
it 'sets the failure time on the breaker without reloading' do
circuit_breaker.track_storage_inaccessible
expect(circuit_breaker).not_to receive(:get_failure_info)
expect(circuit_breaker.last_failure).to eq(Time.now)
end
it 'increments the failure count in redis' do
set_in_redis(:failure_count, 10)
circuit_breaker.track_storage_inaccessible
expect(value_from_redis(:failure_count).to_i).to be(11)
end
it 'increments the failure count on the breaker without reloading' do
set_in_redis(:failure_count, 10)
circuit_breaker.track_storage_inaccessible
expect(circuit_breaker).not_to receive(:get_failure_info)
expect(circuit_breaker.failure_count).to eq(11)
end
end
describe '#track_storage_accessible' do
it 'sets the failure count to zero in redis' do
set_in_redis(:failure_count, 10)
circuit_breaker.track_storage_accessible
expect(value_from_redis(:failure_count).to_i).to be(0)
end
it 'sets the failure count to zero on the breaker without reloading' do
set_in_redis(:failure_count, 10)
circuit_breaker.track_storage_accessible
expect(circuit_breaker).not_to receive(:get_failure_info)
expect(circuit_breaker.failure_count).to eq(0)
end
it 'removes the last failure time from redis' do
set_in_redis(:last_failure, Time.now.to_i)
circuit_breaker.track_storage_accessible
expect(circuit_breaker).not_to receive(:get_failure_info)
expect(circuit_breaker.last_failure).to be_nil
end
it 'removes the last failure time from the breaker without reloading' do
set_in_redis(:last_failure, Time.now.to_i)
circuit_breaker.track_storage_accessible
expect(value_from_redis(:last_failure)).to be_empty
end
it 'wont connect to redis when there are no failures' do
expect(Gitlab::Git::Storage.redis).to receive(:with).once
.and_call_original
expect(circuit_breaker).to receive(:track_storage_accessible)
.and_call_original
circuit_breaker.track_storage_accessible
end
end
describe '#no_failures?' do
it 'is false when a failure was tracked' do
set_in_redis(:last_failure, Time.now.to_i)
set_in_redis(:failure_count, 1)
expect(circuit_breaker.no_failures?).to be_falsey
end
end
describe '#last_failure' do
it 'returns the last failure time' do
time = Time.parse("2017-05-26 17:52:30")
set_in_redis(:last_failure, time.to_i)
expect(circuit_breaker.last_failure).to eq(time)
end
end
describe '#failure_count' do
it 'returns the failure count' do
set_in_redis(:failure_count, 7)
expect(circuit_breaker.failure_count).to eq(7)
end
end
describe '#cache_key' do
it 'includes storage and host' do
expect(circuit_breaker.cache_key).to eq(cache_key)
end
end
end

View file

@ -0,0 +1,27 @@
require 'spec_helper'
describe Gitlab::Git::Storage::ForkedStorageCheck, skip_database_cleaner: true do
let(:existing_path) do
existing_path = TestEnv.repos_path
FileUtils.mkdir_p(existing_path)
existing_path
end
describe '.storage_accessible?' do
it 'detects when a storage is not available' do
expect(described_class.storage_available?('/non/existant/path')).to be_falsey
end
it 'detects when a storage is available' do
expect(described_class.storage_available?(existing_path)).to be_truthy
end
it 'returns false when the check takes to long' do
allow(described_class).to receive(:check_filesystem_in_fork) do
fork { sleep 10 }
end
expect(described_class.storage_available?(existing_path, 0.5)).to be_falsey
end
end
end

View file

@ -0,0 +1,85 @@
require 'spec_helper'
describe Gitlab::Git::Storage::Health, clean_gitlab_redis_shared_state: true, broken_storage: true do
let(:host1_key) { 'storage_accessible:broken:web01' }
let(:host2_key) { 'storage_accessible:default:kiq01' }
def set_in_redis(cache_key, value)
Gitlab::Git::Storage.redis.with do |redis|
redis.hmset(cache_key, :failure_count, value)
end.first
end
describe '.for_failing_storages' do
it 'only includes health status for failures' do
set_in_redis(host1_key, 10)
set_in_redis(host2_key, 0)
expect(described_class.for_failing_storages.map(&:storage_name))
.to contain_exactly('broken')
end
end
describe '.load_for_keys' do
let(:subject) do
results = Gitlab::Git::Storage.redis.with do |redis|
described_class.load_for_keys({ 'broken' => [host1_key] }, redis)
end
# Make sure the `Redis#future is loaded
results.inject({}) do |result, (name, info)|
info.each { |i| i[:failure_count] = i[:failure_count].value.to_i }
result[name] = info
result
end
end
it 'loads when there is no info in redis' do
expect(subject).to eq('broken' => [{ name: host1_key, failure_count: 0 }])
end
it 'reads the correct values for a storage from redis' do
set_in_redis(host1_key, 5)
set_in_redis(host2_key, 7)
expect(subject).to eq('broken' => [{ name: host1_key, failure_count: 5 }])
end
end
describe '.for_all_storages' do
it 'loads health status for all configured storages' do
healths = described_class.for_all_storages
expect(healths.map(&:storage_name)).to contain_exactly('default', 'broken')
end
end
describe '#failing_info' do
it 'only contains storages that have failures' do
health = described_class.new('broken', [{ name: host1_key, failure_count: 0 },
{ name: host2_key, failure_count: 3 }])
expect(health.failing_info).to contain_exactly({ name: host2_key, failure_count: 3 })
end
end
describe '#total_failures' do
it 'sums up all the failures' do
health = described_class.new('broken', [{ name: host1_key, failure_count: 2 },
{ name: host2_key, failure_count: 3 }])
expect(health.total_failures).to eq(5)
end
end
describe '#failing_on_hosts' do
it 'collects only the failing hostnames' do
health = described_class.new('broken', [{ name: host1_key, failure_count: 2 },
{ name: host2_key, failure_count: 0 }])
expect(health.failing_on_hosts).to contain_exactly('web01')
end
end
end

View file

@ -44,6 +44,15 @@ describe Gitlab::HealthChecks::FsShardsCheck do
describe '#readiness' do
subject { described_class.readiness }
context 'storage has a tripped circuitbreaker' do
let(:repository_storages) { ['broken'] }
let(:storages_paths) do
Gitlab.config.repositories.storages
end
it { is_expected.to include(result_class.new(false, 'circuitbreaker tripped', shard: 'broken')) }
end
context 'storage points to not existing folder' do
let(:storages_paths) do
{
@ -51,6 +60,10 @@ describe Gitlab::HealthChecks::FsShardsCheck do
}.with_indifferent_access
end
before do
allow(described_class).to receive(:storage_circuitbreaker_test) { true }
end
it { is_expected.to include(result_class.new(false, 'cannot stat storage', shard: :default)) }
end
@ -109,6 +122,7 @@ describe Gitlab::HealthChecks::FsShardsCheck do
expect(metrics).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
expect(metrics).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
expect(metrics).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
expect(metrics).to include(an_object_having_attributes(name: :filesystem_circuitbreaker_latency_seconds, value: be >= 0))
end
end
@ -127,6 +141,7 @@ describe Gitlab::HealthChecks::FsShardsCheck do
expect(metrics).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
expect(metrics).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
expect(metrics).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
expect(metrics).to include(an_object_having_attributes(name: :filesystem_circuitbreaker_latency_seconds, value: be >= 0))
end
it 'cleans up files used for metrics' do

View file

@ -1,11 +1,12 @@
require 'spec_helper'
describe Repository do
describe Repository, models: true do
include RepoHelpers
TestBlob = Struct.new(:path)
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
let(:broken_repository) { create(:project, :broken_storage).repository }
let(:user) { create(:user) }
let(:commit_options) do
@ -27,12 +28,26 @@ describe Repository do
let(:author_email) { 'user@example.org' }
let(:author_name) { 'John Doe' }
def expect_to_raise_storage_error
expect { yield }.to raise_error do |exception|
expect(exception.class).to be_in([Gitlab::Git::Storage::Inaccessible, GRPC::Unavailable])
end
end
describe '#branch_names_contains' do
subject { repository.branch_names_contains(sample_commit.id) }
it { is_expected.to include('master') }
it { is_expected.not_to include('feature') }
it { is_expected.not_to include('fix') }
describe 'when storage is broken', broken_storage: true do
it 'should raise a storage error' do
expect_to_raise_storage_error do
broken_repository.branch_names_contains(sample_commit.id)
end
end
end
end
describe '#tag_names_contains' do
@ -142,6 +157,14 @@ describe Repository do
subject { repository.last_commit_for_path(sample_commit.id, '.gitignore').id }
it { is_expected.to eq('c1acaa58bbcbc3eafe538cb8274ba387047b69f8') }
describe 'when storage is broken', broken_storage: true do
it 'should raise a storage error' do
expect_to_raise_storage_error do
broken_repository.last_commit_for_path(sample_commit.id, '.gitignore').id
end
end
end
end
describe '#last_commit_id_for_path' do
@ -158,6 +181,14 @@ describe Repository do
expect(cache).to receive(:fetch).with(key).and_return('c1acaa5')
is_expected.to eq('c1acaa5')
end
describe 'when storage is broken', broken_storage: true do
it 'should raise a storage error' do
expect_to_raise_storage_error do
broken_repository.last_commit_id_for_path(sample_commit.id, '.gitignore')
end
end
end
end
describe '#commits' do
@ -196,6 +227,12 @@ describe Repository do
expect(commit_ids).to include('5937ac0a7beb003549fc5fd26fc247adbce4a52e')
end
describe 'when storage is broken', broken_storage: true do
it 'should raise a storage error' do
expect_to_raise_storage_error { broken_repository.find_commits_by_message('s') }
end
end
end
describe '#blob_at' do
@ -521,6 +558,14 @@ describe Repository do
expect(results).to match_array([])
end
describe 'when storage is broken', broken_storage: true do
it 'should raise a storage error' do
expect_to_raise_storage_error do
broken_repository.search_files_by_content('feature', 'master')
end
end
end
describe 'result' do
subject { results.first }
@ -549,6 +594,22 @@ describe Repository do
expect(results).to match_array([])
end
describe 'when storage is broken', broken_storage: true do
it 'should raise a storage error' do
expect_to_raise_storage_error { broken_repository.search_files_by_name('files', 'master') }
end
end
end
describe '#fetch_ref' do
describe 'when storage is broken', broken_storage: true do
it 'should raise a storage error' do
path = broken_repository.path_to_repo
expect_to_raise_storage_error { broken_repository.fetch_ref(path, '1', '2') }
end
end
end
describe '#create_ref' do
@ -966,6 +1027,12 @@ describe Repository do
expect(repository.exists?).to eq(false)
end
context 'with broken storage', broken_storage: true do
it 'should raise a storage error' do
expect_to_raise_storage_error { broken_repository.exists? }
end
end
end
describe '#exists?' do

View file

@ -0,0 +1,57 @@
require 'spec_helper'
describe API::CircuitBreakers do
let(:user) { create(:user) }
let(:admin) { create(:admin) }
describe 'GET circuit_breakers/repository_storage' do
it 'returns a 401 for anonymous users' do
get api('/circuit_breakers/repository_storage')
expect(response).to have_http_status(401)
end
it 'returns a 403 for users' do
get api('/circuit_breakers/repository_storage', user)
expect(response).to have_http_status(403)
end
it 'returns an Array of storages' do
expect(Gitlab::Git::Storage::Health).to receive(:for_all_storages) do
[Gitlab::Git::Storage::Health.new('broken', [{ name: 'prefix:broken:web01', failure_count: 4 }])]
end
get api('/circuit_breakers/repository_storage', admin)
expect(response).to have_http_status(200)
expect(json_response).to be_kind_of(Array)
expect(json_response.first['storage_name']).to eq('broken')
expect(json_response.first['failing_on_hosts']).to eq(['web01'])
expect(json_response.first['total_failures']).to eq(4)
end
describe 'GET circuit_breakers/repository_storage/failing' do
it 'returns an array of failing storages' do
expect(Gitlab::Git::Storage::Health).to receive(:for_failing_storages) do
[Gitlab::Git::Storage::Health.new('broken', [{ name: 'prefix:broken:web01', failure_count: 4 }])]
end
get api('/circuit_breakers/repository_storage/failing', admin)
expect(response).to have_http_status(200)
expect(json_response).to be_kind_of(Array)
end
end
end
describe 'DELETE circuit_breakers/repository_storage' do
it 'clears all circuit_breakers' do
expect(Gitlab::Git::Storage::CircuitBreaker).to receive(:reset_all!)
delete api('/circuit_breakers/repository_storage', admin)
expect(response).to have_http_status(204)
end
end
end

View file

@ -2,4 +2,16 @@ RSpec.configure do |config|
config.before(:each, :repository) do
TestEnv.clean_test_path
end
config.before(:all, :broken_storage) do
FileUtils.rm_rf Gitlab.config.repositories.storages.broken['path']
end
config.before(:each, :broken_storage) do
allow(Gitlab::GitalyClient).to receive(:call) do
raise GRPC::Unavailable.new('Gitaly broken in this spec')
end
Gitlab::Git::Storage::CircuitBreaker.reset_all!
end
end