Add a Circuitbreaker for storage paths
This commit is contained in:
parent
990feb9f2b
commit
3598e60bf2
40 changed files with 1421 additions and 37 deletions
|
@ -1,5 +1,12 @@
|
|||
class Admin::HealthCheckController < Admin::ApplicationController
|
||||
def show
|
||||
@errors = HealthCheck::Utils.process_checks(['standard'])
|
||||
@failing_storage_statuses = Gitlab::Git::Storage::Health.for_failing_storages
|
||||
end
|
||||
|
||||
def reset_storage_health
|
||||
Gitlab::Git::Storage::CircuitBreaker.reset_all!
|
||||
redirect_to admin_health_check_path,
|
||||
notice: _('Git storage health information has been reset')
|
||||
end
|
||||
end
|
||||
|
|
|
@ -52,6 +52,15 @@ class ApplicationController < ActionController::Base
|
|||
head :forbidden, retry_after: Gitlab::Auth::UniqueIpsLimiter.config.unique_ips_limit_time_window
|
||||
end
|
||||
|
||||
rescue_from Gitlab::Git::Storage::Inaccessible, GRPC::Unavailable do |exception|
|
||||
Raven.capture_exception(exception) if sentry_enabled?
|
||||
log_exception(exception)
|
||||
|
||||
headers['Retry-After'] = exception.retry_after if exception.respond_to?(:retry_after)
|
||||
|
||||
render_503
|
||||
end
|
||||
|
||||
def redirect_back_or_default(default: root_path, options: {})
|
||||
redirect_to request.referer.present? ? :back : default, options
|
||||
end
|
||||
|
@ -152,6 +161,19 @@ class ApplicationController < ActionController::Base
|
|||
head :unprocessable_entity
|
||||
end
|
||||
|
||||
def render_503
|
||||
respond_to do |format|
|
||||
format.html do
|
||||
render(
|
||||
file: Rails.root.join("public", "503"),
|
||||
layout: false,
|
||||
status: :service_unavailable
|
||||
)
|
||||
end
|
||||
format.any { head :service_unavailable }
|
||||
end
|
||||
end
|
||||
|
||||
def no_cache_headers
|
||||
response.headers["Cache-Control"] = "no-cache, no-store, max-age=0, must-revalidate"
|
||||
response.headers["Pragma"] = "no-cache"
|
||||
|
|
37
app/helpers/storage_health_helper.rb
Normal file
37
app/helpers/storage_health_helper.rb
Normal file
|
@ -0,0 +1,37 @@
|
|||
module StorageHealthHelper
|
||||
def failing_storage_health_message(storage_health)
|
||||
storage_name = content_tag(:strong, h(storage_health.storage_name))
|
||||
host_names = h(storage_health.failing_on_hosts.to_sentence)
|
||||
translation_params = { storage_name: storage_name,
|
||||
host_names: host_names,
|
||||
failed_attempts: storage_health.total_failures }
|
||||
|
||||
translation = n_('%{storage_name}: failed storage access attempt on host:',
|
||||
'%{storage_name}: %{failed_attempts} failed storage access attempts:',
|
||||
storage_health.total_failures) % translation_params
|
||||
|
||||
translation.html_safe
|
||||
end
|
||||
|
||||
def message_for_circuit_breaker(circuit_breaker)
|
||||
maximum_failures = circuit_breaker.failure_count_threshold
|
||||
current_failures = circuit_breaker.failure_count
|
||||
permanently_broken = circuit_breaker.circuit_broken? && current_failures >= maximum_failures
|
||||
|
||||
translation_params = { number_of_failures: current_failures,
|
||||
maximum_failures: maximum_failures,
|
||||
number_of_seconds: circuit_breaker.failure_wait_time }
|
||||
|
||||
if permanently_broken
|
||||
s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\
|
||||
"retry automatically. Reset storage information when the problem is "\
|
||||
"resolved.") % translation_params
|
||||
elsif circuit_breaker.circuit_broken?
|
||||
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
|
||||
"block access for %{number_of_seconds} seconds.") % translation_params
|
||||
else
|
||||
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
|
||||
"allow access on the next attempt.") % translation_params
|
||||
end
|
||||
end
|
||||
end
|
|
@ -133,12 +133,13 @@ class Repository
|
|||
ref ||= root_ref
|
||||
|
||||
args = %W(
|
||||
#{Gitlab.config.git.bin_path} log #{ref} --pretty=%H --skip #{offset}
|
||||
log #{ref} --pretty=%H --skip #{offset}
|
||||
--max-count #{limit} --grep=#{query} --regexp-ignore-case
|
||||
)
|
||||
args = args.concat(%W(-- #{path})) if path.present?
|
||||
|
||||
git_log_results = Gitlab::Popen.popen(args, path_to_repo).first.lines
|
||||
git_log_results = run_git(args).first.lines
|
||||
|
||||
git_log_results.map { |c| commit(c.chomp) }.compact
|
||||
end
|
||||
|
||||
|
@ -622,8 +623,8 @@ class Repository
|
|||
key = path.blank? ? "last_commit_id_for_path:#{sha}" : "last_commit_id_for_path:#{sha}:#{Digest::SHA1.hexdigest(path)}"
|
||||
|
||||
cache.fetch(key) do
|
||||
args = %W(#{Gitlab.config.git.bin_path} rev-list --max-count=1 #{sha} -- #{path})
|
||||
Gitlab::Popen.popen(args, path_to_repo).first.strip
|
||||
args = %W(rev-list --max-count=1 #{sha} -- #{path})
|
||||
run_git(args).first.strip
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -678,8 +679,8 @@ class Repository
|
|||
end
|
||||
|
||||
def refs_contains_sha(ref_type, sha)
|
||||
args = %W(#{Gitlab.config.git.bin_path} #{ref_type} --contains #{sha})
|
||||
names = Gitlab::Popen.popen(args, path_to_repo).first
|
||||
args = %W(#{ref_type} --contains #{sha})
|
||||
names = run_git(args).first
|
||||
|
||||
if names.respond_to?(:split)
|
||||
names = names.split("\n").map(&:strip)
|
||||
|
@ -957,15 +958,17 @@ class Repository
|
|||
return [] if empty_repo? || query.blank?
|
||||
|
||||
offset = 2
|
||||
args = %W(#{Gitlab.config.git.bin_path} grep -i -I -n --before-context #{offset} --after-context #{offset} -E -e #{Regexp.escape(query)} #{ref || root_ref})
|
||||
Gitlab::Popen.popen(args, path_to_repo).first.scrub.split(/^--$/)
|
||||
args = %W(grep -i -I -n --before-context #{offset} --after-context #{offset} -E -e #{Regexp.escape(query)} #{ref || root_ref})
|
||||
|
||||
run_git(args).first.scrub.split(/^--$/)
|
||||
end
|
||||
|
||||
def search_files_by_name(query, ref)
|
||||
return [] if empty_repo? || query.blank?
|
||||
|
||||
args = %W(#{Gitlab.config.git.bin_path} ls-tree --full-tree -r #{ref || root_ref} --name-status | #{Regexp.escape(query)})
|
||||
Gitlab::Popen.popen(args, path_to_repo).first.lines.map(&:strip)
|
||||
args = %W(ls-tree --full-tree -r #{ref || root_ref} --name-status | #{Regexp.escape(query)})
|
||||
|
||||
run_git(args).first.lines.map(&:strip)
|
||||
end
|
||||
|
||||
def with_repo_branch_commit(start_repository, start_branch_name)
|
||||
|
@ -1010,8 +1013,8 @@ class Repository
|
|||
end
|
||||
|
||||
def fetch_ref(source_path, source_ref, target_ref)
|
||||
args = %W(#{Gitlab.config.git.bin_path} fetch --no-tags -f #{source_path} #{source_ref}:#{target_ref})
|
||||
Gitlab::Popen.popen(args, path_to_repo)
|
||||
args = %W(fetch --no-tags -f #{source_path} #{source_ref}:#{target_ref})
|
||||
run_git(args)
|
||||
end
|
||||
|
||||
def create_ref(ref, ref_path)
|
||||
|
@ -1092,6 +1095,12 @@ class Repository
|
|||
|
||||
private
|
||||
|
||||
def run_git(args)
|
||||
circuit_breaker.perform do
|
||||
Gitlab::Popen.popen([Gitlab.config.git.bin_path, *args], path_to_repo)
|
||||
end
|
||||
end
|
||||
|
||||
def blob_data_at(sha, path)
|
||||
blob = blob_at(sha, path)
|
||||
return unless blob
|
||||
|
@ -1101,8 +1110,10 @@ class Repository
|
|||
end
|
||||
|
||||
def refs_directory_exists?
|
||||
circuit_breaker.perform do
|
||||
File.exist?(File.join(path_to_repo, 'refs'))
|
||||
end
|
||||
end
|
||||
|
||||
def cache
|
||||
# TODO: should we use UUIDs here? We could move repositories without clearing this cache
|
||||
|
@ -1145,4 +1156,8 @@ class Repository
|
|||
def initialize_raw_repository
|
||||
Gitlab::Git::Repository.new(project.repository_storage, disk_path + '.git')
|
||||
end
|
||||
|
||||
def circuit_breaker
|
||||
@circuit_breaker ||= Gitlab::Git::Storage::CircuitBreaker.for_storage(project.repository_storage)
|
||||
end
|
||||
end
|
||||
|
|
15
app/views/admin/health_check/_failing_storages.html.haml
Normal file
15
app/views/admin/health_check/_failing_storages.html.haml
Normal file
|
@ -0,0 +1,15 @@
|
|||
- if failing_storages.any?
|
||||
= _('There are problems accessing Git storage: ')
|
||||
%ul
|
||||
- failing_storages.each do |storage_health|
|
||||
%li
|
||||
= failing_storage_health_message(storage_health)
|
||||
%ul
|
||||
- storage_health.failing_circuit_breakers.each do |circuit_breaker|
|
||||
%li
|
||||
#{circuit_breaker.hostname}: #{message_for_circuit_breaker(circuit_breaker)}
|
||||
|
||||
= _("Access to failing storages has been temporarily disabled to allow the mount to recover. Reset storage information after the issue has been resolved to allow access again.")
|
||||
.prepend-top-10
|
||||
= button_to _("Reset git storage health information"), reset_storage_health_admin_health_check_path,
|
||||
method: :post, class: 'btn btn-default'
|
|
@ -1,22 +1,22 @@
|
|||
- @no_container = true
|
||||
- page_title "Health Check"
|
||||
- page_title _('Health Check')
|
||||
- no_errors = @errors.blank? && @failing_storage_statuses.blank?
|
||||
= render 'admin/monitoring/head'
|
||||
|
||||
%div{ class: container_class }
|
||||
%h3.page-title
|
||||
Health Check
|
||||
%h3.page-title= page_title
|
||||
.bs-callout.clearfix
|
||||
.pull-left
|
||||
%p
|
||||
Access token is
|
||||
#{ s_('HealthCheck|Access token is') }
|
||||
%code#health-check-token= current_application_settings.health_check_access_token
|
||||
.prepend-top-10
|
||||
= button_to "Reset health check access token", reset_health_check_token_admin_application_settings_path,
|
||||
= button_to _("Reset health check access token"), reset_health_check_token_admin_application_settings_path,
|
||||
method: :put, class: 'btn btn-default',
|
||||
data: { confirm: 'Are you sure you want to reset the health check token?' }
|
||||
data: { confirm: _('Are you sure you want to reset the health check token?') }
|
||||
%p.light
|
||||
Health information can be retrieved from the following endpoints. More information is available
|
||||
= link_to 'here', help_page_path('user/admin_area/monitoring/health_check')
|
||||
#{ _('Health information can be retrieved from the following endpoints. More information is available') }
|
||||
= link_to s_('More information is available|here'), help_page_path('user/admin_area/monitoring/health_check')
|
||||
%ul
|
||||
%li
|
||||
%code= readiness_url(token: current_application_settings.health_check_access_token)
|
||||
|
@ -29,14 +29,15 @@
|
|||
.panel.panel-default
|
||||
.panel-heading
|
||||
Current Status:
|
||||
- if @errors.blank?
|
||||
- if no_errors
|
||||
= icon('circle', class: 'cgreen')
|
||||
Healthy
|
||||
#{ s_('HealthCheck|Healthy') }
|
||||
- else
|
||||
= icon('warning', class: 'cred')
|
||||
Unhealthy
|
||||
#{ s_('HealthCheck|Unhealthy') }
|
||||
.panel-body
|
||||
- if @errors.blank?
|
||||
No Health Problems Detected
|
||||
- if no_errors
|
||||
#{ s_('HealthCheck|No Health Problems Detected') }
|
||||
- else
|
||||
= @errors
|
||||
= render partial: 'failing_storages', object: @failing_storage_statuses
|
||||
|
|
4
changelogs/unreleased/bvl-nfs-circuitbreaker.yml
Normal file
4
changelogs/unreleased/bvl-nfs-circuitbreaker.yml
Normal file
|
@ -0,0 +1,4 @@
|
|||
---
|
||||
title: Block access to failing repository storage
|
||||
merge_request: 11449
|
||||
author:
|
|
@ -506,6 +506,11 @@ production: &base
|
|||
path: /home/git/repositories/
|
||||
gitaly_address: unix:/home/git/gitlab/tmp/sockets/private/gitaly.socket # TCP connections are supported too (e.g. tcp://host:port)
|
||||
# gitaly_token: 'special token' # Optional: override global gitaly.token for this storage.
|
||||
failure_count_threshold: 10 # number of failures before stopping attempts
|
||||
failure_wait_time: 30 # Seconds after an access failure before allowing access again
|
||||
failure_reset_time: 1800 # Time in seconds to expire failures
|
||||
storage_timeout: 5 # Time in seconds to wait before aborting a storage access attempt
|
||||
|
||||
|
||||
## Backup settings
|
||||
backup:
|
||||
|
@ -638,6 +643,10 @@ test:
|
|||
default:
|
||||
path: tmp/tests/repositories/
|
||||
gitaly_address: unix:tmp/tests/gitaly/gitaly.socket
|
||||
broken:
|
||||
path: tmp/tests/non-existent-repositories
|
||||
gitaly_address: unix:tmp/tests/gitaly/gitaly.socket
|
||||
|
||||
gitaly:
|
||||
enabled: true
|
||||
token: secret
|
||||
|
|
|
@ -222,6 +222,7 @@ Settings.gitlab['default_branch_protection'] ||= 2
|
|||
Settings.gitlab['default_can_create_group'] = true if Settings.gitlab['default_can_create_group'].nil?
|
||||
Settings.gitlab['host'] ||= ENV['GITLAB_HOST'] || 'localhost'
|
||||
Settings.gitlab['ssh_host'] ||= Settings.gitlab.host
|
||||
Settings.gitlab['hostname'] ||= ENV['HOSTNAME'] || Socket.gethostname
|
||||
Settings.gitlab['https'] = false if Settings.gitlab['https'].nil?
|
||||
Settings.gitlab['port'] ||= ENV['GITLAB_PORT'] || (Settings.gitlab.https ? 443 : 80)
|
||||
Settings.gitlab['relative_url_root'] ||= ENV['RAILS_RELATIVE_URL_ROOT'] || ''
|
||||
|
@ -433,6 +434,17 @@ end
|
|||
Settings.repositories.storages.values.each do |storage|
|
||||
# Expand relative paths
|
||||
storage['path'] = Settings.absolute(storage['path'])
|
||||
# Set failure defaults
|
||||
storage['failure_count_threshold'] ||= 10
|
||||
storage['failure_wait_time'] ||= 30
|
||||
storage['failure_reset_time'] ||= 1800
|
||||
storage['storage_timeout'] ||= 5
|
||||
# Set turn strings into numbers
|
||||
storage['failure_count_threshold'] = storage['failure_count_threshold'].to_i
|
||||
storage['failure_wait_time'] = storage['failure_wait_time'].to_i
|
||||
storage['failure_reset_time'] = storage['failure_reset_time'].to_i
|
||||
# We might want to have a timeout shorter than 1 second.
|
||||
storage['storage_timeout'] = storage['storage_timeout'].to_f
|
||||
end
|
||||
|
||||
#
|
||||
|
|
|
@ -7,6 +7,13 @@ def find_parent_path(name, path)
|
|||
Gitlab.config.repositories.storages.detect do |n, rs|
|
||||
name != n && Pathname.new(rs['path']).realpath == parent
|
||||
end
|
||||
rescue Errno::EIO, Errno::ENOENT => e
|
||||
warning = "WARNING: couldn't verify #{path} (#{name}). "\
|
||||
"If this is an external storage, it might be offline."
|
||||
message = "#{warning}\n#{e.message}"
|
||||
Rails.logger.error("#{message}\n\t" + e.backtrace.join("\n\t"))
|
||||
|
||||
nil
|
||||
end
|
||||
|
||||
def storage_validation_error(message)
|
||||
|
@ -29,6 +36,15 @@ def validate_storages_config
|
|||
if !repository_storage.is_a?(Hash) || repository_storage['path'].nil?
|
||||
storage_validation_error("#{name} is not a valid storage, because it has no `path` key. Refer to gitlab.yml.example for an updated example")
|
||||
end
|
||||
|
||||
%w(failure_count_threshold failure_wait_time failure_reset_time storage_timeout).each do |setting|
|
||||
# Falling back to the defaults is fine!
|
||||
next if repository_storage[setting].nil?
|
||||
|
||||
unless repository_storage[setting].to_f > 0
|
||||
storage_validation_error("#{setting}, for storage `#{name}` needs to be greater than 0")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -67,7 +67,9 @@ namespace :admin do
|
|||
end
|
||||
|
||||
resource :logs, only: [:show]
|
||||
resource :health_check, controller: 'health_check', only: [:show]
|
||||
resource :health_check, controller: 'health_check', only: [:show] do
|
||||
post :reset_storage_health
|
||||
end
|
||||
resource :background_jobs, controller: 'background_jobs', only: [:show]
|
||||
resource :system_info, controller: 'system_info', only: [:show]
|
||||
resources :requests_profiles, only: [:index, :show], param: :name, constraints: { name: /.+\.html/ }
|
||||
|
|
BIN
doc/administration/img/failing_storage.png
Normal file
BIN
doc/administration/img/failing_storage.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 47 KiB |
|
@ -60,7 +60,7 @@ respectively.
|
|||
path: /mnt/cephfs/repositories
|
||||
```
|
||||
|
||||
1. [Restart GitLab] for the changes to take effect.
|
||||
1. [Restart GitLab][restart-gitlab] for the changes to take effect.
|
||||
|
||||
>**Note:**
|
||||
The [`gitlab_shell: repos_path` entry][repospath] in `gitlab.yml` will be
|
||||
|
@ -97,9 +97,80 @@ be stored via the **Application Settings** in the Admin area.
|
|||
Beginning with GitLab 8.13.4, multiple paths can be chosen. New projects will be
|
||||
randomly placed on one of the selected paths.
|
||||
|
||||
## Handling failing repository storage
|
||||
|
||||
> [Introduced][ce-11449] in GitLab 9.5.
|
||||
|
||||
When GitLab detects access to the repositories storage fails repeatedly, it can
|
||||
gracefully prevent attempts to access the storage. This might be useful when
|
||||
the repositories are stored somewhere on the network.
|
||||
|
||||
The configuration could look as follows:
|
||||
|
||||
**For Omnibus installations**
|
||||
|
||||
1. Edit `/etc/gitlab/gitlab.rb`:
|
||||
|
||||
```ruby
|
||||
git_data_dirs({
|
||||
"default" => {
|
||||
"path" => "/mnt/nfs-01/git-data",
|
||||
"failure_count_threshold" => 10,
|
||||
"failure_wait_time" => 30,
|
||||
"failure_reset_time" => 1800,
|
||||
"storage_timeout" => 5
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
1. Save the file and [reconfigure GitLab][reconfigure-gitlab] for the changes to take effect.
|
||||
|
||||
---
|
||||
|
||||
**For installations from source**
|
||||
|
||||
1. Edit `config/gitlab.yml`:
|
||||
|
||||
```yaml
|
||||
repositories:
|
||||
storages: # You must have at least a `default` storage path.
|
||||
default:
|
||||
path: /home/git/repositories/
|
||||
failure_count_threshold: 10 # number of failures before stopping attempts
|
||||
failure_wait_time: 30 # Seconds after last access failure before trying again
|
||||
failure_reset_time: 1800 # Time in seconds to expire failures
|
||||
storage_timeout: 5 # Time in seconds to wait before aborting a storage access attempt
|
||||
```
|
||||
|
||||
1. Save the file and [restart GitLab][restart-gitlab] for the changes to take effect.
|
||||
|
||||
|
||||
**`failure_count_threshold`:** The number of failures of after which GitLab will
|
||||
completely prevent access to the storage. The number of failures can be reset in
|
||||
the admin interface: `https://gitlab.example.com/admin/health_check` or using the
|
||||
[api](../api/repository_storage_health.md) to allow access to the storage again.
|
||||
|
||||
**`failure_wait_time`:** When access to a storage fails. GitLab will prevent
|
||||
access to the storage for the time specified here. This allows the filesystem to
|
||||
recover without.
|
||||
|
||||
**`failure_reset_time`:** The time in seconds GitLab will keep failure
|
||||
information. When no failures occur during this time, information about the
|
||||
mount is reset.
|
||||
|
||||
**`storage_timeout`:** The time in seconds GitLab will try to access storage.
|
||||
After this time a timeout error will be raised.
|
||||
|
||||
When storage failures occur, this will be visible in the admin interface like this:
|
||||
|
||||
![failing storage](img/failing_storage.png)
|
||||
|
||||
To allow access to all storages, click the `Reset git storage health information` button.
|
||||
|
||||
[ce-4578]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/4578
|
||||
[restart gitlab]: restart_gitlab.md#installations-from-source
|
||||
[reconfigure gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure
|
||||
[restart-gitlab]: restart_gitlab.md#installations-from-source
|
||||
[reconfigure-gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure
|
||||
[backups]: ../raketasks/backup_restore.md
|
||||
[raketask]: https://gitlab.com/gitlab-org/gitlab-ce/blob/033e5423a2594e08a7ebcd2379bd2331f4c39032/lib/backup/repository.rb#L54-56
|
||||
[repospath]: https://gitlab.com/gitlab-org/gitlab-ce/blob/8-9-stable/config/gitlab.yml.example#L457
|
||||
[ce-11449]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/11449
|
||||
|
|
74
doc/api/repository_storage_health.md
Normal file
74
doc/api/repository_storage_health.md
Normal file
|
@ -0,0 +1,74 @@
|
|||
# Circuitbreaker API
|
||||
|
||||
> [Introduced][ce-11449] in GitLab 9.5.
|
||||
|
||||
The Circuitbreaker API is only accessible to administrators. All requests by
|
||||
guests will respond with `401 Unauthorized`, and all requests by normal users
|
||||
will respond with `403 Forbidden`.
|
||||
|
||||
## Repository Storages
|
||||
|
||||
### Get all storage information
|
||||
|
||||
Returns of all currently configured storages and their health information.
|
||||
|
||||
```
|
||||
GET /circuit_breakers/repository_storage
|
||||
```
|
||||
|
||||
```bash
|
||||
curl --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/circuit_breakers/repository_storage
|
||||
```
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"storage_name": "default",
|
||||
"failing_on_hosts": [],
|
||||
"total_failures": 0
|
||||
},
|
||||
{
|
||||
"storage_name": "broken",
|
||||
"failing_on_hosts": [
|
||||
"web01", "worker01"
|
||||
],
|
||||
"total_failures": 1
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Get failing storages
|
||||
|
||||
This returns a list of all currently failing storages.
|
||||
|
||||
```
|
||||
GET /circuit_breakers/repository_storage/failing
|
||||
```
|
||||
|
||||
```bash
|
||||
curl --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/circuit_breakers/repository_storage/failing
|
||||
```
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"storage_name":"broken",
|
||||
"failing_on_hosts":["web01", "worker01"],
|
||||
"total_failures":2
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Reset failing storage information
|
||||
|
||||
Use this remove all failing storage information and allow access to the storage again.
|
||||
|
||||
```
|
||||
DELETE /circuit_breakers/repository_storage
|
||||
```
|
||||
|
||||
```bash
|
||||
curl --request DELETE --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/circuit_breakers/repository_storage
|
||||
```
|
||||
|
||||
[ce-11449]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/11449
|
|
@ -95,6 +95,7 @@ module API
|
|||
mount ::API::Boards
|
||||
mount ::API::Branches
|
||||
mount ::API::BroadcastMessages
|
||||
mount ::API::CircuitBreakers
|
||||
mount ::API::Commits
|
||||
mount ::API::CommitStatuses
|
||||
mount ::API::DeployKeys
|
||||
|
|
50
lib/api/circuit_breakers.rb
Normal file
50
lib/api/circuit_breakers.rb
Normal file
|
@ -0,0 +1,50 @@
|
|||
module API
|
||||
class CircuitBreakers < Grape::API
|
||||
before { authenticated_as_admin! }
|
||||
|
||||
resource :circuit_breakers do
|
||||
params do
|
||||
requires :type,
|
||||
type: String,
|
||||
desc: "The type of circuitbreaker",
|
||||
values: ['repository_storage']
|
||||
end
|
||||
resource ':type' do
|
||||
namespace '', requirements: { type: 'repository_storage' } do
|
||||
helpers do
|
||||
def failing_storage_health
|
||||
@failing_storage_health ||= Gitlab::Git::Storage::Health.for_failing_storages
|
||||
end
|
||||
|
||||
def storage_health
|
||||
@failing_storage_health ||= Gitlab::Git::Storage::Health.for_all_storages
|
||||
end
|
||||
end
|
||||
|
||||
desc 'Get all failing git storages' do
|
||||
detail 'This feature was introduced in GitLab 9.5'
|
||||
success Entities::RepositoryStorageHealth
|
||||
end
|
||||
get do
|
||||
present storage_health, with: Entities::RepositoryStorageHealth
|
||||
end
|
||||
|
||||
desc 'Get all failing git storages' do
|
||||
detail 'This feature was introduced in GitLab 9.5'
|
||||
success Entities::RepositoryStorageHealth
|
||||
end
|
||||
get 'failing' do
|
||||
present failing_storage_health, with: Entities::RepositoryStorageHealth
|
||||
end
|
||||
|
||||
desc 'Reset all storage failures and open circuitbreaker' do
|
||||
detail 'This feature was introduced in GitLab 9.5'
|
||||
end
|
||||
delete do
|
||||
Gitlab::Git::Storage::CircuitBreaker.reset_all!
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -954,5 +954,11 @@ module API
|
|||
expose :ip_address
|
||||
expose :submitted, as: :akismet_submitted
|
||||
end
|
||||
|
||||
class RepositoryStorageHealth < Grape::Entity
|
||||
expose :storage_name
|
||||
expose :failing_on_hosts
|
||||
expose :total_failures
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -64,11 +64,17 @@ module Gitlab
|
|||
end
|
||||
|
||||
def rugged
|
||||
@rugged ||= Rugged::Repository.new(path, alternates: alternate_object_directories)
|
||||
@rugged ||= circuit_breaker.perform do
|
||||
Rugged::Repository.new(path, alternates: alternate_object_directories)
|
||||
end
|
||||
rescue Rugged::RepositoryError, Rugged::OSError
|
||||
raise NoRepository.new('no repository for such path')
|
||||
end
|
||||
|
||||
def circuit_breaker
|
||||
@circuit_breaker ||= Gitlab::Git::Storage::CircuitBreaker.for_storage(storage)
|
||||
end
|
||||
|
||||
# Returns an Array of branch names
|
||||
# sorted by name ASC
|
||||
def branch_names
|
||||
|
|
22
lib/gitlab/git/storage.rb
Normal file
22
lib/gitlab/git/storage.rb
Normal file
|
@ -0,0 +1,22 @@
|
|||
module Gitlab
|
||||
module Git
|
||||
module Storage
|
||||
class Inaccessible < StandardError
|
||||
attr_reader :retry_after
|
||||
|
||||
def initialize(message = nil, retry_after = nil)
|
||||
super(message)
|
||||
@retry_after = retry_after
|
||||
end
|
||||
end
|
||||
|
||||
CircuitOpen = Class.new(Inaccessible)
|
||||
|
||||
REDIS_KEY_PREFIX = 'storage_accessible:'.freeze
|
||||
|
||||
def self.redis
|
||||
Gitlab::Redis::SharedState
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
142
lib/gitlab/git/storage/circuit_breaker.rb
Normal file
142
lib/gitlab/git/storage/circuit_breaker.rb
Normal file
|
@ -0,0 +1,142 @@
|
|||
module Gitlab
|
||||
module Git
|
||||
module Storage
|
||||
class CircuitBreaker
|
||||
attr_reader :storage,
|
||||
:hostname,
|
||||
:storage_path,
|
||||
:failure_count_threshold,
|
||||
:failure_wait_time,
|
||||
:failure_reset_time,
|
||||
:storage_timeout
|
||||
|
||||
def self.reset_all!
|
||||
pattern = "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}*"
|
||||
|
||||
Gitlab::Git::Storage.redis.with do |redis|
|
||||
all_storage_keys = redis.scan_each(match: pattern).to_a
|
||||
redis.del(*all_storage_keys) unless all_storage_keys.empty?
|
||||
end
|
||||
|
||||
RequestStore.delete(:circuitbreaker_cache)
|
||||
end
|
||||
|
||||
def self.for_storage(storage)
|
||||
cached_circuitbreakers = RequestStore.fetch(:circuitbreaker_cache) do
|
||||
Hash.new do |hash, storage_name|
|
||||
hash[storage_name] = new(storage_name)
|
||||
end
|
||||
end
|
||||
|
||||
cached_circuitbreakers[storage]
|
||||
end
|
||||
|
||||
def initialize(storage, hostname = Gitlab.config.gitlab.hostname)
|
||||
@storage = storage
|
||||
@hostname = hostname
|
||||
|
||||
config = Gitlab.config.repositories.storages[@storage]
|
||||
@storage_path = config['path']
|
||||
@failure_count_threshold = config['failure_count_threshold']
|
||||
@failure_wait_time = config['failure_wait_time']
|
||||
@failure_reset_time = config['failure_reset_time']
|
||||
@storage_timeout = config['storage_timeout']
|
||||
end
|
||||
|
||||
def perform
|
||||
return yield unless Feature.enabled?('git_storage_circuit_breaker')
|
||||
|
||||
if circuit_broken?
|
||||
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} open", failure_wait_time)
|
||||
end
|
||||
|
||||
check_storage_accessible!
|
||||
|
||||
yield
|
||||
end
|
||||
|
||||
def circuit_broken?
|
||||
return false if no_failures?
|
||||
|
||||
recent_failure = last_failure > failure_wait_time.seconds.ago
|
||||
too_many_failures = failure_count > failure_count_threshold
|
||||
|
||||
recent_failure || too_many_failures
|
||||
end
|
||||
|
||||
# Memoizing the `storage_available` call means we only do it once per
|
||||
# request when the storage is available.
|
||||
#
|
||||
# When the storage appears not available, and the memoized value is `false`
|
||||
# we might want to try again.
|
||||
def storage_available?
|
||||
@storage_available ||= Gitlab::Git::Storage::ForkedStorageCheck.storage_available?(storage_path, storage_timeout)
|
||||
end
|
||||
|
||||
def check_storage_accessible!
|
||||
if storage_available?
|
||||
track_storage_accessible
|
||||
else
|
||||
track_storage_inaccessible
|
||||
raise Gitlab::Git::Storage::Inaccessible.new("#{storage} not accessible", failure_wait_time)
|
||||
end
|
||||
end
|
||||
|
||||
def no_failures?
|
||||
last_failure.blank? && failure_count == 0
|
||||
end
|
||||
|
||||
def track_storage_inaccessible
|
||||
@failure_info = [Time.now, failure_count + 1]
|
||||
|
||||
Gitlab::Git::Storage.redis.with do |redis|
|
||||
redis.pipelined do
|
||||
redis.hset(cache_key, :last_failure, last_failure.to_i)
|
||||
redis.hincrby(cache_key, :failure_count, 1)
|
||||
redis.expire(cache_key, failure_reset_time)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def track_storage_accessible
|
||||
return if no_failures?
|
||||
|
||||
@failure_info = [nil, 0]
|
||||
|
||||
Gitlab::Git::Storage.redis.with do |redis|
|
||||
redis.pipelined do
|
||||
redis.hset(cache_key, :last_failure, nil)
|
||||
redis.hset(cache_key, :failure_count, 0)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def last_failure
|
||||
failure_info.first
|
||||
end
|
||||
|
||||
def failure_count
|
||||
failure_info.last
|
||||
end
|
||||
|
||||
def failure_info
|
||||
@failure_info ||= get_failure_info
|
||||
end
|
||||
|
||||
def get_failure_info
|
||||
last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
|
||||
redis.hmget(cache_key, :last_failure, :failure_count)
|
||||
end
|
||||
|
||||
last_failure = Time.at(last_failure.to_i) if last_failure.present?
|
||||
|
||||
[last_failure, failure_count.to_i]
|
||||
end
|
||||
|
||||
def cache_key
|
||||
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
59
lib/gitlab/git/storage/forked_storage_check.rb
Normal file
59
lib/gitlab/git/storage/forked_storage_check.rb
Normal file
|
@ -0,0 +1,59 @@
|
|||
module Gitlab
|
||||
module Git
|
||||
module Storage
|
||||
module ForkedStorageCheck
|
||||
extend self
|
||||
|
||||
def storage_available?(path, timeout_seconds = 5)
|
||||
status = timeout_check(path, timeout_seconds)
|
||||
|
||||
status.success?
|
||||
end
|
||||
|
||||
def timeout_check(path, timeout_seconds)
|
||||
filesystem_check_pid = check_filesystem_in_fork(path)
|
||||
|
||||
deadline = timeout_seconds.seconds.from_now.utc
|
||||
wait_time = 0.01
|
||||
status = nil
|
||||
|
||||
while status.nil?
|
||||
if deadline > Time.now.utc
|
||||
sleep(wait_time)
|
||||
_pid, status = Process.wait2(filesystem_check_pid, Process::WNOHANG)
|
||||
else
|
||||
Process.kill('KILL', filesystem_check_pid)
|
||||
# Blocking wait, so we are sure the process is gone before continuing
|
||||
_pid, status = Process.wait2(filesystem_check_pid)
|
||||
end
|
||||
end
|
||||
|
||||
status
|
||||
end
|
||||
|
||||
# This call forks out into a process, that process will then be replaced
|
||||
# With an `exec` call, since we fork out into a shell, we can create a
|
||||
# child process without needing an ActiveRecord-connection.
|
||||
#
|
||||
# Inside the shell, we use `& wait` to fork another child. We do this
|
||||
# to prevent leaving a zombie process when the parent gets killed by the
|
||||
# timeout.
|
||||
#
|
||||
# https://stackoverflow.com/questions/27892975/what-causes-activerecord-breaking-postgres-connection-after-forking
|
||||
# https://stackoverflow.com/questions/22012943/activerecordstatementinvalid-runtimeerror-the-connection-cannot-be-reused-in
|
||||
def check_filesystem_in_fork(path)
|
||||
fork do
|
||||
STDOUT.reopen('/dev/null')
|
||||
STDERR.reopen('/dev/null')
|
||||
|
||||
exec("(#{test_script(path)}) & wait %1")
|
||||
end
|
||||
end
|
||||
|
||||
def test_script(path)
|
||||
"testpath=\"$(realpath #{Shellwords.escape(path)})\" && stat $testpath"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
101
lib/gitlab/git/storage/health.rb
Normal file
101
lib/gitlab/git/storage/health.rb
Normal file
|
@ -0,0 +1,101 @@
|
|||
module Gitlab
|
||||
module Git
|
||||
module Storage
|
||||
class Health
|
||||
attr_reader :storage_name, :info
|
||||
|
||||
def self.pattern_for_storage(storage_name)
|
||||
"#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage_name}:*"
|
||||
end
|
||||
|
||||
def self.for_all_storages
|
||||
storage_names = Gitlab.config.repositories.storages.keys
|
||||
results_per_storage = nil
|
||||
|
||||
Gitlab::Git::Storage.redis.with do |redis|
|
||||
keys_per_storage = all_keys_for_storages(storage_names, redis)
|
||||
|
||||
# We need to make sure all keys are actually loaded as an array.
|
||||
# Otherwise when using the enumerator of the `scan_each` within a
|
||||
# second pipeline, it will be assumed unloaded, wich would make the
|
||||
# result unusable inside the pipeline.
|
||||
loaded_keys_per_storage = keys_per_storage.inject({}) do |loaded_keys, (storage_name, keys)|
|
||||
loaded_keys[storage_name] = keys.to_a
|
||||
loaded_keys
|
||||
end
|
||||
|
||||
results_per_storage = load_for_keys(loaded_keys_per_storage, redis)
|
||||
end
|
||||
|
||||
results_per_storage.map do |name, info|
|
||||
info.each { |i| i[:failure_count] = i[:failure_count].value.to_i }
|
||||
new(name, info)
|
||||
end
|
||||
end
|
||||
|
||||
def self.all_keys_for_storages(storage_names, redis)
|
||||
keys_per_storage = nil
|
||||
|
||||
redis.pipelined do
|
||||
keys_per_storage = storage_names.inject({}) do |result, storage_name|
|
||||
key = pattern_for_storage(storage_name)
|
||||
|
||||
result.merge(storage_name => redis.scan_each(match: key))
|
||||
end
|
||||
end
|
||||
|
||||
keys_per_storage
|
||||
end
|
||||
|
||||
def self.load_for_keys(keys_per_storage, redis)
|
||||
info_for_keys = nil
|
||||
|
||||
redis.pipelined do
|
||||
info_for_keys = keys_per_storage.inject({}) do |result, (storage_name, keys)|
|
||||
info_for_storage = keys.map do |key|
|
||||
{ name: key, failure_count: redis.hget(key, :failure_count) }
|
||||
end
|
||||
|
||||
result.merge(storage_name => info_for_storage)
|
||||
end
|
||||
end
|
||||
|
||||
info_for_keys
|
||||
end
|
||||
|
||||
def self.for_failing_storages
|
||||
for_all_storages.select(&:failing?)
|
||||
end
|
||||
|
||||
def initialize(storage_name, info)
|
||||
@storage_name = storage_name
|
||||
@info = info
|
||||
end
|
||||
|
||||
def failing_info
|
||||
@failing_info ||= info.select { |info_for_host| info_for_host[:failure_count] > 0 }
|
||||
end
|
||||
|
||||
def failing?
|
||||
failing_info.any?
|
||||
end
|
||||
|
||||
def failing_on_hosts
|
||||
@failing_on_hosts ||= failing_info.map do |info_for_host|
|
||||
info_for_host[:name].split(':').last
|
||||
end
|
||||
end
|
||||
|
||||
def failing_circuit_breakers
|
||||
@failing_circuit_breakers ||= failing_on_hosts.map do |hostname|
|
||||
CircuitBreaker.new(storage_name, hostname)
|
||||
end
|
||||
end
|
||||
|
||||
def total_failures
|
||||
@total_failures ||= failing_info.sum { |info_for_host| info_for_host[:failure_count] }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -10,7 +10,9 @@ module Gitlab
|
|||
def readiness
|
||||
repository_storages.map do |storage_name|
|
||||
begin
|
||||
if !storage_stat_test(storage_name)
|
||||
if !storage_circuitbreaker_test(storage_name)
|
||||
HealthChecks::Result.new(false, 'circuitbreaker tripped', shard: storage_name)
|
||||
elsif !storage_stat_test(storage_name)
|
||||
HealthChecks::Result.new(false, 'cannot stat storage', shard: storage_name)
|
||||
else
|
||||
with_temp_file(storage_name) do |tmp_file_path|
|
||||
|
@ -36,7 +38,8 @@ module Gitlab
|
|||
[
|
||||
storage_stat_metrics(storage_name),
|
||||
storage_write_metrics(storage_name),
|
||||
storage_read_metrics(storage_name)
|
||||
storage_read_metrics(storage_name),
|
||||
storage_circuitbreaker_metrics(storage_name)
|
||||
].flatten
|
||||
end
|
||||
end
|
||||
|
@ -121,6 +124,12 @@ module Gitlab
|
|||
file_contents == RANDOM_STRING
|
||||
end
|
||||
|
||||
def storage_circuitbreaker_test(storage_name)
|
||||
Gitlab::Git::Storage::CircuitBreaker.new(storage_name).perform { "OK" }
|
||||
rescue Gitlab::Git::Storage::Inaccessible
|
||||
nil
|
||||
end
|
||||
|
||||
def storage_stat_metrics(storage_name)
|
||||
operation_metrics(:filesystem_accessible, :filesystem_access_latency_seconds, shard: storage_name) do
|
||||
with_timing { storage_stat_test(storage_name) }
|
||||
|
@ -143,6 +152,14 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
def storage_circuitbreaker_metrics(storage_name)
|
||||
operation_metrics(:filesystem_circuitbreaker,
|
||||
:filesystem_circuitbreaker_latency_seconds,
|
||||
shard: storage_name) do
|
||||
with_timing { storage_circuitbreaker_test(storage_name) }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
25
spec/controllers/admin/health_check_controller_spec.rb
Normal file
25
spec/controllers/admin/health_check_controller_spec.rb
Normal file
|
@ -0,0 +1,25 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Admin::HealthCheckController, broken_storage: true do
|
||||
let(:admin) { create(:admin) }
|
||||
|
||||
before do
|
||||
sign_in(admin)
|
||||
end
|
||||
|
||||
describe 'GET show' do
|
||||
it 'loads the git storage health information' do
|
||||
get :show
|
||||
|
||||
expect(assigns[:failing_storage_statuses]).not_to be_nil
|
||||
end
|
||||
end
|
||||
|
||||
describe 'POST reset_storage_health' do
|
||||
it 'resets all storage health information' do
|
||||
expect(Gitlab::Git::Storage::CircuitBreaker).to receive(:reset_all!)
|
||||
|
||||
post :reset_storage_health
|
||||
end
|
||||
end
|
||||
end
|
|
@ -108,6 +108,30 @@ describe ApplicationController do
|
|||
end
|
||||
end
|
||||
|
||||
describe 'rescue from Gitlab::Git::Storage::Inaccessible' do
|
||||
controller(described_class) do
|
||||
def index
|
||||
raise Gitlab::Git::Storage::Inaccessible.new('broken', 100)
|
||||
end
|
||||
end
|
||||
|
||||
it 'renders a 503 when storage is not available' do
|
||||
sign_in(create(:user))
|
||||
|
||||
get :index
|
||||
|
||||
expect(response.status).to eq(503)
|
||||
end
|
||||
|
||||
it 'renders includes a Retry-After header' do
|
||||
sign_in(create(:user))
|
||||
|
||||
get :index
|
||||
|
||||
expect(response.headers['Retry-After']).to eq(100)
|
||||
end
|
||||
end
|
||||
|
||||
describe 'response format' do
|
||||
controller(described_class) do
|
||||
def index
|
||||
|
|
|
@ -107,6 +107,20 @@ describe ProjectsController do
|
|||
end
|
||||
end
|
||||
|
||||
context 'when the storage is not available', broken_storage: true do
|
||||
let(:project) { create(:project, :broken_storage) }
|
||||
before do
|
||||
project.add_developer(user)
|
||||
sign_in(user)
|
||||
end
|
||||
|
||||
it 'renders a 503' do
|
||||
get :show, namespace_id: project.namespace, id: project
|
||||
|
||||
expect(response).to have_http_status(503)
|
||||
end
|
||||
end
|
||||
|
||||
context "project with empty repo" do
|
||||
let(:empty_project) { create(:project_empty_repo, :public) }
|
||||
|
||||
|
|
|
@ -54,6 +54,12 @@ FactoryGirl.define do
|
|||
avatar { File.open(Rails.root.join('spec/fixtures/dk.png')) }
|
||||
end
|
||||
|
||||
trait :broken_storage do
|
||||
after(:create) do |project|
|
||||
project.update_column(:repository_storage, 'broken')
|
||||
end
|
||||
end
|
||||
|
||||
# Test repository - https://gitlab.com/gitlab-org/gitlab-test
|
||||
trait :repository do
|
||||
path { 'gitlabhq' }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
require 'spec_helper'
|
||||
|
||||
feature "Admin Health Check" do
|
||||
feature "Admin Health Check", feature: true, broken_storage: true do
|
||||
include StubENV
|
||||
|
||||
before do
|
||||
|
@ -55,4 +55,26 @@ feature "Admin Health Check" do
|
|||
expect(page).to have_content('The server is on fire')
|
||||
end
|
||||
end
|
||||
|
||||
context 'with repository storage failures' do
|
||||
before do
|
||||
# Track a failure
|
||||
Gitlab::Git::Storage::CircuitBreaker.for_storage('broken').perform { nil } rescue nil
|
||||
visit admin_health_check_path
|
||||
end
|
||||
|
||||
it 'shows storage failure information' do
|
||||
hostname = Gitlab.config.gitlab.hostname
|
||||
|
||||
expect(page).to have_content('broken: failed storage access attempt on host:')
|
||||
expect(page).to have_content("#{hostname}: 1 of 10 failures.")
|
||||
end
|
||||
|
||||
it 'allows resetting storage failures' do
|
||||
click_button 'Reset git storage health information'
|
||||
|
||||
expect(page).to have_content('Git storage health information has been reset')
|
||||
expect(page).not_to have_content('failed storage access attempt')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
20
spec/helpers/storage_health_helper_spec.rb
Normal file
20
spec/helpers/storage_health_helper_spec.rb
Normal file
|
@ -0,0 +1,20 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe StorageHealthHelper do
|
||||
describe '#failing_storage_health_message' do
|
||||
let(:health) do
|
||||
Gitlab::Git::Storage::Health.new(
|
||||
"<script>alert('storage name');)</script>",
|
||||
[]
|
||||
)
|
||||
end
|
||||
|
||||
it 'escapes storage names' do
|
||||
escaped_storage_name = '<script>alert('storage name');)</script>'
|
||||
|
||||
result = helper.failing_storage_health_message(health)
|
||||
|
||||
expect(result).to include(escaped_storage_name)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -23,6 +23,16 @@ describe '6_validations' do
|
|||
end
|
||||
end
|
||||
|
||||
context 'when one of the settings is incorrect' do
|
||||
before do
|
||||
mock_storages('foo' => { 'path' => 'tmp/tests/paths/a/b/c', 'failure_count_threshold' => 'not a number' })
|
||||
end
|
||||
|
||||
it 'throws an error' do
|
||||
expect { validate_storages_config }.to raise_error(/failure_count_threshold/)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with invalid storage names' do
|
||||
before do
|
||||
mock_storages('name with spaces' => { 'path' => 'tmp/tests/paths/a/b/c' })
|
||||
|
@ -84,6 +94,17 @@ describe '6_validations' do
|
|||
expect { validate_storages_paths }.not_to raise_error
|
||||
end
|
||||
end
|
||||
|
||||
describe 'inaccessible storage' do
|
||||
before do
|
||||
mock_storages('foo' => { 'path' => 'tmp/tests/a/path/that/does/not/exist' })
|
||||
end
|
||||
|
||||
it 'passes through with a warning' do
|
||||
expect(Rails.logger).to receive(:error)
|
||||
expect { validate_storages_paths }.not_to raise_error
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def mock_storages(storages)
|
||||
|
|
|
@ -2,6 +2,17 @@ require 'spec_helper'
|
|||
require_relative '../../config/initializers/1_settings'
|
||||
|
||||
describe Settings do
|
||||
describe '#repositories' do
|
||||
it 'assigns the default failure attributes' do
|
||||
repository_settings = Gitlab.config.repositories.storages['broken']
|
||||
|
||||
expect(repository_settings['failure_count_threshold']).to eq(10)
|
||||
expect(repository_settings['failure_wait_time']).to eq(30)
|
||||
expect(repository_settings['failure_reset_time']).to eq(1800)
|
||||
expect(repository_settings['storage_timeout']).to eq(5)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#host_without_www' do
|
||||
context 'URL with protocol' do
|
||||
it 'returns the host' do
|
||||
|
|
|
@ -48,8 +48,9 @@ describe Gitlab::Cache::Ci::ProjectPipelineStatus, :clean_gitlab_redis_cache do
|
|||
described_class.load_in_batch_for_projects([project_without_status])
|
||||
end
|
||||
|
||||
it 'only connects to redis_cache twice' do
|
||||
# Once to load, once to store in the cache
|
||||
it 'only connects to redis twice' do
|
||||
# Stub circuitbreaker so it doesn't count the redis connections in there
|
||||
stub_circuit_breaker(project_without_status)
|
||||
expect(Gitlab::Redis::Cache).to receive(:with).exactly(2).and_call_original
|
||||
|
||||
described_class.load_in_batch_for_projects([project_without_status])
|
||||
|
@ -301,4 +302,13 @@ describe Gitlab::Cache::Ci::ProjectPipelineStatus, :clean_gitlab_redis_cache do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
def stub_circuit_breaker(project)
|
||||
fake_circuitbreaker = double
|
||||
allow(fake_circuitbreaker).to receive(:perform).and_yield
|
||||
allow(project.repository.raw_repository)
|
||||
.to receive(:circuit_breaker).and_return(fake_circuitbreaker)
|
||||
allow(project.repository)
|
||||
.to receive(:circuit_breaker).and_return(fake_circuitbreaker)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -55,6 +55,20 @@ describe Gitlab::Git::Repository, seed_helper: true do
|
|||
end
|
||||
|
||||
describe "#rugged" do
|
||||
describe 'when storage is broken', broken_storage: true do
|
||||
it 'raises a storage exception when storage is not available' do
|
||||
broken_repo = described_class.new('broken', 'a/path.git')
|
||||
|
||||
expect { broken_repo.rugged }.to raise_error(Gitlab::Git::Storage::Inaccessible)
|
||||
end
|
||||
end
|
||||
|
||||
it 'raises a no repository exception when there is no repo' do
|
||||
broken_repo = described_class.new('default', 'a/path.git')
|
||||
|
||||
expect { broken_repo.rugged }.to raise_error(Gitlab::Git::Repository::NoRepository)
|
||||
end
|
||||
|
||||
context 'with no Git env stored' do
|
||||
before do
|
||||
expect(Gitlab::Git::Env).to receive(:all).and_return({})
|
||||
|
|
265
spec/lib/gitlab/git/storage/circuit_breaker_spec.rb
Normal file
265
spec/lib/gitlab/git/storage/circuit_breaker_spec.rb
Normal file
|
@ -0,0 +1,265 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: true, broken_storage: true do
|
||||
let(:circuit_breaker) { described_class.new('default') }
|
||||
let(:hostname) { Gitlab.config.gitlab.hostname }
|
||||
let(:cache_key) { "storage_accessible:default:#{hostname}" }
|
||||
|
||||
def value_from_redis(name)
|
||||
Gitlab::Git::Storage.redis.with do |redis|
|
||||
redis.hmget(cache_key, name)
|
||||
end.first
|
||||
end
|
||||
|
||||
def set_in_redis(name, value)
|
||||
Gitlab::Git::Storage.redis.with do |redis|
|
||||
redis.hmset(cache_key, name, value)
|
||||
end.first
|
||||
end
|
||||
|
||||
describe '.reset_all!' do
|
||||
it 'clears all entries form redis' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
|
||||
described_class.reset_all!
|
||||
|
||||
key_exists = Gitlab::Git::Storage.redis.with { |redis| redis.exists(cache_key) }
|
||||
|
||||
expect(key_exists).to be_falsey
|
||||
end
|
||||
end
|
||||
|
||||
describe '.for_storage' do
|
||||
it 'only builds a single circuitbreaker per storage' do
|
||||
expect(described_class).to receive(:new).once.and_call_original
|
||||
|
||||
breaker = described_class.for_storage('default')
|
||||
|
||||
expect(breaker).to be_a(described_class)
|
||||
expect(described_class.for_storage('default')).to eq(breaker)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#initialize' do
|
||||
it 'assigns the settings' do
|
||||
expect(circuit_breaker.hostname).to eq(hostname)
|
||||
expect(circuit_breaker.storage).to eq('default')
|
||||
expect(circuit_breaker.storage_path).to eq(TestEnv.repos_path)
|
||||
expect(circuit_breaker.failure_count_threshold).to eq(10)
|
||||
expect(circuit_breaker.failure_wait_time).to eq(30)
|
||||
expect(circuit_breaker.failure_reset_time).to eq(1800)
|
||||
expect(circuit_breaker.storage_timeout).to eq(5)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#perform' do
|
||||
it 'raises an exception with retry time when the circuit is open' do
|
||||
allow(circuit_breaker).to receive(:circuit_broken?).and_return(true)
|
||||
|
||||
expect { |b| circuit_breaker.perform(&b) }
|
||||
.to raise_error(Gitlab::Git::Storage::CircuitOpen)
|
||||
end
|
||||
|
||||
it 'yields the block' do
|
||||
expect { |b| circuit_breaker.perform(&b) }
|
||||
.to yield_control
|
||||
end
|
||||
|
||||
it 'checks if the storage is available' do
|
||||
expect(circuit_breaker).to receive(:check_storage_accessible!)
|
||||
|
||||
circuit_breaker.perform { 'hello world' }
|
||||
end
|
||||
|
||||
it 'returns the value of the block' do
|
||||
result = circuit_breaker.perform { 'return value' }
|
||||
|
||||
expect(result).to eq('return value')
|
||||
end
|
||||
|
||||
it 'raises possible errors' do
|
||||
expect { circuit_breaker.perform { raise Rugged::OSError.new('Broken') } }
|
||||
.to raise_error(Rugged::OSError)
|
||||
end
|
||||
|
||||
context 'with the feature disabled' do
|
||||
it 'returns the block without checking accessibility' do
|
||||
stub_feature_flags(git_storage_circuit_breaker: false)
|
||||
|
||||
expect(circuit_breaker).not_to receive(:circuit_broken?)
|
||||
|
||||
result = circuit_breaker.perform { 'hello' }
|
||||
|
||||
expect(result).to eq('hello')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#circuit_broken?' do
|
||||
it 'is closed when there is no last failure' do
|
||||
set_in_redis(:last_failure, nil)
|
||||
set_in_redis(:failure_count, 0)
|
||||
|
||||
expect(circuit_breaker.circuit_broken?).to be_falsey
|
||||
end
|
||||
|
||||
it 'is open when there was a recent failure' do
|
||||
Timecop.freeze do
|
||||
set_in_redis(:last_failure, 1.second.ago.to_f)
|
||||
set_in_redis(:failure_count, 1)
|
||||
|
||||
expect(circuit_breaker.circuit_broken?).to be_truthy
|
||||
end
|
||||
end
|
||||
|
||||
it 'is open when there are to many failures' do
|
||||
set_in_redis(:last_failure, 1.day.ago.to_f)
|
||||
set_in_redis(:failure_count, 200)
|
||||
|
||||
expect(circuit_breaker.circuit_broken?).to be_truthy
|
||||
end
|
||||
end
|
||||
|
||||
describe '#check_storage_accessible!' do
|
||||
context 'when the storage is available' do
|
||||
it 'tracks that the storage was accessible an raises the error' do
|
||||
expect(circuit_breaker).to receive(:track_storage_accessible)
|
||||
|
||||
circuit_breaker.check_storage_accessible!
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the storage is not available' do
|
||||
let(:circuit_breaker) { described_class.new('broken') }
|
||||
|
||||
it 'tracks that the storage was unavailable and raises an error with retry time' do
|
||||
expect(circuit_breaker).to receive(:track_storage_inaccessible)
|
||||
|
||||
expect { circuit_breaker.check_storage_accessible! }
|
||||
.to raise_error do |exception|
|
||||
expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible)
|
||||
expect(exception.retry_after).to eq(30)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#track_storage_inaccessible' do
|
||||
around(:each) do |example|
|
||||
Timecop.freeze
|
||||
|
||||
example.run
|
||||
|
||||
Timecop.return
|
||||
end
|
||||
|
||||
it 'records the failure time in redis' do
|
||||
circuit_breaker.track_storage_inaccessible
|
||||
|
||||
failure_time = value_from_redis(:last_failure)
|
||||
|
||||
expect(Time.at(failure_time.to_i)).to be_within(1.second).of(Time.now)
|
||||
end
|
||||
|
||||
it 'sets the failure time on the breaker without reloading' do
|
||||
circuit_breaker.track_storage_inaccessible
|
||||
|
||||
expect(circuit_breaker).not_to receive(:get_failure_info)
|
||||
expect(circuit_breaker.last_failure).to eq(Time.now)
|
||||
end
|
||||
|
||||
it 'increments the failure count in redis' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
|
||||
circuit_breaker.track_storage_inaccessible
|
||||
|
||||
expect(value_from_redis(:failure_count).to_i).to be(11)
|
||||
end
|
||||
|
||||
it 'increments the failure count on the breaker without reloading' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
|
||||
circuit_breaker.track_storage_inaccessible
|
||||
|
||||
expect(circuit_breaker).not_to receive(:get_failure_info)
|
||||
expect(circuit_breaker.failure_count).to eq(11)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#track_storage_accessible' do
|
||||
it 'sets the failure count to zero in redis' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
|
||||
expect(value_from_redis(:failure_count).to_i).to be(0)
|
||||
end
|
||||
|
||||
it 'sets the failure count to zero on the breaker without reloading' do
|
||||
set_in_redis(:failure_count, 10)
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
|
||||
expect(circuit_breaker).not_to receive(:get_failure_info)
|
||||
expect(circuit_breaker.failure_count).to eq(0)
|
||||
end
|
||||
|
||||
it 'removes the last failure time from redis' do
|
||||
set_in_redis(:last_failure, Time.now.to_i)
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
|
||||
expect(circuit_breaker).not_to receive(:get_failure_info)
|
||||
expect(circuit_breaker.last_failure).to be_nil
|
||||
end
|
||||
|
||||
it 'removes the last failure time from the breaker without reloading' do
|
||||
set_in_redis(:last_failure, Time.now.to_i)
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
|
||||
expect(value_from_redis(:last_failure)).to be_empty
|
||||
end
|
||||
|
||||
it 'wont connect to redis when there are no failures' do
|
||||
expect(Gitlab::Git::Storage.redis).to receive(:with).once
|
||||
.and_call_original
|
||||
expect(circuit_breaker).to receive(:track_storage_accessible)
|
||||
.and_call_original
|
||||
|
||||
circuit_breaker.track_storage_accessible
|
||||
end
|
||||
end
|
||||
|
||||
describe '#no_failures?' do
|
||||
it 'is false when a failure was tracked' do
|
||||
set_in_redis(:last_failure, Time.now.to_i)
|
||||
set_in_redis(:failure_count, 1)
|
||||
|
||||
expect(circuit_breaker.no_failures?).to be_falsey
|
||||
end
|
||||
end
|
||||
|
||||
describe '#last_failure' do
|
||||
it 'returns the last failure time' do
|
||||
time = Time.parse("2017-05-26 17:52:30")
|
||||
set_in_redis(:last_failure, time.to_i)
|
||||
|
||||
expect(circuit_breaker.last_failure).to eq(time)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#failure_count' do
|
||||
it 'returns the failure count' do
|
||||
set_in_redis(:failure_count, 7)
|
||||
|
||||
expect(circuit_breaker.failure_count).to eq(7)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#cache_key' do
|
||||
it 'includes storage and host' do
|
||||
expect(circuit_breaker.cache_key).to eq(cache_key)
|
||||
end
|
||||
end
|
||||
end
|
27
spec/lib/gitlab/git/storage/forked_storage_check_spec.rb
Normal file
27
spec/lib/gitlab/git/storage/forked_storage_check_spec.rb
Normal file
|
@ -0,0 +1,27 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Git::Storage::ForkedStorageCheck, skip_database_cleaner: true do
|
||||
let(:existing_path) do
|
||||
existing_path = TestEnv.repos_path
|
||||
FileUtils.mkdir_p(existing_path)
|
||||
existing_path
|
||||
end
|
||||
|
||||
describe '.storage_accessible?' do
|
||||
it 'detects when a storage is not available' do
|
||||
expect(described_class.storage_available?('/non/existant/path')).to be_falsey
|
||||
end
|
||||
|
||||
it 'detects when a storage is available' do
|
||||
expect(described_class.storage_available?(existing_path)).to be_truthy
|
||||
end
|
||||
|
||||
it 'returns false when the check takes to long' do
|
||||
allow(described_class).to receive(:check_filesystem_in_fork) do
|
||||
fork { sleep 10 }
|
||||
end
|
||||
|
||||
expect(described_class.storage_available?(existing_path, 0.5)).to be_falsey
|
||||
end
|
||||
end
|
||||
end
|
85
spec/lib/gitlab/git/storage/health_spec.rb
Normal file
85
spec/lib/gitlab/git/storage/health_spec.rb
Normal file
|
@ -0,0 +1,85 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Git::Storage::Health, clean_gitlab_redis_shared_state: true, broken_storage: true do
|
||||
let(:host1_key) { 'storage_accessible:broken:web01' }
|
||||
let(:host2_key) { 'storage_accessible:default:kiq01' }
|
||||
|
||||
def set_in_redis(cache_key, value)
|
||||
Gitlab::Git::Storage.redis.with do |redis|
|
||||
redis.hmset(cache_key, :failure_count, value)
|
||||
end.first
|
||||
end
|
||||
|
||||
describe '.for_failing_storages' do
|
||||
it 'only includes health status for failures' do
|
||||
set_in_redis(host1_key, 10)
|
||||
set_in_redis(host2_key, 0)
|
||||
|
||||
expect(described_class.for_failing_storages.map(&:storage_name))
|
||||
.to contain_exactly('broken')
|
||||
end
|
||||
end
|
||||
|
||||
describe '.load_for_keys' do
|
||||
let(:subject) do
|
||||
results = Gitlab::Git::Storage.redis.with do |redis|
|
||||
described_class.load_for_keys({ 'broken' => [host1_key] }, redis)
|
||||
end
|
||||
|
||||
# Make sure the `Redis#future is loaded
|
||||
results.inject({}) do |result, (name, info)|
|
||||
info.each { |i| i[:failure_count] = i[:failure_count].value.to_i }
|
||||
|
||||
result[name] = info
|
||||
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
it 'loads when there is no info in redis' do
|
||||
expect(subject).to eq('broken' => [{ name: host1_key, failure_count: 0 }])
|
||||
end
|
||||
|
||||
it 'reads the correct values for a storage from redis' do
|
||||
set_in_redis(host1_key, 5)
|
||||
set_in_redis(host2_key, 7)
|
||||
|
||||
expect(subject).to eq('broken' => [{ name: host1_key, failure_count: 5 }])
|
||||
end
|
||||
end
|
||||
|
||||
describe '.for_all_storages' do
|
||||
it 'loads health status for all configured storages' do
|
||||
healths = described_class.for_all_storages
|
||||
|
||||
expect(healths.map(&:storage_name)).to contain_exactly('default', 'broken')
|
||||
end
|
||||
end
|
||||
|
||||
describe '#failing_info' do
|
||||
it 'only contains storages that have failures' do
|
||||
health = described_class.new('broken', [{ name: host1_key, failure_count: 0 },
|
||||
{ name: host2_key, failure_count: 3 }])
|
||||
|
||||
expect(health.failing_info).to contain_exactly({ name: host2_key, failure_count: 3 })
|
||||
end
|
||||
end
|
||||
|
||||
describe '#total_failures' do
|
||||
it 'sums up all the failures' do
|
||||
health = described_class.new('broken', [{ name: host1_key, failure_count: 2 },
|
||||
{ name: host2_key, failure_count: 3 }])
|
||||
|
||||
expect(health.total_failures).to eq(5)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#failing_on_hosts' do
|
||||
it 'collects only the failing hostnames' do
|
||||
health = described_class.new('broken', [{ name: host1_key, failure_count: 2 },
|
||||
{ name: host2_key, failure_count: 0 }])
|
||||
|
||||
expect(health.failing_on_hosts).to contain_exactly('web01')
|
||||
end
|
||||
end
|
||||
end
|
|
@ -44,6 +44,15 @@ describe Gitlab::HealthChecks::FsShardsCheck do
|
|||
describe '#readiness' do
|
||||
subject { described_class.readiness }
|
||||
|
||||
context 'storage has a tripped circuitbreaker' do
|
||||
let(:repository_storages) { ['broken'] }
|
||||
let(:storages_paths) do
|
||||
Gitlab.config.repositories.storages
|
||||
end
|
||||
|
||||
it { is_expected.to include(result_class.new(false, 'circuitbreaker tripped', shard: 'broken')) }
|
||||
end
|
||||
|
||||
context 'storage points to not existing folder' do
|
||||
let(:storages_paths) do
|
||||
{
|
||||
|
@ -51,6 +60,10 @@ describe Gitlab::HealthChecks::FsShardsCheck do
|
|||
}.with_indifferent_access
|
||||
end
|
||||
|
||||
before do
|
||||
allow(described_class).to receive(:storage_circuitbreaker_test) { true }
|
||||
end
|
||||
|
||||
it { is_expected.to include(result_class.new(false, 'cannot stat storage', shard: :default)) }
|
||||
end
|
||||
|
||||
|
@ -109,6 +122,7 @@ describe Gitlab::HealthChecks::FsShardsCheck do
|
|||
expect(metrics).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
|
||||
expect(metrics).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
|
||||
expect(metrics).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
|
||||
expect(metrics).to include(an_object_having_attributes(name: :filesystem_circuitbreaker_latency_seconds, value: be >= 0))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -127,6 +141,7 @@ describe Gitlab::HealthChecks::FsShardsCheck do
|
|||
expect(metrics).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
|
||||
expect(metrics).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
|
||||
expect(metrics).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
|
||||
expect(metrics).to include(an_object_having_attributes(name: :filesystem_circuitbreaker_latency_seconds, value: be >= 0))
|
||||
end
|
||||
|
||||
it 'cleans up files used for metrics' do
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Repository do
|
||||
describe Repository, models: true do
|
||||
include RepoHelpers
|
||||
TestBlob = Struct.new(:path)
|
||||
|
||||
let(:project) { create(:project, :repository) }
|
||||
let(:repository) { project.repository }
|
||||
let(:broken_repository) { create(:project, :broken_storage).repository }
|
||||
let(:user) { create(:user) }
|
||||
|
||||
let(:commit_options) do
|
||||
|
@ -27,12 +28,26 @@ describe Repository do
|
|||
let(:author_email) { 'user@example.org' }
|
||||
let(:author_name) { 'John Doe' }
|
||||
|
||||
def expect_to_raise_storage_error
|
||||
expect { yield }.to raise_error do |exception|
|
||||
expect(exception.class).to be_in([Gitlab::Git::Storage::Inaccessible, GRPC::Unavailable])
|
||||
end
|
||||
end
|
||||
|
||||
describe '#branch_names_contains' do
|
||||
subject { repository.branch_names_contains(sample_commit.id) }
|
||||
|
||||
it { is_expected.to include('master') }
|
||||
it { is_expected.not_to include('feature') }
|
||||
it { is_expected.not_to include('fix') }
|
||||
|
||||
describe 'when storage is broken', broken_storage: true do
|
||||
it 'should raise a storage error' do
|
||||
expect_to_raise_storage_error do
|
||||
broken_repository.branch_names_contains(sample_commit.id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#tag_names_contains' do
|
||||
|
@ -142,6 +157,14 @@ describe Repository do
|
|||
subject { repository.last_commit_for_path(sample_commit.id, '.gitignore').id }
|
||||
|
||||
it { is_expected.to eq('c1acaa58bbcbc3eafe538cb8274ba387047b69f8') }
|
||||
|
||||
describe 'when storage is broken', broken_storage: true do
|
||||
it 'should raise a storage error' do
|
||||
expect_to_raise_storage_error do
|
||||
broken_repository.last_commit_for_path(sample_commit.id, '.gitignore').id
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#last_commit_id_for_path' do
|
||||
|
@ -158,6 +181,14 @@ describe Repository do
|
|||
expect(cache).to receive(:fetch).with(key).and_return('c1acaa5')
|
||||
is_expected.to eq('c1acaa5')
|
||||
end
|
||||
|
||||
describe 'when storage is broken', broken_storage: true do
|
||||
it 'should raise a storage error' do
|
||||
expect_to_raise_storage_error do
|
||||
broken_repository.last_commit_id_for_path(sample_commit.id, '.gitignore')
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#commits' do
|
||||
|
@ -196,6 +227,12 @@ describe Repository do
|
|||
|
||||
expect(commit_ids).to include('5937ac0a7beb003549fc5fd26fc247adbce4a52e')
|
||||
end
|
||||
|
||||
describe 'when storage is broken', broken_storage: true do
|
||||
it 'should raise a storage error' do
|
||||
expect_to_raise_storage_error { broken_repository.find_commits_by_message('s') }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#blob_at' do
|
||||
|
@ -521,6 +558,14 @@ describe Repository do
|
|||
expect(results).to match_array([])
|
||||
end
|
||||
|
||||
describe 'when storage is broken', broken_storage: true do
|
||||
it 'should raise a storage error' do
|
||||
expect_to_raise_storage_error do
|
||||
broken_repository.search_files_by_content('feature', 'master')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe 'result' do
|
||||
subject { results.first }
|
||||
|
||||
|
@ -549,6 +594,22 @@ describe Repository do
|
|||
|
||||
expect(results).to match_array([])
|
||||
end
|
||||
|
||||
describe 'when storage is broken', broken_storage: true do
|
||||
it 'should raise a storage error' do
|
||||
expect_to_raise_storage_error { broken_repository.search_files_by_name('files', 'master') }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#fetch_ref' do
|
||||
describe 'when storage is broken', broken_storage: true do
|
||||
it 'should raise a storage error' do
|
||||
path = broken_repository.path_to_repo
|
||||
|
||||
expect_to_raise_storage_error { broken_repository.fetch_ref(path, '1', '2') }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#create_ref' do
|
||||
|
@ -966,6 +1027,12 @@ describe Repository do
|
|||
|
||||
expect(repository.exists?).to eq(false)
|
||||
end
|
||||
|
||||
context 'with broken storage', broken_storage: true do
|
||||
it 'should raise a storage error' do
|
||||
expect_to_raise_storage_error { broken_repository.exists? }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#exists?' do
|
||||
|
|
57
spec/requests/api/circuit_breakers_spec.rb
Normal file
57
spec/requests/api/circuit_breakers_spec.rb
Normal file
|
@ -0,0 +1,57 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe API::CircuitBreakers do
|
||||
let(:user) { create(:user) }
|
||||
let(:admin) { create(:admin) }
|
||||
|
||||
describe 'GET circuit_breakers/repository_storage' do
|
||||
it 'returns a 401 for anonymous users' do
|
||||
get api('/circuit_breakers/repository_storage')
|
||||
|
||||
expect(response).to have_http_status(401)
|
||||
end
|
||||
|
||||
it 'returns a 403 for users' do
|
||||
get api('/circuit_breakers/repository_storage', user)
|
||||
|
||||
expect(response).to have_http_status(403)
|
||||
end
|
||||
|
||||
it 'returns an Array of storages' do
|
||||
expect(Gitlab::Git::Storage::Health).to receive(:for_all_storages) do
|
||||
[Gitlab::Git::Storage::Health.new('broken', [{ name: 'prefix:broken:web01', failure_count: 4 }])]
|
||||
end
|
||||
|
||||
get api('/circuit_breakers/repository_storage', admin)
|
||||
|
||||
expect(response).to have_http_status(200)
|
||||
expect(json_response).to be_kind_of(Array)
|
||||
expect(json_response.first['storage_name']).to eq('broken')
|
||||
expect(json_response.first['failing_on_hosts']).to eq(['web01'])
|
||||
expect(json_response.first['total_failures']).to eq(4)
|
||||
end
|
||||
|
||||
describe 'GET circuit_breakers/repository_storage/failing' do
|
||||
it 'returns an array of failing storages' do
|
||||
expect(Gitlab::Git::Storage::Health).to receive(:for_failing_storages) do
|
||||
[Gitlab::Git::Storage::Health.new('broken', [{ name: 'prefix:broken:web01', failure_count: 4 }])]
|
||||
end
|
||||
|
||||
get api('/circuit_breakers/repository_storage/failing', admin)
|
||||
|
||||
expect(response).to have_http_status(200)
|
||||
expect(json_response).to be_kind_of(Array)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe 'DELETE circuit_breakers/repository_storage' do
|
||||
it 'clears all circuit_breakers' do
|
||||
expect(Gitlab::Git::Storage::CircuitBreaker).to receive(:reset_all!)
|
||||
|
||||
delete api('/circuit_breakers/repository_storage', admin)
|
||||
|
||||
expect(response).to have_http_status(204)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -2,4 +2,16 @@ RSpec.configure do |config|
|
|||
config.before(:each, :repository) do
|
||||
TestEnv.clean_test_path
|
||||
end
|
||||
|
||||
config.before(:all, :broken_storage) do
|
||||
FileUtils.rm_rf Gitlab.config.repositories.storages.broken['path']
|
||||
end
|
||||
|
||||
config.before(:each, :broken_storage) do
|
||||
allow(Gitlab::GitalyClient).to receive(:call) do
|
||||
raise GRPC::Unavailable.new('Gitaly broken in this spec')
|
||||
end
|
||||
|
||||
Gitlab::Git::Storage::CircuitBreaker.reset_all!
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue