Adds chaos endpoints to Sidekiq
This allows the chaos endpoints to be invoked in Sidekiq so that this environment can be tested for resilience.
This commit is contained in:
parent
f97a73fa39
commit
dc14c91d06
14 changed files with 302 additions and 42 deletions
|
@ -1,57 +1,38 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class ChaosController < ActionController::Base
|
||||
before_action :validate_chaos_secret, unless: :development?
|
||||
before_action :request_start_time
|
||||
before_action :validate_chaos_secret, unless: :development_or_test?
|
||||
|
||||
def leakmem
|
||||
retainer = []
|
||||
# Add `n` 1mb chunks of memory to the retainer array
|
||||
memory_mb.times { retainer << "x" * 1.megabyte }
|
||||
|
||||
Kernel.sleep(duration_left)
|
||||
|
||||
render plain: "OK"
|
||||
do_chaos :leak_mem, Chaos::LeakMemWorker, memory_mb, duration_s
|
||||
end
|
||||
|
||||
def cpu_spin
|
||||
rand while Time.now < expected_end_time
|
||||
|
||||
render plain: "OK"
|
||||
do_chaos :cpu_spin, Chaos::CpuSpinWorker, duration_s
|
||||
end
|
||||
|
||||
def db_spin
|
||||
while Time.now < expected_end_time
|
||||
ActiveRecord::Base.connection.execute("SELECT 1")
|
||||
|
||||
end_interval_time = Time.now + [duration_s, interval_s].min
|
||||
rand while Time.now < end_interval_time
|
||||
end
|
||||
do_chaos :db_spin, Chaos::DbSpinWorker, duration_s, interval_s
|
||||
end
|
||||
|
||||
def sleep
|
||||
Kernel.sleep(duration_left)
|
||||
|
||||
render plain: "OK"
|
||||
do_chaos :sleep, Chaos::SleepWorker, duration_s
|
||||
end
|
||||
|
||||
def kill
|
||||
Process.kill("KILL", Process.pid)
|
||||
do_chaos :kill, Chaos::KillWorker
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def request_start_time
|
||||
@start_time ||= Time.now
|
||||
def do_chaos(method, worker, *args)
|
||||
if async
|
||||
worker.perform_async(*args)
|
||||
else
|
||||
Gitlab::Chaos.public_send(method, *args) # rubocop: disable GitlabSecurity/PublicSend
|
||||
end
|
||||
|
||||
def expected_end_time
|
||||
request_start_time + duration_s
|
||||
end
|
||||
|
||||
def duration_left
|
||||
# returns 0 if over time
|
||||
[expected_end_time - Time.now, 0].max
|
||||
render plain: "OK"
|
||||
end
|
||||
|
||||
def validate_chaos_secret
|
||||
|
@ -91,7 +72,12 @@ class ChaosController < ActionController::Base
|
|||
memory_mb.to_i
|
||||
end
|
||||
|
||||
def development?
|
||||
Rails.env.development?
|
||||
def async
|
||||
async = params[:async] || false
|
||||
Gitlab::Utils.to_boolean(async)
|
||||
end
|
||||
|
||||
def development_or_test?
|
||||
Rails.env.development? || Rails.env.test?
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,6 +3,12 @@
|
|||
|
||||
- auto_merge:auto_merge_process
|
||||
|
||||
- chaos:chaos_cpu_spin
|
||||
- chaos:chaos_db_spin
|
||||
- chaos:chaos_kill
|
||||
- chaos:chaos_leak_mem
|
||||
- chaos:chaos_sleep
|
||||
|
||||
- cronjob:admin_email
|
||||
- cronjob:expire_build_artifacts
|
||||
- cronjob:gitlab_usage_ping
|
||||
|
|
12
app/workers/chaos/cpu_spin_worker.rb
Normal file
12
app/workers/chaos/cpu_spin_worker.rb
Normal file
|
@ -0,0 +1,12 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Chaos
|
||||
class CpuSpinWorker
|
||||
include ApplicationWorker
|
||||
include ChaosQueue
|
||||
|
||||
def perform(duration_s)
|
||||
Gitlab::Chaos.cpu_spin(duration_s)
|
||||
end
|
||||
end
|
||||
end
|
12
app/workers/chaos/db_spin_worker.rb
Normal file
12
app/workers/chaos/db_spin_worker.rb
Normal file
|
@ -0,0 +1,12 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Chaos
|
||||
class DbSpinWorker
|
||||
include ApplicationWorker
|
||||
include ChaosQueue
|
||||
|
||||
def perform(duration_s, interval_s)
|
||||
Gitlab::Chaos.db_spin(duration_s, interval_s)
|
||||
end
|
||||
end
|
||||
end
|
12
app/workers/chaos/kill_worker.rb
Normal file
12
app/workers/chaos/kill_worker.rb
Normal file
|
@ -0,0 +1,12 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Chaos
|
||||
class KillWorker
|
||||
include ApplicationWorker
|
||||
include ChaosQueue
|
||||
|
||||
def perform
|
||||
Gitlab::Chaos.kill
|
||||
end
|
||||
end
|
||||
end
|
12
app/workers/chaos/leak_mem_worker.rb
Normal file
12
app/workers/chaos/leak_mem_worker.rb
Normal file
|
@ -0,0 +1,12 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Chaos
|
||||
class LeakMemWorker
|
||||
include ApplicationWorker
|
||||
include ChaosQueue
|
||||
|
||||
def perform(memory_mb, duration_s)
|
||||
Gitlab::Chaos.leak_mem(memory_mb, duration_s)
|
||||
end
|
||||
end
|
||||
end
|
12
app/workers/chaos/sleep_worker.rb
Normal file
12
app/workers/chaos/sleep_worker.rb
Normal file
|
@ -0,0 +1,12 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Chaos
|
||||
class SleepWorker
|
||||
include ApplicationWorker
|
||||
include ChaosQueue
|
||||
|
||||
def perform(duration_s)
|
||||
Gitlab::Chaos.sleep(duration_s)
|
||||
end
|
||||
end
|
||||
end
|
9
app/workers/concerns/chaos_queue.rb
Normal file
9
app/workers/concerns/chaos_queue.rb
Normal file
|
@ -0,0 +1,9 @@
|
|||
# frozen_string_literal: true
|
||||
#
|
||||
module ChaosQueue
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
included do
|
||||
queue_namespace :chaos
|
||||
end
|
||||
end
|
5
changelogs/unreleased/an-sidekiq-chaos.yml
Normal file
5
changelogs/unreleased/an-sidekiq-chaos.yml
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Adds chaos endpoints to Sidekiq
|
||||
merge_request: 30814
|
||||
author:
|
||||
type: other
|
|
@ -116,7 +116,7 @@ Rails.application.routes.draw do
|
|||
end
|
||||
end
|
||||
|
||||
if ENV['GITLAB_CHAOS_SECRET'] || Rails.env.development?
|
||||
if ENV['GITLAB_CHAOS_SECRET'] || Rails.env.development? || Rails.env.test?
|
||||
resource :chaos, only: [] do
|
||||
get :leakmem
|
||||
get :cpu_spin
|
||||
|
|
|
@ -95,6 +95,7 @@
|
|||
- [update_project_statistics, 1]
|
||||
- [phabricator_import_import_tasks, 1]
|
||||
- [update_namespace_statistics, 1]
|
||||
- [chaos, 2]
|
||||
|
||||
# EE-specific queues
|
||||
- [ldap_group_sync, 2]
|
||||
|
|
|
@ -36,6 +36,10 @@ Replace `secret` with your own secret token.
|
|||
|
||||
Once you have enabled the chaos endpoints and restarted the application, you can start testing using the endpoints.
|
||||
|
||||
By default, when invoking a chaos endpoint, the web worker process which receives the request will handle it. This means, for example, that if the Kill
|
||||
operation is invoked, the Puma or Unicorn worker process handling the request will be killed. To test these operations in Sidekiq, the `async` parameter on
|
||||
each endpoint can be set to `true`. This will run the chaos process in a Sidekiq worker.
|
||||
|
||||
## Memory leaks
|
||||
|
||||
To simulate a memory leak in your application, use the `/-/chaos/leakmem` endpoint.
|
||||
|
@ -47,12 +51,14 @@ The memory is not retained after the request finishes. Once the request has comp
|
|||
GET /-/chaos/leakmem
|
||||
GET /-/chaos/leakmem?memory_mb=1024
|
||||
GET /-/chaos/leakmem?memory_mb=1024&duration_s=50
|
||||
GET /-/chaos/leakmem?memory_mb=1024&duration_s=50&async=true
|
||||
```
|
||||
|
||||
| Attribute | Type | Required | Description |
|
||||
| ------------ | ------- | -------- | ---------------------------------------------------------------------------------- |
|
||||
| ------------ | ------- | -------- | ------------------------------------------------------------------------------------ |
|
||||
| `memory_mb` | integer | no | How much memory, in MB, should be leaked. Defaults to 100MB. |
|
||||
| `duration_s` | integer | no | Minimum duration_s, in seconds, that the memory should be retained. Defaults to 30s. |
|
||||
| `async` | boolean | no | Set to true to leak memory in a Sidekiq background worker process |
|
||||
|
||||
```bash
|
||||
curl http://localhost:3000/-/chaos/leakmem?memory_mb=1024&duration_s=10 --header 'X-Chaos-Secret: secret'
|
||||
|
@ -69,11 +75,13 @@ If you're using Unicorn, this is done by killing the worker process.
|
|||
```
|
||||
GET /-/chaos/cpu_spin
|
||||
GET /-/chaos/cpu_spin?duration_s=50
|
||||
GET /-/chaos/cpu_spin?duration_s=50&async=true
|
||||
```
|
||||
|
||||
| Attribute | Type | Required | Description |
|
||||
| ------------ | ------- | -------- | --------------------------------------------------------------------- |
|
||||
| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
|
||||
| `async` | boolean | no | Set to true to consume CPU in a Sidekiq background worker process |
|
||||
|
||||
```bash
|
||||
curl http://localhost:3000/-/chaos/cpu_spin?duration_s=60 --header 'X-Chaos-Secret: secret'
|
||||
|
@ -91,12 +99,14 @@ If you're using Unicorn, this is done by killing the worker process.
|
|||
```
|
||||
GET /-/chaos/db_spin
|
||||
GET /-/chaos/db_spin?duration_s=50
|
||||
GET /-/chaos/db_spin?duration_s=50&async=true
|
||||
```
|
||||
|
||||
| Attribute | Type | Required | Description |
|
||||
| ------------ | ------- | -------- | --------------------------------------------------------------------- |
|
||||
| ------------ | ------- | -------- | --------------------------------------------------------------------------- |
|
||||
| `interval_s` | float | no | Interval, in seconds, for every DB request. Defaults to 1s |
|
||||
| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
|
||||
| `async` | boolean | no | Set to true to perform the operation in a Sidekiq background worker process |
|
||||
|
||||
```bash
|
||||
curl http://localhost:3000/-/chaos/db_spin?interval_s=1&duration_s=60 --header 'X-Chaos-Secret: secret'
|
||||
|
@ -112,11 +122,13 @@ As with the CPU Spin endpoint, this may lead to your request timing out if durat
|
|||
```
|
||||
GET /-/chaos/sleep
|
||||
GET /-/chaos/sleep?duration_s=50
|
||||
GET /-/chaos/sleep?duration_s=50&async=true
|
||||
```
|
||||
|
||||
| Attribute | Type | Required | Description |
|
||||
| ------------ | ------- | -------- | ---------------------------------------------------------------------- |
|
||||
| `duration_s` | integer | no | Duration, in seconds, that the request will sleep for. Defaults to 30s |
|
||||
| `async` | boolean | no | Set to true to sleep in a Sidekiq background worker process |
|
||||
|
||||
```bash
|
||||
curl http://localhost:3000/-/chaos/sleep?duration_s=60 --header 'X-Chaos-Secret: secret'
|
||||
|
@ -132,8 +144,13 @@ Since this endpoint uses the `KILL` signal, the worker is not given a chance to
|
|||
|
||||
```
|
||||
GET /-/chaos/kill
|
||||
GET /-/chaos/kill?async=true
|
||||
```
|
||||
|
||||
| Attribute | Type | Required | Description |
|
||||
| ------------ | ------- | -------- | ---------------------------------------------------------------------- |
|
||||
| `async` | boolean | no | Set to true to kill a Sidekiq background worker process |
|
||||
|
||||
```bash
|
||||
curl http://localhost:3000/-/chaos/kill --header 'X-Chaos-Secret: secret'
|
||||
curl http://localhost:3000/-/chaos/kill?token=secret
|
||||
|
|
49
lib/gitlab/chaos.rb
Normal file
49
lib/gitlab/chaos.rb
Normal file
|
@ -0,0 +1,49 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
# Chaos methods for GitLab.
|
||||
# See https://docs.gitlab.com/ee/development/chaos_endpoints.html for more details.
|
||||
class Chaos
|
||||
# leak_mem will retain the specified amount of memory and sleep.
|
||||
# On return, the memory will be released.
|
||||
def self.leak_mem(memory_mb, duration_s)
|
||||
start_time = Time.now
|
||||
|
||||
retainer = []
|
||||
# Add `n` 1mb chunks of memory to the retainer array
|
||||
memory_mb.times { retainer << "x" * 1.megabyte }
|
||||
|
||||
duration_left = [start_time + duration_s - Time.now, 0].max
|
||||
Kernel.sleep(duration_left)
|
||||
end
|
||||
|
||||
# cpu_spin will consume all CPU on a single core for the specified duration
|
||||
def self.cpu_spin(duration_s)
|
||||
expected_end_time = Time.now + duration_s
|
||||
|
||||
rand while Time.now < expected_end_time
|
||||
end
|
||||
|
||||
# db_spin will query the database in a tight loop for the specified duration
|
||||
def self.db_spin(duration_s, interval_s)
|
||||
expected_end_time = Time.now + duration_s
|
||||
|
||||
while Time.now < expected_end_time
|
||||
ActiveRecord::Base.connection.execute("SELECT 1")
|
||||
|
||||
end_interval_time = Time.now + [duration_s, interval_s].min
|
||||
rand while Time.now < end_interval_time
|
||||
end
|
||||
end
|
||||
|
||||
# sleep will sleep for the specified duration
|
||||
def self.sleep(duration_s)
|
||||
Kernel.sleep(duration_s)
|
||||
end
|
||||
|
||||
# Kill will send a SIGKILL signal to the current process
|
||||
def self.kill
|
||||
Process.kill("KILL", Process.pid)
|
||||
end
|
||||
end
|
||||
end
|
127
spec/controllers/chaos_controller_spec.rb
Normal file
127
spec/controllers/chaos_controller_spec.rb
Normal file
|
@ -0,0 +1,127 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'spec_helper'
|
||||
|
||||
describe ChaosController do
|
||||
describe '#leakmem' do
|
||||
it 'calls synchronously' do
|
||||
expect(Gitlab::Chaos).to receive(:leak_mem).with(100, 30.seconds)
|
||||
|
||||
get :leakmem
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
|
||||
it 'call synchronously with params' do
|
||||
expect(Gitlab::Chaos).to receive(:leak_mem).with(1, 2.seconds)
|
||||
|
||||
get :leakmem, params: { memory_mb: 1, duration_s: 2 }
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
|
||||
it 'calls asynchronously' do
|
||||
expect(Chaos::LeakMemWorker).to receive(:perform_async).with(100, 30.seconds)
|
||||
|
||||
get :leakmem, params: { async: 1 }
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#cpu_spin' do
|
||||
it 'calls synchronously' do
|
||||
expect(Gitlab::Chaos).to receive(:cpu_spin).with(30.seconds)
|
||||
|
||||
get :cpu_spin
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
|
||||
it 'calls synchronously with params' do
|
||||
expect(Gitlab::Chaos).to receive(:cpu_spin).with(3.seconds)
|
||||
|
||||
get :cpu_spin, params: { duration_s: 3 }
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
|
||||
it 'calls asynchronously' do
|
||||
expect(Chaos::CpuSpinWorker).to receive(:perform_async).with(30.seconds)
|
||||
|
||||
get :cpu_spin, params: { async: 1 }
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#db_spin' do
|
||||
it 'calls synchronously' do
|
||||
expect(Gitlab::Chaos).to receive(:db_spin).with(30.seconds, 1.second)
|
||||
|
||||
get :db_spin
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
|
||||
it 'calls synchronously with params' do
|
||||
expect(Gitlab::Chaos).to receive(:db_spin).with(4.seconds, 5.seconds)
|
||||
|
||||
get :db_spin, params: { duration_s: 4, interval_s: 5 }
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
|
||||
it 'calls asynchronously' do
|
||||
expect(Chaos::DbSpinWorker).to receive(:perform_async).with(30.seconds, 1.second)
|
||||
|
||||
get :db_spin, params: { async: 1 }
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#sleep' do
|
||||
it 'calls synchronously' do
|
||||
expect(Gitlab::Chaos).to receive(:sleep).with(30.seconds)
|
||||
|
||||
get :sleep
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
|
||||
it 'calls synchronously with params' do
|
||||
expect(Gitlab::Chaos).to receive(:sleep).with(5.seconds)
|
||||
|
||||
get :sleep, params: { duration_s: 5 }
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
|
||||
it 'calls asynchronously' do
|
||||
expect(Chaos::SleepWorker).to receive(:perform_async).with(30.seconds)
|
||||
|
||||
get :sleep, params: { async: 1 }
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#kill' do
|
||||
it 'calls synchronously' do
|
||||
expect(Gitlab::Chaos).to receive(:kill).with(no_args)
|
||||
|
||||
get :kill
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
|
||||
it 'calls asynchronously' do
|
||||
expect(Chaos::KillWorker).to receive(:perform_async).with(no_args)
|
||||
|
||||
get :kill, params: { async: 1 }
|
||||
|
||||
expect(response).to have_gitlab_http_status(200)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue