Adds chaos endpoints to Sidekiq

This allows the chaos endpoints to be invoked in Sidekiq so that this
environment can be tested for resilience.
This commit is contained in:
Andrew Newdigate 2019-07-16 22:10:44 +02:00
parent f97a73fa39
commit dc14c91d06
14 changed files with 302 additions and 42 deletions

View file

@ -1,57 +1,38 @@
# frozen_string_literal: true
class ChaosController < ActionController::Base
before_action :validate_chaos_secret, unless: :development?
before_action :request_start_time
before_action :validate_chaos_secret, unless: :development_or_test?
def leakmem
retainer = []
# Add `n` 1mb chunks of memory to the retainer array
memory_mb.times { retainer << "x" * 1.megabyte }
Kernel.sleep(duration_left)
render plain: "OK"
do_chaos :leak_mem, Chaos::LeakMemWorker, memory_mb, duration_s
end
def cpu_spin
rand while Time.now < expected_end_time
render plain: "OK"
do_chaos :cpu_spin, Chaos::CpuSpinWorker, duration_s
end
def db_spin
while Time.now < expected_end_time
ActiveRecord::Base.connection.execute("SELECT 1")
end_interval_time = Time.now + [duration_s, interval_s].min
rand while Time.now < end_interval_time
end
do_chaos :db_spin, Chaos::DbSpinWorker, duration_s, interval_s
end
def sleep
Kernel.sleep(duration_left)
render plain: "OK"
do_chaos :sleep, Chaos::SleepWorker, duration_s
end
def kill
Process.kill("KILL", Process.pid)
do_chaos :kill, Chaos::KillWorker
end
private
def request_start_time
@start_time ||= Time.now
def do_chaos(method, worker, *args)
if async
worker.perform_async(*args)
else
Gitlab::Chaos.public_send(method, *args) # rubocop: disable GitlabSecurity/PublicSend
end
def expected_end_time
request_start_time + duration_s
end
def duration_left
# returns 0 if over time
[expected_end_time - Time.now, 0].max
render plain: "OK"
end
def validate_chaos_secret
@ -91,7 +72,12 @@ class ChaosController < ActionController::Base
memory_mb.to_i
end
def development?
Rails.env.development?
def async
async = params[:async] || false
Gitlab::Utils.to_boolean(async)
end
def development_or_test?
Rails.env.development? || Rails.env.test?
end
end

View file

@ -3,6 +3,12 @@
- auto_merge:auto_merge_process
- chaos:chaos_cpu_spin
- chaos:chaos_db_spin
- chaos:chaos_kill
- chaos:chaos_leak_mem
- chaos:chaos_sleep
- cronjob:admin_email
- cronjob:expire_build_artifacts
- cronjob:gitlab_usage_ping

View file

@ -0,0 +1,12 @@
# frozen_string_literal: true
module Chaos
class CpuSpinWorker
include ApplicationWorker
include ChaosQueue
def perform(duration_s)
Gitlab::Chaos.cpu_spin(duration_s)
end
end
end

View file

@ -0,0 +1,12 @@
# frozen_string_literal: true
module Chaos
class DbSpinWorker
include ApplicationWorker
include ChaosQueue
def perform(duration_s, interval_s)
Gitlab::Chaos.db_spin(duration_s, interval_s)
end
end
end

View file

@ -0,0 +1,12 @@
# frozen_string_literal: true
module Chaos
class KillWorker
include ApplicationWorker
include ChaosQueue
def perform
Gitlab::Chaos.kill
end
end
end

View file

@ -0,0 +1,12 @@
# frozen_string_literal: true
module Chaos
class LeakMemWorker
include ApplicationWorker
include ChaosQueue
def perform(memory_mb, duration_s)
Gitlab::Chaos.leak_mem(memory_mb, duration_s)
end
end
end

View file

@ -0,0 +1,12 @@
# frozen_string_literal: true
module Chaos
class SleepWorker
include ApplicationWorker
include ChaosQueue
def perform(duration_s)
Gitlab::Chaos.sleep(duration_s)
end
end
end

View file

@ -0,0 +1,9 @@
# frozen_string_literal: true
#
module ChaosQueue
extend ActiveSupport::Concern
included do
queue_namespace :chaos
end
end

View file

@ -0,0 +1,5 @@
---
title: Adds chaos endpoints to Sidekiq
merge_request: 30814
author:
type: other

View file

@ -116,7 +116,7 @@ Rails.application.routes.draw do
end
end
if ENV['GITLAB_CHAOS_SECRET'] || Rails.env.development?
if ENV['GITLAB_CHAOS_SECRET'] || Rails.env.development? || Rails.env.test?
resource :chaos, only: [] do
get :leakmem
get :cpu_spin

View file

@ -95,6 +95,7 @@
- [update_project_statistics, 1]
- [phabricator_import_import_tasks, 1]
- [update_namespace_statistics, 1]
- [chaos, 2]
# EE-specific queues
- [ldap_group_sync, 2]

View file

@ -36,6 +36,10 @@ Replace `secret` with your own secret token.
Once you have enabled the chaos endpoints and restarted the application, you can start testing using the endpoints.
By default, when invoking a chaos endpoint, the web worker process which receives the request will handle it. This means, for example, that if the Kill
operation is invoked, the Puma or Unicorn worker process handling the request will be killed. To test these operations in Sidekiq, the `async` parameter on
each endpoint can be set to `true`. This will run the chaos process in a Sidekiq worker.
## Memory leaks
To simulate a memory leak in your application, use the `/-/chaos/leakmem` endpoint.
@ -47,12 +51,14 @@ The memory is not retained after the request finishes. Once the request has comp
GET /-/chaos/leakmem
GET /-/chaos/leakmem?memory_mb=1024
GET /-/chaos/leakmem?memory_mb=1024&duration_s=50
GET /-/chaos/leakmem?memory_mb=1024&duration_s=50&async=true
```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | ---------------------------------------------------------------------------------- |
| ------------ | ------- | -------- | ------------------------------------------------------------------------------------ |
| `memory_mb` | integer | no | How much memory, in MB, should be leaked. Defaults to 100MB. |
| `duration_s` | integer | no | Minimum duration_s, in seconds, that the memory should be retained. Defaults to 30s. |
| `async` | boolean | no | Set to true to leak memory in a Sidekiq background worker process |
```bash
curl http://localhost:3000/-/chaos/leakmem?memory_mb=1024&duration_s=10 --header 'X-Chaos-Secret: secret'
@ -69,11 +75,13 @@ If you're using Unicorn, this is done by killing the worker process.
```
GET /-/chaos/cpu_spin
GET /-/chaos/cpu_spin?duration_s=50
GET /-/chaos/cpu_spin?duration_s=50&async=true
```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | --------------------------------------------------------------------- |
| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
| `async` | boolean | no | Set to true to consume CPU in a Sidekiq background worker process |
```bash
curl http://localhost:3000/-/chaos/cpu_spin?duration_s=60 --header 'X-Chaos-Secret: secret'
@ -91,12 +99,14 @@ If you're using Unicorn, this is done by killing the worker process.
```
GET /-/chaos/db_spin
GET /-/chaos/db_spin?duration_s=50
GET /-/chaos/db_spin?duration_s=50&async=true
```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | --------------------------------------------------------------------- |
| ------------ | ------- | -------- | --------------------------------------------------------------------------- |
| `interval_s` | float | no | Interval, in seconds, for every DB request. Defaults to 1s |
| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
| `async` | boolean | no | Set to true to perform the operation in a Sidekiq background worker process |
```bash
curl http://localhost:3000/-/chaos/db_spin?interval_s=1&duration_s=60 --header 'X-Chaos-Secret: secret'
@ -112,11 +122,13 @@ As with the CPU Spin endpoint, this may lead to your request timing out if durat
```
GET /-/chaos/sleep
GET /-/chaos/sleep?duration_s=50
GET /-/chaos/sleep?duration_s=50&async=true
```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | ---------------------------------------------------------------------- |
| `duration_s` | integer | no | Duration, in seconds, that the request will sleep for. Defaults to 30s |
| `async` | boolean | no | Set to true to sleep in a Sidekiq background worker process |
```bash
curl http://localhost:3000/-/chaos/sleep?duration_s=60 --header 'X-Chaos-Secret: secret'
@ -132,8 +144,13 @@ Since this endpoint uses the `KILL` signal, the worker is not given a chance to
```
GET /-/chaos/kill
GET /-/chaos/kill?async=true
```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | ---------------------------------------------------------------------- |
| `async` | boolean | no | Set to true to kill a Sidekiq background worker process |
```bash
curl http://localhost:3000/-/chaos/kill --header 'X-Chaos-Secret: secret'
curl http://localhost:3000/-/chaos/kill?token=secret

49
lib/gitlab/chaos.rb Normal file
View file

@ -0,0 +1,49 @@
# frozen_string_literal: true
module Gitlab
# Chaos methods for GitLab.
# See https://docs.gitlab.com/ee/development/chaos_endpoints.html for more details.
class Chaos
# leak_mem will retain the specified amount of memory and sleep.
# On return, the memory will be released.
def self.leak_mem(memory_mb, duration_s)
start_time = Time.now
retainer = []
# Add `n` 1mb chunks of memory to the retainer array
memory_mb.times { retainer << "x" * 1.megabyte }
duration_left = [start_time + duration_s - Time.now, 0].max
Kernel.sleep(duration_left)
end
# cpu_spin will consume all CPU on a single core for the specified duration
def self.cpu_spin(duration_s)
expected_end_time = Time.now + duration_s
rand while Time.now < expected_end_time
end
# db_spin will query the database in a tight loop for the specified duration
def self.db_spin(duration_s, interval_s)
expected_end_time = Time.now + duration_s
while Time.now < expected_end_time
ActiveRecord::Base.connection.execute("SELECT 1")
end_interval_time = Time.now + [duration_s, interval_s].min
rand while Time.now < end_interval_time
end
end
# sleep will sleep for the specified duration
def self.sleep(duration_s)
Kernel.sleep(duration_s)
end
# Kill will send a SIGKILL signal to the current process
def self.kill
Process.kill("KILL", Process.pid)
end
end
end

View file

@ -0,0 +1,127 @@
# frozen_string_literal: true
require 'spec_helper'
describe ChaosController do
describe '#leakmem' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:leak_mem).with(100, 30.seconds)
get :leakmem
expect(response).to have_gitlab_http_status(200)
end
it 'call synchronously with params' do
expect(Gitlab::Chaos).to receive(:leak_mem).with(1, 2.seconds)
get :leakmem, params: { memory_mb: 1, duration_s: 2 }
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::LeakMemWorker).to receive(:perform_async).with(100, 30.seconds)
get :leakmem, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
describe '#cpu_spin' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:cpu_spin).with(30.seconds)
get :cpu_spin
expect(response).to have_gitlab_http_status(200)
end
it 'calls synchronously with params' do
expect(Gitlab::Chaos).to receive(:cpu_spin).with(3.seconds)
get :cpu_spin, params: { duration_s: 3 }
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::CpuSpinWorker).to receive(:perform_async).with(30.seconds)
get :cpu_spin, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
describe '#db_spin' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:db_spin).with(30.seconds, 1.second)
get :db_spin
expect(response).to have_gitlab_http_status(200)
end
it 'calls synchronously with params' do
expect(Gitlab::Chaos).to receive(:db_spin).with(4.seconds, 5.seconds)
get :db_spin, params: { duration_s: 4, interval_s: 5 }
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::DbSpinWorker).to receive(:perform_async).with(30.seconds, 1.second)
get :db_spin, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
describe '#sleep' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:sleep).with(30.seconds)
get :sleep
expect(response).to have_gitlab_http_status(200)
end
it 'calls synchronously with params' do
expect(Gitlab::Chaos).to receive(:sleep).with(5.seconds)
get :sleep, params: { duration_s: 5 }
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::SleepWorker).to receive(:perform_async).with(30.seconds)
get :sleep, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
describe '#kill' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:kill).with(no_args)
get :kill
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::KillWorker).to receive(:perform_async).with(no_args)
get :kill, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
end