Adds direct monitoring for sidekiq metrics

This adds diirect monitoring for sidekiq metrics. This is done via
sidekiq middleware and a sampler to pull from sidekiqs api.
This commit is contained in:
Ryan Cobb 2019-07-29 11:53:12 +00:00 committed by Kamil Trzciński
parent 3bb3ac3d53
commit cfea48dffd
3 changed files with 106 additions and 0 deletions

View file

@ -33,6 +33,7 @@ Sidekiq.configure_server do |config|
config.redis = queues_config_hash config.redis = queues_config_hash
config.server_middleware do |chain| config.server_middleware do |chain|
chain.add Gitlab::SidekiqMiddleware::Metrics if Settings.monitoring.sidekiq_exporter
chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if ENV['SIDEKIQ_LOG_ARGUMENTS'] && !enable_json_logs chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if ENV['SIDEKIQ_LOG_ARGUMENTS'] && !enable_json_logs
chain.add Gitlab::SidekiqMiddleware::MemoryKiller if ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS'] chain.add Gitlab::SidekiqMiddleware::MemoryKiller if ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS']
chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0' chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0'

View file

@ -0,0 +1,48 @@
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
class Metrics
def initialize
@metrics = init_metrics
end
def call(_worker, job, queue)
labels = create_labels(queue)
@metrics[:sidekiq_running_jobs].increment(labels, 1)
if job['retry_count'].present?
@metrics[:sidekiq_jobs_retried_total].increment(labels, 1)
end
realtime = Benchmark.realtime do
yield
end
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, realtime)
rescue Exception # rubocop: disable Lint/RescueException
@metrics[:sidekiq_jobs_failed_total].increment(labels, 1)
raise
ensure
@metrics[:sidekiq_running_jobs].increment(labels, -1)
end
private
def init_metrics
{
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete sidekiq job'),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :livesum)
}
end
def create_labels(queue)
{
queue: queue
}
end
end
end
end

View file

@ -0,0 +1,57 @@
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::SidekiqMiddleware::Metrics do
describe '#call' do
let(:middleware) { described_class.new }
let(:worker) { double(:worker) }
let(:completion_seconds_metric) { double('completion seconds metric') }
let(:failed_total_metric) { double('failed total metric') }
let(:retried_total_metric) { double('retried total metric') }
let(:running_jobs_metric) { double('running jobs metric') }
before do
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything).and_return(completion_seconds_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric)
allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :livesum).and_return(running_jobs_metric)
allow(running_jobs_metric).to receive(:increment)
end
it 'yields block' do
allow(completion_seconds_metric).to receive(:observe)
expect { |b| middleware.call(worker, {}, :test, &b) }.to yield_control.once
end
it 'sets metrics' do
labels = { queue: :test }
expect(running_jobs_metric).to receive(:increment).with(labels, 1)
expect(running_jobs_metric).to receive(:increment).with(labels, -1)
expect(completion_seconds_metric).to receive(:observe).with(labels, kind_of(Numeric))
middleware.call(worker, {}, :test) { nil }
end
context 'when job is retried' do
it 'sets sidekiq_jobs_retried_total metric' do
allow(completion_seconds_metric).to receive(:observe)
expect(retried_total_metric).to receive(:increment)
middleware.call(worker, { 'retry_count' => 1 }, :test) { nil }
end
end
context 'when error is raised' do
it 'sets sidekiq_jobs_failed_total and reraises' do
expect(failed_total_metric).to receive(:increment)
expect { middleware.call(worker, {}, :test) { raise } }.to raise_error
end
end
end
end