Adds direct monitoring for sidekiq metrics
This adds diirect monitoring for sidekiq metrics. This is done via sidekiq middleware and a sampler to pull from sidekiqs api.
This commit is contained in:
parent
3bb3ac3d53
commit
cfea48dffd
|
@ -33,6 +33,7 @@ Sidekiq.configure_server do |config|
|
|||
config.redis = queues_config_hash
|
||||
|
||||
config.server_middleware do |chain|
|
||||
chain.add Gitlab::SidekiqMiddleware::Metrics if Settings.monitoring.sidekiq_exporter
|
||||
chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if ENV['SIDEKIQ_LOG_ARGUMENTS'] && !enable_json_logs
|
||||
chain.add Gitlab::SidekiqMiddleware::MemoryKiller if ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS']
|
||||
chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0'
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module SidekiqMiddleware
|
||||
class Metrics
|
||||
def initialize
|
||||
@metrics = init_metrics
|
||||
end
|
||||
|
||||
def call(_worker, job, queue)
|
||||
labels = create_labels(queue)
|
||||
@metrics[:sidekiq_running_jobs].increment(labels, 1)
|
||||
|
||||
if job['retry_count'].present?
|
||||
@metrics[:sidekiq_jobs_retried_total].increment(labels, 1)
|
||||
end
|
||||
|
||||
realtime = Benchmark.realtime do
|
||||
yield
|
||||
end
|
||||
|
||||
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, realtime)
|
||||
rescue Exception # rubocop: disable Lint/RescueException
|
||||
@metrics[:sidekiq_jobs_failed_total].increment(labels, 1)
|
||||
raise
|
||||
ensure
|
||||
@metrics[:sidekiq_running_jobs].increment(labels, -1)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def init_metrics
|
||||
{
|
||||
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete sidekiq job'),
|
||||
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
|
||||
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
|
||||
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :livesum)
|
||||
}
|
||||
end
|
||||
|
||||
def create_labels(queue)
|
||||
{
|
||||
queue: queue
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,57 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::SidekiqMiddleware::Metrics do
|
||||
describe '#call' do
|
||||
let(:middleware) { described_class.new }
|
||||
let(:worker) { double(:worker) }
|
||||
|
||||
let(:completion_seconds_metric) { double('completion seconds metric') }
|
||||
let(:failed_total_metric) { double('failed total metric') }
|
||||
let(:retried_total_metric) { double('retried total metric') }
|
||||
let(:running_jobs_metric) { double('running jobs metric') }
|
||||
|
||||
before do
|
||||
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything).and_return(completion_seconds_metric)
|
||||
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric)
|
||||
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric)
|
||||
allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :livesum).and_return(running_jobs_metric)
|
||||
|
||||
allow(running_jobs_metric).to receive(:increment)
|
||||
end
|
||||
|
||||
it 'yields block' do
|
||||
allow(completion_seconds_metric).to receive(:observe)
|
||||
|
||||
expect { |b| middleware.call(worker, {}, :test, &b) }.to yield_control.once
|
||||
end
|
||||
|
||||
it 'sets metrics' do
|
||||
labels = { queue: :test }
|
||||
|
||||
expect(running_jobs_metric).to receive(:increment).with(labels, 1)
|
||||
expect(running_jobs_metric).to receive(:increment).with(labels, -1)
|
||||
expect(completion_seconds_metric).to receive(:observe).with(labels, kind_of(Numeric))
|
||||
|
||||
middleware.call(worker, {}, :test) { nil }
|
||||
end
|
||||
|
||||
context 'when job is retried' do
|
||||
it 'sets sidekiq_jobs_retried_total metric' do
|
||||
allow(completion_seconds_metric).to receive(:observe)
|
||||
|
||||
expect(retried_total_metric).to receive(:increment)
|
||||
|
||||
middleware.call(worker, { 'retry_count' => 1 }, :test) { nil }
|
||||
end
|
||||
end
|
||||
|
||||
context 'when error is raised' do
|
||||
it 'sets sidekiq_jobs_failed_total and reraises' do
|
||||
expect(failed_total_metric).to receive(:increment)
|
||||
expect { middleware.call(worker, {}, :test) { raise } }.to raise_error
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue