Adds direct monitoring for sidekiq metrics
This adds diirect monitoring for sidekiq metrics. This is done via sidekiq middleware and a sampler to pull from sidekiqs api.
This commit is contained in:
parent
3bb3ac3d53
commit
cfea48dffd
3 changed files with 106 additions and 0 deletions
|
@ -33,6 +33,7 @@ Sidekiq.configure_server do |config|
|
||||||
config.redis = queues_config_hash
|
config.redis = queues_config_hash
|
||||||
|
|
||||||
config.server_middleware do |chain|
|
config.server_middleware do |chain|
|
||||||
|
chain.add Gitlab::SidekiqMiddleware::Metrics if Settings.monitoring.sidekiq_exporter
|
||||||
chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if ENV['SIDEKIQ_LOG_ARGUMENTS'] && !enable_json_logs
|
chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if ENV['SIDEKIQ_LOG_ARGUMENTS'] && !enable_json_logs
|
||||||
chain.add Gitlab::SidekiqMiddleware::MemoryKiller if ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS']
|
chain.add Gitlab::SidekiqMiddleware::MemoryKiller if ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS']
|
||||||
chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0'
|
chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0'
|
||||||
|
|
48
lib/gitlab/sidekiq_middleware/metrics.rb
Normal file
48
lib/gitlab/sidekiq_middleware/metrics.rb
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module Gitlab
|
||||||
|
module SidekiqMiddleware
|
||||||
|
class Metrics
|
||||||
|
def initialize
|
||||||
|
@metrics = init_metrics
|
||||||
|
end
|
||||||
|
|
||||||
|
def call(_worker, job, queue)
|
||||||
|
labels = create_labels(queue)
|
||||||
|
@metrics[:sidekiq_running_jobs].increment(labels, 1)
|
||||||
|
|
||||||
|
if job['retry_count'].present?
|
||||||
|
@metrics[:sidekiq_jobs_retried_total].increment(labels, 1)
|
||||||
|
end
|
||||||
|
|
||||||
|
realtime = Benchmark.realtime do
|
||||||
|
yield
|
||||||
|
end
|
||||||
|
|
||||||
|
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, realtime)
|
||||||
|
rescue Exception # rubocop: disable Lint/RescueException
|
||||||
|
@metrics[:sidekiq_jobs_failed_total].increment(labels, 1)
|
||||||
|
raise
|
||||||
|
ensure
|
||||||
|
@metrics[:sidekiq_running_jobs].increment(labels, -1)
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def init_metrics
|
||||||
|
{
|
||||||
|
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete sidekiq job'),
|
||||||
|
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
|
||||||
|
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
|
||||||
|
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :livesum)
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def create_labels(queue)
|
||||||
|
{
|
||||||
|
queue: queue
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
57
spec/lib/gitlab/sidekiq_middleware/metrics_spec.rb
Normal file
57
spec/lib/gitlab/sidekiq_middleware/metrics_spec.rb
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require 'spec_helper'
|
||||||
|
|
||||||
|
describe Gitlab::SidekiqMiddleware::Metrics do
|
||||||
|
describe '#call' do
|
||||||
|
let(:middleware) { described_class.new }
|
||||||
|
let(:worker) { double(:worker) }
|
||||||
|
|
||||||
|
let(:completion_seconds_metric) { double('completion seconds metric') }
|
||||||
|
let(:failed_total_metric) { double('failed total metric') }
|
||||||
|
let(:retried_total_metric) { double('retried total metric') }
|
||||||
|
let(:running_jobs_metric) { double('running jobs metric') }
|
||||||
|
|
||||||
|
before do
|
||||||
|
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything).and_return(completion_seconds_metric)
|
||||||
|
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric)
|
||||||
|
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric)
|
||||||
|
allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :livesum).and_return(running_jobs_metric)
|
||||||
|
|
||||||
|
allow(running_jobs_metric).to receive(:increment)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'yields block' do
|
||||||
|
allow(completion_seconds_metric).to receive(:observe)
|
||||||
|
|
||||||
|
expect { |b| middleware.call(worker, {}, :test, &b) }.to yield_control.once
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'sets metrics' do
|
||||||
|
labels = { queue: :test }
|
||||||
|
|
||||||
|
expect(running_jobs_metric).to receive(:increment).with(labels, 1)
|
||||||
|
expect(running_jobs_metric).to receive(:increment).with(labels, -1)
|
||||||
|
expect(completion_seconds_metric).to receive(:observe).with(labels, kind_of(Numeric))
|
||||||
|
|
||||||
|
middleware.call(worker, {}, :test) { nil }
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when job is retried' do
|
||||||
|
it 'sets sidekiq_jobs_retried_total metric' do
|
||||||
|
allow(completion_seconds_metric).to receive(:observe)
|
||||||
|
|
||||||
|
expect(retried_total_metric).to receive(:increment)
|
||||||
|
|
||||||
|
middleware.call(worker, { 'retry_count' => 1 }, :test) { nil }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when error is raised' do
|
||||||
|
it 'sets sidekiq_jobs_failed_total and reraises' do
|
||||||
|
expect(failed_total_metric).to receive(:increment)
|
||||||
|
expect { middleware.call(worker, {}, :test) { raise } }.to raise_error
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in a new issue