diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb index 166fb9b6916..9f88fb9895b 100644 --- a/config/initializers/sidekiq.rb +++ b/config/initializers/sidekiq.rb @@ -33,6 +33,7 @@ Sidekiq.configure_server do |config| config.redis = queues_config_hash config.server_middleware do |chain| + chain.add Gitlab::SidekiqMiddleware::Metrics if Settings.monitoring.sidekiq_exporter chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if ENV['SIDEKIQ_LOG_ARGUMENTS'] && !enable_json_logs chain.add Gitlab::SidekiqMiddleware::MemoryKiller if ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS'] chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0' diff --git a/lib/gitlab/sidekiq_middleware/metrics.rb b/lib/gitlab/sidekiq_middleware/metrics.rb new file mode 100644 index 00000000000..b06ffa9c121 --- /dev/null +++ b/lib/gitlab/sidekiq_middleware/metrics.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Gitlab + module SidekiqMiddleware + class Metrics + def initialize + @metrics = init_metrics + end + + def call(_worker, job, queue) + labels = create_labels(queue) + @metrics[:sidekiq_running_jobs].increment(labels, 1) + + if job['retry_count'].present? + @metrics[:sidekiq_jobs_retried_total].increment(labels, 1) + end + + realtime = Benchmark.realtime do + yield + end + + @metrics[:sidekiq_jobs_completion_seconds].observe(labels, realtime) + rescue Exception # rubocop: disable Lint/RescueException + @metrics[:sidekiq_jobs_failed_total].increment(labels, 1) + raise + ensure + @metrics[:sidekiq_running_jobs].increment(labels, -1) + end + + private + + def init_metrics + { + sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete sidekiq job'), + sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'), + sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'), + sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :livesum) + } + end + + def create_labels(queue) + { + queue: queue + } + end + end + end +end diff --git a/spec/lib/gitlab/sidekiq_middleware/metrics_spec.rb b/spec/lib/gitlab/sidekiq_middleware/metrics_spec.rb new file mode 100644 index 00000000000..c6df1c6a0d8 --- /dev/null +++ b/spec/lib/gitlab/sidekiq_middleware/metrics_spec.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::SidekiqMiddleware::Metrics do + describe '#call' do + let(:middleware) { described_class.new } + let(:worker) { double(:worker) } + + let(:completion_seconds_metric) { double('completion seconds metric') } + let(:failed_total_metric) { double('failed total metric') } + let(:retried_total_metric) { double('retried total metric') } + let(:running_jobs_metric) { double('running jobs metric') } + + before do + allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything).and_return(completion_seconds_metric) + allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric) + allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric) + allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :livesum).and_return(running_jobs_metric) + + allow(running_jobs_metric).to receive(:increment) + end + + it 'yields block' do + allow(completion_seconds_metric).to receive(:observe) + + expect { |b| middleware.call(worker, {}, :test, &b) }.to yield_control.once + end + + it 'sets metrics' do + labels = { queue: :test } + + expect(running_jobs_metric).to receive(:increment).with(labels, 1) + expect(running_jobs_metric).to receive(:increment).with(labels, -1) + expect(completion_seconds_metric).to receive(:observe).with(labels, kind_of(Numeric)) + + middleware.call(worker, {}, :test) { nil } + end + + context 'when job is retried' do + it 'sets sidekiq_jobs_retried_total metric' do + allow(completion_seconds_metric).to receive(:observe) + + expect(retried_total_metric).to receive(:increment) + + middleware.call(worker, { 'retry_count' => 1 }, :test) { nil } + end + end + + context 'when error is raised' do + it 'sets sidekiq_jobs_failed_total and reraises' do + expect(failed_total_metric).to receive(:increment) + expect { middleware.call(worker, {}, :test) { raise } }.to raise_error + end + end + end +end