diff --git a/changelogs/unreleased/an-fix-sidekiq-histogram-buckets.yml b/changelogs/unreleased/an-fix-sidekiq-histogram-buckets.yml new file mode 100644 index 00000000000..696b8f3987e --- /dev/null +++ b/changelogs/unreleased/an-fix-sidekiq-histogram-buckets.yml @@ -0,0 +1,5 @@ +--- +title: Allow latency measurements of sidekiq jobs taking > 2.5s +merge_request: 32001 +author: +type: fixed diff --git a/lib/gitlab/sidekiq_middleware/metrics.rb b/lib/gitlab/sidekiq_middleware/metrics.rb index b06ffa9c121..3dc9521ee8b 100644 --- a/lib/gitlab/sidekiq_middleware/metrics.rb +++ b/lib/gitlab/sidekiq_middleware/metrics.rb @@ -3,6 +3,10 @@ module Gitlab module SidekiqMiddleware class Metrics + # SIDEKIQ_LATENCY_BUCKETS are latency histogram buckets better suited to Sidekiq + # timeframes than the DEFAULT_BUCKET definition. Defined in seconds. + SIDEKIQ_LATENCY_BUCKETS = [0.1, 0.25, 0.5, 1, 2.5, 5, 10, 60, 300, 600].freeze + def initialize @metrics = init_metrics end @@ -31,7 +35,7 @@ module Gitlab def init_metrics { - sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete sidekiq job'), + sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete sidekiq job', buckets: SIDEKIQ_LATENCY_BUCKETS), sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'), sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'), sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :livesum) diff --git a/spec/lib/gitlab/sidekiq_middleware/metrics_spec.rb b/spec/lib/gitlab/sidekiq_middleware/metrics_spec.rb index c6df1c6a0d8..e430599bd94 100644 --- a/spec/lib/gitlab/sidekiq_middleware/metrics_spec.rb +++ b/spec/lib/gitlab/sidekiq_middleware/metrics_spec.rb @@ -13,7 +13,7 @@ describe Gitlab::SidekiqMiddleware::Metrics do let(:running_jobs_metric) { double('running jobs metric') } before do - allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything).and_return(completion_seconds_metric) + allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything, anything).and_return(completion_seconds_metric) allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric) allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric) allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :livesum).and_return(running_jobs_metric)