Merge branch 'improve-jobs-queuing-time-metric' into 'master'

Partition job_queue_duration_seconds with jobs_running_for_project

Closes #43219

See merge request gitlab-org/gitlab-ce!17730
This commit is contained in:
Kamil Trzciński 2018-04-05 18:34:17 +00:00
commit e9e800f523
4 changed files with 104 additions and 4 deletions

View File

@ -4,6 +4,9 @@ module Ci
class RegisterJobService
attr_reader :runner
JOB_QUEUE_DURATION_SECONDS_BUCKETS = [1, 3, 10, 30].freeze
JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET = 5.freeze
Result = Struct.new(:build, :valid?)
def initialize(runner)
@ -104,10 +107,22 @@ module Ci
end
def register_success(job)
job_queue_duration_seconds.observe({ shared_runner: @runner.shared? }, Time.now - job.created_at)
labels = { shared_runner: runner.shared?,
jobs_running_for_project: jobs_running_for_project(job) }
job_queue_duration_seconds.observe(labels, Time.now - job.queued_at)
attempt_counter.increment
end
def jobs_running_for_project(job)
return '+Inf' unless runner.shared?
# excluding currently started job
running_jobs_count = job.project.builds.running.where(runner: Ci::Runner.shared)
.limit(JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET + 1).count - 1
running_jobs_count < JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET ? running_jobs_count : "#{JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET}+"
end
def failed_attempt_counter
@failed_attempt_counter ||= Gitlab::Metrics.counter(:job_register_attempts_failed_total, "Counts the times a runner tries to register a job")
end
@ -117,7 +132,7 @@ module Ci
end
def job_queue_duration_seconds
@job_queue_duration_seconds ||= Gitlab::Metrics.histogram(:job_queue_duration_seconds, 'Request handling execution time')
@job_queue_duration_seconds ||= Gitlab::Metrics.histogram(:job_queue_duration_seconds, 'Request handling execution time', {}, JOB_QUEUE_DURATION_SECONDS_BUCKETS)
end
end
end

View File

@ -0,0 +1,5 @@
---
title: Partition job_queue_duration_seconds with jobs_running_for_project
merge_request: 17730
author:
type: changed

View File

@ -62,6 +62,7 @@ FactoryBot.define do
end
trait :pending do
queued_at 'Di 29. Okt 09:50:59 CET 2013'
status 'pending'
end

View File

@ -370,10 +370,89 @@ module Ci
it_behaves_like 'validation is not active'
end
end
end
def execute(runner)
described_class.new(runner).execute.build
describe '#register_success' do
let!(:current_time) { Time.new(2018, 4, 5, 14, 0, 0) }
let!(:attempt_counter) { double('Gitlab::Metrics::NullMetric') }
let!(:job_queue_duration_seconds) { double('Gitlab::Metrics::NullMetric') }
before do
allow(Time).to receive(:now).and_return(current_time)
# Stub defaults for any metrics other than the ones we're testing
allow(Gitlab::Metrics).to receive(:counter)
.with(any_args)
.and_return(Gitlab::Metrics::NullMetric.instance)
allow(Gitlab::Metrics).to receive(:histogram)
.with(any_args)
.and_return(Gitlab::Metrics::NullMetric.instance)
# Stub tested metrics
allow(Gitlab::Metrics).to receive(:counter)
.with(:job_register_attempts_total, anything)
.and_return(attempt_counter)
allow(Gitlab::Metrics).to receive(:histogram)
.with(:job_queue_duration_seconds, anything, anything, anything)
.and_return(job_queue_duration_seconds)
project.update(shared_runners_enabled: true)
pending_job.update(created_at: current_time - 3600, queued_at: current_time - 1800)
end
shared_examples 'metrics collector' do
it 'increments attempt counter' do
allow(job_queue_duration_seconds).to receive(:observe)
expect(attempt_counter).to receive(:increment)
execute(runner)
end
it 'counts job queuing time histogram with expected labels' do
allow(attempt_counter).to receive(:increment)
expect(job_queue_duration_seconds).to receive(:observe)
.with({ shared_runner: expected_shared_runner,
jobs_running_for_project: expected_jobs_running_for_project_first_job }, 1800)
execute(runner)
end
context 'when project already has running jobs' do
let!(:build2) { create( :ci_build, :running, pipeline: pipeline, runner: shared_runner) }
let!(:build3) { create( :ci_build, :running, pipeline: pipeline, runner: shared_runner) }
it 'counts job queuing time histogram with expected labels' do
allow(attempt_counter).to receive(:increment)
expect(job_queue_duration_seconds).to receive(:observe)
.with({ shared_runner: expected_shared_runner,
jobs_running_for_project: expected_jobs_running_for_project_third_job }, 1800)
execute(runner)
end
end
end
context 'when shared runner is used' do
let(:runner) { shared_runner }
let(:expected_shared_runner) { true }
let(:expected_jobs_running_for_project_first_job) { 0 }
let(:expected_jobs_running_for_project_third_job) { 2 }
it_behaves_like 'metrics collector'
end
context 'when specific runner is used' do
let(:runner) { specific_runner }
let(:expected_shared_runner) { false }
let(:expected_jobs_running_for_project_first_job) { '+Inf' }
let(:expected_jobs_running_for_project_third_job) { '+Inf' }
it_behaves_like 'metrics collector'
end
end
def execute(runner)
described_class.new(runner).execute.build
end
end
end