Merge branch 'improve-jobs-queuing-time-metric' into 'master'
Partition job_queue_duration_seconds with jobs_running_for_project Closes #43219 See merge request gitlab-org/gitlab-ce!17730
This commit is contained in:
commit
e9e800f523
|
@ -4,6 +4,9 @@ module Ci
|
||||||
class RegisterJobService
|
class RegisterJobService
|
||||||
attr_reader :runner
|
attr_reader :runner
|
||||||
|
|
||||||
|
JOB_QUEUE_DURATION_SECONDS_BUCKETS = [1, 3, 10, 30].freeze
|
||||||
|
JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET = 5.freeze
|
||||||
|
|
||||||
Result = Struct.new(:build, :valid?)
|
Result = Struct.new(:build, :valid?)
|
||||||
|
|
||||||
def initialize(runner)
|
def initialize(runner)
|
||||||
|
@ -104,10 +107,22 @@ module Ci
|
||||||
end
|
end
|
||||||
|
|
||||||
def register_success(job)
|
def register_success(job)
|
||||||
job_queue_duration_seconds.observe({ shared_runner: @runner.shared? }, Time.now - job.created_at)
|
labels = { shared_runner: runner.shared?,
|
||||||
|
jobs_running_for_project: jobs_running_for_project(job) }
|
||||||
|
|
||||||
|
job_queue_duration_seconds.observe(labels, Time.now - job.queued_at)
|
||||||
attempt_counter.increment
|
attempt_counter.increment
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def jobs_running_for_project(job)
|
||||||
|
return '+Inf' unless runner.shared?
|
||||||
|
|
||||||
|
# excluding currently started job
|
||||||
|
running_jobs_count = job.project.builds.running.where(runner: Ci::Runner.shared)
|
||||||
|
.limit(JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET + 1).count - 1
|
||||||
|
running_jobs_count < JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET ? running_jobs_count : "#{JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET}+"
|
||||||
|
end
|
||||||
|
|
||||||
def failed_attempt_counter
|
def failed_attempt_counter
|
||||||
@failed_attempt_counter ||= Gitlab::Metrics.counter(:job_register_attempts_failed_total, "Counts the times a runner tries to register a job")
|
@failed_attempt_counter ||= Gitlab::Metrics.counter(:job_register_attempts_failed_total, "Counts the times a runner tries to register a job")
|
||||||
end
|
end
|
||||||
|
@ -117,7 +132,7 @@ module Ci
|
||||||
end
|
end
|
||||||
|
|
||||||
def job_queue_duration_seconds
|
def job_queue_duration_seconds
|
||||||
@job_queue_duration_seconds ||= Gitlab::Metrics.histogram(:job_queue_duration_seconds, 'Request handling execution time')
|
@job_queue_duration_seconds ||= Gitlab::Metrics.histogram(:job_queue_duration_seconds, 'Request handling execution time', {}, JOB_QUEUE_DURATION_SECONDS_BUCKETS)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
title: Partition job_queue_duration_seconds with jobs_running_for_project
|
||||||
|
merge_request: 17730
|
||||||
|
author:
|
||||||
|
type: changed
|
|
@ -62,6 +62,7 @@ FactoryBot.define do
|
||||||
end
|
end
|
||||||
|
|
||||||
trait :pending do
|
trait :pending do
|
||||||
|
queued_at 'Di 29. Okt 09:50:59 CET 2013'
|
||||||
status 'pending'
|
status 'pending'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -370,10 +370,89 @@ module Ci
|
||||||
it_behaves_like 'validation is not active'
|
it_behaves_like 'validation is not active'
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def execute(runner)
|
describe '#register_success' do
|
||||||
described_class.new(runner).execute.build
|
let!(:current_time) { Time.new(2018, 4, 5, 14, 0, 0) }
|
||||||
|
let!(:attempt_counter) { double('Gitlab::Metrics::NullMetric') }
|
||||||
|
let!(:job_queue_duration_seconds) { double('Gitlab::Metrics::NullMetric') }
|
||||||
|
|
||||||
|
before do
|
||||||
|
allow(Time).to receive(:now).and_return(current_time)
|
||||||
|
|
||||||
|
# Stub defaults for any metrics other than the ones we're testing
|
||||||
|
allow(Gitlab::Metrics).to receive(:counter)
|
||||||
|
.with(any_args)
|
||||||
|
.and_return(Gitlab::Metrics::NullMetric.instance)
|
||||||
|
allow(Gitlab::Metrics).to receive(:histogram)
|
||||||
|
.with(any_args)
|
||||||
|
.and_return(Gitlab::Metrics::NullMetric.instance)
|
||||||
|
|
||||||
|
# Stub tested metrics
|
||||||
|
allow(Gitlab::Metrics).to receive(:counter)
|
||||||
|
.with(:job_register_attempts_total, anything)
|
||||||
|
.and_return(attempt_counter)
|
||||||
|
allow(Gitlab::Metrics).to receive(:histogram)
|
||||||
|
.with(:job_queue_duration_seconds, anything, anything, anything)
|
||||||
|
.and_return(job_queue_duration_seconds)
|
||||||
|
|
||||||
|
project.update(shared_runners_enabled: true)
|
||||||
|
pending_job.update(created_at: current_time - 3600, queued_at: current_time - 1800)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
shared_examples 'metrics collector' do
|
||||||
|
it 'increments attempt counter' do
|
||||||
|
allow(job_queue_duration_seconds).to receive(:observe)
|
||||||
|
expect(attempt_counter).to receive(:increment)
|
||||||
|
|
||||||
|
execute(runner)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'counts job queuing time histogram with expected labels' do
|
||||||
|
allow(attempt_counter).to receive(:increment)
|
||||||
|
expect(job_queue_duration_seconds).to receive(:observe)
|
||||||
|
.with({ shared_runner: expected_shared_runner,
|
||||||
|
jobs_running_for_project: expected_jobs_running_for_project_first_job }, 1800)
|
||||||
|
|
||||||
|
execute(runner)
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when project already has running jobs' do
|
||||||
|
let!(:build2) { create( :ci_build, :running, pipeline: pipeline, runner: shared_runner) }
|
||||||
|
let!(:build3) { create( :ci_build, :running, pipeline: pipeline, runner: shared_runner) }
|
||||||
|
|
||||||
|
it 'counts job queuing time histogram with expected labels' do
|
||||||
|
allow(attempt_counter).to receive(:increment)
|
||||||
|
expect(job_queue_duration_seconds).to receive(:observe)
|
||||||
|
.with({ shared_runner: expected_shared_runner,
|
||||||
|
jobs_running_for_project: expected_jobs_running_for_project_third_job }, 1800)
|
||||||
|
|
||||||
|
execute(runner)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when shared runner is used' do
|
||||||
|
let(:runner) { shared_runner }
|
||||||
|
let(:expected_shared_runner) { true }
|
||||||
|
let(:expected_jobs_running_for_project_first_job) { 0 }
|
||||||
|
let(:expected_jobs_running_for_project_third_job) { 2 }
|
||||||
|
|
||||||
|
it_behaves_like 'metrics collector'
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when specific runner is used' do
|
||||||
|
let(:runner) { specific_runner }
|
||||||
|
let(:expected_shared_runner) { false }
|
||||||
|
let(:expected_jobs_running_for_project_first_job) { '+Inf' }
|
||||||
|
let(:expected_jobs_running_for_project_third_job) { '+Inf' }
|
||||||
|
|
||||||
|
it_behaves_like 'metrics collector'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def execute(runner)
|
||||||
|
described_class.new(runner).execute.build
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue