Merge branch 'pawel/reduce_cardinality_of_prometheus_metrics' into 'master'

Reduce cardinality of some of GitLab's Prometheus metrics and fix observed duration reporting.

Closes #41045

See merge request gitlab-org/gitlab-ce!15881
This commit is contained in:
Douwe Maan 2017-12-21 15:56:45 +00:00
commit 92e15071c1
9 changed files with 39 additions and 26 deletions

View File

@ -0,0 +1,5 @@
---
title: Reduce the number of buckets in gitlab_cache_operation_duration_seconds metric
merge_request: 15881
author:
type: changed

View File

@ -38,6 +38,7 @@ module Gitlab
# This is memoized since this method is called for every instrumented # This is memoized since this method is called for every instrumented
# method. Loading data from an external cache on every method call slows # method. Loading data from an external cache on every method call slows
# things down too much. # things down too much.
# in milliseconds
@method_call_threshold ||= settings[:method_call_threshold] @method_call_threshold ||= settings[:method_call_threshold]
end end

View File

@ -35,8 +35,8 @@ module Gitlab
@transaction = transaction @transaction = transaction
@name = name @name = name
@labels = { module: @module_name, method: @method_name } @labels = { module: @module_name, method: @method_name }
@real_time = 0 @real_time = 0.0
@cpu_time = 0 @cpu_time = 0.0
@call_count = 0 @call_count = 0
end end
@ -54,7 +54,7 @@ module Gitlab
@call_count += 1 @call_count += 1
if call_measurement_enabled? && above_threshold? if call_measurement_enabled? && above_threshold?
self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time / 1000.0) self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time)
end end
retval retval
@ -65,8 +65,8 @@ module Gitlab
Metric.new( Metric.new(
Instrumentation.series, Instrumentation.series,
{ {
duration: real_time, duration: real_time.in_milliseconds.to_i,
cpu_duration: cpu_time, cpu_duration: cpu_time.in_milliseconds.to_i,
call_count: call_count call_count: call_count
}, },
method: @name method: @name
@ -76,7 +76,7 @@ module Gitlab
# Returns true if the total runtime of this method exceeds the method call # Returns true if the total runtime of this method exceeds the method call
# threshold. # threshold.
def above_threshold? def above_threshold?
real_time >= Metrics.method_call_threshold real_time.in_milliseconds >= Metrics.method_call_threshold
end end
def call_measurement_enabled? def call_measurement_enabled?

View File

@ -52,7 +52,7 @@ module Gitlab
metrics[:memory_usage].set(labels, System.memory_usage) metrics[:memory_usage].set(labels, System.memory_usage)
metrics[:file_descriptors].set(labels, System.file_descriptor_count) metrics[:file_descriptors].set(labels, System.file_descriptor_count)
metrics[:sampler_duration].observe(labels.merge(worker_label), (System.monotonic_time - start_time) / 1000.0) metrics[:sampler_duration].observe(labels.merge(worker_label), System.monotonic_time - start_time)
ensure ensure
GC::Profiler.clear GC::Profiler.clear
end end

View File

@ -66,7 +66,7 @@ module Gitlab
:gitlab_cache_operation_duration_seconds, :gitlab_cache_operation_duration_seconds,
'Cache access time', 'Cache access time',
Transaction::BASE_LABELS.merge({ action: nil }), Transaction::BASE_LABELS.merge({ action: nil }),
[0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] [0.001, 0.01, 0.1, 1, 10]
) )
end end

View File

@ -35,27 +35,27 @@ module Gitlab
if Process.const_defined?(:CLOCK_THREAD_CPUTIME_ID) if Process.const_defined?(:CLOCK_THREAD_CPUTIME_ID)
def self.cpu_time def self.cpu_time
Process Process
.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :millisecond) .clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :float_second)
end end
else else
def self.cpu_time def self.cpu_time
Process Process
.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :millisecond) .clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :float_second)
end end
end end
# Returns the current real time in a given precision. # Returns the current real time in a given precision.
# #
# Returns the time as a Fixnum. # Returns the time as a Float for precision = :float_second.
def self.real_time(precision = :millisecond) def self.real_time(precision = :float_second)
Process.clock_gettime(Process::CLOCK_REALTIME, precision) Process.clock_gettime(Process::CLOCK_REALTIME, precision)
end end
# Returns the current monotonic clock time in a given precision. # Returns the current monotonic clock time as seconds with microseconds precision.
# #
# Returns the time as a Fixnum. # Returns the time as a Float.
def self.monotonic_time(precision = :millisecond) def self.monotonic_time
Process.clock_gettime(Process::CLOCK_MONOTONIC, precision) Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second)
end end
end end
end end

View File

@ -35,6 +35,10 @@ module Gitlab
@finished_at ? (@finished_at - @started_at) : 0.0 @finished_at ? (@finished_at - @started_at) : 0.0
end end
def duration_milliseconds
duration.in_milliseconds.to_i
end
def allocated_memory def allocated_memory
@memory_after - @memory_before @memory_after - @memory_before
end end
@ -50,7 +54,7 @@ module Gitlab
@memory_after = System.memory_usage @memory_after = System.memory_usage
@finished_at = System.monotonic_time @finished_at = System.monotonic_time
self.class.metric_transaction_duration_seconds.observe(labels, duration * 1000) self.class.metric_transaction_duration_seconds.observe(labels, duration)
self.class.metric_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0) self.class.metric_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0)
Thread.current[THREAD_KEY] = nil Thread.current[THREAD_KEY] = nil
@ -97,7 +101,7 @@ module Gitlab
end end
def track_self def track_self
values = { duration: duration, allocated_memory: allocated_memory } values = { duration: duration_milliseconds, allocated_memory: allocated_memory }
@values.each do |name, value| @values.each do |name, value|
values[name] = value values[name] = value

View File

@ -96,14 +96,17 @@ describe Gitlab::Metrics::MethodCall do
describe '#to_metric' do describe '#to_metric' do
it 'returns a Metric instance' do it 'returns a Metric instance' do
expect(method_call).to receive(:real_time).and_return(4.0001)
expect(method_call).to receive(:cpu_time).and_return(3.0001)
method_call.measure { 'foo' } method_call.measure { 'foo' }
metric = method_call.to_metric metric = method_call.to_metric
expect(metric).to be_an_instance_of(Gitlab::Metrics::Metric) expect(metric).to be_an_instance_of(Gitlab::Metrics::Metric)
expect(metric.series).to eq('rails_method_calls') expect(metric.series).to eq('rails_method_calls')
expect(metric.values[:duration]).to be_a_kind_of(Numeric) expect(metric.values[:duration]).to eq(4000)
expect(metric.values[:cpu_duration]).to be_a_kind_of(Numeric) expect(metric.values[:cpu_duration]).to eq(3000)
expect(metric.values[:call_count]).to be_an(Integer) expect(metric.values[:call_count]).to be_an(Integer)
expect(metric.tags).to eq({ method: 'Foo#bar' }) expect(metric.tags).to eq({ method: 'Foo#bar' })
@ -116,13 +119,13 @@ describe Gitlab::Metrics::MethodCall do
end end
it 'returns false when the total call time is not above the threshold' do it 'returns false when the total call time is not above the threshold' do
expect(method_call).to receive(:real_time).and_return(9) expect(method_call).to receive(:real_time).and_return(0.009)
expect(method_call.above_threshold?).to eq(false) expect(method_call.above_threshold?).to eq(false)
end end
it 'returns true when the total call time is above the threshold' do it 'returns true when the total call time is above the threshold' do
expect(method_call).to receive(:real_time).and_return(9000) expect(method_call).to receive(:real_time).and_return(9)
expect(method_call.above_threshold?).to eq(true) expect(method_call.above_threshold?).to eq(true)
end end

View File

@ -29,19 +29,19 @@ describe Gitlab::Metrics::System do
describe '.cpu_time' do describe '.cpu_time' do
it 'returns a Fixnum' do it 'returns a Fixnum' do
expect(described_class.cpu_time).to be_an(Integer) expect(described_class.cpu_time).to be_an(Float)
end end
end end
describe '.real_time' do describe '.real_time' do
it 'returns a Fixnum' do it 'returns a Fixnum' do
expect(described_class.real_time).to be_an(Integer) expect(described_class.real_time).to be_an(Float)
end end
end end
describe '.monotonic_time' do describe '.monotonic_time' do
it 'returns a Fixnum' do it 'returns a Float' do
expect(described_class.monotonic_time).to be_an(Integer) expect(described_class.monotonic_time).to be_an(Float)
end end
end end
end end