diff --git a/changelogs/unreleased/pawel-reduce_cardinality_of_prometheus_metrics.yml b/changelogs/unreleased/pawel-reduce_cardinality_of_prometheus_metrics.yml new file mode 100644 index 00000000000..0cee0b634d6 --- /dev/null +++ b/changelogs/unreleased/pawel-reduce_cardinality_of_prometheus_metrics.yml @@ -0,0 +1,5 @@ +--- +title: Reduce the number of buckets in gitlab_cache_operation_duration_seconds metric +merge_request: 15881 +author: +type: changed diff --git a/lib/gitlab/metrics/influx_db.rb b/lib/gitlab/metrics/influx_db.rb index 6ea132fc5bf..877cebf6786 100644 --- a/lib/gitlab/metrics/influx_db.rb +++ b/lib/gitlab/metrics/influx_db.rb @@ -38,6 +38,7 @@ module Gitlab # This is memoized since this method is called for every instrumented # method. Loading data from an external cache on every method call slows # things down too much. + # in milliseconds @method_call_threshold ||= settings[:method_call_threshold] end diff --git a/lib/gitlab/metrics/method_call.rb b/lib/gitlab/metrics/method_call.rb index 9112164f22e..329b07af5db 100644 --- a/lib/gitlab/metrics/method_call.rb +++ b/lib/gitlab/metrics/method_call.rb @@ -35,8 +35,8 @@ module Gitlab @transaction = transaction @name = name @labels = { module: @module_name, method: @method_name } - @real_time = 0 - @cpu_time = 0 + @real_time = 0.0 + @cpu_time = 0.0 @call_count = 0 end @@ -54,7 +54,7 @@ module Gitlab @call_count += 1 if call_measurement_enabled? && above_threshold? - self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time / 1000.0) + self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time) end retval @@ -65,8 +65,8 @@ module Gitlab Metric.new( Instrumentation.series, { - duration: real_time, - cpu_duration: cpu_time, + duration: real_time.in_milliseconds.to_i, + cpu_duration: cpu_time.in_milliseconds.to_i, call_count: call_count }, method: @name @@ -76,7 +76,7 @@ module Gitlab # Returns true if the total runtime of this method exceeds the method call # threshold. def above_threshold? - real_time >= Metrics.method_call_threshold + real_time.in_milliseconds >= Metrics.method_call_threshold end def call_measurement_enabled? diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb index b68800417a2..4e1ea62351f 100644 --- a/lib/gitlab/metrics/samplers/ruby_sampler.rb +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -52,7 +52,7 @@ module Gitlab metrics[:memory_usage].set(labels, System.memory_usage) metrics[:file_descriptors].set(labels, System.file_descriptor_count) - metrics[:sampler_duration].observe(labels.merge(worker_label), (System.monotonic_time - start_time) / 1000.0) + metrics[:sampler_duration].observe(labels.merge(worker_label), System.monotonic_time - start_time) ensure GC::Profiler.clear end diff --git a/lib/gitlab/metrics/subscribers/rails_cache.rb b/lib/gitlab/metrics/subscribers/rails_cache.rb index efd3c9daf79..250897a79c2 100644 --- a/lib/gitlab/metrics/subscribers/rails_cache.rb +++ b/lib/gitlab/metrics/subscribers/rails_cache.rb @@ -66,7 +66,7 @@ module Gitlab :gitlab_cache_operation_duration_seconds, 'Cache access time', Transaction::BASE_LABELS.merge({ action: nil }), - [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] + [0.001, 0.01, 0.1, 1, 10] ) end diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb index c2cbd3c16a1..e60e245cf89 100644 --- a/lib/gitlab/metrics/system.rb +++ b/lib/gitlab/metrics/system.rb @@ -35,27 +35,27 @@ module Gitlab if Process.const_defined?(:CLOCK_THREAD_CPUTIME_ID) def self.cpu_time Process - .clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :millisecond) + .clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :float_second) end else def self.cpu_time Process - .clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :millisecond) + .clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :float_second) end end # Returns the current real time in a given precision. # - # Returns the time as a Fixnum. - def self.real_time(precision = :millisecond) + # Returns the time as a Float for precision = :float_second. + def self.real_time(precision = :float_second) Process.clock_gettime(Process::CLOCK_REALTIME, precision) end - # Returns the current monotonic clock time in a given precision. + # Returns the current monotonic clock time as seconds with microseconds precision. # - # Returns the time as a Fixnum. - def self.monotonic_time(precision = :millisecond) - Process.clock_gettime(Process::CLOCK_MONOTONIC, precision) + # Returns the time as a Float. + def self.monotonic_time + Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second) end end end diff --git a/lib/gitlab/metrics/transaction.rb b/lib/gitlab/metrics/transaction.rb index ee3afc5ffdb..e7975c023a9 100644 --- a/lib/gitlab/metrics/transaction.rb +++ b/lib/gitlab/metrics/transaction.rb @@ -35,6 +35,10 @@ module Gitlab @finished_at ? (@finished_at - @started_at) : 0.0 end + def duration_milliseconds + duration.in_milliseconds.to_i + end + def allocated_memory @memory_after - @memory_before end @@ -50,7 +54,7 @@ module Gitlab @memory_after = System.memory_usage @finished_at = System.monotonic_time - self.class.metric_transaction_duration_seconds.observe(labels, duration * 1000) + self.class.metric_transaction_duration_seconds.observe(labels, duration) self.class.metric_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0) Thread.current[THREAD_KEY] = nil @@ -97,7 +101,7 @@ module Gitlab end def track_self - values = { duration: duration, allocated_memory: allocated_memory } + values = { duration: duration_milliseconds, allocated_memory: allocated_memory } @values.each do |name, value| values[name] = value diff --git a/spec/lib/gitlab/metrics/method_call_spec.rb b/spec/lib/gitlab/metrics/method_call_spec.rb index 78767d06462..41a9d1d9c90 100644 --- a/spec/lib/gitlab/metrics/method_call_spec.rb +++ b/spec/lib/gitlab/metrics/method_call_spec.rb @@ -96,14 +96,17 @@ describe Gitlab::Metrics::MethodCall do describe '#to_metric' do it 'returns a Metric instance' do + expect(method_call).to receive(:real_time).and_return(4.0001) + expect(method_call).to receive(:cpu_time).and_return(3.0001) + method_call.measure { 'foo' } metric = method_call.to_metric expect(metric).to be_an_instance_of(Gitlab::Metrics::Metric) expect(metric.series).to eq('rails_method_calls') - expect(metric.values[:duration]).to be_a_kind_of(Numeric) - expect(metric.values[:cpu_duration]).to be_a_kind_of(Numeric) + expect(metric.values[:duration]).to eq(4000) + expect(metric.values[:cpu_duration]).to eq(3000) expect(metric.values[:call_count]).to be_an(Integer) expect(metric.tags).to eq({ method: 'Foo#bar' }) @@ -116,13 +119,13 @@ describe Gitlab::Metrics::MethodCall do end it 'returns false when the total call time is not above the threshold' do - expect(method_call).to receive(:real_time).and_return(9) + expect(method_call).to receive(:real_time).and_return(0.009) expect(method_call.above_threshold?).to eq(false) end it 'returns true when the total call time is above the threshold' do - expect(method_call).to receive(:real_time).and_return(9000) + expect(method_call).to receive(:real_time).and_return(9) expect(method_call.above_threshold?).to eq(true) end diff --git a/spec/lib/gitlab/metrics/system_spec.rb b/spec/lib/gitlab/metrics/system_spec.rb index 4d94d8705fb..14afcdf5daa 100644 --- a/spec/lib/gitlab/metrics/system_spec.rb +++ b/spec/lib/gitlab/metrics/system_spec.rb @@ -29,19 +29,19 @@ describe Gitlab::Metrics::System do describe '.cpu_time' do it 'returns a Fixnum' do - expect(described_class.cpu_time).to be_an(Integer) + expect(described_class.cpu_time).to be_an(Float) end end describe '.real_time' do it 'returns a Fixnum' do - expect(described_class.real_time).to be_an(Integer) + expect(described_class.real_time).to be_an(Float) end end describe '.monotonic_time' do - it 'returns a Fixnum' do - expect(described_class.monotonic_time).to be_an(Integer) + it 'returns a Float' do + expect(described_class.monotonic_time).to be_an(Float) end end end