diff --git a/Gemfile b/Gemfile index 6034323956c..53820459a16 100644 --- a/Gemfile +++ b/Gemfile @@ -283,7 +283,7 @@ group :metrics do gem 'influxdb', '~> 0.2', require: false # Prometheus - gem 'prometheus-client-mmap', '~>0.7.0.beta18' + gem 'prometheus-client-mmap', '~> 0.7.0.beta36' gem 'raindrops', '~> 0.18' end diff --git a/Gemfile.lock b/Gemfile.lock index 4787be92365..69d20bdbbb3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -488,7 +488,7 @@ GEM mini_mime (0.1.4) mini_portile2 (2.3.0) minitest (5.7.0) - mmap2 (2.2.7) + mmap2 (2.2.9) mousetrap-rails (1.4.6) multi_json (1.12.2) multi_xml (0.6.0) @@ -625,8 +625,8 @@ GEM parser unparser procto (0.0.3) - prometheus-client-mmap (0.7.0.beta18) - mmap2 (~> 2.2, >= 2.2.7) + prometheus-client-mmap (0.7.0.beta36) + mmap2 (~> 2.2, >= 2.2.9) pry (0.10.4) coderay (~> 1.1.0) method_source (~> 0.8.1) @@ -1111,7 +1111,7 @@ DEPENDENCIES peek-sidekiq (~> 1.0.3) pg (~> 0.18.2) premailer-rails (~> 1.9.7) - prometheus-client-mmap (~> 0.7.0.beta18) + prometheus-client-mmap (~> 0.7.0.beta36) pry-byebug (~> 3.4.1) pry-rails (~> 0.3.4) rack-attack (~> 4.4.1) diff --git a/changelogs/unreleased/pawel-update_prometheus_gem_to_well_tested_version.yml b/changelogs/unreleased/pawel-update_prometheus_gem_to_well_tested_version.yml new file mode 100644 index 00000000000..a4133ae5cec --- /dev/null +++ b/changelogs/unreleased/pawel-update_prometheus_gem_to_well_tested_version.yml @@ -0,0 +1,5 @@ +--- +title: Reenable Prometheus metrics, add more control over Prometheus method instrumentation +merge_request: 15558 +author: +type: fixed diff --git a/config/initializers/7_prometheus_metrics.rb b/config/initializers/7_prometheus_metrics.rb index e8f33593fe0..43b1e943897 100644 --- a/config/initializers/7_prometheus_metrics.rb +++ b/config/initializers/7_prometheus_metrics.rb @@ -12,16 +12,20 @@ Prometheus::Client.configure do |config| end config.pid_provider = -> do - wid = Prometheus::Client::Support::Unicorn.worker_id - wid = Process.pid if wid.nil? - if wid.nil? + worker_id = Prometheus::Client::Support::Unicorn.worker_id + if worker_id.nil? "process_pid_#{Process.pid}" else - "worker_id_#{wid}" + "worker_id_#{worker_id}" end end end +Gitlab::Application.configure do |config| + # 0 should be Sentry to catch errors in this middleware + config.middleware.insert(1, Gitlab::Metrics::RequestsRackMiddleware) +end + Sidekiq.configure_server do |config| config.on(:startup) do Gitlab::Metrics::SidekiqMetricsExporter.instance.start diff --git a/config/initializers/8_metrics.rb b/config/initializers/8_metrics.rb index 7ef594836d6..45b39b2a38d 100644 --- a/config/initializers/8_metrics.rb +++ b/config/initializers/8_metrics.rb @@ -118,11 +118,6 @@ def instrument_classes(instrumentation) end # rubocop:enable Metrics/AbcSize -Gitlab::Application.configure do |config| - # 0 should be Sentry to catch errors in this middleware - config.middleware.insert(1, Gitlab::Metrics::RequestsRackMiddleware) -end - if Gitlab::Metrics.enabled? require 'pathname' require 'influxdb' diff --git a/lib/gitlab/metrics/method_call.rb b/lib/gitlab/metrics/method_call.rb index 90235095306..65d55576ac2 100644 --- a/lib/gitlab/metrics/method_call.rb +++ b/lib/gitlab/metrics/method_call.rb @@ -6,29 +6,15 @@ module Gitlab BASE_LABELS = { module: nil, method: nil }.freeze attr_reader :real_time, :cpu_time, :call_count, :labels - def self.call_real_duration_histogram - return @call_real_duration_histogram if @call_real_duration_histogram - - MUTEX.synchronize do - @call_real_duration_histogram ||= Gitlab::Metrics.histogram( - :gitlab_method_call_real_duration_seconds, - 'Method calls real duration', - Transaction::BASE_LABELS.merge(BASE_LABELS), - [0.1, 0.2, 0.5, 1, 2, 5, 10] - ) - end - end - - def self.call_cpu_duration_histogram - return @call_cpu_duration_histogram if @call_cpu_duration_histogram + def self.call_duration_histogram + return @call_duration_histogram if @call_duration_histogram MUTEX.synchronize do @call_duration_histogram ||= Gitlab::Metrics.histogram( - :gitlab_method_call_cpu_duration_seconds, - 'Method calls cpu duration', + :gitlab_method_call_duration_seconds, + 'Method calls real duration', Transaction::BASE_LABELS.merge(BASE_LABELS), - [0.1, 0.2, 0.5, 1, 2, 5, 10] - ) + [0.01, 0.05, 0.1, 0.5, 1]) end end @@ -59,8 +45,9 @@ module Gitlab @cpu_time += cpu_time @call_count += 1 - self.class.call_real_duration_histogram.observe(@transaction.labels.merge(labels), real_time / 1000.0) - self.class.call_cpu_duration_histogram.observe(@transaction.labels.merge(labels), cpu_time / 1000.0) + if call_measurement_enabled? && above_threshold? + self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time / 1000.0) + end retval end @@ -83,6 +70,10 @@ module Gitlab def above_threshold? real_time >= Metrics.method_call_threshold end + + def call_measurement_enabled? + Feature.get(:prometheus_metrics_method_instrumentation).enabled? + end end end end diff --git a/lib/gitlab/metrics/prometheus.rb b/lib/gitlab/metrics/prometheus.rb index 4f165d12a94..09103b4ca2d 100644 --- a/lib/gitlab/metrics/prometheus.rb +++ b/lib/gitlab/metrics/prometheus.rb @@ -17,9 +17,9 @@ module Gitlab end def prometheus_metrics_enabled? - # force disable prometheus_metrics until - # https://gitlab.com/gitlab-org/prometheus-client-mmap/merge_requests/11 is ready - false + return @prometheus_metrics_enabled if defined?(@prometheus_metrics_enabled) + + @prometheus_metrics_enabled = prometheus_metrics_enabled_unmemoized end def registry diff --git a/spec/lib/gitlab/metrics/method_call_spec.rb b/spec/lib/gitlab/metrics/method_call_spec.rb index f1e9e414e0d..5341addf911 100644 --- a/spec/lib/gitlab/metrics/method_call_spec.rb +++ b/spec/lib/gitlab/metrics/method_call_spec.rb @@ -13,16 +13,52 @@ describe Gitlab::Metrics::MethodCall do expect(method_call.call_count).to eq(1) end - it 'observes the performance of the supplied block' do - expect(described_class.call_real_duration_histogram) - .to receive(:observe) - .with({ module: :Foo, method: '#bar' }, be_a_kind_of(Numeric)) + context 'when measurement is above threshold' do + before do + allow(method_call).to receive(:above_threshold?).and_return(true) + end - expect(described_class.call_cpu_duration_histogram) - .to receive(:observe) - .with({ module: :Foo, method: '#bar' }, be_a_kind_of(Numeric)) + context 'prometheus instrumentation is enabled' do + before do + Feature.get(:prometheus_metrics_method_instrumentation).enable + end - method_call.measure { 'foo' } + it 'observes the performance of the supplied block' do + expect(described_class.call_duration_histogram) + .to receive(:observe) + .with({ module: :Foo, method: '#bar' }, be_a_kind_of(Numeric)) + + method_call.measure { 'foo' } + end + end + + context 'prometheus instrumentation is disabled' do + before do + Feature.get(:prometheus_metrics_method_instrumentation).disable + end + + it 'does not observe the performance' do + expect(described_class.call_duration_histogram) + .not_to receive(:observe) + + method_call.measure { 'foo' } + end + end + end + + context 'when measurement is below threshold' do + before do + allow(method_call).to receive(:above_threshold?).and_return(false) + + Feature.get(:prometheus_metrics_method_instrumentation).enable + end + + it 'does not observe the performance' do + expect(described_class.call_duration_histogram) + .not_to receive(:observe) + + method_call.measure { 'foo' } + end end end @@ -43,7 +79,13 @@ describe Gitlab::Metrics::MethodCall do end describe '#above_threshold?' do + before do + allow(Gitlab::Metrics).to receive(:method_call_threshold).and_return(100) + end + it 'returns false when the total call time is not above the threshold' do + expect(method_call).to receive(:real_time).and_return(9) + expect(method_call.above_threshold?).to eq(false) end