Call GC::Profiler.clear only in one place

Previously, both InfluxSampler and RubySampler were relying on the
`GC::Profiler.total_time` data which is the sum over the list
of captured GC events. Also, both samplers asynchronously called
`GC::Profiler.clear` which led to incorrect metric data because
each sampler has the wrong assumption it is the only object who calls
`GC::Profiler.clear` and thus could rely on the gathered results between
such calls.

We should ensure that `GC::Profiler.total_time` is called only in one
place making it possible to rely on accumulated data between such wipes.

Also, we need to track the amount of profiler reports we lost.
This commit is contained in:
Aleksei Lipniagov 2019-08-02 09:04:32 +00:00 committed by Kamil Trzciński
parent ebdd3a233e
commit 1f9edb7c4a
6 changed files with 44 additions and 53 deletions

View file

@ -0,0 +1,5 @@
---
title: Fix GC::Profiler metrics fetching
merge_request: 31331
author:
type: fixed

View file

@ -9,5 +9,10 @@ Peek.into Peek::Views::ActiveRecord
Peek.into Peek::Views::Gitaly Peek.into Peek::Views::Gitaly
Peek.into Peek::Views::RedisDetailed Peek.into Peek::Views::RedisDetailed
Peek.into Peek::Views::Rugged Peek.into Peek::Views::Rugged
Peek.into Peek::Views::GC
# `Peek::Views::GC` is currently disabled in production, as it runs with every request
# even if PerformanceBar is inactive and clears `GC::Profiler` reports we need for metrics.
# Check https://gitlab.com/gitlab-org/gitlab-ce/issues/65455
Peek.into Peek::Views::GC if Rails.env.development?
Peek.into Peek::Views::Tracing if Labkit::Tracing.tracing_url_enabled? Peek.into Peek::Views::Tracing if Labkit::Tracing.tracing_url_enabled?

View file

@ -15,19 +15,14 @@ module Gitlab
@last_step = nil @last_step = nil
@metrics = [] @metrics = []
@last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
@last_major_gc = Delta.new(GC.stat[:major_gc_count])
end end
def sample def sample
sample_memory_usage sample_memory_usage
sample_file_descriptors sample_file_descriptors
sample_gc
flush flush
ensure ensure
GC::Profiler.clear
@metrics.clear @metrics.clear
end end
@ -43,23 +38,6 @@ module Gitlab
add_metric('file_descriptors', value: System.file_descriptor_count) add_metric('file_descriptors', value: System.file_descriptor_count)
end end
def sample_gc
time = GC::Profiler.total_time * 1000.0
stats = GC.stat.merge(total_time: time)
# We want the difference of GC runs compared to the last sample, not the
# total amount since the process started.
stats[:minor_gc_count] =
@last_minor_gc.compared_with(stats[:minor_gc_count])
stats[:major_gc_count] =
@last_major_gc.compared_with(stats[:major_gc_count])
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
add_metric('gc_statistics', stats)
end
def add_metric(series, values, tags = {}) def add_metric(series, values, tags = {})
prefix = sidekiq? ? 'sidekiq_' : 'rails_' prefix = sidekiq? ? 'sidekiq_' : 'rails_'

View file

@ -6,7 +6,11 @@ module Gitlab
module Metrics module Metrics
module Samplers module Samplers
class RubySampler < BaseSampler class RubySampler < BaseSampler
GC_REPORT_BUCKETS = [0.001, 0.002, 0.005, 0.01, 0.05, 0.1, 0.5].freeze
def initialize(interval) def initialize(interval)
GC::Profiler.clear
metrics[:process_start_time_seconds].set(labels, Time.now.to_i) metrics[:process_start_time_seconds].set(labels, Time.now.to_i)
super super
@ -37,7 +41,7 @@ module Gitlab
process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels), process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels),
process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'), process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'),
sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels), sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels),
total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels) gc_duration_seconds: ::Gitlab::Metrics.histogram(with_prefix(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS)
} }
GC.stat.keys.each do |key| GC.stat.keys.each do |key|
@ -57,20 +61,27 @@ module Gitlab
sample_gc sample_gc
metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time) metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time)
ensure
GC::Profiler.clear
end end
private private
def sample_gc def sample_gc
# Collect generic GC stats. # Observe all GC samples
sample_gc_reports.each do |report|
metrics[:gc_duration_seconds].observe(labels, report[:GC_TIME])
end
# Collect generic GC stats
GC.stat.each do |key, value| GC.stat.each do |key, value|
metrics[key].set(labels, value) metrics[key].set(labels, value)
end end
end
# Collect the GC time since last sample in float seconds. def sample_gc_reports
metrics[:total_time].increment(labels, GC::Profiler.total_time) GC::Profiler.enable
GC::Profiler.raw_data
ensure
GC::Profiler.clear
end end
def set_memory_usage_metrics def set_memory_usage_metrics

View file

@ -17,18 +17,10 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do
it 'samples various statistics' do it 'samples various statistics' do
expect(sampler).to receive(:sample_memory_usage) expect(sampler).to receive(:sample_memory_usage)
expect(sampler).to receive(:sample_file_descriptors) expect(sampler).to receive(:sample_file_descriptors)
expect(sampler).to receive(:sample_gc)
expect(sampler).to receive(:flush) expect(sampler).to receive(:flush)
sampler.sample sampler.sample
end end
it 'clears any GC profiles' do
expect(sampler).to receive(:flush)
expect(GC::Profiler).to receive(:clear)
sampler.sample
end
end end
describe '#flush' do describe '#flush' do
@ -67,18 +59,6 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do
end end
end end
describe '#sample_gc' do
it 'adds a metric containing garbage collection statistics' do
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
expect(sampler).to receive(:add_metric)
.with(/gc_statistics/, an_instance_of(Hash))
.and_call_original
sampler.sample_gc
end
end
describe '#add_metric' do describe '#add_metric' do
it 'prefixes the series name for a Rails process' do it 'prefixes the series name for a Rails process' do
expect(sampler).to receive(:sidekiq?).and_return(false) expect(sampler).to receive(:sidekiq?).and_return(false)

View file

@ -59,17 +59,29 @@ describe Gitlab::Metrics::Samplers::RubySampler do
end end
it 'clears any GC profiles' do it 'clears any GC profiles' do
expect(GC::Profiler).to receive(:clear) expect(GC::Profiler).to receive(:clear).at_least(:once)
sampler.sample sampler.sample
end end
end end
describe '#sample_gc' do describe '#sample_gc' do
it 'adds a metric containing garbage collection time statistics' do let!(:sampler) { described_class.new(5) }
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
expect(sampler.metrics[:total_time]).to receive(:increment).with({}, 0.24) let(:gc_reports) { [{ GC_TIME: 0.1 }, { GC_TIME: 0.2 }, { GC_TIME: 0.3 }] }
it 're-enables GC::Profiler if needed' do
expect(GC::Profiler).to receive(:enable)
sampler.sample
end
it 'observes GC cycles time' do
expect(sampler).to receive(:sample_gc_reports).and_return(gc_reports)
expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.1).ordered
expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.2).ordered
expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.3).ordered
sampler.sample sampler.sample
end end