Call GC::Profiler.clear
only in one place
Previously, both InfluxSampler and RubySampler were relying on the `GC::Profiler.total_time` data which is the sum over the list of captured GC events. Also, both samplers asynchronously called `GC::Profiler.clear` which led to incorrect metric data because each sampler has the wrong assumption it is the only object who calls `GC::Profiler.clear` and thus could rely on the gathered results between such calls. We should ensure that `GC::Profiler.total_time` is called only in one place making it possible to rely on accumulated data between such wipes. Also, we need to track the amount of profiler reports we lost.
This commit is contained in:
parent
ebdd3a233e
commit
1f9edb7c4a
6 changed files with 44 additions and 53 deletions
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Fix GC::Profiler metrics fetching
|
||||
merge_request: 31331
|
||||
author:
|
||||
type: fixed
|
|
@ -9,5 +9,10 @@ Peek.into Peek::Views::ActiveRecord
|
|||
Peek.into Peek::Views::Gitaly
|
||||
Peek.into Peek::Views::RedisDetailed
|
||||
Peek.into Peek::Views::Rugged
|
||||
Peek.into Peek::Views::GC
|
||||
|
||||
# `Peek::Views::GC` is currently disabled in production, as it runs with every request
|
||||
# even if PerformanceBar is inactive and clears `GC::Profiler` reports we need for metrics.
|
||||
# Check https://gitlab.com/gitlab-org/gitlab-ce/issues/65455
|
||||
Peek.into Peek::Views::GC if Rails.env.development?
|
||||
|
||||
Peek.into Peek::Views::Tracing if Labkit::Tracing.tracing_url_enabled?
|
||||
|
|
|
@ -15,19 +15,14 @@ module Gitlab
|
|||
@last_step = nil
|
||||
|
||||
@metrics = []
|
||||
|
||||
@last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
|
||||
@last_major_gc = Delta.new(GC.stat[:major_gc_count])
|
||||
end
|
||||
|
||||
def sample
|
||||
sample_memory_usage
|
||||
sample_file_descriptors
|
||||
sample_gc
|
||||
|
||||
flush
|
||||
ensure
|
||||
GC::Profiler.clear
|
||||
@metrics.clear
|
||||
end
|
||||
|
||||
|
@ -43,23 +38,6 @@ module Gitlab
|
|||
add_metric('file_descriptors', value: System.file_descriptor_count)
|
||||
end
|
||||
|
||||
def sample_gc
|
||||
time = GC::Profiler.total_time * 1000.0
|
||||
stats = GC.stat.merge(total_time: time)
|
||||
|
||||
# We want the difference of GC runs compared to the last sample, not the
|
||||
# total amount since the process started.
|
||||
stats[:minor_gc_count] =
|
||||
@last_minor_gc.compared_with(stats[:minor_gc_count])
|
||||
|
||||
stats[:major_gc_count] =
|
||||
@last_major_gc.compared_with(stats[:major_gc_count])
|
||||
|
||||
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
|
||||
|
||||
add_metric('gc_statistics', stats)
|
||||
end
|
||||
|
||||
def add_metric(series, values, tags = {})
|
||||
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
|
||||
|
||||
|
|
|
@ -6,7 +6,11 @@ module Gitlab
|
|||
module Metrics
|
||||
module Samplers
|
||||
class RubySampler < BaseSampler
|
||||
GC_REPORT_BUCKETS = [0.001, 0.002, 0.005, 0.01, 0.05, 0.1, 0.5].freeze
|
||||
|
||||
def initialize(interval)
|
||||
GC::Profiler.clear
|
||||
|
||||
metrics[:process_start_time_seconds].set(labels, Time.now.to_i)
|
||||
|
||||
super
|
||||
|
@ -37,7 +41,7 @@ module Gitlab
|
|||
process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels),
|
||||
process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'),
|
||||
sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels),
|
||||
total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels)
|
||||
gc_duration_seconds: ::Gitlab::Metrics.histogram(with_prefix(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS)
|
||||
}
|
||||
|
||||
GC.stat.keys.each do |key|
|
||||
|
@ -57,20 +61,27 @@ module Gitlab
|
|||
sample_gc
|
||||
|
||||
metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time)
|
||||
ensure
|
||||
GC::Profiler.clear
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def sample_gc
|
||||
# Collect generic GC stats.
|
||||
# Observe all GC samples
|
||||
sample_gc_reports.each do |report|
|
||||
metrics[:gc_duration_seconds].observe(labels, report[:GC_TIME])
|
||||
end
|
||||
|
||||
# Collect generic GC stats
|
||||
GC.stat.each do |key, value|
|
||||
metrics[key].set(labels, value)
|
||||
end
|
||||
end
|
||||
|
||||
# Collect the GC time since last sample in float seconds.
|
||||
metrics[:total_time].increment(labels, GC::Profiler.total_time)
|
||||
def sample_gc_reports
|
||||
GC::Profiler.enable
|
||||
GC::Profiler.raw_data
|
||||
ensure
|
||||
GC::Profiler.clear
|
||||
end
|
||||
|
||||
def set_memory_usage_metrics
|
||||
|
|
|
@ -17,18 +17,10 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do
|
|||
it 'samples various statistics' do
|
||||
expect(sampler).to receive(:sample_memory_usage)
|
||||
expect(sampler).to receive(:sample_file_descriptors)
|
||||
expect(sampler).to receive(:sample_gc)
|
||||
expect(sampler).to receive(:flush)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
|
||||
it 'clears any GC profiles' do
|
||||
expect(sampler).to receive(:flush)
|
||||
expect(GC::Profiler).to receive(:clear)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
end
|
||||
|
||||
describe '#flush' do
|
||||
|
@ -67,18 +59,6 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do
|
|||
end
|
||||
end
|
||||
|
||||
describe '#sample_gc' do
|
||||
it 'adds a metric containing garbage collection statistics' do
|
||||
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
|
||||
|
||||
expect(sampler).to receive(:add_metric)
|
||||
.with(/gc_statistics/, an_instance_of(Hash))
|
||||
.and_call_original
|
||||
|
||||
sampler.sample_gc
|
||||
end
|
||||
end
|
||||
|
||||
describe '#add_metric' do
|
||||
it 'prefixes the series name for a Rails process' do
|
||||
expect(sampler).to receive(:sidekiq?).and_return(false)
|
||||
|
|
|
@ -59,17 +59,29 @@ describe Gitlab::Metrics::Samplers::RubySampler do
|
|||
end
|
||||
|
||||
it 'clears any GC profiles' do
|
||||
expect(GC::Profiler).to receive(:clear)
|
||||
expect(GC::Profiler).to receive(:clear).at_least(:once)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
end
|
||||
|
||||
describe '#sample_gc' do
|
||||
it 'adds a metric containing garbage collection time statistics' do
|
||||
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
|
||||
let!(:sampler) { described_class.new(5) }
|
||||
|
||||
expect(sampler.metrics[:total_time]).to receive(:increment).with({}, 0.24)
|
||||
let(:gc_reports) { [{ GC_TIME: 0.1 }, { GC_TIME: 0.2 }, { GC_TIME: 0.3 }] }
|
||||
|
||||
it 're-enables GC::Profiler if needed' do
|
||||
expect(GC::Profiler).to receive(:enable)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
|
||||
it 'observes GC cycles time' do
|
||||
expect(sampler).to receive(:sample_gc_reports).and_return(gc_reports)
|
||||
|
||||
expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.1).ordered
|
||||
expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.2).ordered
|
||||
expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.3).ordered
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue