gitlab-org--gitlab-foss/spec/lib/gitlab/metrics/sampler_spec.rb
Yorick Peterse 057eb824b5 Randomize metrics sample intervals
Sampling data at a fixed interval means we can potentially miss data
from events occurring between sampling intervals. For example, say we
sample data every 15 seconds but Unicorn workers get killed after 10
seconds. In this particular case it's possible to miss interesting data
as the sampler will never get to actually submitting data.

To work around this (at least for the most part) the sampling interval
is randomized as following:

1. Take the user specified sampling interval (15 seconds by default)
2. Divide it by 2 (referred to as "half" below)
3. Generate a range (using a step of 0.1) from -"half" to "half"
4. Every time the sampler goes to sleep we'll grab the user provided
   interval and add a randomly chosen "adjustment" to it while making
   sure we don't pick the same value twice in a row.

For a specified timeout of 15 this means the actual intervals can be
anywhere between 7.5 and 22.5, but never can the same interval be used
twice in a row.

The rationale behind this change is that on dev.gitlab.org I'm sometimes
seeing certain Gitlab::Git/Rugged objects being retained, but only for a
few minutes every 24 hours. Knowing the code of Gitlab and how much
memory it uses/leaks I suspect we're missing data due to workers getting
terminated before the sampler can write its data to InfluxDB.
2016-01-13 12:57:46 +01:00

139 lines
3.6 KiB
Ruby

require 'spec_helper'
describe Gitlab::Metrics::Sampler do
let(:sampler) { described_class.new(5) }
after do
Allocations.stop if Gitlab::Metrics.mri?
end
describe '#start' do
it 'gathers a sample at a given interval' do
expect(sampler).to receive(:sleep).with(a_kind_of(Numeric))
expect(sampler).to receive(:sample)
expect(sampler).to receive(:loop).and_yield
sampler.start.join
end
end
describe '#sample' do
it 'samples various statistics' do
expect(sampler).to receive(:sample_memory_usage)
expect(sampler).to receive(:sample_file_descriptors)
expect(sampler).to receive(:sample_objects)
expect(sampler).to receive(:sample_gc)
expect(sampler).to receive(:flush)
sampler.sample
end
it 'clears any GC profiles' do
expect(sampler).to receive(:flush)
expect(GC::Profiler).to receive(:clear)
sampler.sample
end
end
describe '#flush' do
it 'schedules the metrics using Sidekiq' do
expect(Gitlab::Metrics).to receive(:submit_metrics).
with([an_instance_of(Hash)])
sampler.sample_memory_usage
sampler.flush
end
end
describe '#sample_memory_usage' do
it 'adds a metric containing the memory usage' do
expect(Gitlab::Metrics::System).to receive(:memory_usage).
and_return(9000)
expect(sampler).to receive(:add_metric).
with(/memory_usage/, value: 9000).
and_call_original
sampler.sample_memory_usage
end
end
describe '#sample_file_descriptors' do
it 'adds a metric containing the amount of open file descriptors' do
expect(Gitlab::Metrics::System).to receive(:file_descriptor_count).
and_return(4)
expect(sampler).to receive(:add_metric).
with(/file_descriptors/, value: 4).
and_call_original
sampler.sample_file_descriptors
end
end
describe '#sample_objects' do
it 'adds a metric containing the amount of allocated objects' do
expect(sampler).to receive(:add_metric).
with(/object_counts/, an_instance_of(Hash), an_instance_of(Hash)).
at_least(:once).
and_call_original
sampler.sample_objects
end
end
describe '#sample_gc' do
it 'adds a metric containing garbage collection statistics' do
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
expect(sampler).to receive(:add_metric).
with(/gc_statistics/, an_instance_of(Hash)).
and_call_original
sampler.sample_gc
end
end
describe '#add_metric' do
it 'prefixes the series name for a Rails process' do
expect(sampler).to receive(:sidekiq?).and_return(false)
expect(Gitlab::Metrics::Metric).to receive(:new).
with('rails_cats', { value: 10 }, {}).
and_call_original
sampler.add_metric('cats', value: 10)
end
it 'prefixes the series name for a Sidekiq process' do
expect(sampler).to receive(:sidekiq?).and_return(true)
expect(Gitlab::Metrics::Metric).to receive(:new).
with('sidekiq_cats', { value: 10 }, {}).
and_call_original
sampler.add_metric('cats', value: 10)
end
end
describe '#sleep_interval' do
it 'returns a Numeric' do
expect(sampler.sleep_interval).to be_a_kind_of(Numeric)
end
# Testing random behaviour is very hard, so treat this test as a basic smoke
# test instead of a very accurate behaviour/unit test.
it 'does not return the same interval twice in a row' do
last = nil
100.times do
interval = sampler.sleep_interval
expect(interval).to_not eq(last)
last = interval
end
end
end
end