057eb824b5
Sampling data at a fixed interval means we can potentially miss data from events occurring between sampling intervals. For example, say we sample data every 15 seconds but Unicorn workers get killed after 10 seconds. In this particular case it's possible to miss interesting data as the sampler will never get to actually submitting data. To work around this (at least for the most part) the sampling interval is randomized as following: 1. Take the user specified sampling interval (15 seconds by default) 2. Divide it by 2 (referred to as "half" below) 3. Generate a range (using a step of 0.1) from -"half" to "half" 4. Every time the sampler goes to sleep we'll grab the user provided interval and add a randomly chosen "adjustment" to it while making sure we don't pick the same value twice in a row. For a specified timeout of 15 this means the actual intervals can be anywhere between 7.5 and 22.5, but never can the same interval be used twice in a row. The rationale behind this change is that on dev.gitlab.org I'm sometimes seeing certain Gitlab::Git/Rugged objects being retained, but only for a few minutes every 24 hours. Knowing the code of Gitlab and how much memory it uses/leaks I suspect we're missing data due to workers getting terminated before the sampler can write its data to InfluxDB.
139 lines
3.6 KiB
Ruby
139 lines
3.6 KiB
Ruby
require 'spec_helper'
|
|
|
|
describe Gitlab::Metrics::Sampler do
|
|
let(:sampler) { described_class.new(5) }
|
|
|
|
after do
|
|
Allocations.stop if Gitlab::Metrics.mri?
|
|
end
|
|
|
|
describe '#start' do
|
|
it 'gathers a sample at a given interval' do
|
|
expect(sampler).to receive(:sleep).with(a_kind_of(Numeric))
|
|
expect(sampler).to receive(:sample)
|
|
expect(sampler).to receive(:loop).and_yield
|
|
|
|
sampler.start.join
|
|
end
|
|
end
|
|
|
|
describe '#sample' do
|
|
it 'samples various statistics' do
|
|
expect(sampler).to receive(:sample_memory_usage)
|
|
expect(sampler).to receive(:sample_file_descriptors)
|
|
expect(sampler).to receive(:sample_objects)
|
|
expect(sampler).to receive(:sample_gc)
|
|
expect(sampler).to receive(:flush)
|
|
|
|
sampler.sample
|
|
end
|
|
|
|
it 'clears any GC profiles' do
|
|
expect(sampler).to receive(:flush)
|
|
expect(GC::Profiler).to receive(:clear)
|
|
|
|
sampler.sample
|
|
end
|
|
end
|
|
|
|
describe '#flush' do
|
|
it 'schedules the metrics using Sidekiq' do
|
|
expect(Gitlab::Metrics).to receive(:submit_metrics).
|
|
with([an_instance_of(Hash)])
|
|
|
|
sampler.sample_memory_usage
|
|
sampler.flush
|
|
end
|
|
end
|
|
|
|
describe '#sample_memory_usage' do
|
|
it 'adds a metric containing the memory usage' do
|
|
expect(Gitlab::Metrics::System).to receive(:memory_usage).
|
|
and_return(9000)
|
|
|
|
expect(sampler).to receive(:add_metric).
|
|
with(/memory_usage/, value: 9000).
|
|
and_call_original
|
|
|
|
sampler.sample_memory_usage
|
|
end
|
|
end
|
|
|
|
describe '#sample_file_descriptors' do
|
|
it 'adds a metric containing the amount of open file descriptors' do
|
|
expect(Gitlab::Metrics::System).to receive(:file_descriptor_count).
|
|
and_return(4)
|
|
|
|
expect(sampler).to receive(:add_metric).
|
|
with(/file_descriptors/, value: 4).
|
|
and_call_original
|
|
|
|
sampler.sample_file_descriptors
|
|
end
|
|
end
|
|
|
|
describe '#sample_objects' do
|
|
it 'adds a metric containing the amount of allocated objects' do
|
|
expect(sampler).to receive(:add_metric).
|
|
with(/object_counts/, an_instance_of(Hash), an_instance_of(Hash)).
|
|
at_least(:once).
|
|
and_call_original
|
|
|
|
sampler.sample_objects
|
|
end
|
|
end
|
|
|
|
describe '#sample_gc' do
|
|
it 'adds a metric containing garbage collection statistics' do
|
|
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
|
|
|
|
expect(sampler).to receive(:add_metric).
|
|
with(/gc_statistics/, an_instance_of(Hash)).
|
|
and_call_original
|
|
|
|
sampler.sample_gc
|
|
end
|
|
end
|
|
|
|
describe '#add_metric' do
|
|
it 'prefixes the series name for a Rails process' do
|
|
expect(sampler).to receive(:sidekiq?).and_return(false)
|
|
|
|
expect(Gitlab::Metrics::Metric).to receive(:new).
|
|
with('rails_cats', { value: 10 }, {}).
|
|
and_call_original
|
|
|
|
sampler.add_metric('cats', value: 10)
|
|
end
|
|
|
|
it 'prefixes the series name for a Sidekiq process' do
|
|
expect(sampler).to receive(:sidekiq?).and_return(true)
|
|
|
|
expect(Gitlab::Metrics::Metric).to receive(:new).
|
|
with('sidekiq_cats', { value: 10 }, {}).
|
|
and_call_original
|
|
|
|
sampler.add_metric('cats', value: 10)
|
|
end
|
|
end
|
|
|
|
describe '#sleep_interval' do
|
|
it 'returns a Numeric' do
|
|
expect(sampler.sleep_interval).to be_a_kind_of(Numeric)
|
|
end
|
|
|
|
# Testing random behaviour is very hard, so treat this test as a basic smoke
|
|
# test instead of a very accurate behaviour/unit test.
|
|
it 'does not return the same interval twice in a row' do
|
|
last = nil
|
|
|
|
100.times do
|
|
interval = sampler.sleep_interval
|
|
|
|
expect(interval).to_not eq(last)
|
|
|
|
last = interval
|
|
end
|
|
end
|
|
end
|
|
end
|