Cleanup ruby sampler metrics
* Use a simple counter for sampler duration instead of a histogram. * Use a counter to collect GC time. * Remove unused objects metric. * Cleanup metric names to match Prometheus conventions. * Prefix generic GC stats with `gc_stat`. * Include worker label on memory and file descriptor metrics.
This commit is contained in:
parent
348ad22d7a
commit
78a9991543
4 changed files with 34 additions and 13 deletions
5
changelogs/unreleased/bjk-48176_ruby_gc.yml
Normal file
5
changelogs/unreleased/bjk-48176_ruby_gc.yml
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Cleanup Prometheus ruby metrics
|
||||
merge_request: 20039
|
||||
author: Ben Kochie
|
||||
type: fixed
|
|
@ -49,6 +49,20 @@ The following metrics are available:
|
|||
| filesystem_circuitbreaker | Gauge | 9.5 | Whether or not the circuit for a certain shard is broken or not |
|
||||
| circuitbreaker_storage_check_duration_seconds | Histogram | 10.3 | Time a single storage probe took |
|
||||
|
||||
### Ruby metrics
|
||||
|
||||
Some basic Ruby runtime metrics are available:
|
||||
|
||||
| Metric | Type | Since | Description |
|
||||
|:-------------------------------------- |:--------- |:----- |:----------- |
|
||||
| ruby_gc_duration_seconds_total | Counter | 11.1 | Time spent by Ruby in GC |
|
||||
| ruby_gc_stat_... | Gauge | 11.1 | Various metrics from [GC.stat] |
|
||||
| ruby_file_descriptors | Gauge | 11.1 | File descriptors per process |
|
||||
| ruby_memory_bytes | Gauge | 11.1 | Memory usage by process |
|
||||
| ruby_sampler_duration_seconds_total | Counter | 11.1 | Time spent collecting stats |
|
||||
|
||||
[GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat
|
||||
|
||||
## Metrics shared directory
|
||||
|
||||
GitLab's Prometheus client requires a directory to store metrics data shared between multi-process services.
|
||||
|
|
|
@ -22,27 +22,27 @@ module Gitlab
|
|||
|
||||
def init_metrics
|
||||
metrics = {}
|
||||
metrics[:sampler_duration] = Metrics.histogram(with_prefix(:sampler_duration, :seconds), 'Sampler time', { worker: nil })
|
||||
metrics[:total_time] = Metrics.gauge(with_prefix(:gc, :time_total), 'Total GC time', labels, :livesum)
|
||||
metrics[:sampler_duration] = Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels)
|
||||
metrics[:total_time] = Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels)
|
||||
GC.stat.keys.each do |key|
|
||||
metrics[key] = Metrics.gauge(with_prefix(:gc, key), to_doc_string(key), labels, :livesum)
|
||||
metrics[key] = Metrics.gauge(with_prefix(:gc_stat, key), to_doc_string(key), labels, :livesum)
|
||||
end
|
||||
|
||||
metrics[:objects_total] = Metrics.gauge(with_prefix(:objects, :total), 'Objects total', labels.merge(class: nil), :livesum)
|
||||
metrics[:memory_usage] = Metrics.gauge(with_prefix(:memory, :usage_total), 'Memory used total', labels, :livesum)
|
||||
metrics[:file_descriptors] = Metrics.gauge(with_prefix(:file, :descriptors_total), 'File descriptors total', labels, :livesum)
|
||||
metrics[:memory_usage] = Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum)
|
||||
metrics[:file_descriptors] = Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum)
|
||||
|
||||
metrics
|
||||
end
|
||||
|
||||
def sample
|
||||
start_time = System.monotonic_time
|
||||
|
||||
metrics[:memory_usage].set(labels.merge(worker_label), System.memory_usage)
|
||||
metrics[:file_descriptors].set(labels.merge(worker_label), System.file_descriptor_count)
|
||||
|
||||
sample_gc
|
||||
|
||||
metrics[:memory_usage].set(labels, System.memory_usage)
|
||||
metrics[:file_descriptors].set(labels, System.file_descriptor_count)
|
||||
|
||||
metrics[:sampler_duration].observe(labels.merge(worker_label), System.monotonic_time - start_time)
|
||||
metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time)
|
||||
ensure
|
||||
GC::Profiler.clear
|
||||
end
|
||||
|
@ -50,11 +50,13 @@ module Gitlab
|
|||
private
|
||||
|
||||
def sample_gc
|
||||
metrics[:total_time].set(labels, GC::Profiler.total_time * 1000)
|
||||
|
||||
# Collect generic GC stats.
|
||||
GC.stat.each do |key, value|
|
||||
metrics[key].set(labels, value)
|
||||
end
|
||||
|
||||
# Collect the GC time since last sample in float seconds.
|
||||
metrics[:total_time].increment(labels, GC::Profiler.total_time)
|
||||
end
|
||||
|
||||
def worker_label
|
||||
|
|
|
@ -45,7 +45,7 @@ describe Gitlab::Metrics::Samplers::RubySampler do
|
|||
it 'adds a metric containing garbage collection time statistics' do
|
||||
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
|
||||
|
||||
expect(sampler.metrics[:total_time]).to receive(:set).with({}, 240)
|
||||
expect(sampler.metrics[:total_time]).to receive(:increment).with({}, 0.24)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue