Add GC sampler and small refactor of samplers
This commit is contained in:
parent
52115b905a
commit
03b38a4a8d
12 changed files with 347 additions and 217 deletions
|
@ -19,3 +19,7 @@ Sidekiq.configure_server do |config|
|
|||
Gitlab::Metrics::SidekiqMetricsExporter.instance.start
|
||||
end
|
||||
end
|
||||
|
||||
# if Gitlab::Metrics.prometheus_metrics_enabled?
|
||||
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(1.second).start
|
||||
# end
|
||||
|
|
|
@ -123,8 +123,8 @@ def instrument_classes(instrumentation)
|
|||
end
|
||||
# rubocop:enable Metrics/AbcSize
|
||||
|
||||
unless Sidekiq.server?
|
||||
Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
|
||||
unless Sidekiq.server?
|
||||
Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
|
||||
end
|
||||
|
||||
Gitlab::Application.configure do |config|
|
||||
|
@ -192,7 +192,7 @@ if Gitlab::Metrics.enabled?
|
|||
|
||||
GC::Profiler.enable
|
||||
|
||||
Gitlab::Metrics::InfluxSampler.initialize_instance.start
|
||||
Gitlab::Metrics::Samplers::InfluxSampler.initialize_instance.start
|
||||
|
||||
module TrackNewRedisConnections
|
||||
def connect(*args)
|
||||
|
|
|
@ -1,63 +0,0 @@
|
|||
require 'logger'
|
||||
module Gitlab
|
||||
module Metrics
|
||||
class BaseSampler < Daemon
|
||||
# interval - The sampling interval in seconds.
|
||||
def initialize(interval)
|
||||
interval_half = interval.to_f / 2
|
||||
|
||||
@interval = interval
|
||||
@interval_steps = (-interval_half..interval_half).step(0.1).to_a
|
||||
|
||||
super()
|
||||
end
|
||||
|
||||
def safe_sample
|
||||
sample
|
||||
rescue => e
|
||||
Rails.logger.warn("#{self.class}: #{e}, stopping")
|
||||
stop
|
||||
end
|
||||
|
||||
def sample
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Returns the sleep interval with a random adjustment.
|
||||
#
|
||||
# The random adjustment is put in place to ensure we:
|
||||
#
|
||||
# 1. Don't generate samples at the exact same interval every time (thus
|
||||
# potentially missing anything that happens in between samples).
|
||||
# 2. Don't sample data at the same interval two times in a row.
|
||||
def sleep_interval
|
||||
while (step = @interval_steps.sample)
|
||||
if step != @last_step
|
||||
@last_step = step
|
||||
|
||||
return @interval + @last_step
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
attr_reader :running
|
||||
|
||||
def start_working
|
||||
@running = true
|
||||
sleep(sleep_interval)
|
||||
|
||||
while running
|
||||
safe_sample
|
||||
|
||||
sleep(sleep_interval)
|
||||
end
|
||||
end
|
||||
|
||||
def stop_working
|
||||
@running = false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,101 +0,0 @@
|
|||
module Gitlab
|
||||
module Metrics
|
||||
# Class that sends certain metrics to InfluxDB at a specific interval.
|
||||
#
|
||||
# This class is used to gather statistics that can't be directly associated
|
||||
# with a transaction such as system memory usage, garbage collection
|
||||
# statistics, etc.
|
||||
class InfluxSampler < BaseSampler
|
||||
# interval - The sampling interval in seconds.
|
||||
def initialize(interval = Metrics.settings[:sample_interval])
|
||||
super(interval)
|
||||
@last_step = nil
|
||||
|
||||
@metrics = []
|
||||
|
||||
@last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
|
||||
@last_major_gc = Delta.new(GC.stat[:major_gc_count])
|
||||
|
||||
if Gitlab::Metrics.mri?
|
||||
require 'allocations'
|
||||
|
||||
Allocations.start
|
||||
end
|
||||
end
|
||||
|
||||
def sample
|
||||
sample_memory_usage
|
||||
sample_file_descriptors
|
||||
sample_objects
|
||||
sample_gc
|
||||
|
||||
flush
|
||||
ensure
|
||||
GC::Profiler.clear
|
||||
@metrics.clear
|
||||
end
|
||||
|
||||
def flush
|
||||
Metrics.submit_metrics(@metrics.map(&:to_hash))
|
||||
end
|
||||
|
||||
def sample_memory_usage
|
||||
add_metric('memory_usage', value: System.memory_usage)
|
||||
end
|
||||
|
||||
def sample_file_descriptors
|
||||
add_metric('file_descriptors', value: System.file_descriptor_count)
|
||||
end
|
||||
|
||||
if Metrics.mri?
|
||||
def sample_objects
|
||||
sample = Allocations.to_hash
|
||||
counts = sample.each_with_object({}) do |(klass, count), hash|
|
||||
name = klass.name
|
||||
|
||||
next unless name
|
||||
|
||||
hash[name] = count
|
||||
end
|
||||
|
||||
# Symbols aren't allocated so we'll need to add those manually.
|
||||
counts['Symbol'] = Symbol.all_symbols.length
|
||||
|
||||
counts.each do |name, count|
|
||||
add_metric('object_counts', { count: count }, type: name)
|
||||
end
|
||||
end
|
||||
else
|
||||
def sample_objects
|
||||
end
|
||||
end
|
||||
|
||||
def sample_gc
|
||||
time = GC::Profiler.total_time * 1000.0
|
||||
stats = GC.stat.merge(total_time: time)
|
||||
|
||||
# We want the difference of GC runs compared to the last sample, not the
|
||||
# total amount since the process started.
|
||||
stats[:minor_gc_count] =
|
||||
@last_minor_gc.compared_with(stats[:minor_gc_count])
|
||||
|
||||
stats[:major_gc_count] =
|
||||
@last_major_gc.compared_with(stats[:major_gc_count])
|
||||
|
||||
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
|
||||
|
||||
add_metric('gc_statistics', stats)
|
||||
end
|
||||
|
||||
def add_metric(series, values, tags = {})
|
||||
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
|
||||
|
||||
@metrics << Metric.new("#{prefix}#{series}", values, tags)
|
||||
end
|
||||
|
||||
def sidekiq?
|
||||
Sidekiq.server?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
96
lib/gitlab/metrics/samplers/base_sampler.rb
Normal file
96
lib/gitlab/metrics/samplers/base_sampler.rb
Normal file
|
@ -0,0 +1,96 @@
|
|||
require 'logger'
|
||||
module Gitlab
|
||||
module Metrics
|
||||
module Samplers
|
||||
class BaseSampler
|
||||
def self.initialize_instance(*args)
|
||||
raise "#{name} singleton instance already initialized" if @instance
|
||||
@instance = new(*args)
|
||||
at_exit(&@instance.method(:stop))
|
||||
@instance
|
||||
end
|
||||
|
||||
def self.instance
|
||||
@instance
|
||||
end
|
||||
|
||||
attr_reader :running
|
||||
|
||||
# interval - The sampling interval in seconds.
|
||||
def initialize(interval)
|
||||
interval_half = interval.to_f / 2
|
||||
|
||||
@interval = interval
|
||||
@interval_steps = (-interval_half..interval_half).step(0.1).to_a
|
||||
|
||||
@mutex = Mutex.new
|
||||
end
|
||||
|
||||
def enabled?
|
||||
true
|
||||
end
|
||||
|
||||
def start
|
||||
return unless enabled?
|
||||
|
||||
@mutex.synchronize do
|
||||
return if running
|
||||
@running = true
|
||||
|
||||
@thread = Thread.new do
|
||||
sleep(sleep_interval)
|
||||
|
||||
while running
|
||||
safe_sample
|
||||
|
||||
sleep(sleep_interval)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def stop
|
||||
@mutex.synchronize do
|
||||
return unless running
|
||||
|
||||
@running = false
|
||||
|
||||
if @thread
|
||||
@thread.wakeup if @thread.alive?
|
||||
@thread.join
|
||||
@thread = nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def safe_sample
|
||||
sample
|
||||
rescue => e
|
||||
Rails.logger.warn("#{self.class}: #{e}, stopping")
|
||||
stop
|
||||
end
|
||||
|
||||
def sample
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Returns the sleep interval with a random adjustment.
|
||||
#
|
||||
# The random adjustment is put in place to ensure we:
|
||||
#
|
||||
# 1. Don't generate samples at the exact same interval every time (thus
|
||||
# potentially missing anything that happens in between samples).
|
||||
# 2. Don't sample data at the same interval two times in a row.
|
||||
def sleep_interval
|
||||
while step = @interval_steps.sample
|
||||
if step != @last_step
|
||||
@last_step = step
|
||||
|
||||
return @interval + @last_step
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
103
lib/gitlab/metrics/samplers/influx_sampler.rb
Normal file
103
lib/gitlab/metrics/samplers/influx_sampler.rb
Normal file
|
@ -0,0 +1,103 @@
|
|||
module Gitlab
|
||||
module Metrics
|
||||
module Samplers
|
||||
# Class that sends certain metrics to InfluxDB at a specific interval.
|
||||
#
|
||||
# This class is used to gather statistics that can't be directly associated
|
||||
# with a transaction such as system memory usage, garbage collection
|
||||
# statistics, etc.
|
||||
class InfluxSampler < BaseSampler
|
||||
# interval - The sampling interval in seconds.
|
||||
def initialize(interval = Metrics.settings[:sample_interval])
|
||||
super(interval)
|
||||
@last_step = nil
|
||||
|
||||
@metrics = []
|
||||
|
||||
@last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
|
||||
@last_major_gc = Delta.new(GC.stat[:major_gc_count])
|
||||
|
||||
if Gitlab::Metrics.mri?
|
||||
require 'allocations'
|
||||
|
||||
Allocations.start
|
||||
end
|
||||
end
|
||||
|
||||
def sample
|
||||
sample_memory_usage
|
||||
sample_file_descriptors
|
||||
sample_objects
|
||||
sample_gc
|
||||
|
||||
flush
|
||||
ensure
|
||||
GC::Profiler.clear
|
||||
@metrics.clear
|
||||
end
|
||||
|
||||
def flush
|
||||
Metrics.submit_metrics(@metrics.map(&:to_hash))
|
||||
end
|
||||
|
||||
def sample_memory_usage
|
||||
add_metric('memory_usage', value: System.memory_usage)
|
||||
end
|
||||
|
||||
def sample_file_descriptors
|
||||
add_metric('file_descriptors', value: System.file_descriptor_count)
|
||||
end
|
||||
|
||||
if Metrics.mri?
|
||||
def sample_objects
|
||||
sample = Allocations.to_hash
|
||||
counts = sample.each_with_object({}) do |(klass, count), hash|
|
||||
name = klass.name
|
||||
|
||||
next unless name
|
||||
|
||||
hash[name] = count
|
||||
end
|
||||
|
||||
# Symbols aren't allocated so we'll need to add those manually.
|
||||
counts['Symbol'] = Symbol.all_symbols.length
|
||||
|
||||
counts.each do |name, count|
|
||||
add_metric('object_counts', { count: count }, type: name)
|
||||
end
|
||||
end
|
||||
else
|
||||
def sample_objects
|
||||
end
|
||||
end
|
||||
|
||||
def sample_gc
|
||||
time = GC::Profiler.total_time * 1000.0
|
||||
stats = GC.stat.merge(total_time: time)
|
||||
|
||||
# We want the difference of GC runs compared to the last sample, not the
|
||||
# total amount since the process started.
|
||||
stats[:minor_gc_count] =
|
||||
@last_minor_gc.compared_with(stats[:minor_gc_count])
|
||||
|
||||
stats[:major_gc_count] =
|
||||
@last_major_gc.compared_with(stats[:major_gc_count])
|
||||
|
||||
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
|
||||
|
||||
add_metric('gc_statistics', stats)
|
||||
end
|
||||
|
||||
def add_metric(series, values, tags = {})
|
||||
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
|
||||
|
||||
@metrics << Metric.new("#{prefix}#{series}", values, tags)
|
||||
end
|
||||
|
||||
def sidekiq?
|
||||
Sidekiq.server?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
89
lib/gitlab/metrics/samplers/ruby_sampler.rb
Normal file
89
lib/gitlab/metrics/samplers/ruby_sampler.rb
Normal file
|
@ -0,0 +1,89 @@
|
|||
module Gitlab
|
||||
module Metrics
|
||||
module Samplers
|
||||
class RubySampler < BaseSampler
|
||||
|
||||
COUNTS = [:count, :minor_gc_count, :major_gc_count]
|
||||
|
||||
def metrics
|
||||
@metrics ||= init_metrics
|
||||
end
|
||||
|
||||
def with_prefix(name)
|
||||
"ruby_gc_#{name}".to_sym
|
||||
end
|
||||
|
||||
def to_doc_string(name)
|
||||
name.to_s.humanize
|
||||
end
|
||||
|
||||
def labels
|
||||
worker_label.merge(source_label)
|
||||
end
|
||||
|
||||
def initialize(interval)
|
||||
super(interval)
|
||||
GC::Profiler.enable
|
||||
Rails.logger.info("123")
|
||||
|
||||
init_metrics
|
||||
end
|
||||
|
||||
def init_metrics
|
||||
metrics = {}
|
||||
metrics[:total_time] = Gitlab::Metrics.gauge(with_prefix(:total_time), to_doc_string(:total_time), labels, :livesum)
|
||||
GC.stat.keys.each do |key|
|
||||
metrics[key] = Gitlab::Metrics.gauge(with_prefix(key), to_doc_string(key), labels, :livesum)
|
||||
end
|
||||
metrics
|
||||
end
|
||||
|
||||
def sample
|
||||
metrics[:total_time].set(labels, GC::Profiler.total_time * 1000)
|
||||
|
||||
GC.stat.each do |key, value|
|
||||
metrics[key].set(labels, value)
|
||||
end
|
||||
end
|
||||
|
||||
def source_label
|
||||
if Sidekiq.server?
|
||||
{ source: 'sidekiq' }
|
||||
else
|
||||
{ source: 'rails' }
|
||||
end
|
||||
end
|
||||
|
||||
def worker_label
|
||||
return {} unless defined?(Unicorn::Worker)
|
||||
worker = if defined?(Unicorn::Worker)
|
||||
ObjectSpace.each_object(Unicorn::Worker)&.first
|
||||
end
|
||||
if worker
|
||||
{ unicorn: worker.nr }
|
||||
else
|
||||
{ unicorn: 'master' }
|
||||
end
|
||||
end
|
||||
|
||||
def sample_gc
|
||||
time = GC::Profiler.total_time * 1000.0
|
||||
stats = GC.stat.merge(total_time: time)
|
||||
|
||||
# We want the difference of GC runs compared to the last sample, not the
|
||||
# total amount since the process started.
|
||||
|
||||
stats[:minor_gc_count] =
|
||||
@last_minor_gc.compared_with(stats[:minor_gc_count])
|
||||
|
||||
stats[:major_gc_count] =
|
||||
@last_major_gc.compared_with(stats[:major_gc_count])
|
||||
|
||||
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
|
||||
|
||||
add_metric('gc_statistics', stats)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
50
lib/gitlab/metrics/samplers/unicorn_sampler.rb
Normal file
50
lib/gitlab/metrics/samplers/unicorn_sampler.rb
Normal file
|
@ -0,0 +1,50 @@
|
|||
module Gitlab
|
||||
module Metrics
|
||||
module Samplers
|
||||
class UnicornSampler < BaseSampler
|
||||
def initialize(interval)
|
||||
super(interval)
|
||||
end
|
||||
|
||||
def unicorn_active_connections
|
||||
@unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max)
|
||||
end
|
||||
|
||||
def unicorn_queued_connections
|
||||
@unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max)
|
||||
end
|
||||
|
||||
def enabled?
|
||||
# Raindrops::Linux.tcp_listener_stats is only present on Linux
|
||||
unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats)
|
||||
end
|
||||
|
||||
def sample
|
||||
Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
|
||||
unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active)
|
||||
unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued)
|
||||
end
|
||||
|
||||
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
|
||||
unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active)
|
||||
unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def tcp_listeners
|
||||
@tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
|
||||
end
|
||||
|
||||
def unix_listeners
|
||||
@unix_listeners ||= Unicorn.listener_names - tcp_listeners
|
||||
end
|
||||
|
||||
def unicorn_with_listeners?
|
||||
defined?(Unicorn) && Unicorn.listener_names.any?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,48 +0,0 @@
|
|||
module Gitlab
|
||||
module Metrics
|
||||
class UnicornSampler < BaseSampler
|
||||
def initialize(interval)
|
||||
super(interval)
|
||||
end
|
||||
|
||||
def unicorn_active_connections
|
||||
@unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max)
|
||||
end
|
||||
|
||||
def unicorn_queued_connections
|
||||
@unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max)
|
||||
end
|
||||
|
||||
def enabled?
|
||||
# Raindrops::Linux.tcp_listener_stats is only present on Linux
|
||||
unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats)
|
||||
end
|
||||
|
||||
def sample
|
||||
Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
|
||||
unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active)
|
||||
unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued)
|
||||
end
|
||||
|
||||
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
|
||||
unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active)
|
||||
unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def tcp_listeners
|
||||
@tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
|
||||
end
|
||||
|
||||
def unix_listeners
|
||||
@unix_listeners ||= Unicorn.listener_names - tcp_listeners
|
||||
end
|
||||
|
||||
def unicorn_with_listeners?
|
||||
defined?(Unicorn) && Unicorn.listener_names.any?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
0
spec/lib/gitlab/metrics/samplers/gc_sampler_spec.rb
Normal file
0
spec/lib/gitlab/metrics/samplers/gc_sampler_spec.rb
Normal file
|
@ -1,6 +1,6 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Metrics::InfluxSampler do
|
||||
describe Gitlab::Metrics::Samplers::InfluxSampler do
|
||||
let(:sampler) { described_class.new(5) }
|
||||
|
||||
after do
|
|
@ -1,6 +1,6 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Metrics::UnicornSampler do
|
||||
describe Gitlab::Metrics::Samplers::UnicornSampler do
|
||||
subject { described_class.new(1.second) }
|
||||
|
||||
describe '#sample' do
|
Loading…
Reference in a new issue