Add GC sampler and small refactor of samplers

This commit is contained in:
Pawel Chojnacki 2017-08-10 16:18:37 +02:00
parent 52115b905a
commit 03b38a4a8d
12 changed files with 347 additions and 217 deletions

View file

@ -19,3 +19,7 @@ Sidekiq.configure_server do |config|
Gitlab::Metrics::SidekiqMetricsExporter.instance.start
end
end
# if Gitlab::Metrics.prometheus_metrics_enabled?
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(1.second).start
# end

View file

@ -123,8 +123,8 @@ def instrument_classes(instrumentation)
end
# rubocop:enable Metrics/AbcSize
unless Sidekiq.server?
Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
unless Sidekiq.server?
Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
end
Gitlab::Application.configure do |config|
@ -192,7 +192,7 @@ if Gitlab::Metrics.enabled?
GC::Profiler.enable
Gitlab::Metrics::InfluxSampler.initialize_instance.start
Gitlab::Metrics::Samplers::InfluxSampler.initialize_instance.start
module TrackNewRedisConnections
def connect(*args)

View file

@ -1,63 +0,0 @@
require 'logger'
module Gitlab
module Metrics
class BaseSampler < Daemon
# interval - The sampling interval in seconds.
def initialize(interval)
interval_half = interval.to_f / 2
@interval = interval
@interval_steps = (-interval_half..interval_half).step(0.1).to_a
super()
end
def safe_sample
sample
rescue => e
Rails.logger.warn("#{self.class}: #{e}, stopping")
stop
end
def sample
raise NotImplementedError
end
# Returns the sleep interval with a random adjustment.
#
# The random adjustment is put in place to ensure we:
#
# 1. Don't generate samples at the exact same interval every time (thus
# potentially missing anything that happens in between samples).
# 2. Don't sample data at the same interval two times in a row.
def sleep_interval
while (step = @interval_steps.sample)
if step != @last_step
@last_step = step
return @interval + @last_step
end
end
end
private
attr_reader :running
def start_working
@running = true
sleep(sleep_interval)
while running
safe_sample
sleep(sleep_interval)
end
end
def stop_working
@running = false
end
end
end
end

View file

@ -1,101 +0,0 @@
module Gitlab
module Metrics
# Class that sends certain metrics to InfluxDB at a specific interval.
#
# This class is used to gather statistics that can't be directly associated
# with a transaction such as system memory usage, garbage collection
# statistics, etc.
class InfluxSampler < BaseSampler
# interval - The sampling interval in seconds.
def initialize(interval = Metrics.settings[:sample_interval])
super(interval)
@last_step = nil
@metrics = []
@last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
@last_major_gc = Delta.new(GC.stat[:major_gc_count])
if Gitlab::Metrics.mri?
require 'allocations'
Allocations.start
end
end
def sample
sample_memory_usage
sample_file_descriptors
sample_objects
sample_gc
flush
ensure
GC::Profiler.clear
@metrics.clear
end
def flush
Metrics.submit_metrics(@metrics.map(&:to_hash))
end
def sample_memory_usage
add_metric('memory_usage', value: System.memory_usage)
end
def sample_file_descriptors
add_metric('file_descriptors', value: System.file_descriptor_count)
end
if Metrics.mri?
def sample_objects
sample = Allocations.to_hash
counts = sample.each_with_object({}) do |(klass, count), hash|
name = klass.name
next unless name
hash[name] = count
end
# Symbols aren't allocated so we'll need to add those manually.
counts['Symbol'] = Symbol.all_symbols.length
counts.each do |name, count|
add_metric('object_counts', { count: count }, type: name)
end
end
else
def sample_objects
end
end
def sample_gc
time = GC::Profiler.total_time * 1000.0
stats = GC.stat.merge(total_time: time)
# We want the difference of GC runs compared to the last sample, not the
# total amount since the process started.
stats[:minor_gc_count] =
@last_minor_gc.compared_with(stats[:minor_gc_count])
stats[:major_gc_count] =
@last_major_gc.compared_with(stats[:major_gc_count])
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
add_metric('gc_statistics', stats)
end
def add_metric(series, values, tags = {})
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
@metrics << Metric.new("#{prefix}#{series}", values, tags)
end
def sidekiq?
Sidekiq.server?
end
end
end
end

View file

@ -0,0 +1,96 @@
require 'logger'
module Gitlab
module Metrics
module Samplers
class BaseSampler
def self.initialize_instance(*args)
raise "#{name} singleton instance already initialized" if @instance
@instance = new(*args)
at_exit(&@instance.method(:stop))
@instance
end
def self.instance
@instance
end
attr_reader :running
# interval - The sampling interval in seconds.
def initialize(interval)
interval_half = interval.to_f / 2
@interval = interval
@interval_steps = (-interval_half..interval_half).step(0.1).to_a
@mutex = Mutex.new
end
def enabled?
true
end
def start
return unless enabled?
@mutex.synchronize do
return if running
@running = true
@thread = Thread.new do
sleep(sleep_interval)
while running
safe_sample
sleep(sleep_interval)
end
end
end
end
def stop
@mutex.synchronize do
return unless running
@running = false
if @thread
@thread.wakeup if @thread.alive?
@thread.join
@thread = nil
end
end
end
def safe_sample
sample
rescue => e
Rails.logger.warn("#{self.class}: #{e}, stopping")
stop
end
def sample
raise NotImplementedError
end
# Returns the sleep interval with a random adjustment.
#
# The random adjustment is put in place to ensure we:
#
# 1. Don't generate samples at the exact same interval every time (thus
# potentially missing anything that happens in between samples).
# 2. Don't sample data at the same interval two times in a row.
def sleep_interval
while step = @interval_steps.sample
if step != @last_step
@last_step = step
return @interval + @last_step
end
end
end
end
end
end
end

View file

@ -0,0 +1,103 @@
module Gitlab
module Metrics
module Samplers
# Class that sends certain metrics to InfluxDB at a specific interval.
#
# This class is used to gather statistics that can't be directly associated
# with a transaction such as system memory usage, garbage collection
# statistics, etc.
class InfluxSampler < BaseSampler
# interval - The sampling interval in seconds.
def initialize(interval = Metrics.settings[:sample_interval])
super(interval)
@last_step = nil
@metrics = []
@last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
@last_major_gc = Delta.new(GC.stat[:major_gc_count])
if Gitlab::Metrics.mri?
require 'allocations'
Allocations.start
end
end
def sample
sample_memory_usage
sample_file_descriptors
sample_objects
sample_gc
flush
ensure
GC::Profiler.clear
@metrics.clear
end
def flush
Metrics.submit_metrics(@metrics.map(&:to_hash))
end
def sample_memory_usage
add_metric('memory_usage', value: System.memory_usage)
end
def sample_file_descriptors
add_metric('file_descriptors', value: System.file_descriptor_count)
end
if Metrics.mri?
def sample_objects
sample = Allocations.to_hash
counts = sample.each_with_object({}) do |(klass, count), hash|
name = klass.name
next unless name
hash[name] = count
end
# Symbols aren't allocated so we'll need to add those manually.
counts['Symbol'] = Symbol.all_symbols.length
counts.each do |name, count|
add_metric('object_counts', { count: count }, type: name)
end
end
else
def sample_objects
end
end
def sample_gc
time = GC::Profiler.total_time * 1000.0
stats = GC.stat.merge(total_time: time)
# We want the difference of GC runs compared to the last sample, not the
# total amount since the process started.
stats[:minor_gc_count] =
@last_minor_gc.compared_with(stats[:minor_gc_count])
stats[:major_gc_count] =
@last_major_gc.compared_with(stats[:major_gc_count])
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
add_metric('gc_statistics', stats)
end
def add_metric(series, values, tags = {})
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
@metrics << Metric.new("#{prefix}#{series}", values, tags)
end
def sidekiq?
Sidekiq.server?
end
end
end
end
end

View file

@ -0,0 +1,89 @@
module Gitlab
module Metrics
module Samplers
class RubySampler < BaseSampler
COUNTS = [:count, :minor_gc_count, :major_gc_count]
def metrics
@metrics ||= init_metrics
end
def with_prefix(name)
"ruby_gc_#{name}".to_sym
end
def to_doc_string(name)
name.to_s.humanize
end
def labels
worker_label.merge(source_label)
end
def initialize(interval)
super(interval)
GC::Profiler.enable
Rails.logger.info("123")
init_metrics
end
def init_metrics
metrics = {}
metrics[:total_time] = Gitlab::Metrics.gauge(with_prefix(:total_time), to_doc_string(:total_time), labels, :livesum)
GC.stat.keys.each do |key|
metrics[key] = Gitlab::Metrics.gauge(with_prefix(key), to_doc_string(key), labels, :livesum)
end
metrics
end
def sample
metrics[:total_time].set(labels, GC::Profiler.total_time * 1000)
GC.stat.each do |key, value|
metrics[key].set(labels, value)
end
end
def source_label
if Sidekiq.server?
{ source: 'sidekiq' }
else
{ source: 'rails' }
end
end
def worker_label
return {} unless defined?(Unicorn::Worker)
worker = if defined?(Unicorn::Worker)
ObjectSpace.each_object(Unicorn::Worker)&.first
end
if worker
{ unicorn: worker.nr }
else
{ unicorn: 'master' }
end
end
def sample_gc
time = GC::Profiler.total_time * 1000.0
stats = GC.stat.merge(total_time: time)
# We want the difference of GC runs compared to the last sample, not the
# total amount since the process started.
stats[:minor_gc_count] =
@last_minor_gc.compared_with(stats[:minor_gc_count])
stats[:major_gc_count] =
@last_major_gc.compared_with(stats[:major_gc_count])
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
add_metric('gc_statistics', stats)
end
end
end
end
end

View file

@ -0,0 +1,50 @@
module Gitlab
module Metrics
module Samplers
class UnicornSampler < BaseSampler
def initialize(interval)
super(interval)
end
def unicorn_active_connections
@unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max)
end
def unicorn_queued_connections
@unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max)
end
def enabled?
# Raindrops::Linux.tcp_listener_stats is only present on Linux
unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats)
end
def sample
Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active)
unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued)
end
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active)
unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued)
end
end
private
def tcp_listeners
@tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
end
def unix_listeners
@unix_listeners ||= Unicorn.listener_names - tcp_listeners
end
def unicorn_with_listeners?
defined?(Unicorn) && Unicorn.listener_names.any?
end
end
end
end
end

View file

@ -1,48 +0,0 @@
module Gitlab
module Metrics
class UnicornSampler < BaseSampler
def initialize(interval)
super(interval)
end
def unicorn_active_connections
@unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max)
end
def unicorn_queued_connections
@unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max)
end
def enabled?
# Raindrops::Linux.tcp_listener_stats is only present on Linux
unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats)
end
def sample
Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active)
unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued)
end
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active)
unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued)
end
end
private
def tcp_listeners
@tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
end
def unix_listeners
@unix_listeners ||= Unicorn.listener_names - tcp_listeners
end
def unicorn_with_listeners?
defined?(Unicorn) && Unicorn.listener_names.any?
end
end
end
end

View file

@ -1,6 +1,6 @@
require 'spec_helper'
describe Gitlab::Metrics::InfluxSampler do
describe Gitlab::Metrics::Samplers::InfluxSampler do
let(:sampler) { described_class.new(5) }
after do

View file

@ -1,6 +1,6 @@
require 'spec_helper'
describe Gitlab::Metrics::UnicornSampler do
describe Gitlab::Metrics::Samplers::UnicornSampler do
subject { described_class.new(1.second) }
describe '#sample' do