Merge branch '33410-unicorn_metrics' into 'master'
Instrument Unicorn with Ruby exporter Closes #33410 See merge request !12115
This commit is contained in:
commit
d453bb8635
14 changed files with 425 additions and 47 deletions
1
Gemfile
1
Gemfile
|
@ -285,6 +285,7 @@ group :metrics do
|
|||
|
||||
# Prometheus
|
||||
gem 'prometheus-client-mmap', '~>0.7.0.beta5'
|
||||
gem 'raindrops', '~> 0.18'
|
||||
end
|
||||
|
||||
group :development do
|
||||
|
|
|
@ -599,7 +599,7 @@ GEM
|
|||
premailer-rails (1.9.7)
|
||||
actionmailer (>= 3, < 6)
|
||||
premailer (~> 1.7, >= 1.7.9)
|
||||
prometheus-client-mmap (0.7.0.beta5)
|
||||
prometheus-client-mmap (0.7.0.beta7)
|
||||
mmap2 (~> 2.2.6)
|
||||
pry (0.10.4)
|
||||
coderay (~> 1.1.0)
|
||||
|
@ -658,7 +658,7 @@ GEM
|
|||
thor (>= 0.18.1, < 2.0)
|
||||
rainbow (2.2.2)
|
||||
rake
|
||||
raindrops (0.17.0)
|
||||
raindrops (0.18.0)
|
||||
rake (10.5.0)
|
||||
rblineprof (0.3.6)
|
||||
debugger-ruby_core_source (~> 1.3)
|
||||
|
@ -1062,6 +1062,7 @@ DEPENDENCIES
|
|||
rails-deprecated_sanitizer (~> 1.0.3)
|
||||
rails-i18n (~> 4.0.9)
|
||||
rainbow (~> 2.2)
|
||||
raindrops (~> 0.18)
|
||||
rblineprof (~> 0.3.6)
|
||||
rdoc (~> 4.2)
|
||||
recaptcha (~> 3.0)
|
||||
|
|
|
@ -543,6 +543,10 @@ production: &base
|
|||
# enabled: true
|
||||
# host: localhost
|
||||
# port: 3808
|
||||
prometheus:
|
||||
# Time between sampling of unicorn socket metrics, in seconds
|
||||
# unicorn_sampler_interval: 10
|
||||
|
||||
|
||||
#
|
||||
# 5. Extra customization
|
||||
|
|
|
@ -494,6 +494,12 @@ Settings.webpack.dev_server['enabled'] ||= false
|
|||
Settings.webpack.dev_server['host'] ||= 'localhost'
|
||||
Settings.webpack.dev_server['port'] ||= 3808
|
||||
|
||||
#
|
||||
# Prometheus metrics settings
|
||||
#
|
||||
Settings['prometheus'] ||= Settingslogic.new({})
|
||||
Settings.prometheus['unicorn_sampler_interval'] ||= 10
|
||||
|
||||
#
|
||||
# Testing settings
|
||||
#
|
||||
|
|
|
@ -119,6 +119,13 @@ def instrument_classes(instrumentation)
|
|||
end
|
||||
# rubocop:enable Metrics/AbcSize
|
||||
|
||||
Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.prometheus.unicorn_sampler_interval).start
|
||||
|
||||
Gitlab::Application.configure do |config|
|
||||
# 0 should be Sentry to catch errors in this middleware
|
||||
config.middleware.insert(1, Gitlab::Metrics::ConnectionRackMiddleware)
|
||||
end
|
||||
|
||||
if Gitlab::Metrics.enabled?
|
||||
require 'pathname'
|
||||
require 'influxdb'
|
||||
|
@ -175,7 +182,7 @@ if Gitlab::Metrics.enabled?
|
|||
|
||||
GC::Profiler.enable
|
||||
|
||||
Gitlab::Metrics::Sampler.new.start
|
||||
Gitlab::Metrics::InfluxSampler.initialize_instance.start
|
||||
|
||||
module TrackNewRedisConnections
|
||||
def connect(*args)
|
||||
|
|
94
lib/gitlab/metrics/base_sampler.rb
Normal file
94
lib/gitlab/metrics/base_sampler.rb
Normal file
|
@ -0,0 +1,94 @@
|
|||
require 'logger'
|
||||
module Gitlab
|
||||
module Metrics
|
||||
class BaseSampler
|
||||
def self.initialize_instance(*args)
|
||||
raise "#{name} singleton instance already initialized" if @instance
|
||||
@instance = new(*args)
|
||||
at_exit(&@instance.method(:stop))
|
||||
@instance
|
||||
end
|
||||
|
||||
def self.instance
|
||||
@instance
|
||||
end
|
||||
|
||||
attr_reader :running
|
||||
|
||||
# interval - The sampling interval in seconds.
|
||||
def initialize(interval)
|
||||
interval_half = interval.to_f / 2
|
||||
|
||||
@interval = interval
|
||||
@interval_steps = (-interval_half..interval_half).step(0.1).to_a
|
||||
|
||||
@mutex = Mutex.new
|
||||
end
|
||||
|
||||
def enabled?
|
||||
true
|
||||
end
|
||||
|
||||
def start
|
||||
return unless enabled?
|
||||
|
||||
@mutex.synchronize do
|
||||
return if running
|
||||
@running = true
|
||||
|
||||
@thread = Thread.new do
|
||||
sleep(sleep_interval)
|
||||
|
||||
while running
|
||||
safe_sample
|
||||
|
||||
sleep(sleep_interval)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def stop
|
||||
@mutex.synchronize do
|
||||
return unless running
|
||||
|
||||
@running = false
|
||||
|
||||
if @thread
|
||||
@thread.wakeup if @thread.alive?
|
||||
@thread.join
|
||||
@thread = nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def safe_sample
|
||||
sample
|
||||
rescue => e
|
||||
Rails.logger.warn("#{self.class}: #{e}, stopping")
|
||||
stop
|
||||
end
|
||||
|
||||
def sample
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Returns the sleep interval with a random adjustment.
|
||||
#
|
||||
# The random adjustment is put in place to ensure we:
|
||||
#
|
||||
# 1. Don't generate samples at the exact same interval every time (thus
|
||||
# potentially missing anything that happens in between samples).
|
||||
# 2. Don't sample data at the same interval two times in a row.
|
||||
def sleep_interval
|
||||
while step = @interval_steps.sample
|
||||
if step != @last_step
|
||||
@last_step = step
|
||||
|
||||
return @interval + @last_step
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
45
lib/gitlab/metrics/connection_rack_middleware.rb
Normal file
45
lib/gitlab/metrics/connection_rack_middleware.rb
Normal file
|
@ -0,0 +1,45 @@
|
|||
module Gitlab
|
||||
module Metrics
|
||||
class ConnectionRackMiddleware
|
||||
def initialize(app)
|
||||
@app = app
|
||||
end
|
||||
|
||||
def self.rack_request_count
|
||||
@rack_request_count ||= Gitlab::Metrics.counter(:rack_request, 'Rack request count')
|
||||
end
|
||||
|
||||
def self.rack_response_count
|
||||
@rack_response_count ||= Gitlab::Metrics.counter(:rack_response, 'Rack response count')
|
||||
end
|
||||
|
||||
def self.rack_uncaught_errors_count
|
||||
@rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors, 'Rack connections handling uncaught errors count')
|
||||
end
|
||||
|
||||
def self.rack_execution_time
|
||||
@rack_execution_time ||= Gitlab::Metrics.histogram(:rack_execution_time, 'Rack connection handling execution time',
|
||||
{}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 1.5, 2, 2.5, 3, 5, 7, 10])
|
||||
end
|
||||
|
||||
def call(env)
|
||||
method = env['REQUEST_METHOD'].downcase
|
||||
started = Time.now.to_f
|
||||
begin
|
||||
ConnectionRackMiddleware.rack_request_count.increment(method: method)
|
||||
|
||||
status, headers, body = @app.call(env)
|
||||
|
||||
ConnectionRackMiddleware.rack_response_count.increment(method: method, status: status)
|
||||
[status, headers, body]
|
||||
rescue
|
||||
ConnectionRackMiddleware.rack_uncaught_errors_count.increment
|
||||
raise
|
||||
ensure
|
||||
elapsed = Time.now.to_f - started
|
||||
ConnectionRackMiddleware.rack_execution_time.observe({}, elapsed)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -5,13 +5,10 @@ module Gitlab
|
|||
# This class is used to gather statistics that can't be directly associated
|
||||
# with a transaction such as system memory usage, garbage collection
|
||||
# statistics, etc.
|
||||
class Sampler
|
||||
class InfluxSampler < BaseSampler
|
||||
# interval - The sampling interval in seconds.
|
||||
def initialize(interval = Metrics.settings[:sample_interval])
|
||||
interval_half = interval.to_f / 2
|
||||
|
||||
@interval = interval
|
||||
@interval_steps = (-interval_half..interval_half).step(0.1).to_a
|
||||
super(interval)
|
||||
@last_step = nil
|
||||
|
||||
@metrics = []
|
||||
|
@ -26,18 +23,6 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
|
||||
def start
|
||||
Thread.new do
|
||||
Thread.current.abort_on_exception = true
|
||||
|
||||
loop do
|
||||
sleep(sleep_interval)
|
||||
|
||||
sample
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def sample
|
||||
sample_memory_usage
|
||||
sample_file_descriptors
|
||||
|
@ -111,23 +96,6 @@ module Gitlab
|
|||
def sidekiq?
|
||||
Sidekiq.server?
|
||||
end
|
||||
|
||||
# Returns the sleep interval with a random adjustment.
|
||||
#
|
||||
# The random adjustment is put in place to ensure we:
|
||||
#
|
||||
# 1. Don't generate samples at the exact same interval every time (thus
|
||||
# potentially missing anything that happens in between samples).
|
||||
# 2. Don't sample data at the same interval two times in a row.
|
||||
def sleep_interval
|
||||
while step = @interval_steps.sample
|
||||
if step != @last_step
|
||||
@last_step = step
|
||||
|
||||
return @interval + @last_step
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -29,8 +29,8 @@ module Gitlab
|
|||
provide_metric(name) || registry.summary(name, docstring, base_labels)
|
||||
end
|
||||
|
||||
def gauge(name, docstring, base_labels = {})
|
||||
provide_metric(name) || registry.gauge(name, docstring, base_labels)
|
||||
def gauge(name, docstring, base_labels = {}, multiprocess_mode = :all)
|
||||
provide_metric(name) || registry.gauge(name, docstring, base_labels, multiprocess_mode)
|
||||
end
|
||||
|
||||
def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS)
|
||||
|
|
48
lib/gitlab/metrics/unicorn_sampler.rb
Normal file
48
lib/gitlab/metrics/unicorn_sampler.rb
Normal file
|
@ -0,0 +1,48 @@
|
|||
module Gitlab
|
||||
module Metrics
|
||||
class UnicornSampler < BaseSampler
|
||||
def initialize(interval)
|
||||
super(interval)
|
||||
end
|
||||
|
||||
def unicorn_active_connections
|
||||
@unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max)
|
||||
end
|
||||
|
||||
def unicorn_queued_connections
|
||||
@unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max)
|
||||
end
|
||||
|
||||
def enabled?
|
||||
# Raindrops::Linux.tcp_listener_stats is only present on Linux
|
||||
unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats)
|
||||
end
|
||||
|
||||
def sample
|
||||
Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
|
||||
unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active)
|
||||
unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued)
|
||||
end
|
||||
|
||||
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
|
||||
unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active)
|
||||
unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def tcp_listeners
|
||||
@tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
|
||||
end
|
||||
|
||||
def unix_listeners
|
||||
@unix_listeners ||= Unicorn.listener_names - tcp_listeners
|
||||
end
|
||||
|
||||
def unicorn_with_listeners?
|
||||
defined?(Unicorn) && Unicorn.listener_names.any?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,17 +1,25 @@
|
|||
require 'spec_helper'
|
||||
require_relative '../../config/initializers/8_metrics'
|
||||
|
||||
describe 'instrument_classes', lib: true do
|
||||
let(:config) { double(:config) }
|
||||
|
||||
let(:unicorn_sampler) { double(:unicorn_sampler) }
|
||||
let(:influx_sampler) { double(:influx_sampler) }
|
||||
|
||||
before do
|
||||
allow(config).to receive(:instrument_method)
|
||||
allow(config).to receive(:instrument_methods)
|
||||
allow(config).to receive(:instrument_instance_method)
|
||||
allow(config).to receive(:instrument_instance_methods)
|
||||
allow(Gitlab::Metrics::UnicornSampler).to receive(:initialize_instance).and_return(unicorn_sampler)
|
||||
allow(Gitlab::Metrics::InfluxSampler).to receive(:initialize_instance).and_return(influx_sampler)
|
||||
allow(unicorn_sampler).to receive(:start)
|
||||
allow(influx_sampler).to receive(:start)
|
||||
allow(Gitlab::Application).to receive(:configure)
|
||||
end
|
||||
|
||||
it 'can autoload and instrument all files' do
|
||||
require_relative '../../config/initializers/8_metrics'
|
||||
expect { instrument_classes(config) }.not_to raise_error
|
||||
end
|
||||
end
|
||||
|
|
88
spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb
Normal file
88
spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb
Normal file
|
@ -0,0 +1,88 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Metrics::ConnectionRackMiddleware do
|
||||
let(:app) { double('app') }
|
||||
subject { described_class.new(app) }
|
||||
|
||||
around do |example|
|
||||
Timecop.freeze { example.run }
|
||||
end
|
||||
|
||||
describe '#call' do
|
||||
let(:status) { 100 }
|
||||
let(:env) { { 'REQUEST_METHOD' => 'GET' } }
|
||||
let(:stack_result) { [status, {}, 'body'] }
|
||||
|
||||
before do
|
||||
allow(app).to receive(:call).and_return(stack_result)
|
||||
end
|
||||
|
||||
context '@app.call succeeds with 200' do
|
||||
before do
|
||||
allow(app).to receive(:call).and_return([200, nil, nil])
|
||||
end
|
||||
|
||||
it 'increments response count with status label' do
|
||||
expect(described_class).to receive_message_chain(:rack_response_count, :increment).with(include(status: 200, method: 'get'))
|
||||
|
||||
subject.call(env)
|
||||
end
|
||||
|
||||
it 'increments requests count' do
|
||||
expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get')
|
||||
|
||||
subject.call(env)
|
||||
end
|
||||
|
||||
it 'measures execution time' do
|
||||
execution_time = 10
|
||||
allow(app).to receive(:call) do |*args|
|
||||
Timecop.freeze(execution_time.seconds)
|
||||
end
|
||||
|
||||
expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time)
|
||||
|
||||
subject.call(env)
|
||||
end
|
||||
end
|
||||
|
||||
context '@app.call throws exception' do
|
||||
let(:rack_response_count) { double('rack_response_count') }
|
||||
|
||||
before do
|
||||
allow(app).to receive(:call).and_raise(StandardError)
|
||||
allow(described_class).to receive(:rack_response_count).and_return(rack_response_count)
|
||||
end
|
||||
|
||||
it 'increments exceptions count' do
|
||||
expect(described_class).to receive_message_chain(:rack_uncaught_errors_count, :increment)
|
||||
|
||||
expect { subject.call(env) }.to raise_error(StandardError)
|
||||
end
|
||||
|
||||
it 'increments requests count' do
|
||||
expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get')
|
||||
|
||||
expect { subject.call(env) }.to raise_error(StandardError)
|
||||
end
|
||||
|
||||
it "does't increment response count" do
|
||||
expect(described_class.rack_response_count).not_to receive(:increment)
|
||||
|
||||
expect { subject.call(env) }.to raise_error(StandardError)
|
||||
end
|
||||
|
||||
it 'measures execution time' do
|
||||
execution_time = 10
|
||||
allow(app).to receive(:call) do |*args|
|
||||
Timecop.freeze(execution_time.seconds)
|
||||
raise StandardError
|
||||
end
|
||||
|
||||
expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time)
|
||||
|
||||
expect { subject.call(env) }.to raise_error(StandardError)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Metrics::Sampler do
|
||||
describe Gitlab::Metrics::InfluxSampler do
|
||||
let(:sampler) { described_class.new(5) }
|
||||
|
||||
after do
|
||||
|
@ -8,10 +8,10 @@ describe Gitlab::Metrics::Sampler do
|
|||
end
|
||||
|
||||
describe '#start' do
|
||||
it 'gathers a sample at a given interval' do
|
||||
expect(sampler).to receive(:sleep).with(a_kind_of(Numeric))
|
||||
expect(sampler).to receive(:sample)
|
||||
expect(sampler).to receive(:loop).and_yield
|
||||
it 'runs once and gathers a sample at a given interval' do
|
||||
expect(sampler).to receive(:sleep).with(a_kind_of(Numeric)).twice
|
||||
expect(sampler).to receive(:sample).once
|
||||
expect(sampler).to receive(:running).and_return(false, true, false)
|
||||
|
||||
sampler.start.join
|
||||
end
|
108
spec/lib/gitlab/metrics/unicorn_sampler_spec.rb
Normal file
108
spec/lib/gitlab/metrics/unicorn_sampler_spec.rb
Normal file
|
@ -0,0 +1,108 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Metrics::UnicornSampler do
|
||||
subject { described_class.new(1.second) }
|
||||
|
||||
describe '#sample' do
|
||||
let(:unicorn) { double('unicorn') }
|
||||
let(:raindrops) { double('raindrops') }
|
||||
let(:stats) { double('stats') }
|
||||
|
||||
before do
|
||||
stub_const('Unicorn', unicorn)
|
||||
stub_const('Raindrops::Linux', raindrops)
|
||||
allow(raindrops).to receive(:unix_listener_stats).and_return({})
|
||||
allow(raindrops).to receive(:tcp_listener_stats).and_return({})
|
||||
end
|
||||
|
||||
context 'unicorn listens on unix sockets' do
|
||||
let(:socket_address) { '/some/sock' }
|
||||
let(:sockets) { [socket_address] }
|
||||
|
||||
before do
|
||||
allow(unicorn).to receive(:listener_names).and_return(sockets)
|
||||
end
|
||||
|
||||
it 'samples socket data' do
|
||||
expect(raindrops).to receive(:unix_listener_stats).with(sockets)
|
||||
|
||||
subject.sample
|
||||
end
|
||||
|
||||
context 'stats collected' do
|
||||
before do
|
||||
allow(stats).to receive(:active).and_return('active')
|
||||
allow(stats).to receive(:queued).and_return('queued')
|
||||
allow(raindrops).to receive(:unix_listener_stats).and_return({ socket_address => stats })
|
||||
end
|
||||
|
||||
it 'updates metrics type unix and with addr' do
|
||||
labels = { type: 'unix', address: socket_address }
|
||||
|
||||
expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active')
|
||||
expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued')
|
||||
|
||||
subject.sample
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'unicorn listens on tcp sockets' do
|
||||
let(:tcp_socket_address) { '0.0.0.0:8080' }
|
||||
let(:tcp_sockets) { [tcp_socket_address] }
|
||||
|
||||
before do
|
||||
allow(unicorn).to receive(:listener_names).and_return(tcp_sockets)
|
||||
end
|
||||
|
||||
it 'samples socket data' do
|
||||
expect(raindrops).to receive(:tcp_listener_stats).with(tcp_sockets)
|
||||
|
||||
subject.sample
|
||||
end
|
||||
|
||||
context 'stats collected' do
|
||||
before do
|
||||
allow(stats).to receive(:active).and_return('active')
|
||||
allow(stats).to receive(:queued).and_return('queued')
|
||||
allow(raindrops).to receive(:tcp_listener_stats).and_return({ tcp_socket_address => stats })
|
||||
end
|
||||
|
||||
it 'updates metrics type unix and with addr' do
|
||||
labels = { type: 'tcp', address: tcp_socket_address }
|
||||
|
||||
expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active')
|
||||
expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued')
|
||||
|
||||
subject.sample
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#start' do
|
||||
context 'when enabled' do
|
||||
before do
|
||||
allow(subject).to receive(:enabled?).and_return(true)
|
||||
end
|
||||
|
||||
it 'creates new thread' do
|
||||
expect(Thread).to receive(:new)
|
||||
|
||||
subject.start
|
||||
end
|
||||
end
|
||||
|
||||
context 'when disabled' do
|
||||
before do
|
||||
allow(subject).to receive(:enabled?).and_return(false)
|
||||
end
|
||||
|
||||
it "doesn't create new thread" do
|
||||
expect(Thread).not_to receive(:new)
|
||||
|
||||
subject.start
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue