From 26ac691a688cb569a7345d8f31a406d467240bb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Chojnacki?= Date: Tue, 4 Jul 2017 15:28:34 +0000 Subject: [PATCH] Instrument Unicorn with Ruby exporter --- Gemfile | 1 + Gemfile.lock | 5 +- config/gitlab.yml.example | 4 + config/initializers/1_settings.rb | 6 + config/initializers/8_metrics.rb | 9 +- lib/gitlab/metrics/base_sampler.rb | 94 +++++++++++++++ .../metrics/connection_rack_middleware.rb | 45 ++++++++ .../metrics/{sampler.rb => influx_sampler.rb} | 40 +------ lib/gitlab/metrics/prometheus.rb | 4 +- lib/gitlab/metrics/unicorn_sampler.rb | 48 ++++++++ spec/initializers/8_metrics_spec.rb | 10 +- .../connection_rack_middleware_spec.rb | 88 ++++++++++++++ ...sampler_spec.rb => influx_sampler_spec.rb} | 10 +- .../gitlab/metrics/unicorn_sampler_spec.rb | 108 ++++++++++++++++++ 14 files changed, 425 insertions(+), 47 deletions(-) create mode 100644 lib/gitlab/metrics/base_sampler.rb create mode 100644 lib/gitlab/metrics/connection_rack_middleware.rb rename lib/gitlab/metrics/{sampler.rb => influx_sampler.rb} (71%) create mode 100644 lib/gitlab/metrics/unicorn_sampler.rb create mode 100644 spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb rename spec/lib/gitlab/metrics/{sampler_spec.rb => influx_sampler_spec.rb} (94%) create mode 100644 spec/lib/gitlab/metrics/unicorn_sampler_spec.rb diff --git a/Gemfile b/Gemfile index 71d2eb1557c..323827ed01b 100644 --- a/Gemfile +++ b/Gemfile @@ -285,6 +285,7 @@ group :metrics do # Prometheus gem 'prometheus-client-mmap', '~>0.7.0.beta5' + gem 'raindrops', '~> 0.18' end group :development do diff --git a/Gemfile.lock b/Gemfile.lock index f4ddd30da1b..e6ea006356c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -599,7 +599,7 @@ GEM premailer-rails (1.9.7) actionmailer (>= 3, < 6) premailer (~> 1.7, >= 1.7.9) - prometheus-client-mmap (0.7.0.beta5) + prometheus-client-mmap (0.7.0.beta7) mmap2 (~> 2.2.6) pry (0.10.4) coderay (~> 1.1.0) @@ -658,7 +658,7 @@ GEM thor (>= 0.18.1, < 2.0) rainbow (2.2.2) rake - raindrops (0.17.0) + raindrops (0.18.0) rake (10.5.0) rblineprof (0.3.6) debugger-ruby_core_source (~> 1.3) @@ -1062,6 +1062,7 @@ DEPENDENCIES rails-deprecated_sanitizer (~> 1.0.3) rails-i18n (~> 4.0.9) rainbow (~> 2.2) + raindrops (~> 0.18) rblineprof (~> 0.3.6) rdoc (~> 4.2) recaptcha (~> 3.0) diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index 43a8c0078ca..4b81fd90f59 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -543,6 +543,10 @@ production: &base # enabled: true # host: localhost # port: 3808 + prometheus: + # Time between sampling of unicorn socket metrics, in seconds + # unicorn_sampler_interval: 10 + # # 5. Extra customization diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 8ddf8e4d2e4..cb11d2c34f4 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -494,6 +494,12 @@ Settings.webpack.dev_server['enabled'] ||= false Settings.webpack.dev_server['host'] ||= 'localhost' Settings.webpack.dev_server['port'] ||= 3808 +# +# Prometheus metrics settings +# +Settings['prometheus'] ||= Settingslogic.new({}) +Settings.prometheus['unicorn_sampler_interval'] ||= 10 + # # Testing settings # diff --git a/config/initializers/8_metrics.rb b/config/initializers/8_metrics.rb index a0a63ddf8f0..d56fd7a6cfa 100644 --- a/config/initializers/8_metrics.rb +++ b/config/initializers/8_metrics.rb @@ -119,6 +119,13 @@ def instrument_classes(instrumentation) end # rubocop:enable Metrics/AbcSize +Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.prometheus.unicorn_sampler_interval).start + +Gitlab::Application.configure do |config| + # 0 should be Sentry to catch errors in this middleware + config.middleware.insert(1, Gitlab::Metrics::ConnectionRackMiddleware) +end + if Gitlab::Metrics.enabled? require 'pathname' require 'influxdb' @@ -175,7 +182,7 @@ if Gitlab::Metrics.enabled? GC::Profiler.enable - Gitlab::Metrics::Sampler.new.start + Gitlab::Metrics::InfluxSampler.initialize_instance.start module TrackNewRedisConnections def connect(*args) diff --git a/lib/gitlab/metrics/base_sampler.rb b/lib/gitlab/metrics/base_sampler.rb new file mode 100644 index 00000000000..219accfc029 --- /dev/null +++ b/lib/gitlab/metrics/base_sampler.rb @@ -0,0 +1,94 @@ +require 'logger' +module Gitlab + module Metrics + class BaseSampler + def self.initialize_instance(*args) + raise "#{name} singleton instance already initialized" if @instance + @instance = new(*args) + at_exit(&@instance.method(:stop)) + @instance + end + + def self.instance + @instance + end + + attr_reader :running + + # interval - The sampling interval in seconds. + def initialize(interval) + interval_half = interval.to_f / 2 + + @interval = interval + @interval_steps = (-interval_half..interval_half).step(0.1).to_a + + @mutex = Mutex.new + end + + def enabled? + true + end + + def start + return unless enabled? + + @mutex.synchronize do + return if running + @running = true + + @thread = Thread.new do + sleep(sleep_interval) + + while running + safe_sample + + sleep(sleep_interval) + end + end + end + end + + def stop + @mutex.synchronize do + return unless running + + @running = false + + if @thread + @thread.wakeup if @thread.alive? + @thread.join + @thread = nil + end + end + end + + def safe_sample + sample + rescue => e + Rails.logger.warn("#{self.class}: #{e}, stopping") + stop + end + + def sample + raise NotImplementedError + end + + # Returns the sleep interval with a random adjustment. + # + # The random adjustment is put in place to ensure we: + # + # 1. Don't generate samples at the exact same interval every time (thus + # potentially missing anything that happens in between samples). + # 2. Don't sample data at the same interval two times in a row. + def sleep_interval + while step = @interval_steps.sample + if step != @last_step + @last_step = step + + return @interval + @last_step + end + end + end + end + end +end diff --git a/lib/gitlab/metrics/connection_rack_middleware.rb b/lib/gitlab/metrics/connection_rack_middleware.rb new file mode 100644 index 00000000000..b3da360be8f --- /dev/null +++ b/lib/gitlab/metrics/connection_rack_middleware.rb @@ -0,0 +1,45 @@ +module Gitlab + module Metrics + class ConnectionRackMiddleware + def initialize(app) + @app = app + end + + def self.rack_request_count + @rack_request_count ||= Gitlab::Metrics.counter(:rack_request, 'Rack request count') + end + + def self.rack_response_count + @rack_response_count ||= Gitlab::Metrics.counter(:rack_response, 'Rack response count') + end + + def self.rack_uncaught_errors_count + @rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors, 'Rack connections handling uncaught errors count') + end + + def self.rack_execution_time + @rack_execution_time ||= Gitlab::Metrics.histogram(:rack_execution_time, 'Rack connection handling execution time', + {}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 1.5, 2, 2.5, 3, 5, 7, 10]) + end + + def call(env) + method = env['REQUEST_METHOD'].downcase + started = Time.now.to_f + begin + ConnectionRackMiddleware.rack_request_count.increment(method: method) + + status, headers, body = @app.call(env) + + ConnectionRackMiddleware.rack_response_count.increment(method: method, status: status) + [status, headers, body] + rescue + ConnectionRackMiddleware.rack_uncaught_errors_count.increment + raise + ensure + elapsed = Time.now.to_f - started + ConnectionRackMiddleware.rack_execution_time.observe({}, elapsed) + end + end + end + end +end diff --git a/lib/gitlab/metrics/sampler.rb b/lib/gitlab/metrics/influx_sampler.rb similarity index 71% rename from lib/gitlab/metrics/sampler.rb rename to lib/gitlab/metrics/influx_sampler.rb index 0000450d9bb..6db1dd755b7 100644 --- a/lib/gitlab/metrics/sampler.rb +++ b/lib/gitlab/metrics/influx_sampler.rb @@ -5,14 +5,11 @@ module Gitlab # This class is used to gather statistics that can't be directly associated # with a transaction such as system memory usage, garbage collection # statistics, etc. - class Sampler + class InfluxSampler < BaseSampler # interval - The sampling interval in seconds. def initialize(interval = Metrics.settings[:sample_interval]) - interval_half = interval.to_f / 2 - - @interval = interval - @interval_steps = (-interval_half..interval_half).step(0.1).to_a - @last_step = nil + super(interval) + @last_step = nil @metrics = [] @@ -26,18 +23,6 @@ module Gitlab end end - def start - Thread.new do - Thread.current.abort_on_exception = true - - loop do - sleep(sleep_interval) - - sample - end - end - end - def sample sample_memory_usage sample_file_descriptors @@ -86,7 +71,7 @@ module Gitlab end def sample_gc - time = GC::Profiler.total_time * 1000.0 + time = GC::Profiler.total_time * 1000.0 stats = GC.stat.merge(total_time: time) # We want the difference of GC runs compared to the last sample, not the @@ -111,23 +96,6 @@ module Gitlab def sidekiq? Sidekiq.server? end - - # Returns the sleep interval with a random adjustment. - # - # The random adjustment is put in place to ensure we: - # - # 1. Don't generate samples at the exact same interval every time (thus - # potentially missing anything that happens in between samples). - # 2. Don't sample data at the same interval two times in a row. - def sleep_interval - while step = @interval_steps.sample - if step != @last_step - @last_step = step - - return @interval + @last_step - end - end - end end end end diff --git a/lib/gitlab/metrics/prometheus.rb b/lib/gitlab/metrics/prometheus.rb index 9d314a56e58..fb7bbc7cfc7 100644 --- a/lib/gitlab/metrics/prometheus.rb +++ b/lib/gitlab/metrics/prometheus.rb @@ -29,8 +29,8 @@ module Gitlab provide_metric(name) || registry.summary(name, docstring, base_labels) end - def gauge(name, docstring, base_labels = {}) - provide_metric(name) || registry.gauge(name, docstring, base_labels) + def gauge(name, docstring, base_labels = {}, multiprocess_mode = :all) + provide_metric(name) || registry.gauge(name, docstring, base_labels, multiprocess_mode) end def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS) diff --git a/lib/gitlab/metrics/unicorn_sampler.rb b/lib/gitlab/metrics/unicorn_sampler.rb new file mode 100644 index 00000000000..f6987252039 --- /dev/null +++ b/lib/gitlab/metrics/unicorn_sampler.rb @@ -0,0 +1,48 @@ +module Gitlab + module Metrics + class UnicornSampler < BaseSampler + def initialize(interval) + super(interval) + end + + def unicorn_active_connections + @unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max) + end + + def unicorn_queued_connections + @unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max) + end + + def enabled? + # Raindrops::Linux.tcp_listener_stats is only present on Linux + unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats) + end + + def sample + Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats| + unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active) + unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued) + end + + Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats| + unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active) + unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued) + end + end + + private + + def tcp_listeners + @tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z}) + end + + def unix_listeners + @unix_listeners ||= Unicorn.listener_names - tcp_listeners + end + + def unicorn_with_listeners? + defined?(Unicorn) && Unicorn.listener_names.any? + end + end + end +end diff --git a/spec/initializers/8_metrics_spec.rb b/spec/initializers/8_metrics_spec.rb index a507d7f7f2b..d4189f902fd 100644 --- a/spec/initializers/8_metrics_spec.rb +++ b/spec/initializers/8_metrics_spec.rb @@ -1,17 +1,25 @@ require 'spec_helper' -require_relative '../../config/initializers/8_metrics' describe 'instrument_classes', lib: true do let(:config) { double(:config) } + let(:unicorn_sampler) { double(:unicorn_sampler) } + let(:influx_sampler) { double(:influx_sampler) } + before do allow(config).to receive(:instrument_method) allow(config).to receive(:instrument_methods) allow(config).to receive(:instrument_instance_method) allow(config).to receive(:instrument_instance_methods) + allow(Gitlab::Metrics::UnicornSampler).to receive(:initialize_instance).and_return(unicorn_sampler) + allow(Gitlab::Metrics::InfluxSampler).to receive(:initialize_instance).and_return(influx_sampler) + allow(unicorn_sampler).to receive(:start) + allow(influx_sampler).to receive(:start) + allow(Gitlab::Application).to receive(:configure) end it 'can autoload and instrument all files' do + require_relative '../../config/initializers/8_metrics' expect { instrument_classes(config) }.not_to raise_error end end diff --git a/spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb b/spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb new file mode 100644 index 00000000000..94251af305f --- /dev/null +++ b/spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb @@ -0,0 +1,88 @@ +require 'spec_helper' + +describe Gitlab::Metrics::ConnectionRackMiddleware do + let(:app) { double('app') } + subject { described_class.new(app) } + + around do |example| + Timecop.freeze { example.run } + end + + describe '#call' do + let(:status) { 100 } + let(:env) { { 'REQUEST_METHOD' => 'GET' } } + let(:stack_result) { [status, {}, 'body'] } + + before do + allow(app).to receive(:call).and_return(stack_result) + end + + context '@app.call succeeds with 200' do + before do + allow(app).to receive(:call).and_return([200, nil, nil]) + end + + it 'increments response count with status label' do + expect(described_class).to receive_message_chain(:rack_response_count, :increment).with(include(status: 200, method: 'get')) + + subject.call(env) + end + + it 'increments requests count' do + expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get') + + subject.call(env) + end + + it 'measures execution time' do + execution_time = 10 + allow(app).to receive(:call) do |*args| + Timecop.freeze(execution_time.seconds) + end + + expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time) + + subject.call(env) + end + end + + context '@app.call throws exception' do + let(:rack_response_count) { double('rack_response_count') } + + before do + allow(app).to receive(:call).and_raise(StandardError) + allow(described_class).to receive(:rack_response_count).and_return(rack_response_count) + end + + it 'increments exceptions count' do + expect(described_class).to receive_message_chain(:rack_uncaught_errors_count, :increment) + + expect { subject.call(env) }.to raise_error(StandardError) + end + + it 'increments requests count' do + expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get') + + expect { subject.call(env) }.to raise_error(StandardError) + end + + it "does't increment response count" do + expect(described_class.rack_response_count).not_to receive(:increment) + + expect { subject.call(env) }.to raise_error(StandardError) + end + + it 'measures execution time' do + execution_time = 10 + allow(app).to receive(:call) do |*args| + Timecop.freeze(execution_time.seconds) + raise StandardError + end + + expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time) + + expect { subject.call(env) }.to raise_error(StandardError) + end + end + end +end diff --git a/spec/lib/gitlab/metrics/sampler_spec.rb b/spec/lib/gitlab/metrics/influx_sampler_spec.rb similarity index 94% rename from spec/lib/gitlab/metrics/sampler_spec.rb rename to spec/lib/gitlab/metrics/influx_sampler_spec.rb index d07ce6f81af..0bc68d64276 100644 --- a/spec/lib/gitlab/metrics/sampler_spec.rb +++ b/spec/lib/gitlab/metrics/influx_sampler_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Gitlab::Metrics::Sampler do +describe Gitlab::Metrics::InfluxSampler do let(:sampler) { described_class.new(5) } after do @@ -8,10 +8,10 @@ describe Gitlab::Metrics::Sampler do end describe '#start' do - it 'gathers a sample at a given interval' do - expect(sampler).to receive(:sleep).with(a_kind_of(Numeric)) - expect(sampler).to receive(:sample) - expect(sampler).to receive(:loop).and_yield + it 'runs once and gathers a sample at a given interval' do + expect(sampler).to receive(:sleep).with(a_kind_of(Numeric)).twice + expect(sampler).to receive(:sample).once + expect(sampler).to receive(:running).and_return(false, true, false) sampler.start.join end diff --git a/spec/lib/gitlab/metrics/unicorn_sampler_spec.rb b/spec/lib/gitlab/metrics/unicorn_sampler_spec.rb new file mode 100644 index 00000000000..dc0d1f2e940 --- /dev/null +++ b/spec/lib/gitlab/metrics/unicorn_sampler_spec.rb @@ -0,0 +1,108 @@ +require 'spec_helper' + +describe Gitlab::Metrics::UnicornSampler do + subject { described_class.new(1.second) } + + describe '#sample' do + let(:unicorn) { double('unicorn') } + let(:raindrops) { double('raindrops') } + let(:stats) { double('stats') } + + before do + stub_const('Unicorn', unicorn) + stub_const('Raindrops::Linux', raindrops) + allow(raindrops).to receive(:unix_listener_stats).and_return({}) + allow(raindrops).to receive(:tcp_listener_stats).and_return({}) + end + + context 'unicorn listens on unix sockets' do + let(:socket_address) { '/some/sock' } + let(:sockets) { [socket_address] } + + before do + allow(unicorn).to receive(:listener_names).and_return(sockets) + end + + it 'samples socket data' do + expect(raindrops).to receive(:unix_listener_stats).with(sockets) + + subject.sample + end + + context 'stats collected' do + before do + allow(stats).to receive(:active).and_return('active') + allow(stats).to receive(:queued).and_return('queued') + allow(raindrops).to receive(:unix_listener_stats).and_return({ socket_address => stats }) + end + + it 'updates metrics type unix and with addr' do + labels = { type: 'unix', address: socket_address } + + expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active') + expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued') + + subject.sample + end + end + end + + context 'unicorn listens on tcp sockets' do + let(:tcp_socket_address) { '0.0.0.0:8080' } + let(:tcp_sockets) { [tcp_socket_address] } + + before do + allow(unicorn).to receive(:listener_names).and_return(tcp_sockets) + end + + it 'samples socket data' do + expect(raindrops).to receive(:tcp_listener_stats).with(tcp_sockets) + + subject.sample + end + + context 'stats collected' do + before do + allow(stats).to receive(:active).and_return('active') + allow(stats).to receive(:queued).and_return('queued') + allow(raindrops).to receive(:tcp_listener_stats).and_return({ tcp_socket_address => stats }) + end + + it 'updates metrics type unix and with addr' do + labels = { type: 'tcp', address: tcp_socket_address } + + expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active') + expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued') + + subject.sample + end + end + end + end + + describe '#start' do + context 'when enabled' do + before do + allow(subject).to receive(:enabled?).and_return(true) + end + + it 'creates new thread' do + expect(Thread).to receive(:new) + + subject.start + end + end + + context 'when disabled' do + before do + allow(subject).to receive(:enabled?).and_return(false) + end + + it "doesn't create new thread" do + expect(Thread).not_to receive(:new) + + subject.start + end + end + end +end