diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index 4529bd3bee3..3dcd1593099 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -43,10 +43,11 @@ The following metrics are available: | redis_ping_latency_seconds | Gauge | 9.4 | Round trip time of the redis ping | | user_session_logins_total | Counter | 9.4 | Counter of how many users have logged in | | upload_file_does_not_exist | Counter | 10.7 in EE, 11.5 in CE | Number of times an upload record could not find its file | -| failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login | -| successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login | -| unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) | -| unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections | +| failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login | +| successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login | +| unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) | +| unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections | +| unicorn_workers | Gauge | 12.0 | The number of Unicorn workers | ## Sidekiq Metrics available for Geo **[PREMIUM]** @@ -86,8 +87,8 @@ the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`. | geo_wikis_checksum_mismatch_count | Gauge | 10.7 | Number of wikis that checksum mismatch on secondary | url | geo_repositories_checked_count | Gauge | 11.1 | Number of repositories that have been checked via `git fsck` | url | geo_repositories_checked_failed_count | Gauge | 11.1 | Number of repositories that have a failure from `git fsck` | url -| geo_repositories_retrying_verification_count | Gauge | 11.2 | Number of repositories verification failures that Geo is actively trying to correct on secondary | url -| geo_wikis_retrying_verification_count | Gauge | 11.2 | Number of wikis verification failures that Geo is actively trying to correct on secondary | url +| geo_repositories_retrying_verification_count | Gauge | 11.2 | Number of repositories verification failures that Geo is actively trying to correct on secondary | url +| geo_wikis_retrying_verification_count | Gauge | 11.2 | Number of wikis verification failures that Geo is actively trying to correct on secondary | url ### Ruby metrics @@ -100,6 +101,10 @@ Some basic Ruby runtime metrics are available: | ruby_file_descriptors | Gauge | 11.1 | File descriptors per process | | ruby_memory_bytes | Gauge | 11.1 | Memory usage by process | | ruby_sampler_duration_seconds_total | Counter | 11.1 | Time spent collecting stats | +| ruby_process_cpu_seconds_total | Gauge | 12.0 | Total amount of CPU time per process | +| ruby_process_max_fds | Gauge | 12.0 | Maximum number of open file descriptors per process | +| ruby_process_resident_memory_bytes | Gauge | 12.0 | Memory usage by process, measured in bytes | +| ruby_process_start_time_seconds | Gauge | 12.0 | The elapsed time between system boot and the process started, measured in seconds | [GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb index 18a69321905..4d9c43f37e7 100644 --- a/lib/gitlab/metrics/samplers/ruby_sampler.rb +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -23,25 +23,32 @@ module Gitlab end def init_metrics - metrics = {} - metrics[:sampler_duration] = ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels) - metrics[:total_time] = ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels) + metrics = { + file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum), + memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum), + process_cpu_seconds_total: ::Gitlab::Metrics.gauge(with_prefix(:process, :cpu_seconds_total), 'Process CPU seconds total'), + process_max_fds: ::Gitlab::Metrics.gauge(with_prefix(:process, :max_fds), 'Process max fds'), + process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels, :livesum), + process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'), + sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels), + total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels) + } + GC.stat.keys.each do |key| metrics[key] = ::Gitlab::Metrics.gauge(with_prefix(:gc_stat, key), to_doc_string(key), labels, :livesum) end - metrics[:memory_usage] = ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum) - metrics[:file_descriptors] = ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum) - metrics end def sample start_time = System.monotonic_time - metrics[:memory_usage].set(labels.merge(worker_label), System.memory_usage) metrics[:file_descriptors].set(labels.merge(worker_label), System.file_descriptor_count) - + metrics[:process_cpu_seconds_total].set(labels.merge(worker_label), ::Gitlab::Metrics::System.cpu_time) + metrics[:process_max_fds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.max_open_file_descriptors) + metrics[:process_start_time_seconds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.process_start_time) + set_memory_usage_metrics sample_gc metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time) @@ -61,6 +68,14 @@ module Gitlab metrics[:total_time].increment(labels, GC::Profiler.total_time) end + def set_memory_usage_metrics + memory_usage = System.memory_usage + memory_labels = labels.merge(worker_label) + + metrics[:memory_bytes].set(memory_labels, memory_usage) + metrics[:process_resident_memory_bytes].set(memory_labels, memory_usage) + end + def worker_label return {} unless defined?(Unicorn::Worker) diff --git a/lib/gitlab/metrics/samplers/unicorn_sampler.rb b/lib/gitlab/metrics/samplers/unicorn_sampler.rb index bec64e864b3..9af7e0afed4 100644 --- a/lib/gitlab/metrics/samplers/unicorn_sampler.rb +++ b/lib/gitlab/metrics/samplers/unicorn_sampler.rb @@ -4,16 +4,16 @@ module Gitlab module Metrics module Samplers class UnicornSampler < BaseSampler - def initialize(interval) - super(interval) + def metrics + @metrics ||= init_metrics end - def unicorn_active_connections - @unicorn_active_connections ||= ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max) - end - - def unicorn_queued_connections - @unicorn_queued_connections ||= ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max) + def init_metrics + { + unicorn_active_connections: ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max), + unicorn_queued_connections: ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max), + unicorn_workers: ::Gitlab::Metrics.gauge(:unicorn_workers, 'Unicorn workers') + } end def enabled? @@ -23,14 +23,13 @@ module Gitlab def sample Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats| - unicorn_active_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.active) - unicorn_queued_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.queued) + set_unicorn_connection_metrics('tcp', addr, stats) + end + Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats| + set_unicorn_connection_metrics('unix', addr, stats) end - Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats| - unicorn_active_connections.set({ socket_type: 'unix', socket_address: addr }, stats.active) - unicorn_queued_connections.set({ socket_type: 'unix', socket_address: addr }, stats.queued) - end + metrics[:unicorn_workers].set({}, unicorn_workers_count) end private @@ -39,6 +38,13 @@ module Gitlab @tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z}) end + def set_unicorn_connection_metrics(type, addr, stats) + labels = { socket_type: type, socket_address: addr } + + metrics[:unicorn_active_connections].set(labels, stats.active) + metrics[:unicorn_queued_connections].set(labels, stats.queued) + end + def unix_listeners @unix_listeners ||= Unicorn.listener_names - tcp_listeners end @@ -46,6 +52,10 @@ module Gitlab def unicorn_with_listeners? defined?(Unicorn) && Unicorn.listener_names.any? end + + def unicorn_workers_count + `pgrep -f '[u]nicorn_rails worker.+ #{Rails.root.to_s}'`.split.count + end end end end diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb index 426496855e3..33c0de91c11 100644 --- a/lib/gitlab/metrics/system.rb +++ b/lib/gitlab/metrics/system.rb @@ -23,6 +23,22 @@ module Gitlab def self.file_descriptor_count Dir.glob('/proc/self/fd/*').length end + + def self.max_open_file_descriptors + match = File.read('/proc/self/limits').match(/Max open files\s*(\d+)/) + + return unless match && match[1] + + match[1].to_i + end + + def self.process_start_time + fields = File.read('/proc/self/stat').split + + # fields[21] is linux proc stat field "(22) starttime". + # The value is expressed in clock ticks, divide by clock ticks for seconds. + ( fields[21].to_i || 0 ) / clk_tck + end else def self.memory_usage 0.0 @@ -31,6 +47,14 @@ module Gitlab def self.file_descriptor_count 0 end + + def self.max_open_file_descriptors + 0 + end + + def self.process_start_time + 0 + end end # THREAD_CPUTIME is not supported on OS X @@ -59,6 +83,10 @@ module Gitlab def self.monotonic_time Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second) end + + def self.clk_tck + @clk_tck ||= `getconf CLK_TCK`.to_i + end end end end diff --git a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb index 7972ff253fe..aaf8c9fa2a0 100644 --- a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb @@ -10,17 +10,20 @@ describe Gitlab::Metrics::Samplers::RubySampler do describe '#sample' do it 'samples various statistics' do - expect(Gitlab::Metrics::System).to receive(:memory_usage) + expect(Gitlab::Metrics::System).to receive(:cpu_time) expect(Gitlab::Metrics::System).to receive(:file_descriptor_count) + expect(Gitlab::Metrics::System).to receive(:memory_usage) + expect(Gitlab::Metrics::System).to receive(:process_start_time) + expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors) expect(sampler).to receive(:sample_gc) sampler.sample end - it 'adds a metric containing the memory usage' do + it 'adds a metric containing the process resident memory bytes' do expect(Gitlab::Metrics::System).to receive(:memory_usage).and_return(9000) - expect(sampler.metrics[:memory_usage]).to receive(:set).with({}, 9000) + expect(sampler.metrics[:process_resident_memory_bytes]).to receive(:set).with({}, 9000) sampler.sample end @@ -34,6 +37,27 @@ describe Gitlab::Metrics::Samplers::RubySampler do sampler.sample end + it 'adds a metric containing the process total cpu time' do + expect(Gitlab::Metrics::System).to receive(:cpu_time).and_return(0.51) + expect(sampler.metrics[:process_cpu_seconds_total]).to receive(:set).with({}, 0.51) + + sampler.sample + end + + it 'adds a metric containing the process start time' do + expect(Gitlab::Metrics::System).to receive(:process_start_time).and_return(12345) + expect(sampler.metrics[:process_start_time_seconds]).to receive(:set).with({}, 12345) + + sampler.sample + end + + it 'adds a metric containing the process max file descriptors' do + expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors).and_return(1024) + expect(sampler.metrics[:process_max_fds]).to receive(:set).with({}, 1024) + + sampler.sample + end + it 'clears any GC profiles' do expect(GC::Profiler).to receive(:clear) diff --git a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb index 4b03f3c2532..090e456644f 100644 --- a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb @@ -39,8 +39,8 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do it 'updates metrics type unix and with addr' do labels = { socket_type: 'unix', socket_address: socket_address } - expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active') - expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued') + expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active') + expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued') subject.sample end @@ -50,7 +50,6 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do context 'unicorn listens on tcp sockets' do let(:tcp_socket_address) { '0.0.0.0:8080' } let(:tcp_sockets) { [tcp_socket_address] } - before do allow(unicorn).to receive(:listener_names).and_return(tcp_sockets) end @@ -71,13 +70,29 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do it 'updates metrics type unix and with addr' do labels = { socket_type: 'tcp', socket_address: tcp_socket_address } - expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active') - expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued') + expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active') + expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued') subject.sample end end end + + context 'additional metrics' do + let(:unicorn_workers) { 2 } + + before do + allow(unicorn).to receive(:listener_names).and_return([""]) + allow(::Gitlab::Metrics::System).to receive(:cpu_time).and_return(3.14) + allow(subject).to receive(:unicorn_workers_count).and_return(unicorn_workers) + end + + it "sets additional metrics" do + expect(subject.metrics[:unicorn_workers]).to receive(:set).with({}, unicorn_workers) + + subject.sample + end + end end describe '#start' do diff --git a/spec/lib/gitlab/metrics/system_spec.rb b/spec/lib/gitlab/metrics/system_spec.rb index 14afcdf5daa..b0603d96eb2 100644 --- a/spec/lib/gitlab/metrics/system_spec.rb +++ b/spec/lib/gitlab/metrics/system_spec.rb @@ -13,6 +13,18 @@ describe Gitlab::Metrics::System do expect(described_class.file_descriptor_count).to be > 0 end end + + describe '.max_open_file_descriptors' do + it 'returns the max allowed open file descriptors' do + expect(described_class.max_open_file_descriptors).to be > 0 + end + end + + describe '.process_start_time' do + it 'returns the process start time' do + expect(described_class.process_start_time).to be > 0 + end + end else describe '.memory_usage' do it 'returns 0.0' do @@ -25,6 +37,18 @@ describe Gitlab::Metrics::System do expect(described_class.file_descriptor_count).to eq(0) end end + + describe '.max_open_file_descriptors' do + it 'returns 0' do + expect(described_class.max_open_file_descriptors).to eq(0) + end + end + + describe 'process_start_time' do + it 'returns 0' do + expect(described_class.process_start_time).to eq(0) + end + end end describe '.cpu_time' do