Merge branch '61964-unicorn-instrumentation' into 'master'
Adds ruby and unicorn instrumentation Closes #61964 See merge request gitlab-org/gitlab-ce!28499
This commit is contained in:
commit
327cae1da7
7 changed files with 157 additions and 36 deletions
|
@ -43,10 +43,11 @@ The following metrics are available:
|
|||
| redis_ping_latency_seconds | Gauge | 9.4 | Round trip time of the redis ping |
|
||||
| user_session_logins_total | Counter | 9.4 | Counter of how many users have logged in |
|
||||
| upload_file_does_not_exist | Counter | 10.7 in EE, 11.5 in CE | Number of times an upload record could not find its file |
|
||||
| failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login |
|
||||
| successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login |
|
||||
| unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) |
|
||||
| unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections |
|
||||
| failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login |
|
||||
| successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login |
|
||||
| unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) |
|
||||
| unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections |
|
||||
| unicorn_workers | Gauge | 12.0 | The number of Unicorn workers |
|
||||
|
||||
## Sidekiq Metrics available for Geo **[PREMIUM]**
|
||||
|
||||
|
@ -86,8 +87,8 @@ the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`.
|
|||
| geo_wikis_checksum_mismatch_count | Gauge | 10.7 | Number of wikis that checksum mismatch on secondary | url
|
||||
| geo_repositories_checked_count | Gauge | 11.1 | Number of repositories that have been checked via `git fsck` | url
|
||||
| geo_repositories_checked_failed_count | Gauge | 11.1 | Number of repositories that have a failure from `git fsck` | url
|
||||
| geo_repositories_retrying_verification_count | Gauge | 11.2 | Number of repositories verification failures that Geo is actively trying to correct on secondary | url
|
||||
| geo_wikis_retrying_verification_count | Gauge | 11.2 | Number of wikis verification failures that Geo is actively trying to correct on secondary | url
|
||||
| geo_repositories_retrying_verification_count | Gauge | 11.2 | Number of repositories verification failures that Geo is actively trying to correct on secondary | url
|
||||
| geo_wikis_retrying_verification_count | Gauge | 11.2 | Number of wikis verification failures that Geo is actively trying to correct on secondary | url
|
||||
|
||||
### Ruby metrics
|
||||
|
||||
|
@ -100,6 +101,10 @@ Some basic Ruby runtime metrics are available:
|
|||
| ruby_file_descriptors | Gauge | 11.1 | File descriptors per process |
|
||||
| ruby_memory_bytes | Gauge | 11.1 | Memory usage by process |
|
||||
| ruby_sampler_duration_seconds_total | Counter | 11.1 | Time spent collecting stats |
|
||||
| ruby_process_cpu_seconds_total | Gauge | 12.0 | Total amount of CPU time per process |
|
||||
| ruby_process_max_fds | Gauge | 12.0 | Maximum number of open file descriptors per process |
|
||||
| ruby_process_resident_memory_bytes | Gauge | 12.0 | Memory usage by process, measured in bytes |
|
||||
| ruby_process_start_time_seconds | Gauge | 12.0 | The elapsed time between system boot and the process started, measured in seconds |
|
||||
|
||||
[GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat
|
||||
|
||||
|
|
|
@ -23,25 +23,32 @@ module Gitlab
|
|||
end
|
||||
|
||||
def init_metrics
|
||||
metrics = {}
|
||||
metrics[:sampler_duration] = ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels)
|
||||
metrics[:total_time] = ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels)
|
||||
metrics = {
|
||||
file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum),
|
||||
memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum),
|
||||
process_cpu_seconds_total: ::Gitlab::Metrics.gauge(with_prefix(:process, :cpu_seconds_total), 'Process CPU seconds total'),
|
||||
process_max_fds: ::Gitlab::Metrics.gauge(with_prefix(:process, :max_fds), 'Process max fds'),
|
||||
process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels, :livesum),
|
||||
process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'),
|
||||
sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels),
|
||||
total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels)
|
||||
}
|
||||
|
||||
GC.stat.keys.each do |key|
|
||||
metrics[key] = ::Gitlab::Metrics.gauge(with_prefix(:gc_stat, key), to_doc_string(key), labels, :livesum)
|
||||
end
|
||||
|
||||
metrics[:memory_usage] = ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum)
|
||||
metrics[:file_descriptors] = ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum)
|
||||
|
||||
metrics
|
||||
end
|
||||
|
||||
def sample
|
||||
start_time = System.monotonic_time
|
||||
|
||||
metrics[:memory_usage].set(labels.merge(worker_label), System.memory_usage)
|
||||
metrics[:file_descriptors].set(labels.merge(worker_label), System.file_descriptor_count)
|
||||
|
||||
metrics[:process_cpu_seconds_total].set(labels.merge(worker_label), ::Gitlab::Metrics::System.cpu_time)
|
||||
metrics[:process_max_fds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.max_open_file_descriptors)
|
||||
metrics[:process_start_time_seconds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.process_start_time)
|
||||
set_memory_usage_metrics
|
||||
sample_gc
|
||||
|
||||
metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time)
|
||||
|
@ -61,6 +68,14 @@ module Gitlab
|
|||
metrics[:total_time].increment(labels, GC::Profiler.total_time)
|
||||
end
|
||||
|
||||
def set_memory_usage_metrics
|
||||
memory_usage = System.memory_usage
|
||||
memory_labels = labels.merge(worker_label)
|
||||
|
||||
metrics[:memory_bytes].set(memory_labels, memory_usage)
|
||||
metrics[:process_resident_memory_bytes].set(memory_labels, memory_usage)
|
||||
end
|
||||
|
||||
def worker_label
|
||||
return {} unless defined?(Unicorn::Worker)
|
||||
|
||||
|
|
|
@ -4,16 +4,16 @@ module Gitlab
|
|||
module Metrics
|
||||
module Samplers
|
||||
class UnicornSampler < BaseSampler
|
||||
def initialize(interval)
|
||||
super(interval)
|
||||
def metrics
|
||||
@metrics ||= init_metrics
|
||||
end
|
||||
|
||||
def unicorn_active_connections
|
||||
@unicorn_active_connections ||= ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max)
|
||||
end
|
||||
|
||||
def unicorn_queued_connections
|
||||
@unicorn_queued_connections ||= ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max)
|
||||
def init_metrics
|
||||
{
|
||||
unicorn_active_connections: ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max),
|
||||
unicorn_queued_connections: ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max),
|
||||
unicorn_workers: ::Gitlab::Metrics.gauge(:unicorn_workers, 'Unicorn workers')
|
||||
}
|
||||
end
|
||||
|
||||
def enabled?
|
||||
|
@ -23,14 +23,13 @@ module Gitlab
|
|||
|
||||
def sample
|
||||
Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
|
||||
unicorn_active_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.active)
|
||||
unicorn_queued_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.queued)
|
||||
set_unicorn_connection_metrics('tcp', addr, stats)
|
||||
end
|
||||
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
|
||||
set_unicorn_connection_metrics('unix', addr, stats)
|
||||
end
|
||||
|
||||
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
|
||||
unicorn_active_connections.set({ socket_type: 'unix', socket_address: addr }, stats.active)
|
||||
unicorn_queued_connections.set({ socket_type: 'unix', socket_address: addr }, stats.queued)
|
||||
end
|
||||
metrics[:unicorn_workers].set({}, unicorn_workers_count)
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -39,6 +38,13 @@ module Gitlab
|
|||
@tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
|
||||
end
|
||||
|
||||
def set_unicorn_connection_metrics(type, addr, stats)
|
||||
labels = { socket_type: type, socket_address: addr }
|
||||
|
||||
metrics[:unicorn_active_connections].set(labels, stats.active)
|
||||
metrics[:unicorn_queued_connections].set(labels, stats.queued)
|
||||
end
|
||||
|
||||
def unix_listeners
|
||||
@unix_listeners ||= Unicorn.listener_names - tcp_listeners
|
||||
end
|
||||
|
@ -46,6 +52,10 @@ module Gitlab
|
|||
def unicorn_with_listeners?
|
||||
defined?(Unicorn) && Unicorn.listener_names.any?
|
||||
end
|
||||
|
||||
def unicorn_workers_count
|
||||
`pgrep -f '[u]nicorn_rails worker.+ #{Rails.root.to_s}'`.split.count
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -23,6 +23,22 @@ module Gitlab
|
|||
def self.file_descriptor_count
|
||||
Dir.glob('/proc/self/fd/*').length
|
||||
end
|
||||
|
||||
def self.max_open_file_descriptors
|
||||
match = File.read('/proc/self/limits').match(/Max open files\s*(\d+)/)
|
||||
|
||||
return unless match && match[1]
|
||||
|
||||
match[1].to_i
|
||||
end
|
||||
|
||||
def self.process_start_time
|
||||
fields = File.read('/proc/self/stat').split
|
||||
|
||||
# fields[21] is linux proc stat field "(22) starttime".
|
||||
# The value is expressed in clock ticks, divide by clock ticks for seconds.
|
||||
( fields[21].to_i || 0 ) / clk_tck
|
||||
end
|
||||
else
|
||||
def self.memory_usage
|
||||
0.0
|
||||
|
@ -31,6 +47,14 @@ module Gitlab
|
|||
def self.file_descriptor_count
|
||||
0
|
||||
end
|
||||
|
||||
def self.max_open_file_descriptors
|
||||
0
|
||||
end
|
||||
|
||||
def self.process_start_time
|
||||
0
|
||||
end
|
||||
end
|
||||
|
||||
# THREAD_CPUTIME is not supported on OS X
|
||||
|
@ -59,6 +83,10 @@ module Gitlab
|
|||
def self.monotonic_time
|
||||
Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second)
|
||||
end
|
||||
|
||||
def self.clk_tck
|
||||
@clk_tck ||= `getconf CLK_TCK`.to_i
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,17 +10,20 @@ describe Gitlab::Metrics::Samplers::RubySampler do
|
|||
|
||||
describe '#sample' do
|
||||
it 'samples various statistics' do
|
||||
expect(Gitlab::Metrics::System).to receive(:memory_usage)
|
||||
expect(Gitlab::Metrics::System).to receive(:cpu_time)
|
||||
expect(Gitlab::Metrics::System).to receive(:file_descriptor_count)
|
||||
expect(Gitlab::Metrics::System).to receive(:memory_usage)
|
||||
expect(Gitlab::Metrics::System).to receive(:process_start_time)
|
||||
expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors)
|
||||
expect(sampler).to receive(:sample_gc)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
|
||||
it 'adds a metric containing the memory usage' do
|
||||
it 'adds a metric containing the process resident memory bytes' do
|
||||
expect(Gitlab::Metrics::System).to receive(:memory_usage).and_return(9000)
|
||||
|
||||
expect(sampler.metrics[:memory_usage]).to receive(:set).with({}, 9000)
|
||||
expect(sampler.metrics[:process_resident_memory_bytes]).to receive(:set).with({}, 9000)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
|
@ -34,6 +37,27 @@ describe Gitlab::Metrics::Samplers::RubySampler do
|
|||
sampler.sample
|
||||
end
|
||||
|
||||
it 'adds a metric containing the process total cpu time' do
|
||||
expect(Gitlab::Metrics::System).to receive(:cpu_time).and_return(0.51)
|
||||
expect(sampler.metrics[:process_cpu_seconds_total]).to receive(:set).with({}, 0.51)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
|
||||
it 'adds a metric containing the process start time' do
|
||||
expect(Gitlab::Metrics::System).to receive(:process_start_time).and_return(12345)
|
||||
expect(sampler.metrics[:process_start_time_seconds]).to receive(:set).with({}, 12345)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
|
||||
it 'adds a metric containing the process max file descriptors' do
|
||||
expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors).and_return(1024)
|
||||
expect(sampler.metrics[:process_max_fds]).to receive(:set).with({}, 1024)
|
||||
|
||||
sampler.sample
|
||||
end
|
||||
|
||||
it 'clears any GC profiles' do
|
||||
expect(GC::Profiler).to receive(:clear)
|
||||
|
||||
|
|
|
@ -39,8 +39,8 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do
|
|||
it 'updates metrics type unix and with addr' do
|
||||
labels = { socket_type: 'unix', socket_address: socket_address }
|
||||
|
||||
expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active')
|
||||
expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued')
|
||||
expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active')
|
||||
expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued')
|
||||
|
||||
subject.sample
|
||||
end
|
||||
|
@ -50,7 +50,6 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do
|
|||
context 'unicorn listens on tcp sockets' do
|
||||
let(:tcp_socket_address) { '0.0.0.0:8080' }
|
||||
let(:tcp_sockets) { [tcp_socket_address] }
|
||||
|
||||
before do
|
||||
allow(unicorn).to receive(:listener_names).and_return(tcp_sockets)
|
||||
end
|
||||
|
@ -71,13 +70,29 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do
|
|||
it 'updates metrics type unix and with addr' do
|
||||
labels = { socket_type: 'tcp', socket_address: tcp_socket_address }
|
||||
|
||||
expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active')
|
||||
expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued')
|
||||
expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active')
|
||||
expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued')
|
||||
|
||||
subject.sample
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'additional metrics' do
|
||||
let(:unicorn_workers) { 2 }
|
||||
|
||||
before do
|
||||
allow(unicorn).to receive(:listener_names).and_return([""])
|
||||
allow(::Gitlab::Metrics::System).to receive(:cpu_time).and_return(3.14)
|
||||
allow(subject).to receive(:unicorn_workers_count).and_return(unicorn_workers)
|
||||
end
|
||||
|
||||
it "sets additional metrics" do
|
||||
expect(subject.metrics[:unicorn_workers]).to receive(:set).with({}, unicorn_workers)
|
||||
|
||||
subject.sample
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#start' do
|
||||
|
|
|
@ -13,6 +13,18 @@ describe Gitlab::Metrics::System do
|
|||
expect(described_class.file_descriptor_count).to be > 0
|
||||
end
|
||||
end
|
||||
|
||||
describe '.max_open_file_descriptors' do
|
||||
it 'returns the max allowed open file descriptors' do
|
||||
expect(described_class.max_open_file_descriptors).to be > 0
|
||||
end
|
||||
end
|
||||
|
||||
describe '.process_start_time' do
|
||||
it 'returns the process start time' do
|
||||
expect(described_class.process_start_time).to be > 0
|
||||
end
|
||||
end
|
||||
else
|
||||
describe '.memory_usage' do
|
||||
it 'returns 0.0' do
|
||||
|
@ -25,6 +37,18 @@ describe Gitlab::Metrics::System do
|
|||
expect(described_class.file_descriptor_count).to eq(0)
|
||||
end
|
||||
end
|
||||
|
||||
describe '.max_open_file_descriptors' do
|
||||
it 'returns 0' do
|
||||
expect(described_class.max_open_file_descriptors).to eq(0)
|
||||
end
|
||||
end
|
||||
|
||||
describe 'process_start_time' do
|
||||
it 'returns 0' do
|
||||
expect(described_class.process_start_time).to eq(0)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '.cpu_time' do
|
||||
|
|
Loading…
Reference in a new issue