Custom queries for prometheus
- Tests for prometheus queries - fix rubocop warnings - Remove unused method. Add more queries to deployment queries. - Wrap BaseQuery in module hierarchy Renname Prometheus class to PrometheusClient
This commit is contained in:
parent
e7b53dd678
commit
4f824d2aec
13 changed files with 146 additions and 53 deletions
|
@ -11,7 +11,7 @@ class Projects::DeploymentsController < Projects::ApplicationController
|
|||
end
|
||||
|
||||
def metrics
|
||||
@metrics = deployment.metrics(1.hour)
|
||||
@metrics = deployment.metrics
|
||||
|
||||
if @metrics&.any?
|
||||
render json: @metrics, status: :ok
|
||||
|
|
|
@ -103,14 +103,10 @@ class Deployment < ActiveRecord::Base
|
|||
project.monitoring_service.present?
|
||||
end
|
||||
|
||||
def metrics(timeframe)
|
||||
def metrics
|
||||
return {} unless has_metrics?
|
||||
|
||||
half_timeframe = timeframe / 2
|
||||
timeframe_start = created_at - half_timeframe
|
||||
timeframe_end = created_at + half_timeframe
|
||||
|
||||
metrics = project.monitoring_service.metrics(environment, timeframe_start: timeframe_start, timeframe_end: timeframe_end)
|
||||
metrics = project.monitoring_service.deployment_metrics(self)
|
||||
metrics&.merge(deployment_time: created_at.to_i) || {}
|
||||
end
|
||||
|
||||
|
|
|
@ -150,7 +150,7 @@ class Environment < ActiveRecord::Base
|
|||
end
|
||||
|
||||
def metrics
|
||||
project.monitoring_service.metrics(self) if has_metrics?
|
||||
project.monitoring_service.environment_metrics(self) if has_metrics?
|
||||
end
|
||||
|
||||
# An environment name is not necessarily suitable for use in URLs, DNS
|
||||
|
|
|
@ -9,8 +9,11 @@ class MonitoringService < Service
|
|||
%w()
|
||||
end
|
||||
|
||||
# Environments have a number of metrics
|
||||
def metrics(environment, timeframe_start: nil, timeframe_end: nil)
|
||||
def environment_metrics(environment)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def deployment_metrics(deployment)
|
||||
raise NotImplementedError
|
||||
end
|
||||
end
|
||||
|
|
|
@ -63,45 +63,30 @@ class PrometheusService < MonitoringService
|
|||
{ success: false, result: err }
|
||||
end
|
||||
|
||||
def metrics(environment, timeframe_start: nil, timeframe_end: nil)
|
||||
with_reactive_cache(environment.slug, timeframe_start, timeframe_end) do |data|
|
||||
data
|
||||
def environment_metrics(environment, **args)
|
||||
with_reactive_cache(Gitlab::Prometheus::Queries::EnvironmentQuery.name, environment.id, &:itself)
|
||||
end
|
||||
|
||||
def deployment_metrics(deployment)
|
||||
with_reactive_cache(Gitlab::Prometheus::Queries::DeploymentQuery.name, deployment.id, &:itself)
|
||||
end
|
||||
|
||||
# Cache metrics for specific environment
|
||||
def calculate_reactive_cache(environment_slug, timeframe_start, timeframe_end)
|
||||
def calculate_reactive_cache(query_class_name, *args)
|
||||
return unless active? && project && !project.pending_delete?
|
||||
|
||||
timeframe_start = Time.parse(timeframe_start) if timeframe_start
|
||||
timeframe_end = Time.parse(timeframe_end) if timeframe_end
|
||||
|
||||
timeframe_start ||= 8.hours.ago
|
||||
timeframe_end ||= Time.now
|
||||
|
||||
memory_query = %{(sum(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / count(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"})) /1024/1024}
|
||||
cpu_query = %{sum(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) / count(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}) * 100}
|
||||
metrics = Kernel.const_get(query_class_name).new(client).query(*args)
|
||||
|
||||
{
|
||||
success: true,
|
||||
metrics: {
|
||||
# Average Memory used in MB
|
||||
memory_values: client.query_range(memory_query, start: timeframe_start, stop: timeframe_end),
|
||||
memory_current: client.query(memory_query, time: timeframe_end),
|
||||
memory_previous: client.query(memory_query, time: timeframe_start),
|
||||
# Average CPU Utilization
|
||||
cpu_values: client.query_range(cpu_query, start: timeframe_start, stop: timeframe_end),
|
||||
cpu_current: client.query(cpu_query, time: timeframe_end),
|
||||
cpu_previous: client.query(cpu_query, time: timeframe_start)
|
||||
},
|
||||
metrics: metrics,
|
||||
last_update: Time.now.utc
|
||||
}
|
||||
|
||||
rescue Gitlab::PrometheusError => err
|
||||
{ success: false, result: err.message }
|
||||
end
|
||||
|
||||
def client
|
||||
@prometheus ||= Gitlab::Prometheus.new(api_url: api_url)
|
||||
@prometheus ||= Gitlab::PrometheusClient.new(api_url: api_url)
|
||||
end
|
||||
end
|
||||
|
|
26
lib/gitlab/prometheus/queries/base_query.rb
Normal file
26
lib/gitlab/prometheus/queries/base_query.rb
Normal file
|
@ -0,0 +1,26 @@
|
|||
module Gitlab
|
||||
module Prometheus
|
||||
module Queries
|
||||
class BaseQuery
|
||||
attr_accessor :client
|
||||
delegate :query_range, :query, to: :client, prefix: true
|
||||
|
||||
def raw_memory_usage_query(environment_slug)
|
||||
%{avg(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / 2^20}
|
||||
end
|
||||
|
||||
def raw_cpu_usage_query(environment_slug)
|
||||
%{avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) * 100}
|
||||
end
|
||||
|
||||
def initialize(client)
|
||||
@client = client
|
||||
end
|
||||
|
||||
def query(*args)
|
||||
raise NotImplementedError
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
26
lib/gitlab/prometheus/queries/deployment_query.rb
Normal file
26
lib/gitlab/prometheus/queries/deployment_query.rb
Normal file
|
@ -0,0 +1,26 @@
|
|||
module Gitlab::Prometheus::Queries
|
||||
class DeploymentQuery < BaseQuery
|
||||
def query(deployment_id)
|
||||
deployment = Deployment.find_by(id: deployment_id)
|
||||
environment_slug = deployment.environment.slug
|
||||
|
||||
memory_query = raw_memory_usage_query(environment_slug)
|
||||
memory_avg_query = %{avg(avg_over_time(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}[30m]))}
|
||||
cpu_query = raw_cpu_usage_query(environment_slug)
|
||||
cpu_avg_query = %{avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[30m])) * 100}
|
||||
|
||||
timeframe_start = (deployment.created_at - 30.minutes).to_f
|
||||
timeframe_end = (deployment.created_at + 30.minutes).to_f
|
||||
|
||||
{
|
||||
memory_values: client_query_range(memory_query, start: timeframe_start, stop: timeframe_end),
|
||||
memory_before: client_query(memory_avg_query, time: deployment.created_at.to_f),
|
||||
memory_after: client_query(memory_avg_query, time: timeframe_end),
|
||||
|
||||
cpu_values: client_query_range(cpu_query, start: timeframe_start, stop: timeframe_end),
|
||||
cpu_before: client_query(cpu_avg_query, time: deployment.created_at.to_f),
|
||||
cpu_after: client_query(cpu_avg_query, time: timeframe_end),
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
20
lib/gitlab/prometheus/queries/environment_query.rb
Normal file
20
lib/gitlab/prometheus/queries/environment_query.rb
Normal file
|
@ -0,0 +1,20 @@
|
|||
module Gitlab::Prometheus::Queries
|
||||
class EnvironmentQuery < BaseQuery
|
||||
def query(environment_id)
|
||||
environment = Environment.find_by(id: environment_id)
|
||||
environment_slug = environment.slug
|
||||
timeframe_start = 8.hours.ago.to_f
|
||||
timeframe_end = Time.now.to_f
|
||||
|
||||
memory_query = raw_memory_usage_query(environment_slug)
|
||||
cpu_query = raw_cpu_usage_query(environment_slug)
|
||||
|
||||
{
|
||||
memory_values: client_query_range(memory_query, start: timeframe_start, stop: timeframe_end),
|
||||
memory_current: client_query(memory_query, time: timeframe_end),
|
||||
cpu_values: client_query_range(cpu_query, start: timeframe_start, stop: timeframe_end),
|
||||
cpu_current: client_query(cpu_query, time: timeframe_end),
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
|
@ -2,7 +2,7 @@ module Gitlab
|
|||
PrometheusError = Class.new(StandardError)
|
||||
|
||||
# Helper methods to interact with Prometheus network services & resources
|
||||
class Prometheus
|
||||
class PrometheusClient
|
||||
attr_reader :api_url
|
||||
|
||||
def initialize(api_url:)
|
||||
|
@ -15,7 +15,7 @@ module Gitlab
|
|||
|
||||
def query(query, time: Time.now)
|
||||
get_result('vector') do
|
||||
json_api_get('query', query: query, time: time.utc.to_f)
|
||||
json_api_get('query', query: query, time: time.to_f)
|
||||
end
|
||||
end
|
||||
|
31
spec/lib/gitlab/prometheus/queries/deployment_query_spec.rb
Normal file
31
spec/lib/gitlab/prometheus/queries/deployment_query_spec.rb
Normal file
|
@ -0,0 +1,31 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Prometheus::Queries::DeploymentQuery, lib: true do
|
||||
let(:environment) { create(:environment) }
|
||||
let(:deployment) { create(:deployment, environment: environment) }
|
||||
|
||||
let(:client) { double('prometheus_client') }
|
||||
subject { described_class.new(client) }
|
||||
|
||||
it 'sends appropriate queries to prometheus' do
|
||||
start_time = (deployment.created_at - 30.minutes).to_f
|
||||
|
||||
stop_time = (deployment.created_at + 30.minutes).to_f
|
||||
expect(client).to receive(:query_range).with('avg(container_memory_usage_bytes{container_name!="POD",environment="environment1"}) / 2^20',
|
||||
start: start_time, stop: stop_time)
|
||||
expect(client).to receive(:query).with('avg(avg_over_time(container_memory_usage_bytes{container_name!="POD",environment="environment1"}[30m]))',
|
||||
time: deployment.created_at.to_f)
|
||||
expect(client).to receive(:query).with('avg(avg_over_time(container_memory_usage_bytes{container_name!="POD",environment="environment1"}[30m]))',
|
||||
time: stop_time)
|
||||
|
||||
expect(client).to receive(:query_range).with('avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="environment1"}[2m])) * 100',
|
||||
start: start_time, stop: stop_time)
|
||||
expect(client).to receive(:query).with('avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="environment1"}[30m])) * 100',
|
||||
time: deployment.created_at.to_f)
|
||||
expect(client).to receive(:query).with('avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="environment1"}[30m])) * 100',
|
||||
time: stop_time)
|
||||
|
||||
expect(subject.query(deployment.id)).to eq(memory_values: nil, memory_before: nil, memory_after: nil,
|
||||
cpu_values: nil, cpu_before: nil, cpu_after: nil)
|
||||
end
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Prometheus, lib: true do
|
||||
describe Gitlab::PrometheusClient, lib: true do
|
||||
include PrometheusHelpers
|
||||
|
||||
subject { described_class.new(api_url: 'https://prometheus.example.com') }
|
|
@ -6,6 +6,7 @@ describe PrometheusService, models: true, caching: true do
|
|||
|
||||
let(:project) { create(:prometheus_project) }
|
||||
let(:service) { project.prometheus_service }
|
||||
let(:environment_query) { Gitlab::Prometheus::Queries::EnvironmentQuery }
|
||||
|
||||
describe "Associations" do
|
||||
it { is_expected.to belong_to :project }
|
||||
|
@ -45,32 +46,39 @@ describe PrometheusService, models: true, caching: true do
|
|||
end
|
||||
end
|
||||
|
||||
describe '#metrics' do
|
||||
describe '#environment_metrics' do
|
||||
let(:environment) { build_stubbed(:environment, slug: 'env-slug') }
|
||||
|
||||
around do |example|
|
||||
Timecop.freeze { example.run }
|
||||
end
|
||||
|
||||
context 'with valid data without time range' do
|
||||
subject { service.metrics(environment) }
|
||||
context 'with valid data' do
|
||||
subject { service.environment_metrics(environment) }
|
||||
|
||||
before do
|
||||
stub_reactive_cache(service, prometheus_data, 'env-slug', nil, nil)
|
||||
stub_reactive_cache(service, prometheus_data, environment_query, environment.id)
|
||||
end
|
||||
|
||||
it 'returns reactive data' do
|
||||
is_expected.to eq(prometheus_data)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'with valid data with time range' do
|
||||
let(:t_start) { 1.hour.ago.utc }
|
||||
let(:t_end) { Time.now.utc }
|
||||
subject { service.metrics(environment, timeframe_start: t_start, timeframe_end: t_end) }
|
||||
describe '#deployment_metrics' do
|
||||
let(:deployment) { build_stubbed(:deployment)}
|
||||
let(:deployment_query) { Gitlab::Prometheus::Queries::DeploymentQuery }
|
||||
|
||||
around do |example|
|
||||
Timecop.freeze { example.run }
|
||||
end
|
||||
|
||||
context 'with valid data' do
|
||||
subject { service.deployment_metrics(deployment) }
|
||||
|
||||
before do
|
||||
stub_reactive_cache(service, prometheus_data, 'env-slug', t_start, t_end)
|
||||
stub_reactive_cache(service, prometheus_data, deployment_query, deployment.id)
|
||||
end
|
||||
|
||||
it 'returns reactive data' do
|
||||
|
@ -80,14 +88,14 @@ describe PrometheusService, models: true, caching: true do
|
|||
end
|
||||
|
||||
describe '#calculate_reactive_cache' do
|
||||
let(:environment) { build_stubbed(:environment, slug: 'env-slug') }
|
||||
let(:environment) { create(:environment, slug: 'env-slug') }
|
||||
|
||||
around do |example|
|
||||
Timecop.freeze { example.run }
|
||||
end
|
||||
|
||||
subject do
|
||||
service.calculate_reactive_cache(environment.slug, nil, nil)
|
||||
service.calculate_reactive_cache(environment_query.to_s, environment.id)
|
||||
end
|
||||
|
||||
context 'when service is inactive' do
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
module PrometheusHelpers
|
||||
def prometheus_memory_query(environment_slug)
|
||||
%{(sum(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / count(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"})) /1024/1024}
|
||||
%{avg(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / 2^20}
|
||||
end
|
||||
|
||||
def prometheus_cpu_query(environment_slug)
|
||||
%{sum(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) / count(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}) * 100}
|
||||
%{avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) * 100}
|
||||
end
|
||||
|
||||
def prometheus_ping_url(prometheus_query)
|
||||
|
@ -88,10 +88,8 @@ module PrometheusHelpers
|
|||
metrics: {
|
||||
memory_values: prometheus_values_body('matrix').dig(:data, :result),
|
||||
memory_current: prometheus_value_body('vector').dig(:data, :result),
|
||||
memory_previous: prometheus_value_body('vector').dig(:data, :result),
|
||||
cpu_values: prometheus_values_body('matrix').dig(:data, :result),
|
||||
cpu_current: prometheus_value_body('vector').dig(:data, :result),
|
||||
cpu_previous: prometheus_value_body('vector').dig(:data, :result)
|
||||
},
|
||||
last_update: last_update
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue