Import common metrics into database.
This MR backports PrometheusMetric model to CE and adds: common, identifier to figure out what kind of metric is used.
This commit is contained in:
parent
05ee94beb7
commit
0a9d771bcb
22 changed files with 665 additions and 22 deletions
|
@ -232,6 +232,8 @@ class Project < ActiveRecord::Base
|
|||
has_many :clusters, through: :cluster_project, class_name: 'Clusters::Cluster'
|
||||
has_many :cluster_ingresses, through: :clusters, source: :application_ingress, class_name: 'Clusters::Applications::Ingress'
|
||||
|
||||
has_many :prometheus_metrics
|
||||
|
||||
# Container repositories need to remove data from the container registry,
|
||||
# which is not managed by the DB. Hence we're still using dependent: :destroy
|
||||
# here.
|
||||
|
|
96
app/models/prometheus_metric.rb
Normal file
96
app/models/prometheus_metric.rb
Normal file
|
@ -0,0 +1,96 @@
|
|||
class PrometheusMetric < ActiveRecord::Base
|
||||
belongs_to :project, validate: true, inverse_of: :prometheus_metrics
|
||||
|
||||
enum group: {
|
||||
# built-in groups
|
||||
nginx_ingress: -1,
|
||||
ha_proxy: -2,
|
||||
aws_elb: -3,
|
||||
nginx: -4,
|
||||
kubernetes: -5,
|
||||
|
||||
# custom/user groups
|
||||
business: 0,
|
||||
response: 1,
|
||||
system: 2
|
||||
}
|
||||
|
||||
validates :title, presence: true
|
||||
validates :query, presence: true
|
||||
validates :group, presence: true
|
||||
validates :y_label, presence: true
|
||||
validates :unit, presence: true
|
||||
|
||||
validate :require_project
|
||||
|
||||
scope :common, -> { where(common: true) }
|
||||
|
||||
GROUP_TITLES = {
|
||||
# built-in groups
|
||||
nginx_ingress: _('Response metrics (NGINX Ingress)'),
|
||||
ha_proxy: _('Response metrics (HA Proxy)'),
|
||||
aws_elb: _('Response metrics (AWS ELB)'),
|
||||
nginx: _('Response metrics (NGINX)'),
|
||||
kubernetes: _('System metrics (Kubernetes)'),
|
||||
|
||||
# custom/user groups
|
||||
business: _('Business metrics (Custom)'),
|
||||
response: _('Response metrics (Custom)'),
|
||||
system: _('System metrics (Custom)')
|
||||
}.freeze
|
||||
|
||||
REQUIRED_METRICS = {
|
||||
nginx_ingress: %w(nginx_upstream_responses_total nginx_upstream_response_msecs_avg),
|
||||
ha_proxy: %w(haproxy_frontend_http_requests_total haproxy_frontend_http_responses_total),
|
||||
aws_elb: %w(aws_elb_request_count_sum aws_elb_latency_average aws_elb_httpcode_backend_5_xx_sum),
|
||||
nginx: %w(nginx_server_requests nginx_server_requestMsec),
|
||||
kubernetes: %w(container_memory_usage_bytes container_cpu_usage_seconds_total)
|
||||
}.freeze
|
||||
|
||||
def group_title
|
||||
GROUP_TITLES[group.to_sym]
|
||||
end
|
||||
|
||||
def required_metrics
|
||||
(REQUIRED_METRICS[group.to_sym] || []).map(&:to_s)
|
||||
end
|
||||
|
||||
def to_query_metric
|
||||
Gitlab::Prometheus::Metric.new(id: id, title: title, required_metrics: required_metrics, weight: 0, y_label: y_label, queries: queries)
|
||||
end
|
||||
|
||||
def queries
|
||||
[
|
||||
{
|
||||
query_range: query,
|
||||
unit: unit,
|
||||
label: legend,
|
||||
series: query_series
|
||||
}
|
||||
]
|
||||
end
|
||||
|
||||
def query_series
|
||||
case legend
|
||||
when 'Status Code'
|
||||
{
|
||||
label: 'status_code',
|
||||
when: [
|
||||
{ value: '2xx', color: 'green' },
|
||||
{ value: '4xx', color: 'orange' },
|
||||
{ value: '5xx', color: 'red' }
|
||||
]
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def require_project
|
||||
if project
|
||||
errors.add(:project, "cannot be set if this is common metric") if common?
|
||||
else
|
||||
errors.add(:project, "has to be set when this is project-specific metric") unless common?
|
||||
end
|
||||
end
|
||||
end
|
5
changelogs/unreleased/alerts-for-built-in-metrics.yml
Normal file
5
changelogs/unreleased/alerts-for-built-in-metrics.yml
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Import all common metrics into database
|
||||
merge_request:
|
||||
author:
|
||||
type: changed
|
|
@ -7,7 +7,8 @@
|
|||
- nginx_upstream_responses_total
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)'
|
||||
- id: response_metrics_nginx_ingress_throughput_status_code
|
||||
query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)'
|
||||
unit: req / sec
|
||||
label: Status Code
|
||||
series:
|
||||
|
@ -25,7 +26,8 @@
|
|||
- nginx_upstream_response_msecs_avg
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'avg(nginx_upstream_response_msecs_avg{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"})'
|
||||
- id: response_metrics_nginx_ingress_latency_pod_average
|
||||
query_range: 'avg(nginx_upstream_response_msecs_avg{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"})'
|
||||
label: Pod average
|
||||
unit: ms
|
||||
- title: "HTTP Error Rate"
|
||||
|
@ -34,7 +36,8 @@
|
|||
- nginx_upstream_responses_total
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'sum(rate(nginx_upstream_responses_total{status_code="5xx", upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) / sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) * 100'
|
||||
- id: response_metrics_nginx_ingress_http_error_rate
|
||||
query_range: 'sum(rate(nginx_upstream_responses_total{status_code="5xx", upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) / sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) * 100'
|
||||
label: 5xx Errors
|
||||
unit: "%"
|
||||
- group: Response metrics (HA Proxy)
|
||||
|
@ -46,10 +49,12 @@
|
|||
- haproxy_frontend_http_requests_total
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code)'
|
||||
- id: response_metrics_ha_proxy_throughput_status_code
|
||||
query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code)'
|
||||
unit: req / sec
|
||||
label: Status Code
|
||||
series:
|
||||
- label: code
|
||||
- label: status_code
|
||||
when:
|
||||
- value: 2xx
|
||||
color: green
|
||||
|
@ -63,7 +68,8 @@
|
|||
- haproxy_frontend_http_responses_total
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'sum(rate(haproxy_frontend_http_responses_total{code="5xx",%{environment_filter}}[2m])) / sum(rate(haproxy_frontend_http_responses_total{%{environment_filter}}[2m]))'
|
||||
- id: response_metrics_ha_proxy_http_error_rate
|
||||
query_range: 'sum(rate(haproxy_frontend_http_responses_total{code="5xx",%{environment_filter}}[2m])) / sum(rate(haproxy_frontend_http_responses_total{%{environment_filter}}[2m]))'
|
||||
label: HTTP Errors
|
||||
unit: "%"
|
||||
- group: Response metrics (AWS ELB)
|
||||
|
@ -75,7 +81,8 @@
|
|||
- aws_elb_request_count_sum
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'sum(aws_elb_request_count_sum{%{environment_filter}}) / 60'
|
||||
- id: response_metrics_aws_elb_throughput_requests
|
||||
query_range: 'sum(aws_elb_request_count_sum{%{environment_filter}}) / 60'
|
||||
label: Total
|
||||
unit: req / sec
|
||||
- title: "Latency"
|
||||
|
@ -84,7 +91,8 @@
|
|||
- aws_elb_latency_average
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'avg(aws_elb_latency_average{%{environment_filter}}) * 1000'
|
||||
- id: response_metrics_aws_elb_latency_average
|
||||
query_range: 'avg(aws_elb_latency_average{%{environment_filter}}) * 1000'
|
||||
label: Average
|
||||
unit: ms
|
||||
- title: "HTTP Error Rate"
|
||||
|
@ -94,7 +102,8 @@
|
|||
- aws_elb_httpcode_backend_5_xx_sum
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'sum(aws_elb_httpcode_backend_5_xx_sum{%{environment_filter}}) / sum(aws_elb_request_count_sum{%{environment_filter}})'
|
||||
- id: response_metrics_aws_elb_http_error_rate
|
||||
query_range: 'sum(aws_elb_httpcode_backend_5_xx_sum{%{environment_filter}}) / sum(aws_elb_request_count_sum{%{environment_filter}})'
|
||||
label: HTTP Errors
|
||||
unit: "%"
|
||||
- group: Response metrics (NGINX)
|
||||
|
@ -106,7 +115,8 @@
|
|||
- nginx_server_requests
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'sum(rate(nginx_server_requests{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (code)'
|
||||
- id: response_metrics_nginx_throughput_status_code
|
||||
query_range: 'sum(rate(nginx_server_requests{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (code)'
|
||||
unit: req / sec
|
||||
label: Status Code
|
||||
series:
|
||||
|
@ -124,7 +134,8 @@
|
|||
- nginx_server_requestMsec
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'avg(nginx_server_requestMsec{%{environment_filter}})'
|
||||
- id: response_metrics_nginx_latency
|
||||
query_range: 'avg(nginx_server_requestMsec{%{environment_filter}})'
|
||||
label: Upstream
|
||||
unit: ms
|
||||
- title: "HTTP Error Rate"
|
||||
|
@ -133,7 +144,8 @@
|
|||
- nginx_server_requests
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'sum(rate(nginx_server_requests{code="5xx", %{environment_filter}}[2m]))'
|
||||
- id: response_metrics_nginx_http_error_rate
|
||||
query_range: 'sum(rate(nginx_server_requests{code="5xx", %{environment_filter}}[2m]))'
|
||||
label: HTTP Errors
|
||||
unit: "errors / sec"
|
||||
- group: System metrics (Kubernetes)
|
||||
|
@ -145,7 +157,8 @@
|
|||
- container_memory_usage_bytes
|
||||
weight: 4
|
||||
queries:
|
||||
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) /1024/1024/1024'
|
||||
- id: system_metrics_kubernetes_container_memory_total
|
||||
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) /1024/1024/1024'
|
||||
label: Total
|
||||
unit: GB
|
||||
- title: "Core Usage (Total)"
|
||||
|
@ -154,7 +167,8 @@
|
|||
- container_cpu_usage_seconds_total
|
||||
weight: 3
|
||||
queries:
|
||||
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)'
|
||||
- id: system_metrics_kubernetes_container_cores_total
|
||||
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)'
|
||||
label: Total
|
||||
unit: "cores"
|
||||
- title: "Memory Usage (Pod average)"
|
||||
|
@ -163,7 +177,8 @@
|
|||
- container_memory_usage_bytes
|
||||
weight: 2
|
||||
queries:
|
||||
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
|
||||
- id: system_metrics_kubernetes_container_memory_average
|
||||
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
|
||||
label: Pod average
|
||||
unit: MB
|
||||
- title: "Core Usage (Pod average)"
|
||||
|
@ -172,6 +187,12 @@
|
|||
- container_cpu_usage_seconds_total
|
||||
weight: 1
|
||||
queries:
|
||||
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
|
||||
- id: system_metrics_kubernetes_container_core_usage
|
||||
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
|
||||
label: Pod average
|
||||
unit: "cores"
|
||||
unit: "cores"
|
||||
- id: system_metrics_kubernetes_container_core_usage_canary
|
||||
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
|
||||
label: Pod average
|
||||
unit: "cores"
|
||||
track: canary
|
3
db/fixtures/development/99_common_metrics.rb
Normal file
3
db/fixtures/development/99_common_metrics.rb
Normal file
|
@ -0,0 +1,3 @@
|
|||
require_relative '../importers/common_metrics_importer.rb'
|
||||
|
||||
::Importers::CommonMetricsImporter.new.execute
|
3
db/fixtures/production/999_common_metrics.rb
Normal file
3
db/fixtures/production/999_common_metrics.rb
Normal file
|
@ -0,0 +1,3 @@
|
|||
require_relative '../importers/common_metrics_importer.rb'
|
||||
|
||||
::Importers::CommonMetricsImporter.new.execute
|
101
db/importers/common_metrics_importer.rb
Normal file
101
db/importers/common_metrics_importer.rb
Normal file
|
@ -0,0 +1,101 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Importers
|
||||
class PrometheusMetric < ActiveRecord::Base
|
||||
enum group: {
|
||||
# built-in groups
|
||||
nginx_ingress: -1,
|
||||
ha_proxy: -2,
|
||||
aws_elb: -3,
|
||||
nginx: -4,
|
||||
kubernetes: -5,
|
||||
|
||||
# custom groups
|
||||
business: 0,
|
||||
response: 1,
|
||||
system: 2,
|
||||
}
|
||||
|
||||
scope :common, -> { where(common: true) }
|
||||
|
||||
GROUP_TITLES = {
|
||||
business: _('Business metrics (Custom)'),
|
||||
response: _('Response metrics (Custom)'),
|
||||
system: _('System metrics (Custom)'),
|
||||
nginx_ingress: _('Response metrics (NGINX Ingress)'),
|
||||
ha_proxy: _('Response metrics (HA Proxy)'),
|
||||
aws_elb: _('Response metrics (AWS ELB)'),
|
||||
nginx: _('Response metrics (NGINX)'),
|
||||
kubernetes: _('System metrics (Kubernetes)')
|
||||
}.freeze
|
||||
end
|
||||
|
||||
class CommonMetricsImporter
|
||||
MissingQueryId = Class.new(StandardError)
|
||||
|
||||
attr_reader :content
|
||||
|
||||
def initialize(file = 'config/prometheus/common_metrics.yml')
|
||||
@content = YAML.load_file(file)
|
||||
end
|
||||
|
||||
def execute
|
||||
process_content do |id, attributes|
|
||||
find_or_build_metric!(id)
|
||||
.update!(**attributes)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def process_content(&blk)
|
||||
content.map do |group|
|
||||
process_group(group, &blk)
|
||||
end
|
||||
end
|
||||
|
||||
def process_group(group, &blk)
|
||||
attributes = {
|
||||
group: find_group_title_key(group['group'])
|
||||
}
|
||||
|
||||
group['metrics'].map do |metric|
|
||||
process_metric(metric, attributes, &blk)
|
||||
end
|
||||
end
|
||||
|
||||
def process_metric(metric, attributes, &blk)
|
||||
attributes = attributes.merge(
|
||||
title: metric['title'],
|
||||
y_label: metric['y_label'])
|
||||
|
||||
metric['queries'].map do |query|
|
||||
process_metric_query(query, attributes, &blk)
|
||||
end
|
||||
end
|
||||
|
||||
def process_metric_query(query, attributes, &blk)
|
||||
attributes = attributes.merge(
|
||||
legend: query['label'],
|
||||
query: query['query_range'],
|
||||
unit: query['unit'])
|
||||
|
||||
blk.call(query['id'], attributes)
|
||||
end
|
||||
|
||||
def find_or_build_metric!(id)
|
||||
raise MissingQueryId unless id
|
||||
|
||||
PrometheusMetric.common.find_by(identifier: id) ||
|
||||
PrometheusMetric.new(common: true, identifier: id)
|
||||
end
|
||||
|
||||
def find_group_title_key(title)
|
||||
PrometheusMetric.groups[find_group_title(title)]
|
||||
end
|
||||
|
||||
def find_group_title(title)
|
||||
PrometheusMetric::GROUP_TITLES.invert[title]
|
||||
end
|
||||
end
|
||||
end
|
16
db/migrate/20180101160629_create_prometheus_metrics.rb
Normal file
16
db/migrate/20180101160629_create_prometheus_metrics.rb
Normal file
|
@ -0,0 +1,16 @@
|
|||
class CreatePrometheusMetrics < ActiveRecord::Migration
|
||||
DOWNTIME = false
|
||||
|
||||
def change
|
||||
create_table :prometheus_metrics do |t|
|
||||
t.references :project, index: true, foreign_key: { on_delete: :cascade }, null: false
|
||||
t.string :title, null: false
|
||||
t.string :query, null: false
|
||||
t.string :y_label
|
||||
t.string :unit
|
||||
t.string :legend
|
||||
t.integer :group, null: false, index: true
|
||||
t.timestamps_with_timezone null: false
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,15 @@
|
|||
class AddCommonToPrometheusMetrics < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
def up
|
||||
add_column_with_default(:prometheus_metrics, :common, :boolean, default: false)
|
||||
end
|
||||
|
||||
def down
|
||||
remove_column(:prometheus_metrics, :common)
|
||||
end
|
||||
end
|
|
@ -0,0 +1,11 @@
|
|||
class ChangeProjectIdForPrometheusMetrics < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
def change
|
||||
change_column_null :prometheus_metrics, :project_id, true
|
||||
end
|
||||
end
|
|
@ -0,0 +1,15 @@
|
|||
class AddIndexOnDefaultPrometheusMetrics < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
def up
|
||||
add_concurrent_index :prometheus_metrics, :common
|
||||
end
|
||||
|
||||
def down
|
||||
remove_concurrent_index :prometheus_metrics, :project_id
|
||||
end
|
||||
end
|
|
@ -0,0 +1,11 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class AddIdentifierToPrometheusMetric < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
|
||||
def change
|
||||
add_column :prometheus_metrics, :identifier, :string, unique: true
|
||||
end
|
||||
end
|
15
db/migrate/20180831164909_import_common_metrics.rb
Normal file
15
db/migrate/20180831164909_import_common_metrics.rb
Normal file
|
@ -0,0 +1,15 @@
|
|||
class ImportCommonMetrics < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
require_relative '../importers/common_metrics_importer.rb'
|
||||
|
||||
DOWNTIME = false
|
||||
|
||||
def up
|
||||
Importers::CommonMetricsImporter.new.execute
|
||||
end
|
||||
|
||||
def down
|
||||
# no-op
|
||||
end
|
||||
end
|
21
db/schema.rb
21
db/schema.rb
|
@ -11,7 +11,7 @@
|
|||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema.define(version: 20180826111825) do
|
||||
ActiveRecord::Schema.define(version: 20180831164909) do
|
||||
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "plpgsql"
|
||||
|
@ -1696,6 +1696,24 @@ ActiveRecord::Schema.define(version: 20180826111825) do
|
|||
add_index "projects", ["star_count"], name: "index_projects_on_star_count", using: :btree
|
||||
add_index "projects", ["visibility_level"], name: "index_projects_on_visibility_level", using: :btree
|
||||
|
||||
create_table "prometheus_metrics", force: :cascade do |t|
|
||||
t.integer "project_id"
|
||||
t.string "title", null: false
|
||||
t.string "query", null: false
|
||||
t.string "y_label"
|
||||
t.string "unit"
|
||||
t.string "legend"
|
||||
t.integer "group", null: false
|
||||
t.datetime_with_timezone "created_at", null: false
|
||||
t.datetime_with_timezone "updated_at", null: false
|
||||
t.boolean "common", default: false, null: false
|
||||
t.string "identifier"
|
||||
end
|
||||
|
||||
add_index "prometheus_metrics", ["common"], name: "index_prometheus_metrics_on_common", using: :btree
|
||||
add_index "prometheus_metrics", ["group"], name: "index_prometheus_metrics_on_group", using: :btree
|
||||
add_index "prometheus_metrics", ["project_id"], name: "index_prometheus_metrics_on_project_id", using: :btree
|
||||
|
||||
create_table "protected_branch_merge_access_levels", force: :cascade do |t|
|
||||
t.integer "protected_branch_id", null: false
|
||||
t.integer "access_level", default: 40, null: false
|
||||
|
@ -2375,6 +2393,7 @@ ActiveRecord::Schema.define(version: 20180826111825) do
|
|||
add_foreign_key "project_import_data", "projects", name: "fk_ffb9ee3a10", on_delete: :cascade
|
||||
add_foreign_key "project_mirror_data", "projects", on_delete: :cascade
|
||||
add_foreign_key "project_statistics", "projects", on_delete: :cascade
|
||||
add_foreign_key "prometheus_metrics", "projects", on_delete: :cascade
|
||||
add_foreign_key "protected_branch_merge_access_levels", "protected_branches", name: "fk_8a3072ccb3", on_delete: :cascade
|
||||
add_foreign_key "protected_branch_push_access_levels", "protected_branches", name: "fk_9ffc86a3d9", on_delete: :cascade
|
||||
add_foreign_key "protected_branches", "projects", name: "fk_7a9c6d93e7", on_delete: :cascade
|
||||
|
|
|
@ -20,6 +20,45 @@ GitLab uses the defined queries and fills in the environment specific variables.
|
|||
|
||||
## Adding to the library
|
||||
|
||||
We strive to support the 2-4 most important metrics for each common system service that supports Prometheus. If you are looking for support for a particular exporter which has not yet been added to the library, additions can be made [to the `additional_metrics.yml`](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/config/prometheus/additional_metrics.yml) file.
|
||||
We strive to support the 2-4 most important metrics for each common system service that supports Prometheus. If you are looking for support for a particular exporter which has not yet been added to the library, additions can be made [to the `common_metrics.yml`](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/config/prometheus/common_metrics.yml) file.
|
||||
|
||||
> Note: The library is only for monitoring public, common, system services which all customers can benefit from. Support for monitoring [customer proprietary metrics](https://gitlab.com/gitlab-org/gitlab-ee/issues/2273) will be added in a subsequent release.
|
||||
### Query identifier
|
||||
|
||||
The requirement for adding metrics is to have each query to have unique identifier.
|
||||
Identifier is used to update the metric later when changed.
|
||||
|
||||
```yaml
|
||||
- group: Response metrics (NGINX Ingress)
|
||||
metrics:
|
||||
- title: "Throughput"
|
||||
y_label: "Requests / Sec"
|
||||
queries:
|
||||
- id: response_metrics_nginx_ingress_throughput_status_code
|
||||
query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)'
|
||||
unit: req / sec
|
||||
label: Status Code
|
||||
```
|
||||
|
||||
### Update existing metrics
|
||||
|
||||
After you add or change existing _common_ metric you have to create a new database migration that will query and update all existing metrics.
|
||||
|
||||
**Note: If a query metric (which is identified by `id:`) is removed it will not be removed from database by default.**
|
||||
**You might want to add additional database migration that makes a decision what to do with removed one.**
|
||||
**For example: you might be interested in migrating all dependent data to a different metric.**
|
||||
|
||||
```ruby
|
||||
class ImportCommonMetrics < ActiveRecord::Migration
|
||||
require_relative '../importers/common_metrics_importer.rb'
|
||||
|
||||
DOWNTIME = false
|
||||
|
||||
def up
|
||||
Importers::CommonMetricsImporter.new.execute
|
||||
end
|
||||
|
||||
def down
|
||||
# no-op
|
||||
end
|
||||
end
|
||||
```
|
||||
|
|
|
@ -5,7 +5,7 @@ module Gitlab
|
|||
MUTEX = Mutex.new
|
||||
extend self
|
||||
|
||||
def load_groups_from_yaml(file_name = 'additional_metrics.yml')
|
||||
def load_groups_from_yaml(file_name)
|
||||
yaml_metrics_raw(file_name).map(&method(:group_from_entry))
|
||||
end
|
||||
|
||||
|
|
|
@ -4,10 +4,13 @@ module Gitlab
|
|||
include ActiveModel::Model
|
||||
|
||||
attr_accessor :name, :priority, :metrics
|
||||
|
||||
validates :name, :priority, :metrics, presence: true
|
||||
|
||||
def self.common_metrics
|
||||
AdditionalMetricsParser.load_groups_from_yaml
|
||||
::PrometheusMetric.common.group_by(&:group_title).map do |name, metrics|
|
||||
MetricGroup.new(name: name, priority: 0, metrics: metrics.map(&:to_query_metric))
|
||||
end
|
||||
end
|
||||
|
||||
# EE only
|
||||
|
|
119
spec/db/importers/common_metrics_importer_spec.rb
Normal file
119
spec/db/importers/common_metrics_importer_spec.rb
Normal file
|
@ -0,0 +1,119 @@
|
|||
require 'rails_helper'
|
||||
require Rails.root.join("db", "importers", "common_metrics_importer.rb")
|
||||
|
||||
describe Importers::PrometheusMetric do
|
||||
it 'group enum equals ::PrometheusMetric' do
|
||||
expect(described_class.groups).to eq(::PrometheusMetric.groups)
|
||||
end
|
||||
|
||||
it 'GROUP_TITLES equals ::PrometheusMetric' do
|
||||
expect(described_class::GROUP_TITLES).to eq(::PrometheusMetric::GROUP_TITLES)
|
||||
end
|
||||
end
|
||||
|
||||
describe Importers::CommonMetricsImporter do
|
||||
subject { described_class.new }
|
||||
|
||||
context "does import common_metrics.yml" do
|
||||
let(:groups) { subject.content }
|
||||
let(:metrics) { groups.map { |group| group['metrics'] }.flatten }
|
||||
let(:queries) { metrics.map { |group| group['queries'] }.flatten }
|
||||
let(:query_ids) { queries.map { |query| query['id'] } }
|
||||
|
||||
before do
|
||||
subject.execute
|
||||
end
|
||||
|
||||
it "has the same amount of groups" do
|
||||
expect(PrometheusMetric.common.group(:group).count.count).to eq(groups.count)
|
||||
end
|
||||
|
||||
it "has the same amount of metrics" do
|
||||
expect(PrometheusMetric.common.group(:group, :title).count.count).to eq(metrics.count)
|
||||
end
|
||||
|
||||
it "has the same amount of queries" do
|
||||
expect(PrometheusMetric.common.count).to eq(queries.count)
|
||||
end
|
||||
|
||||
it "does not have duplicate IDs" do
|
||||
expect(query_ids).to eq(query_ids.uniq)
|
||||
end
|
||||
|
||||
it "imports all IDs" do
|
||||
expect(PrometheusMetric.common.pluck(:identifier)).to eq(query_ids)
|
||||
end
|
||||
end
|
||||
|
||||
context 'does import properly all fields' do
|
||||
let(:query_identifier) { 'response-metric' }
|
||||
let(:group) do
|
||||
{
|
||||
group: 'Response metrics (NGINX Ingress)',
|
||||
metrics: [{
|
||||
title: "Throughput",
|
||||
y_label: "Requests / Sec",
|
||||
queries: [{
|
||||
id: query_identifier,
|
||||
query_range: 'my-query',
|
||||
unit: 'my-unit',
|
||||
label: 'status code'
|
||||
}]
|
||||
}]
|
||||
}
|
||||
end
|
||||
|
||||
before do
|
||||
expect(subject).to receive(:content) { [group.deep_stringify_keys] }
|
||||
end
|
||||
|
||||
shared_examples 'stores metric' do
|
||||
let(:metric) { PrometheusMetric.find_by(identifier: query_identifier) }
|
||||
|
||||
it 'with all data' do
|
||||
expect(metric.group).to eq('nginx_ingress')
|
||||
expect(metric.title).to eq('Throughput')
|
||||
expect(metric.y_label).to eq('Requests / Sec')
|
||||
expect(metric.unit).to eq('my-unit')
|
||||
expect(metric.legend).to eq('status code')
|
||||
expect(metric.query).to eq('my-query')
|
||||
end
|
||||
end
|
||||
|
||||
context 'if ID is missing' do
|
||||
let(:query_identifier) { }
|
||||
|
||||
it 'raises exception' do
|
||||
expect { subject.execute }.to raise_error(described_class::MissingQueryId)
|
||||
end
|
||||
end
|
||||
|
||||
context 'for existing common metric with different ID' do
|
||||
let!(:existing_metric) { create(:prometheus_metric, :common, identifier: 'my-existing-metric') }
|
||||
|
||||
before do
|
||||
subject.execute
|
||||
end
|
||||
|
||||
it_behaves_like 'stores metric' do
|
||||
it 'and existing metric is not changed' do
|
||||
expect(metric).not_to eq(existing_metric)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'when metric with ID exists ' do
|
||||
let!(:existing_metric) { create(:prometheus_metric, :common, identifier: 'response-metric') }
|
||||
|
||||
before do
|
||||
subject.execute
|
||||
end
|
||||
|
||||
it_behaves_like 'stores metric' do
|
||||
it 'and existing metric is changed' do
|
||||
expect(metric).to eq(existing_metric)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
16
spec/factories/prometheus_metrics.rb
Normal file
16
spec/factories/prometheus_metrics.rb
Normal file
|
@ -0,0 +1,16 @@
|
|||
FactoryBot.define do
|
||||
factory :prometheus_metric, class: PrometheusMetric do
|
||||
title 'title'
|
||||
query 'avg(metric)'
|
||||
y_label 'y_label'
|
||||
unit 'm/s'
|
||||
group :business
|
||||
project
|
||||
legend 'legend'
|
||||
|
||||
trait :common do
|
||||
common true
|
||||
project nil
|
||||
end
|
||||
end
|
||||
end
|
22
spec/lib/gitlab/prometheus/metric_group_spec.rb
Normal file
22
spec/lib/gitlab/prometheus/metric_group_spec.rb
Normal file
|
@ -0,0 +1,22 @@
|
|||
require 'rails_helper'
|
||||
|
||||
describe Gitlab::Prometheus::MetricGroup do
|
||||
describe '.common_metrics' do
|
||||
set(:project_metric) { create(:prometheus_metric) }
|
||||
set(:common_metric_group_a) { create(:prometheus_metric, :common, group: :aws_elb) }
|
||||
set(:common_metric_group_b_q1) { create(:prometheus_metric, :common, group: :kubernetes) }
|
||||
set(:common_metric_group_b_q2) { create(:prometheus_metric, :common, group: :kubernetes) }
|
||||
|
||||
subject { described_class.common_metrics }
|
||||
|
||||
it 'returns exactly two groups' do
|
||||
expect(subject.map(&:name)).to contain_exactly('Response metrics (AWS ELB)', 'System metrics (Kubernetes)')
|
||||
end
|
||||
|
||||
it 'returns exactly three metric queries' do
|
||||
expect(subject.map(&:metrics).flatten.map(&:queries)).to contain_exactly(
|
||||
common_metric_group_a.queries, common_metric_group_b_q1.queries,
|
||||
common_metric_group_b_q2.queries)
|
||||
end
|
||||
end
|
||||
end
|
14
spec/migrations/import_common_metrics_spec.rb
Normal file
14
spec/migrations/import_common_metrics_spec.rb
Normal file
|
@ -0,0 +1,14 @@
|
|||
require 'spec_helper'
|
||||
require Rails.root.join('db', 'migrate', '20180831164909_import_common_metrics.rb')
|
||||
|
||||
describe ImportCommonMetrics, :migration do
|
||||
describe '#up' do
|
||||
it "imports all prometheus metrics" do
|
||||
expect(PrometheusMetric.common).to be_empty
|
||||
|
||||
migrate!
|
||||
|
||||
expect(PrometheusMetric.common).not_to be_empty
|
||||
end
|
||||
end
|
||||
end
|
97
spec/models/prometheus_metric_spec.rb
Normal file
97
spec/models/prometheus_metric_spec.rb
Normal file
|
@ -0,0 +1,97 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe PrometheusMetric do
|
||||
subject { build(:prometheus_metric) }
|
||||
let(:other_project) { build(:project) }
|
||||
|
||||
it { is_expected.to belong_to(:project) }
|
||||
it { is_expected.to validate_presence_of(:title) }
|
||||
it { is_expected.to validate_presence_of(:query) }
|
||||
it { is_expected.to validate_presence_of(:group) }
|
||||
|
||||
describe 'common metrics' do
|
||||
using RSpec::Parameterized::TableSyntax
|
||||
|
||||
where(:common, :project, :result) do
|
||||
false | other_project | true
|
||||
false | nil | false
|
||||
true | other_project | false
|
||||
true | nil | true
|
||||
end
|
||||
|
||||
with_them do
|
||||
before do
|
||||
subject.common = common
|
||||
subject.project = project
|
||||
end
|
||||
|
||||
it { expect(subject.valid?).to eq(result) }
|
||||
end
|
||||
end
|
||||
|
||||
describe '#query_series' do
|
||||
using RSpec::Parameterized::TableSyntax
|
||||
|
||||
where(:legend, :type) do
|
||||
'Some other legend' | NilClass
|
||||
'Status Code' | Hash
|
||||
end
|
||||
|
||||
with_them do
|
||||
before do
|
||||
subject.legend = legend
|
||||
end
|
||||
|
||||
it { expect(subject.query_series).to be_a(type) }
|
||||
end
|
||||
end
|
||||
|
||||
describe '#group_title' do
|
||||
shared_examples 'group_title' do |group, title|
|
||||
subject { build(:prometheus_metric, group: group).group_title }
|
||||
|
||||
it "returns text #{title} for group #{group}" do
|
||||
expect(subject).to eq(title)
|
||||
end
|
||||
end
|
||||
|
||||
it_behaves_like 'group_title', :business, 'Business metrics (Custom)'
|
||||
it_behaves_like 'group_title', :response, 'Response metrics (Custom)'
|
||||
it_behaves_like 'group_title', :system, 'System metrics (Custom)'
|
||||
end
|
||||
|
||||
describe '#to_query_metric' do
|
||||
it 'converts to queryable metric object' do
|
||||
expect(subject.to_query_metric).to be_instance_of(Gitlab::Prometheus::Metric)
|
||||
end
|
||||
|
||||
it 'queryable metric object has title' do
|
||||
expect(subject.to_query_metric.title).to eq(subject.title)
|
||||
end
|
||||
|
||||
it 'queryable metric object has y_label' do
|
||||
expect(subject.to_query_metric.y_label).to eq(subject.y_label)
|
||||
end
|
||||
|
||||
it 'queryable metric has no required_metric' do
|
||||
expect(subject.to_query_metric.required_metrics).to eq([])
|
||||
end
|
||||
|
||||
it 'queryable metric has weight 0' do
|
||||
expect(subject.to_query_metric.weight).to eq(0)
|
||||
end
|
||||
|
||||
it 'queryable metrics has query description' do
|
||||
queries = [
|
||||
{
|
||||
query_range: subject.query,
|
||||
unit: subject.unit,
|
||||
label: subject.legend,
|
||||
series: nil
|
||||
}
|
||||
]
|
||||
|
||||
expect(subject.to_query_metric.queries).to eq(queries)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue