gitlab-org--gitlab-foss/config/prometheus/pod_metrics.yml

116 lines
2.7 KiB
YAML

dashboard: 'K8s pod health'
priority: 2
templating:
variables:
pod:
label: 'Pod name'
type: metric_label_values
options:
series_selector: 'container_memory_working_set_bytes'
label: 'pod'
panel_groups:
- group: CPU metrics
panels:
- title: "CPU usage"
type: "line-chart"
y_label: "Cores per container"
metrics:
- id: pod_cpu_usage_seconds_total
query_range: >-
sum(
rate(container_cpu_usage_seconds_total{pod="{{pod}}",container!="POD"}[5m])
)
by (container)
unit: "cores"
label: container
- title: "CPU throttling"
type: "line-chart"
y_label: "Cores per container"
metrics:
- id: pod_cpu_cfs_throttle
query_range: >-
sum(
rate(container_cpu_cfs_throttled_seconds_total{pod="{{pod}}"}[5m])
)
by (container)
unit: "cores"
label: container
- group: Memory metrics
panels:
- title: "Memory usage working set"
type: "line-chart"
y_label: "Working set memory"
metrics:
- id: pod_memory_working_set
query_range: >-
sum(
container_memory_working_set_bytes{pod="{{pod}}",container!="POD"}
) by (container)
unit: "bytes"
label: container
- group: Network metrics
panels:
- title: "Network Receive (In)"
type: "line-chart"
y_label: "Received (bytes/sec)"
metrics:
- id: pod_network_receive
query_range: >-
sum(
rate(
container_network_receive_bytes_total{pod="{{pod}}"}[5m]
)
) by (pod)
unit: "bytes"
label: pod
- title: "Network Transmit (Out)"
type: "line-chart"
y_label: "Transmitted (bytes/sec)"
metrics:
- id: pod_network_transmit
query_range: >-
sum(
rate(
container_network_transmit_bytes_total{pod="{{pod}}"}[5m]
)
) by (pod)
unit: bytes
label: pod
- group: Disk metrics
panels:
- title: "Disk Reads"
type: "line-chart"
y_label: "Disk reads (bytes/sec)"
metrics:
- id: pod_disk_reads
query_range: >-
sum(
rate(
container_fs_reads_bytes_total{pod="{{pod}}", container!="POD"}[5m]
)
) by (container,device)
unit: "bytes / sec"
label: "{{container}} {{device}}"
- title: "Disk Writes"
type: "line-chart"
y_label: "Disk writes (bytes/sec)"
metrics:
- id: pod_disk_writes
query_range: >-
sum(
rate(
container_fs_writes_bytes_total{pod="{{pod}}", container!="POD"}[5m]
)
) by (container,device)
unit: "bytes / sec"
label: "{{container}} {{device}}"