dashboard: 'K8s pod health' priority: 2 templating: variables: pod: label: 'Pod name' type: metric_label_values options: series_selector: 'container_memory_working_set_bytes' label: 'pod' panel_groups: - group: CPU metrics panels: - title: "CPU usage" type: "line-chart" y_label: "Cores per container" metrics: - id: pod_cpu_usage_seconds_total query_range: >- sum( rate(container_cpu_usage_seconds_total{pod="{{pod}}",container!="POD"}[5m]) ) by (container) unit: "cores" label: container - title: "CPU throttling" type: "line-chart" y_label: "Cores per container" metrics: - id: pod_cpu_cfs_throttle query_range: >- sum( rate(container_cpu_cfs_throttled_seconds_total{pod="{{pod}}"}[5m]) ) by (container) unit: "cores" label: container - group: Memory metrics panels: - title: "Memory usage working set" type: "line-chart" y_label: "Working set memory" metrics: - id: pod_memory_working_set query_range: >- sum( container_memory_working_set_bytes{pod="{{pod}}",container!="POD"} ) by (container) unit: "bytes" label: container - group: Network metrics panels: - title: "Network Receive (In)" type: "line-chart" y_label: "Received (bytes/sec)" metrics: - id: pod_network_receive query_range: >- sum( rate( container_network_receive_bytes_total{pod="{{pod}}"}[5m] ) ) by (pod) unit: "bytes" label: pod - title: "Network Transmit (Out)" type: "line-chart" y_label: "Transmitted (bytes/sec)" metrics: - id: pod_network_transmit query_range: >- sum( rate( container_network_transmit_bytes_total{pod="{{pod}}"}[5m] ) ) by (pod) unit: bytes label: pod - group: Disk metrics panels: - title: "Disk Reads" type: "line-chart" y_label: "Disk reads (bytes/sec)" metrics: - id: pod_disk_reads query_range: >- sum( rate( container_fs_reads_bytes_total{pod="{{pod}}", container!="POD"}[5m] ) ) by (container,device) unit: "bytes / sec" label: "{{container}} {{device}}" - title: "Disk Writes" type: "line-chart" y_label: "Disk writes (bytes/sec)" metrics: - id: pod_disk_writes query_range: >- sum( rate( container_fs_writes_bytes_total{pod="{{pod}}", container!="POD"}[5m] ) ) by (container,device) unit: "bytes / sec" label: "{{container}} {{device}}"