dashboard: 'Overview' priority: 1 templating: variables: instance: type: 'text' label: 'Instance label regex' options: default_value: '.+' panel_groups: - group: 'Resource usage' panels: - title: "Memory usage" type: "line-chart" y_label: "% memory used" metrics: - id: node_memory_usage_percentage query_range: '(1 - (node_memory_MemAvailable_bytes{instance=~"{{instance}}"} or (node_memory_MemFree_bytes{instance=~"{{instance}}"} + node_memory_Buffers_bytes{instance=~"{{instance}}"} + node_memory_Cached_bytes{instance=~"{{instance}}"} + node_memory_Slab_bytes{instance=~"{{instance}}"})) / node_memory_MemTotal_bytes{instance=~"{{instance}}"}) * 100' unit: "%" label: instance - title: "CPU usage" type: "line-chart" y_label: "% CPU used" metrics: - id: node_cpu_usage_percentage query_range: '(avg without (mode,cpu) (1 - irate(node_cpu_seconds_total{mode="idle",instance=~"{{instance}}"}[5m]))) * 100' unit: "%" label: instance - group: Web Service panels: - title: Web Service - Error Ratio type: line-chart y_label: "Unhandled Exceptions (%)" metrics: - id: wser_web_service query_range: 'max(max_over_time(gitlab_service_errors:ratio{environment="{{ci_environment_slug}}", type="web", stage="main"}[1m])) by (type) * 100' unit: "%" label: "Error Ratio" - id: wser_degradation_slo query_range: 'avg(slo:max:gitlab_service_errors:ratio{environment="{{ci_environment_slug}}", type="web", stage="main"}) or avg(slo:max:gitlab_service_errors:ratio{type="web"}) * 100' unit: "%" label: "Degradation SLO" - id: wser_outage_slo query_range: '2 * (avg(slo:max:gitlab_service_errors:ratio{environment="{{ci_environment_slug}}", type="web", stage="main"}) or avg(slo:max:gitlab_service_errors:ratio{type="web"})) * 100' unit: "%" label: "Outage SLO" - group: API Service panels: - title: API Service - Error Ratio type: line-chart y_label: "Unhandled Exceptions (%)" metrics: - id: aser_web_service query_range: 'max(max_over_time(gitlab_service_errors:ratio{environment="{{ci_environment_slug}}", type="api", stage="main"}[1m])) by (type) * 100' unit: "%" label: "Error Ratio" - id: aser_degradation_slo query_range: 'avg(slo:max:gitlab_service_errors:ratio{environment="{{ci_environment_slug}}", type="api", stage="main"}) or avg(slo:max:gitlab_service_errors:ratio{type="web"}) * 100' unit: "%" label: "Degradation SLO" - id: aser_outage_slo query_range: '2 * (avg(slo:max:gitlab_service_errors:ratio{environment="{{ci_environment_slug}}", type="api", stage="main"}) or avg(slo:max:gitlab_service_errors:ratio{type="web"})) * 100' unit: "%" label: "Outage SLO"