class PrometheusService < MonitoringService include ReactiveCaching self.reactive_cache_key = ->(service) { [service.class.model_name.singular, service.project_id] } self.reactive_cache_lease_timeout = 30.seconds self.reactive_cache_refresh_interval = 30.seconds self.reactive_cache_lifetime = 1.minute # Access to prometheus is directly through the API prop_accessor :api_url with_options presence: true, if: :activated? do validates :api_url, url: true end after_save :clear_reactive_cache! def initialize_properties if properties.nil? self.properties = {} end end def title 'Prometheus' end def description 'Prometheus monitoring' end def help <<-MD.strip_heredoc Retrieves the Kubernetes node metrics `container_cpu_usage_seconds_total` and `container_memory_usage_bytes` from the configured Prometheus server. If you are not using [Auto-Deploy](https://docs.gitlab.com/ee/ci/autodeploy/index.html) or have set up your own Prometheus server, an `environment` label is required on each metric to [identify the Environment](https://docs.gitlab.com/ce/user/project/integrations/prometheus.html#metrics-and-labels). MD end def self.to_param 'prometheus' end def fields [ { type: 'text', name: 'api_url', title: 'API URL', placeholder: 'Prometheus API Base URL, like http://prometheus.example.com/' } ] end # Check we can connect to the Prometheus API def test(*args) client.ping { success: true, result: 'Checked API endpoint' } rescue Gitlab::PrometheusError => err { success: false, result: err } end def metrics(environment) with_reactive_cache(environment.slug) do |data| data end end # Cache metrics for specific environment def calculate_reactive_cache(environment_slug) return unless active? && project && !project.pending_delete? memory_query = %{(sum(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / count(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"})) /1024/1024} cpu_query = %{sum(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) / count(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}) * 100} { success: true, metrics: { # Average Memory used in MB memory_values: client.query_range(memory_query, start: 8.hours.ago), memory_current: client.query(memory_query), # Average CPU Utilization cpu_values: client.query_range(cpu_query, start: 8.hours.ago), cpu_current: client.query(cpu_query) }, last_update: Time.now.utc } rescue Gitlab::PrometheusError => err { success: false, result: err.message } end def client @prometheus ||= Gitlab::Prometheus.new(api_url: api_url) end end