From 7c2b7f702c3df99be0d3f60d92c67a291e8b6d7c Mon Sep 17 00:00:00 2001
From: Andrei Stoicescu <astoicescu@gitlab.com>
Date: Thu, 5 Mar 2020 15:06:47 +0200
Subject: [PATCH] Add system metrics block to top of config file

---
 config/prometheus/common_metrics.yml | 144 +++++++++++++--------------
 1 file changed, 68 insertions(+), 76 deletions(-)

diff --git a/config/prometheus/common_metrics.yml b/config/prometheus/common_metrics.yml
index aa739614c9df2..85833cc19689f 100644
--- a/config/prometheus/common_metrics.yml
+++ b/config/prometheus/common_metrics.yml
@@ -1,6 +1,74 @@
 dashboard: 'Environment metrics'
 priority: 1
 panel_groups:
+- group: System metrics (Kubernetes)
+  priority: 15
+  panels:
+  - title: "Memory Usage (Total)"
+    type: "area-chart"
+    y_label: "Total Memory Used (GB)"
+    weight: 4
+    metrics:
+    - id: system_metrics_kubernetes_container_memory_total
+      query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job)  /1024/1024/1024'
+      label: Total (GB)
+      unit: GB
+  - title: "Core Usage (Total)"
+    type: "area-chart"
+    y_label: "Total Cores"
+    weight: 3
+    metrics:
+    - id: system_metrics_kubernetes_container_cores_total
+      query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)'
+      label: Total (cores)
+      unit: "cores"
+  - title: "Memory Usage (Pod average)"
+    type: "line-chart"
+    y_label: "Memory Used per Pod (MB)"
+    weight: 2
+    metrics:
+    - id: system_metrics_kubernetes_container_memory_average
+      query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
+      label: Pod average (MB)
+      unit: MB
+  - title: "Canary: Memory Usage (Pod Average)"
+    type: "line-chart"
+    y_label: "Memory Used per Pod (MB)"
+    weight: 2
+    metrics:
+    - id: system_metrics_kubernetes_container_memory_average_canary
+      query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
+      label: Pod average (MB)
+      unit: MB
+      track: canary
+  - title: "Core Usage (Pod Average)"
+    type: "line-chart"
+    y_label: "Cores per Pod"
+    weight: 1
+    metrics:
+    - id: system_metrics_kubernetes_container_core_usage
+      query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
+      label: Pod average (cores)
+      unit: "cores"
+  - title: "Canary: Core Usage (Pod Average)"
+    type: "line-chart"
+    y_label: "Cores per Pod"
+    weight: 1
+    metrics:
+    - id: system_metrics_kubernetes_container_core_usage_canary
+      query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
+      label: Pod average (cores)
+      unit: "cores"
+      track: canary
+  - title: "Knative function invocations"
+    type: "area-chart"
+    y_label: "Invocations"
+    weight: 1
+    metrics:
+    - id: system_metrics_knative_function_invocation_count
+      query_range: 'sum(ceil(rate(istio_requests_total{destination_service_namespace="%{kube_namespace}", destination_service=~"%{function_name}.*"}[1m])*60))'
+      label: invocations / minute
+      unit: requests
 # NGINX Ingress metrics for pre-0.16.0 versions
 - group: Response metrics (NGINX Ingress VTS)
   priority: 10
@@ -150,79 +218,3 @@ panel_groups:
       query_range: 'sum(rate(nginx_server_requests{code="5xx", %{environment_filter}}[2m]))'
       label: HTTP Errors
       unit: "errors / sec"
-- group: System metrics (Kubernetes)
-  priority: 15
-  panels:
-  - title: "Memory Usage (Total)"
-    type: "area-chart"
-    y_label: "Total Memory Used (GB)"
-    y_axis:
-      format: "gibibytes"
-    weight: 4
-    metrics:
-    - id: system_metrics_kubernetes_container_memory_total
-      query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job)  /1024/1024/1024'
-      label: Total (GB)
-      unit: GB
-  - title: "Core Usage (Total)"
-    type: "area-chart"
-    y_label: "Total Cores"
-    weight: 3
-    metrics:
-    - id: system_metrics_kubernetes_container_cores_total
-      query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)'
-      label: Total (cores)
-      unit: "cores"
-  - title: "Memory Usage (Pod average)"
-    type: "line-chart"
-    y_label: "Memory Used per Pod (MB)"
-    y_axis:
-      format: "mebibytes"
-    weight: 2
-    metrics:
-    - id: system_metrics_kubernetes_container_memory_average
-      query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
-      label: Pod average (MB)
-      unit: MB
-  - title: "Canary: Memory Usage (Pod Average)"
-    type: "line-chart"
-    y_label: "Memory Used per Pod (MB)"
-    y_axis:
-      format: "mebibytes"
-    weight: 2
-    metrics:
-    - id: system_metrics_kubernetes_container_memory_average_canary
-      query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
-      label: Pod average (MB)
-      unit: MB
-      track: canary
-  - title: "Core Usage (Pod Average)"
-    type: "line-chart"
-    y_label: "Cores per Pod"
-    weight: 1
-    metrics:
-    - id: system_metrics_kubernetes_container_core_usage
-      query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
-      label: Pod average (cores)
-      unit: "cores"
-  - title: "Canary: Core Usage (Pod Average)"
-    type: "line-chart"
-    y_label: "Cores per Pod"
-    weight: 1
-    metrics:
-    - id: system_metrics_kubernetes_container_core_usage_canary
-      query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
-      label: Pod average (cores)
-      unit: "cores"
-      track: canary
-  - title: "Knative function invocations"
-    type: "area-chart"
-    y_label: "Invocations"
-    y_axis:
-      precision: 0
-    weight: 1
-    metrics:
-    - id: system_metrics_knative_function_invocation_count
-      query_range: 'sum(ceil(rate(istio_requests_total{destination_service_namespace="%{kube_namespace}", destination_service=~"%{function_name}.*"}[1m])*60))'
-      label: invocations / minute
-      unit: requests
-- 
GitLab