From 7fff7cd2ac33b46feed7fe7ec8866e08e87aa0aa Mon Sep 17 00:00:00 2001 From: genofire Date: Tue, 28 Feb 2023 14:18:26 +0100 Subject: [PATCH] feat(helm): add prometheus resources to monitor (#2958) ### Pull Request Checklist * [x] I have added Go unit tests or [Complement integration tests](https://github.com/matrix-org/complement) for this PR _or_ I have justified why this PR doesn't need tests * [x] Pull request includes a [sign off below using a legally identifiable name](https://matrix-org.github.io/dendrite/development/contributing#sign-off) _or_ I have already signed off privately --- I do not know, how you run helm-docs .... otherwise i would like to add somewhere: ````markdown * Works well with [Prometheus Operator](https://prometheus-operator.dev/) ([Helmchart](https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack)) and there setup of [Grafana](https://grafana.com/grafana/), by enabling following values: ```yaml prometheus: servicemonitor: enabled: true labels: release: "kube-prometheus-stack" rules: enabled: true # will deploy alert rules additionalLabels: release: "kube-prometheus-stack" grafana: dashboards: enabled: true # will deploy default dashboards ``` PS: The labels `release=kube-prometheus-stack` is setup with the helmchart of the Prometheus Operator. For Grafana Dashboards it maybe need scan enable to scan in correct namespaces (or ALL), enabled by `sidecar.dashboards.searchNamespace` in [Helmchart of grafana](https://artifacthub.io/packages/helm/grafana/grafana) (which is part of PrometheusOperator, so `grafana.sidecar.dashboards.searchNamespace`) ```` Maybe also put somewhere the Screenshot of that Grafana Dashboard: https://grafana.com/grafana/dashboards/13916-dendrite/ --- @S7evinK do you take a look? Signed-off-by: genofire --- helm/dendrite/.helm-docs/monitoring.gotmpl | 22 + helm/dendrite/Chart.yaml | 2 +- helm/dendrite/README.md | 34 +- helm/dendrite/README.md.gotmpl | 1 + helm/dendrite/ci/ct-ingress-values.yaml | 5 + .../grafana_dashboards/dendrite-rev1.json | 1119 +++++++++++++++++ .../configmap_grafana_dashboards.yaml | 16 + helm/dendrite/templates/prometheus-rules.yaml | 16 + helm/dendrite/templates/secrets.yaml | 28 +- helm/dendrite/templates/servicemonitor.yaml | 26 + helm/dendrite/values.yaml | 23 + 11 files changed, 1282 insertions(+), 10 deletions(-) create mode 100644 helm/dendrite/.helm-docs/monitoring.gotmpl create mode 100644 helm/dendrite/grafana_dashboards/dendrite-rev1.json create mode 100644 helm/dendrite/templates/configmap_grafana_dashboards.yaml create mode 100644 helm/dendrite/templates/prometheus-rules.yaml create mode 100644 helm/dendrite/templates/servicemonitor.yaml diff --git a/helm/dendrite/.helm-docs/monitoring.gotmpl b/helm/dendrite/.helm-docs/monitoring.gotmpl new file mode 100644 index 000000000..3618a1c1a --- /dev/null +++ b/helm/dendrite/.helm-docs/monitoring.gotmpl @@ -0,0 +1,22 @@ +{{ define "chart.monitoringSection" }} +## Monitoring + +[![Grafana Dashboard](https://grafana.com/api/dashboards/13916/images/9894/image)](https://grafana.com/grafana/dashboards/13916-dendrite/) + +* Works well with [Prometheus Operator](https://prometheus-operator.dev/) ([Helmchart](https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack)) and their setup of [Grafana](https://grafana.com/grafana/), by enabling the following values: +```yaml +prometheus: + servicemonitor: + enabled: true + labels: + release: "kube-prometheus-stack" + rules: + enabled: true # will deploy alert rules + labels: + release: "kube-prometheus-stack" +grafana: + dashboards: + enabled: true # will deploy default dashboards +``` +PS: The label `release=kube-prometheus-stack` is setup with the helmchart of the Prometheus Operator. For Grafana Dashboards it may be necessary to enable scanning in the correct namespaces (or ALL), enabled by `sidecar.dashboards.searchNamespace` in [Helmchart of grafana](https://artifacthub.io/packages/helm/grafana/grafana) (which is part of PrometheusOperator, so `grafana.sidecar.dashboards.searchNamespace`) +{{ end }} \ No newline at end of file diff --git a/helm/dendrite/Chart.yaml b/helm/dendrite/Chart.yaml index 3944a76db..dc2764939 100644 --- a/helm/dendrite/Chart.yaml +++ b/helm/dendrite/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: dendrite -version: "0.11.1" +version: "0.11.2" appVersion: "0.11.1" description: Dendrite Matrix Homeserver type: application diff --git a/helm/dendrite/README.md b/helm/dendrite/README.md index 8bcc82e6e..51587b766 100644 --- a/helm/dendrite/README.md +++ b/helm/dendrite/README.md @@ -1,6 +1,6 @@ # dendrite -![Version: 0.11.1](https://img.shields.io/badge/Version-0.11.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.11.1](https://img.shields.io/badge/AppVersion-0.11.1-informational?style=flat-square) +![Version: 0.11.2](https://img.shields.io/badge/Version-0.11.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.11.1](https://img.shields.io/badge/AppVersion-0.11.1-informational?style=flat-square) Dendrite Matrix Homeserver Status: **NOT PRODUCTION READY** @@ -146,3 +146,35 @@ Create a folder `appservices` and place your configurations in there. The confi | ingress.tls | list | `[]` | | | service.type | string | `"ClusterIP"` | | | service.port | int | `8008` | | +| prometheus.servicemonitor.enabled | bool | `false` | Enable ServiceMonitor for Prometheus-Operator for scrape metric-endpoint | +| prometheus.servicemonitor.labels | object | `{}` | Extra Labels on ServiceMonitor for selector of Prometheus Instance | +| prometheus.rules.enabled | bool | `false` | Enable PrometheusRules for Prometheus-Operator for setup alerting | +| prometheus.rules.labels | object | `{}` | Extra Labels on PrometheusRules for selector of Prometheus Instance | +| prometheus.rules.additionalRules | list | `[]` | additional alertrules (no default alertrules are provided) | +| grafana.dashboards.enabled | bool | `false` | | +| grafana.dashboards.labels | object | `{"grafana_dashboard":"1"}` | Extra Labels on ConfigMap for selector of grafana sidecar | +| grafana.dashboards.annotations | object | `{}` | Extra Annotations on ConfigMap additional config in grafana sidecar | + +## Monitoring + +[![Grafana Dashboard](https://grafana.com/api/dashboards/13916/images/9894/image)](https://grafana.com/grafana/dashboards/13916-dendrite/) + +* Works well with [Prometheus Operator](https://prometheus-operator.dev/) ([Helmchart](https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack)) and their setup of [Grafana](https://grafana.com/grafana/), by enabling the following values: +```yaml +prometheus: + servicemonitor: + enabled: true + labels: + release: "kube-prometheus-stack" + rules: + enabled: true # will deploy alert rules + labels: + release: "kube-prometheus-stack" +grafana: + dashboards: + enabled: true # will deploy default dashboards +``` +PS: The label `release=kube-prometheus-stack` is setup with the helmchart of the Prometheus Operator. For Grafana Dashboards it may be necessary to enable scanning in the correct namespaces (or ALL), enabled by `sidecar.dashboards.searchNamespace` in [Helmchart of grafana](https://artifacthub.io/packages/helm/grafana/grafana) (which is part of PrometheusOperator, so `grafana.sidecar.dashboards.searchNamespace`) + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs vv1.11.0](https://github.com/norwoodj/helm-docs/releases/vv1.11.0) \ No newline at end of file diff --git a/helm/dendrite/README.md.gotmpl b/helm/dendrite/README.md.gotmpl index 7c32f7b02..9411733ce 100644 --- a/helm/dendrite/README.md.gotmpl +++ b/helm/dendrite/README.md.gotmpl @@ -10,4 +10,5 @@ {{ template "chart.sourcesSection" . }} {{ template "chart.requirementsSection" . }} {{ template "chart.valuesSection" . }} +{{ template "chart.monitoringSection" . }} {{ template "helm-docs.versionFooter" . }} \ No newline at end of file diff --git a/helm/dendrite/ci/ct-ingress-values.yaml b/helm/dendrite/ci/ct-ingress-values.yaml index 28311d33e..f3f58b5ca 100644 --- a/helm/dendrite/ci/ct-ingress-values.yaml +++ b/helm/dendrite/ci/ct-ingress-values.yaml @@ -11,3 +11,8 @@ dendrite_config: ingress: enabled: true + +# dashboard is an ConfigMap with labels - it does not harm on testing +grafana: + dashboards: + enabled: true diff --git a/helm/dendrite/grafana_dashboards/dendrite-rev1.json b/helm/dendrite/grafana_dashboards/dendrite-rev1.json new file mode 100644 index 000000000..206e8af87 --- /dev/null +++ b/helm/dendrite/grafana_dashboards/dendrite-rev1.json @@ -0,0 +1,1119 @@ +{ + "__inputs": [ + { + "name": "DS_INFLUXDB_DOMOTICA", + "label": "", + "description": "", + "type": "datasource", + "pluginId": "influxdb", + "pluginName": "InfluxDB" + }, + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "7.4.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "influxdb", + "name": "InfluxDB", + "version": "1.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Dendrite dashboard from https://github.com/matrix-org/dendrite/", + "editable": true, + "gnetId": 13916, + "graphTooltip": 0, + "id": null, + "iteration": 1613683251329, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": "${DS_INFLUXDB_DOMOTICA}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 4, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 10, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{job}}-{{index}} ", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of registered users", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 10, + "y": 1 + }, + "id": 20, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.4.2", + "targets": [ + { + "exemplar": false, + "expr": "dendrite_clientapi_reg_users_total", + "instant": false, + "interval": "", + "legendFormat": "Users", + "refId": "A" + } + ], + "title": "Registerd Users", + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "The number of sync requests that are active right now and are waiting to be woken by a notifier", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 10, + "x": 14, + "y": 1 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(dendrite_syncapi_active_sync_requests{instance=\"$instance\"}[$bucket_size]))without (job,index)", + "hide": false, + "interval": "", + "legendFormat": "active", + "refId": "A" + }, + { + "expr": "sum(rate(dendrite_syncapi_waiting_sync_requests{instance=\"$instance\"}[$bucket_size]))without (job,index)", + "hide": false, + "interval": "", + "legendFormat": "waiting", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sync API", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:232", + "format": "hertz", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:233", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_PROMETHEUS}", + "description": "How long it takes to build and submit a new event from the client API to the roomserver", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 6 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 24, + "legend": { + "show": false + }, + "pluginVersion": "7.4.2", + "reverseYBuckets": false, + "targets": [ + { + "expr": "dendrite_clientapi_sendevent_duration_millis_bucket{action=\"build\",instance=\"$instance\"}", + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Sendevent Duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "collapsed": false, + "datasource": "${DS_INFLUXDB_DOMOTICA}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 8, + "panels": [], + "title": "Federation", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Collection of queues for sending transactions to other matrix servers", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "dendrite_federationsender_destination_queues_running", + "interval": "", + "legendFormat": "Queue Running", + "refId": "A" + }, + { + "expr": "dendrite_federationsender_destination_queues_total", + "hide": false, + "interval": "", + "legendFormat": "Queue Total", + "refId": "B" + }, + { + "expr": "dendrite_federationsender_destination_queues_backing_off", + "hide": false, + "interval": "", + "legendFormat": "Backing Off", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Federation Sender Destination", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:443", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:444", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "${DS_INFLUXDB_DOMOTICA}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 26, + "panels": [], + "title": "Rooms", + "type": "row" + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "${DS_PROMETHEUS}", + "description": "How long it takes the roomserver to process an event", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 19 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 28, + "legend": { + "show": false + }, + "pluginVersion": "7.4.2", + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(rate(dendrite_roomserver_processroomevent_duration_millis_bucket{instance=\"$instance\"}[$bucket_size])) by (le)", + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Room Event Processing", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "collapsed": false, + "datasource": "${DS_INFLUXDB_DOMOTICA}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 12, + "panels": [], + "title": "Caches", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 27 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "dendrite_caching_in_memory_lru_server_key", + "interval": "", + "legendFormat": "Server keys", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Server Keys", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:667", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:668", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 27 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "dendrite_caching_in_memory_lru_federation_event", + "interval": "", + "legendFormat": "Federation Event", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Federation Events", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:784", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:785", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 27 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "dendrite_caching_in_memory_lru_roomserver_room_ids", + "interval": "", + "legendFormat": "Room IDs", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Room IDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:898", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:899", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 27, + "style": "dark", + "tags": [ + "matrix", + "dendrite" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "auto": true, + "auto_count": 100, + "auto_min": "30s", + "current": { + "selected": false, + "text": "auto", + "value": "$__auto_interval_bucket_size" + }, + "description": null, + "error": null, + "hide": 0, + "label": "Bucket Size", + "name": "bucket_size", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_bucket_size" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + } + ], + "query": "30s,1m,2m,5m,10m,15m", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(dendrite_caching_in_memory_lru_roominfo, instance)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(dendrite_caching_in_memory_lru_roominfo, instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(dendrite_caching_in_memory_lru_roominfo, job)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "options": [], + "query": { + "query": "label_values(dendrite_caching_in_memory_lru_roominfo, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(dendrite_caching_in_memory_lru_roominfo, index)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "index", + "options": [], + "query": { + "query": "label_values(dendrite_caching_in_memory_lru_roominfo, index)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Dendrite", + "uid": "RoRt1jEGz", + "version": 8 +} \ No newline at end of file diff --git a/helm/dendrite/templates/configmap_grafana_dashboards.yaml b/helm/dendrite/templates/configmap_grafana_dashboards.yaml new file mode 100644 index 000000000..e2abc4909 --- /dev/null +++ b/helm/dendrite/templates/configmap_grafana_dashboards.yaml @@ -0,0 +1,16 @@ +{{- if .Values.grafana.dashboards.enabled }} +{{- range $path, $bytes := .Files.Glob "grafana_dashboards/*" }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "dendrite.fullname" $ }}-grafana-dashboards-{{ base $path }} + labels: + {{- include "dendrite.labels" $ | nindent 4 }} + {{- toYaml $.Values.grafana.dashboards.labels | nindent 4 }} + annotations: + {{- toYaml $.Values.grafana.dashboards.annotations | nindent 4 }} +data: + {{- ($.Files.Glob $path ).AsConfig | nindent 2 }} +{{- end }} +{{- end }} diff --git a/helm/dendrite/templates/prometheus-rules.yaml b/helm/dendrite/templates/prometheus-rules.yaml new file mode 100644 index 000000000..6693a4ed9 --- /dev/null +++ b/helm/dendrite/templates/prometheus-rules.yaml @@ -0,0 +1,16 @@ +{{- if and ( .Values.prometheus.rules.enabled ) ( .Capabilities.APIVersions.Has "monitoring.coreos.com/v1" ) }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ include "dendrite.fullname" . }} + labels: + {{- include "dendrite.labels" . | nindent 4 }} + {{- toYaml .Values.prometheus.rules.labels | nindent 4 }} +spec: + groups: + {{- if .Values.prometheus.rules.additionalRules }} + - name: {{ template "dendrite.name" . }}-Additional + rules: {{- toYaml .Values.prometheus.rules.additionalRules | nindent 4 }} + {{- end }} +{{- end }} diff --git a/helm/dendrite/templates/secrets.yaml b/helm/dendrite/templates/secrets.yaml index d4b8ecbf2..2084c9a56 100644 --- a/helm/dendrite/templates/secrets.yaml +++ b/helm/dendrite/templates/secrets.yaml @@ -1,15 +1,15 @@ -{{ if (gt (len (.Files.Glob "appservices/*")) 0) }} +{{- if (gt (len (.Files.Glob "appservices/*")) 0) }} --- apiVersion: v1 kind: Secret metadata: name: {{ include "dendrite.fullname" . }}-appservices-conf - namespace: {{ .Release.Namespace }} type: Opaque data: {{ (.Files.Glob "appservices/*").AsSecrets | indent 2 }} -{{ end }} -{{ if and .Values.signing_key.create (not .Values.signing_key.existingSecret) }} +{{- end }} + +{{- if and .Values.signing_key.create (not .Values.signing_key.existingSecret) }} --- apiVersion: v1 kind: Secret @@ -17,17 +17,29 @@ metadata: annotations: helm.sh/resource-policy: keep name: {{ include "dendrite.fullname" . }}-signing-key - namespace: {{ .Release.Namespace }} type: Opaque -{{ end }} +{{- end }} + +{{- with .Values.dendrite_config.global.metrics }} +{{- if .enabled }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "dendrite.fullname" $ }}-metrics-basic-auth +type: Opaque +stringData: + user: {{ .basic_auth.user | quote }} + password: {{ .basic_auth.password | quote }} +{{- end }} +{{- end }} --- apiVersion: v1 kind: Secret -type: Opaque metadata: name: {{ include "dendrite.fullname" . }}-conf - namespace: {{ .Release.Namespace }} +type: Opaque stringData: dendrite.yaml: | {{ toYaml ( mustMergeOverwrite .Values.dendrite_config ( fromYaml (include "override.config" .) ) .Values.dendrite_config ) | nindent 4 }} \ No newline at end of file diff --git a/helm/dendrite/templates/servicemonitor.yaml b/helm/dendrite/templates/servicemonitor.yaml new file mode 100644 index 000000000..3819c7d02 --- /dev/null +++ b/helm/dendrite/templates/servicemonitor.yaml @@ -0,0 +1,26 @@ +{{- if and + (and .Values.prometheus.servicemonitor.enabled .Values.dendrite_config.global.metrics.enabled ) + ( .Capabilities.APIVersions.Has "monitoring.coreos.com/v1" ) +}} +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "dendrite.fullname" . }} + labels: + {{- include "dendrite.labels" . | nindent 4 }} + {{- toYaml .Values.prometheus.servicemonitor.labels | nindent 4 }} +spec: + endpoints: + - port: http + basicAuth: + username: + name: {{ include "dendrite.fullname" . }}-metrics-basic-auth + key: "user" + password: + name: {{ include "dendrite.fullname" . }}-metrics-basic-auth + key: "password" + selector: + matchLabels: + {{- include "dendrite.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/helm/dendrite/values.yaml b/helm/dendrite/values.yaml index 848241ab6..c219d27f8 100644 --- a/helm/dendrite/values.yaml +++ b/helm/dendrite/values.yaml @@ -348,3 +348,26 @@ ingress: service: type: ClusterIP port: 8008 + +prometheus: + servicemonitor: + # -- Enable ServiceMonitor for Prometheus-Operator for scrape metric-endpoint + enabled: false + # -- Extra Labels on ServiceMonitor for selector of Prometheus Instance + labels: {} + rules: + # -- Enable PrometheusRules for Prometheus-Operator for setup alerting + enabled: false + # -- Extra Labels on PrometheusRules for selector of Prometheus Instance + labels: {} + # -- additional alertrules (no default alertrules are provided) + additionalRules: [] + +grafana: + dashboards: + enabled: false + # -- Extra Labels on ConfigMap for selector of grafana sidecar + labels: + grafana_dashboard: "1" + # -- Extra Annotations on ConfigMap additional config in grafana sidecar + annotations: {}