From 2f16e031e3baae6f8972bb572381efa27e93e0bf Mon Sep 17 00:00:00 2001 From: Nicolas Lamirault Date: Sun, 7 Mar 2021 08:08:09 +0100 Subject: [PATCH] Add: Prometheus mixin v2.250 Signed-off-by: Nicolas Lamirault --- charts/prometheus-mixin/.helmignore | 23 ++ charts/prometheus-mixin/Chart.yaml | 54 +++++ charts/prometheus-mixin/LICENSE | 195 ++++++++++++++++ charts/prometheus-mixin/README.md | 28 +++ charts/prometheus-mixin/ci/test-values.yaml | 8 + charts/prometheus-mixin/templates/NOTES.txt | 4 + .../prometheus-mixin/templates/_helpers.tpl | 66 ++++++ charts/prometheus-mixin/templates/alerts.yaml | 216 ++++++++++++++++++ charts/prometheus-mixin/values.schema.json | 22 ++ charts/prometheus-mixin/values.yaml | 12 + 10 files changed, 628 insertions(+) create mode 100644 charts/prometheus-mixin/.helmignore create mode 100644 charts/prometheus-mixin/Chart.yaml create mode 100644 charts/prometheus-mixin/LICENSE create mode 100644 charts/prometheus-mixin/README.md create mode 100644 charts/prometheus-mixin/ci/test-values.yaml create mode 100644 charts/prometheus-mixin/templates/NOTES.txt create mode 100644 charts/prometheus-mixin/templates/_helpers.tpl create mode 100644 charts/prometheus-mixin/templates/alerts.yaml create mode 100644 charts/prometheus-mixin/values.schema.json create mode 100644 charts/prometheus-mixin/values.yaml diff --git a/charts/prometheus-mixin/.helmignore b/charts/prometheus-mixin/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/prometheus-mixin/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/prometheus-mixin/Chart.yaml b/charts/prometheus-mixin/Chart.yaml new file mode 100644 index 00000000..cb46f420 --- /dev/null +++ b/charts/prometheus-mixin/Chart.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2020 Nicolas Lamirault + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v2 +name: prometheus-mixin +description: A Helm chart for Prometheus Mixin +home: https://charts.portefaix.xyz +icon: https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/Prometheus_software_logo.svg/242px-Prometheus_software_logo.svg.png +sources: + - https://github.com/nlamirault/portefaix-hub/tree/master/charts/prometheus-mixin +type: application +keywords: + - prometheus + - monitoring-mixin + - portefaix + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 0.46.0 + +maintainers: + - name: nlamirault + email: nicolas.lamirault@gmail.com + +# https://artifacthub.io/docs/topics/repositories/ +annotations: + artifacthub.io/changes: | + - Artifact Hub informations + - Refactoring Prometheus rules configuration + artifacthub.io/links: | + - name: prometheus + url: https://github.com/prometheus/prometheus + - name: Portefaix + url: https://portefaix.xyz + artifacthub.io/maintainers: | + - name: nlamirault + email: nicolas.lamirault@gmail.com diff --git a/charts/prometheus-mixin/LICENSE b/charts/prometheus-mixin/LICENSE new file mode 100644 index 00000000..70a865f1 --- /dev/null +++ b/charts/prometheus-mixin/LICENSE @@ -0,0 +1,195 @@ + +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + diff --git a/charts/prometheus-mixin/README.md b/charts/prometheus-mixin/README.md new file mode 100644 index 00000000..7ed68572 --- /dev/null +++ b/charts/prometheus-mixin/README.md @@ -0,0 +1,28 @@ +# prometheus-mixin + +![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.46.0](https://img.shields.io/badge/AppVersion-0.46.0-informational?style=flat-square) + +A Helm chart for Prometheus Mixin + +**Homepage:** + +## Maintainers + +| Name | Email | Url | +| ---- | ------ | --- | +| nlamirault | nicolas.lamirault@gmail.com | | + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| additionalAnnotations | object | `{}` | Additional annotations to add to the Prometheus rules | +| additionalLabels | object | `{}` | Additional labels to add to the Prometheus rules | +| fullnameOverride | string | `""` | Provide a name to substitute for the full names of resources | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.4.0](https://github.com/norwoodj/helm-docs/releases/v1.4.0) diff --git a/charts/prometheus-mixin/ci/test-values.yaml b/charts/prometheus-mixin/ci/test-values.yaml new file mode 100644 index 00000000..65ba26f4 --- /dev/null +++ b/charts/prometheus-mixin/ci/test-values.yaml @@ -0,0 +1,8 @@ +# Values for CI +# + +additionalLabels: + test: github + +additionalAnnotations: + made-by: portefaix diff --git a/charts/prometheus-mixin/templates/NOTES.txt b/charts/prometheus-mixin/templates/NOTES.txt new file mode 100644 index 00000000..227ac438 --- /dev/null +++ b/charts/prometheus-mixin/templates/NOTES.txt @@ -0,0 +1,4 @@ +{{ $.Chart.Name }} has been installed. Check its status by running: + kubectl --namespace {{ template "prometheus-mixin.namespace" . }} get prometheusrules + +Visit https://github.com/nlamirault/portefaix-hub/charts/prometheus-mixin. diff --git a/charts/prometheus-mixin/templates/_helpers.tpl b/charts/prometheus-mixin/templates/_helpers.tpl new file mode 100644 index 00000000..01df56e5 --- /dev/null +++ b/charts/prometheus-mixin/templates/_helpers.tpl @@ -0,0 +1,66 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "prometheus-mixin.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "prometheus-mixin.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prometheus-mixin.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "prometheus-mixin.selectorLabels" -}} +app.kubernetes.io/name: {{ include "prometheus-mixin.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "prometheus-mixin.labels" -}} +helm.sh/chart: {{ include "prometheus-mixin.chart" . }} +{{ include "prometheus-mixin.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.Version | quote }} +{{- end }} +app.kubernetes.io/component: monitoring-mixin +app.kubernetes.io/part-of: {{ include "prometheus-mixin.name" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Allow the release namespace to be overridden +*/}} +{{- define "prometheus-mixin.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/prometheus-mixin/templates/alerts.yaml b/charts/prometheus-mixin/templates/alerts.yaml new file mode 100644 index 00000000..d65acdda --- /dev/null +++ b/charts/prometheus-mixin/templates/alerts.yaml @@ -0,0 +1,216 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-mixin-alerts + labels: + app: {{ include "prometheus-mixin.name" . }} +{{ include "prometheus-mixin.labels" . | indent 4 }} +{{- if .Values.additionalLabels }} +{{ toYaml .Values.additionalLabels | indent 4 }} +{{- end }} +{{- if .Values.additionalAnnotations }} + annotations: +{{ toYaml .Values.additionalAnnotations | indent 4 }} +{{- end }} +spec: + groups: + - name: prometheus + rules: + - alert: PrometheusBadConfig + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} has failed to reload its configuration. + summary: Failed Prometheus configuration reload. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_config_last_reload_successful{job="prometheus"}[5m]) == 0 + for: 10m + labels: + severity: critical + - alert: PrometheusNotificationQueueRunningFull + annotations: + description: Alert notification queue of Prometheus {{`{{`}}$labels.instance{{`}}`}} is running full. + summary: Prometheus alert notification queue predicted to run full in less than 30m. + expr: | + # Without min_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + predict_linear(prometheus_notifications_queue_length{job="prometheus"}[5m], 60 * 30) + > + min_over_time(prometheus_notifications_queue_capacity{job="prometheus"}[5m]) + ) + for: 15m + labels: + severity: warning + - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers + annotations: + description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% errors while sending alerts from Prometheus {{`{{`}}$labels.instance{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}}.' + summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. + expr: | + ( + rate(prometheus_notifications_errors_total{job="prometheus"}[5m]) + / + rate(prometheus_notifications_sent_total{job="prometheus"}[5m]) + ) + * 100 + > 1 + for: 15m + labels: + severity: warning + - alert: PrometheusNotConnectedToAlertmanagers + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} is not connected to any Alertmanagers. + summary: Prometheus is not connected to any Alertmanagers. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus"}[5m]) < 1 + for: 10m + labels: + severity: warning + - alert: PrometheusTSDBReloadsFailing + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} reload failures over the last 3h. + summary: Prometheus has issues reloading blocks from disk. + expr: | + increase(prometheus_tsdb_reloads_failures_total{job="prometheus"}[3h]) > 0 + for: 4h + labels: + severity: warning + - alert: PrometheusTSDBCompactionsFailing + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last 3h. + summary: Prometheus has issues compacting blocks. + expr: | + increase(prometheus_tsdb_compactions_failed_total{job="prometheus"}[3h]) > 0 + for: 4h + labels: + severity: warning + - alert: PrometheusNotIngestingSamples + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} is not ingesting samples. + summary: Prometheus is not ingesting samples. + expr: | + ( + rate(prometheus_tsdb_head_samples_appended_total{job="prometheus"}[5m]) <= 0 + and + ( + sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus"}) > 0 + or + sum without(rule_group) (prometheus_rule_group_rules{job="prometheus"}) > 0 + ) + ) + for: 10m + labels: + severity: warning + - alert: PrometheusDuplicateTimestamps + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with different values but duplicated timestamp. + summary: Prometheus is dropping samples with duplicate timestamps. + expr: | + rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus"}[5m]) > 0 + for: 10m + labels: + severity: warning + - alert: PrometheusOutOfOrderTimestamps + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with timestamps arriving out of order. + summary: Prometheus drops samples with out-of-order timestamps. + expr: | + rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus"}[5m]) > 0 + for: 10m + labels: + severity: warning + - alert: PrometheusRemoteStorageFailures + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} failed to send {{`{{`}} printf "%.1f" $value {{`}}`}}% of the samples to {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}} + summary: Prometheus fails to send samples to remote storage. + expr: | + ( + (rate(prometheus_remote_storage_failed_samples_total{job="prometheus"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus"}[5m])) + / + ( + (rate(prometheus_remote_storage_failed_samples_total{job="prometheus"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus"}[5m])) + + + (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus"}[5m])) + ) + ) + * 100 + > 1 + for: 15m + labels: + severity: critical + - alert: PrometheusRemoteWriteBehind + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} remote write is {{`{{`}} printf "%.1f" $value {{`}}`}}s behind for {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}. + summary: Prometheus remote write is behind. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus"}[5m]) + - ignoring(remote_name, url) group_right + max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus"}[5m]) + ) + > 120 + for: 15m + labels: + severity: critical + - alert: PrometheusRemoteWriteDesiredShards + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} remote write desired shards calculation wants to run {{`{{`}} $value {{`}}`}} shards for queue {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}, which is more than the max of {{`{{`}} printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}` $labels.instance | query | first | value {{`}}`}}. + summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_shards_desired{job="prometheus"}[5m]) + > + max_over_time(prometheus_remote_storage_shards_max{job="prometheus"}[5m]) + ) + for: 15m + labels: + severity: warning + - alert: PrometheusRuleFailures + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} has failed to evaluate {{`{{`}} printf "%.0f" $value {{`}}`}} rules in the last 5m. + summary: Prometheus is failing rule evaluations. + expr: | + increase(prometheus_rule_evaluation_failures_total{job="prometheus"}[5m]) > 0 + for: 15m + labels: + severity: critical + - alert: PrometheusMissingRuleEvaluations + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} has missed {{`{{`}} printf "%.0f" $value {{`}}`}} rule group evaluations in the last 5m. + summary: Prometheus is missing rule evaluations due to slow rule group evaluation. + expr: | + increase(prometheus_rule_group_iterations_missed_total{job="prometheus"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusTargetLimitHit + annotations: + description: Prometheus {{`{{`}}$labels.instance{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because the number of targets exceeded the configured target_limit. + summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. + expr: | + increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusErrorSendingAlertsToAnyAlertmanager + annotations: + description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.instance{{`}}`}} to any Alertmanager.' + summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. + expr: | + min without (alertmanager) ( + rate(prometheus_notifications_errors_total{job="prometheus",alertmanager!~``}[5m]) + / + rate(prometheus_notifications_sent_total{job="prometheus",alertmanager!~``}[5m]) + ) + * 100 + > 3 + for: 15m + labels: + severity: critical diff --git a/charts/prometheus-mixin/values.schema.json b/charts/prometheus-mixin/values.schema.json new file mode 100644 index 00000000..1c920363 --- /dev/null +++ b/charts/prometheus-mixin/values.schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "http://json-schema.org/schema#", + "title": "Prometheus Mixin Chart JSON Schema", + "type": "object", + "properties": { + "fullnameOverride": { + "type": "string", + "default": "" + }, + "additionalLabels": { + "title": "Additional labels to add to the Prometheus rules", + "type": "object", + "default": {} + }, + "additionalAnnotations": { + "title": "Additional annotations to add to the Prometheus rules", + "type": "object", + "default": {} + } + }, + "required": [] +} \ No newline at end of file diff --git a/charts/prometheus-mixin/values.yaml b/charts/prometheus-mixin/values.yaml new file mode 100644 index 00000000..94690617 --- /dev/null +++ b/charts/prometheus-mixin/values.yaml @@ -0,0 +1,12 @@ +# Default values for prometheus-mixin. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Provide a name to substitute for the full names of resources +fullnameOverride: "" + +# -- Additional labels to add to the Prometheus rules +additionalLabels: {} + +# -- Additional annotations to add to the Prometheus rules +additionalAnnotations: {}