From f5df5915b610771c5ac094f1823b8b93ed7d5265 Mon Sep 17 00:00:00 2001 From: Arun Kumar Mohan Date: Fri, 29 Oct 2021 18:53:01 +0530 Subject: [PATCH] ceph: fix 'CephMonQuorumLost' alert Only the 'Running' mons with result value of '1' should be counted. Signed-off-by: Arun Kumar Mohan --- .../kubernetes/ceph/monitoring/prometheus-ceph-v14-rules.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster/examples/kubernetes/ceph/monitoring/prometheus-ceph-v14-rules.yaml b/cluster/examples/kubernetes/ceph/monitoring/prometheus-ceph-v14-rules.yaml index d5507ace9fae3..991f71371bd67 100644 --- a/cluster/examples/kubernetes/ceph/monitoring/prometheus-ceph-v14-rules.yaml +++ b/cluster/examples/kubernetes/ceph/monitoring/prometheus-ceph-v14-rules.yaml @@ -90,7 +90,7 @@ spec: severity_level: critical storage_type: ceph expr: | - count(kube_pod_status_phase{pod=~"rook-ceph-mon-.*", phase=~"Running|running"}) by (namespace) < 2 + count(kube_pod_status_phase{pod=~"rook-ceph-mon-.*", phase=~"Running|running"} == 1) by (namespace) < 2 for: 5m labels: severity: critical