diff --git a/alerts/absent_alerts.libsonnet b/alerts/absent_alerts.libsonnet index e8630e7..b032117 100644 --- a/alerts/absent_alerts.libsonnet +++ b/alerts/absent_alerts.libsonnet @@ -7,7 +7,7 @@ { alert: 'CephMgrIsAbsent', expr: ||| - absent(up{%(cephExporterSelector)s} == 1) + up{%(cephExporterSelector)s} == 0 ||| % $._config, 'for': $._config.mgrIsAbsentAlertTime, labels: { diff --git a/extras/manifests/prometheus-ceph-rules.yaml b/extras/manifests/prometheus-ceph-rules.yaml index e590291..cc46355 100644 --- a/extras/manifests/prometheus-ceph-rules.yaml +++ b/extras/manifests/prometheus-ceph-rules.yaml @@ -11,7 +11,7 @@ spec: - name: ceph.rules rules: - expr: | - kube_node_status_condition{condition="Ready",job="kube-state-metrics",status="true"} * on (node) group_right() max(label_replace(ceph_disk_occupation{job="rook-ceph-mgr"},"node","$1","exported_instance","(.*)")) by (node) + kube_node_status_condition{condition="Ready",job="kube-state-metrics",status="true"} * on (node) group_right() max(label_replace(ceph_disk_occupation{job="rook-ceph-mgr"},"node","$1","exported_instance","(.*)")) by (node, namespace) record: cluster:ceph_node_down:join_kube - expr: | avg(topk by (ceph_daemon) (1, label_replace(label_replace(ceph_disk_occupation{job="rook-ceph-mgr"}, "instance", "$1", "exported_instance", "(.*)"), "device", "$1", "device", "/dev/(.*)")) * on(instance, device) group_right(ceph_daemon) topk by (instance,device) (1,(irate(node_disk_read_time_seconds_total[1m]) + irate(node_disk_write_time_seconds_total[1m]) / (clamp_min(irate(node_disk_reads_completed_total[1m]), 1) + irate(node_disk_writes_completed_total[1m]))))) @@ -42,7 +42,7 @@ spec: severity_level: critical storage_type: ceph expr: | - absent(up{job="rook-ceph-mgr"} == 1) + up{job="rook-ceph-mgr"} == 0 for: 5m labels: severity: critical diff --git a/rules/rules.libsonnet b/rules/rules.libsonnet index af5d821..7cb199f 100644 --- a/rules/rules.libsonnet +++ b/rules/rules.libsonnet @@ -7,7 +7,7 @@ { record: 'cluster:ceph_node_down:join_kube', expr: ||| - kube_node_status_condition{condition="Ready",job="kube-state-metrics",status="true"} * on (node) group_right() max(label_replace(ceph_disk_occupation{%(cephExporterSelector)s},"node","$1","exported_instance","(.*)")) by (node) + kube_node_status_condition{condition="Ready",job="kube-state-metrics",status="true"} * on (node) group_right() max(label_replace(ceph_disk_occupation{%(cephExporterSelector)s},"node","$1","exported_instance","(.*)")) by (node, namespace) ||| % $._config, }, {