-
Notifications
You must be signed in to change notification settings - Fork 1
/
appuio_cloud_memory.promql
93 lines (93 loc) · 5.45 KB
/
appuio_cloud_memory.promql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Sum values over one hour.
sum_over_time(
# Average over a one-minute time frame.
# NOTE: This is a sliding window. Results vary based on the queries' execution time.
avg_over_time(
# Add the final product label by joining the base product with the cluster ID, the tenant and the namespace.
label_join(
# Add the category label by joining the cluster ID and the namespace.
label_join(
# Add the base product identifier.
label_replace(
clamp_min(
# Get the maximum of requested and used memory.
# TODO Is there a better way to get the maximum of two vectors?
(
(
# Select used memory if higher.
(
sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""}
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
# IMPORTANT: one clause must use equal. If used grater and lesser than, equal values will be dropped.
>=
sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"}
* on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
)
or
# Select reserved memory if higher.
(
# IMPORTANT: The desired time series must always be first.
sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"}
* on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
>
sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""}
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
)
)
# Add CPU requests in violation to the ratio provided by the platform.
+ clamp_min(
# Convert CPU request to their memory equivalent.
sum by(cluster_id, namespace, label_appuio_io_node_class) (
kube_pod_container_resource_requests{resource="cpu"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))
# Build that ratio from static values
* on(cluster_id) group_left()(
# Build a time series of ratio for Cloudscale LPG 2 (4096 MiB/core)
label_replace(vector(4294967296), "cluster_id", "c-appuio-cloudscale-lpg-2", "", "")
# Build a time series of ratio for Exoscale GVA-2 0 (5086 MiB/core)
or label_replace(vector(5333057536), "cluster_id", "c-appuio-exoscale-ch-gva-2-0", "", "")
)
)
# Subtract memory request
- sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))
# Only values above zero are in violation.
), 0)
)
*
# Join namespace label `label_appuio_io_organization` as `tenant_id`.
on(cluster_id, namespace)
group_left(tenant_id)
label_replace(
kube_namespace_labels{label_appuio_io_organization=~".+"},
"tenant_id",
"$1",
"label_appuio_io_organization", "(.*)"
),
# At least return 128MiB
128 * 1024 * 1024
),
"product",
"appuio_cloud_memory",
"product",
".*"
),
"category",
":",
"cluster_id",
"namespace"
),
"product",
":",
"product",
"cluster_id",
"tenant_id",
"namespace",
"label_appuio_io_node_class"
)[45s:15s]
)[59m:1m]
)
# Convert to MiB
/ 1024 / 1024