Skip to content

Commit

Permalink
Merge pull request #105 from appuio/fix-mem-query-on-label-change
Browse files Browse the repository at this point in the history
Fix memory query on unrelated node label changes
  • Loading branch information
bastjan committed Dec 21, 2022
2 parents f87485a + c853f0b commit b5960e3
Show file tree
Hide file tree
Showing 8 changed files with 252 additions and 121 deletions.
3 changes: 2 additions & 1 deletion Makefile
Expand Up @@ -56,8 +56,9 @@ gen-golden: ensure-prometheus docker-compose-down ping-postgres ## Update golden
@$(COMPOSE_CMD) $(compose_args) down

.PHONY: fmt
fmt: ## Run 'go fmt' against code
fmt: ## Run 'go fmt' and `jsonnetfmt` against code
go fmt ./...
find . \( -name '*.jsonnet' -o -name '*.libsonnet' \) -exec jsonnetfmt -i -- {} \;

.PHONY: vet
vet: ## Run 'go vet' against code
Expand Down
12 changes: 6 additions & 6 deletions pkg/db/seeds/appuio_cloud_memory.promql
Expand Up @@ -17,31 +17,31 @@ sum_over_time(
# Select used memory if higher.
(
sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
# IMPORTANT: one clause must use equal. If used grater and lesser than, equal values will be dropped.
>=
sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"}
* on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
)
or
# Select reserved memory if higher.
(
# IMPORTANT: The desired time series must always be first.
sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"}
* on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
>
sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
)
)
# Add CPU requests in violation to the ratio provided by the platform.
+ clamp_min(
# Convert CPU request to their memory equivalent.
sum by(cluster_id, namespace, label_appuio_io_node_class) (
kube_pod_container_resource_requests{resource="cpu"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))
# Build that ratio from static values
* on(cluster_id) group_left()(
# Build a time series of ratio for Cloudscale LPG 2 (4096 MiB/core)
Expand All @@ -52,7 +52,7 @@ sum_over_time(
)
# Subtract memory request
- sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))
# Only values above zero are in violation.
), 0)
)
Expand Down
6 changes: 4 additions & 2 deletions pkg/db/seeds/appuio_cloud_memory_sub_cpu.promql
@@ -1,3 +1,5 @@
# Calculates CPU requests higher than memory requests respecting the fair-use ratio

# Sum values over one hour.
sum_over_time(
# Average over a one-minute time frame.
Expand All @@ -14,7 +16,7 @@ sum_over_time(
sum by(cluster_id, namespace, label_appuio_io_node_class) (
# Get the CPU requests
kube_pod_container_resource_requests{resource="cpu"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))
# Convert them to their memory equivalent by multiplying them by the memory to CPU ratio
# Build that ratio from static values
* on(cluster_id) group_left()(
Expand All @@ -25,7 +27,7 @@ sum_over_time(
)
)
- sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
)
*
# Join namespace label `label_appuio_io_organization` as `tenant_id`.
Expand Down
6 changes: 4 additions & 2 deletions pkg/db/seeds/appuio_cloud_memory_sub_memory.promql
@@ -1,3 +1,5 @@
# Calculates memory requests higher than the real memory usage

# Sum values over one hour.
sum_over_time(
# Average over a one-minute time frame.
Expand All @@ -14,11 +16,11 @@ sum_over_time(
clamp_min(
sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"}
* on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})),
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))),
128 * 1024 * 1024
)
- sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
),
0
)
Expand Down
211 changes: 211 additions & 0 deletions pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet
@@ -0,0 +1,211 @@
local c = import 'common.libsonnet';

local query = importstr '../appuio_cloud_memory.promql';
local subCPUQuery = importstr '../appuio_cloud_memory_sub_cpu.promql';
local subMemoryQuery = importstr '../appuio_cloud_memory_sub_memory.promql';

local commonLabels = {
cluster_id: 'c-appuio-cloudscale-lpg-2',
tenant_id: 'c-appuio-cloudscale-lpg-2',
};

// One running pod, minimal (=1 byte) memory request and usage, no CPU request
// 10 samples
local baseSeries = {
flexNodeLabel: c.series('kube_node_labels', commonLabels {
label_appuio_io_node_class: 'flex',
label_kubernetes_io_hostname: 'flex-x666',
node: 'flex-x666',
}, '1x10'),
testprojectNamespaceOrgLabel: c.series('kube_namespace_labels', commonLabels {
namespace: 'testproject',
label_appuio_io_organization: 'cherry-pickers-inc',
}, '1x10'),

local podLbls = commonLabels {
namespace: 'testproject',
pod: 'running-pod',
uid: '35e3a8b1-b46d-496c-b2b7-1b52953bf904',
},
// Phases
runningPodPhase: c.series('kube_pod_status_phase', podLbls {
phase: 'Running',
}, '1x10'),
// Requests
runningPodMemoryRequests: c.series('kube_pod_container_resource_requests', podLbls {
resource: 'memory',
node: 'flex-x666',
}, '1x10'),
runningPodCPURequests: c.series('kube_pod_container_resource_requests', podLbls {
resource: 'cpu',
node: 'flex-x666',
}, '0x10'),
// Real usage
runningPodMemoryUsage: c.series('container_memory_working_set_bytes', podLbls {
image: 'busybox',
node: 'flex-x666',
}, '1x10'),
};

local baseCalculatedLabels = {
category: 'c-appuio-cloudscale-lpg-2:testproject',
cluster_id: 'c-appuio-cloudscale-lpg-2',
label_appuio_io_node_class: 'flex',
namespace: 'testproject',
product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:flex',
tenant_id: 'cherry-pickers-inc',
};

// Constants from the query
local minMemoryRequestMib = 128;
local cloudscaleFairUseRatio = 4294967296;

{
tests: [
c.test('minimal pod',
baseSeries,
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 10,
}),
c.test('pod with higher memory usage',
baseSeries {
runningPodMemoryUsage+: {
values: '%sx10' % (500 * 1024 * 1024),
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: 500 * 10,
}),
c.test('pod with higher memory requests',
baseSeries {
runningPodMemoryRequests+: {
values: '%sx10' % (500 * 1024 * 1024),
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: 500 * 10,
}),
c.test('pod with CPU requests violating fair use',
baseSeries {
runningPodCPURequests+: {
values: '1x10',
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
// See per cluster fair use ratio in query
// value: 2.048E+04,
value: (cloudscaleFairUseRatio / 1024 / 1024) * 10,
}),
c.test('non-running pods are not counted',
baseSeries {
local lbls = commonLabels {
namespace: 'testproject',
pod: 'succeeded-pod',
uid: '2a7a6e32-0840-4ac3-bab4-52d7e16f4a0a',
},
succeededPodPhase: c.series('kube_pod_status_phase', lbls {
phase: 'Succeeded',
}, '1x10'),
succeededPodMemoryRequests: c.series('kube_pod_container_resource_requests', lbls {
resource: 'memory',
node: 'flex-x666',
}, '1x10'),
succeededPodCPURequests: c.series('kube_pod_container_resource_requests', lbls {
node: 'flex-x666',
resource: 'cpu',
}, '1x10'),
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 10,
}),
c.test('unrelated kube node label changes do not throw errors - there is an overlap since series go stale only after a few missed scrapes',
baseSeries {
flexNodeLabel+: {
_labels+:: {
label_csi_driver_id: 'A09B8DDE-5435-4D74-923C-4866513E8F02',
},
values: '1x10 _x10 stale',
},
flexNodeLabelUpdated: self.flexNodeLabel {
_labels+:: {
label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807',
},
values: '_x5 1x15',
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 10,
}),
c.test('unrelated kube node label adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes',
baseSeries {
flexNodeLabel+: {
values: '1x10 _x10 stale',
},
flexNodeLabelUpdated: self.flexNodeLabel {
_labels+:: {
label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807',
},
values: '_x5 1x15',
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 10,
}),
c.test('node class adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes',
baseSeries {
flexNodeLabel+: {
_labels+:: {
label_appuio_io_node_class:: null,
},
values: '1x10 _x10 stale',
},
flexNodeLabelUpdated: super.flexNodeLabel {
values: '_x5 1x15',
},
},
query,
[
// I'm not sure why this is 11 * minMemoryRequestMib, might have something to do with the intervals or intra minute switching
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 8,
},
{
labels: c.formatLabels(baseCalculatedLabels {
label_appuio_io_node_class:: null,
product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:',
}),
value: minMemoryRequestMib * 3,
},
]),

c.test('sub CPU requests query sanity check',
baseSeries,
subCPUQuery,
{
labels: c.formatLabels(baseCalculatedLabels),
value: 0,
}),
c.test('sub memory requests query sanity check',
baseSeries,
subMemoryQuery,
{
labels: c.formatLabels(baseCalculatedLabels),
value: (minMemoryRequestMib - (1 / 1024 / 1024)) * 10,
}),
],
}
26 changes: 23 additions & 3 deletions pkg/db/seeds/promtest/common.libsonnet
@@ -1,13 +1,33 @@
local formatLabels = function(labels)
local lf = std.join(', ', std.map(function(l) '%s="%s"' % [ l, labels[l] ], std.objectFields(labels)));
"{%s}" % [ lf ];
local lf = std.join(', ', std.map(function(l) '%s="%s"' % [l, labels[l]], std.objectFields(labels)));
'{%s}' % [lf];

// returns a series object with correctly formatted labels.
// labels can be modified post creation using `_labels`.
local series = function(name, labels, values) {
series: name+formatLabels(labels),
_name:: name,
_labels:: labels,
series: self._name + formatLabels(self._labels),
values: values,
};

// returns a test object with the given series and samples. Sample interval is 30s
// the evaluation time is set one hour in the future since all our queries operate on a 1h window
local test = function(name, series, query, samples) {
name: name,
interval: '30s',
input_series: if std.isArray(series) then series else std.objectValues(series),
promql_expr_test: [
{
expr: query,
eval_time: '1h',
exp_samples: if std.isArray(samples) then samples else [samples],
},
],
};

{
series: series,
formatLabels: formatLabels,
test: test,
}

0 comments on commit b5960e3

Please sign in to comment.