Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix memory query on unrelated node label changes #105

Merged
merged 5 commits into from Dec 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Expand Up @@ -56,8 +56,9 @@ gen-golden: ensure-prometheus docker-compose-down ping-postgres ## Update golden
@$(COMPOSE_CMD) $(compose_args) down

.PHONY: fmt
fmt: ## Run 'go fmt' against code
fmt: ## Run 'go fmt' and `jsonnetfmt` against code
go fmt ./...
find . \( -name '*.jsonnet' -o -name '*.libsonnet' \) -exec jsonnetfmt -i -- {} \;

.PHONY: vet
vet: ## Run 'go vet' against code
Expand Down
12 changes: 6 additions & 6 deletions pkg/db/seeds/appuio_cloud_memory.promql
Expand Up @@ -17,31 +17,31 @@ sum_over_time(
# Select used memory if higher.
(
sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
# IMPORTANT: one clause must use equal. If used grater and lesser than, equal values will be dropped.
>=
sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"}
* on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
)
or
# Select reserved memory if higher.
(
# IMPORTANT: The desired time series must always be first.
sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"}
* on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
>
sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
)
)
# Add CPU requests in violation to the ratio provided by the platform.
+ clamp_min(
# Convert CPU request to their memory equivalent.
sum by(cluster_id, namespace, label_appuio_io_node_class) (
kube_pod_container_resource_requests{resource="cpu"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))
# Build that ratio from static values
* on(cluster_id) group_left()(
# Build a time series of ratio for Cloudscale LPG 2 (4096 MiB/core)
Expand All @@ -52,7 +52,7 @@ sum_over_time(
)
# Subtract memory request
- sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))
# Only values above zero are in violation.
), 0)
)
Expand Down
6 changes: 4 additions & 2 deletions pkg/db/seeds/appuio_cloud_memory_sub_cpu.promql
@@ -1,3 +1,5 @@
# Calculates CPU requests higher than memory requests respecting the fair-use ratio

# Sum values over one hour.
sum_over_time(
# Average over a one-minute time frame.
Expand All @@ -14,7 +16,7 @@ sum_over_time(
sum by(cluster_id, namespace, label_appuio_io_node_class) (
# Get the CPU requests
kube_pod_container_resource_requests{resource="cpu"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))
# Convert them to their memory equivalent by multiplying them by the memory to CPU ratio
# Build that ratio from static values
* on(cluster_id) group_left()(
Expand All @@ -25,7 +27,7 @@ sum_over_time(
)
)
- sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
)
*
# Join namespace label `label_appuio_io_organization` as `tenant_id`.
Expand Down
6 changes: 4 additions & 2 deletions pkg/db/seeds/appuio_cloud_memory_sub_memory.promql
@@ -1,3 +1,5 @@
# Calculates memory requests higher than the real memory usage

# Sum values over one hour.
sum_over_time(
# Average over a one-minute time frame.
Expand All @@ -14,11 +16,11 @@ sum_over_time(
clamp_min(
sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"}
* on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})),
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))),
128 * 1024 * 1024
)
- sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""}
* on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}))
* on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})))
),
0
)
Expand Down
211 changes: 211 additions & 0 deletions pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet
@@ -0,0 +1,211 @@
local c = import 'common.libsonnet';

local query = importstr '../appuio_cloud_memory.promql';
local subCPUQuery = importstr '../appuio_cloud_memory_sub_cpu.promql';
local subMemoryQuery = importstr '../appuio_cloud_memory_sub_memory.promql';

local commonLabels = {
cluster_id: 'c-appuio-cloudscale-lpg-2',
tenant_id: 'c-appuio-cloudscale-lpg-2',
};

// One running pod, minimal (=1 byte) memory request and usage, no CPU request
// 10 samples
local baseSeries = {
flexNodeLabel: c.series('kube_node_labels', commonLabels {
label_appuio_io_node_class: 'flex',
label_kubernetes_io_hostname: 'flex-x666',
node: 'flex-x666',
}, '1x10'),
testprojectNamespaceOrgLabel: c.series('kube_namespace_labels', commonLabels {
namespace: 'testproject',
label_appuio_io_organization: 'cherry-pickers-inc',
}, '1x10'),

local podLbls = commonLabels {
namespace: 'testproject',
pod: 'running-pod',
uid: '35e3a8b1-b46d-496c-b2b7-1b52953bf904',
},
// Phases
runningPodPhase: c.series('kube_pod_status_phase', podLbls {
phase: 'Running',
}, '1x10'),
// Requests
runningPodMemoryRequests: c.series('kube_pod_container_resource_requests', podLbls {
resource: 'memory',
node: 'flex-x666',
}, '1x10'),
runningPodCPURequests: c.series('kube_pod_container_resource_requests', podLbls {
resource: 'cpu',
node: 'flex-x666',
}, '0x10'),
// Real usage
runningPodMemoryUsage: c.series('container_memory_working_set_bytes', podLbls {
image: 'busybox',
node: 'flex-x666',
}, '1x10'),
};

local baseCalculatedLabels = {
category: 'c-appuio-cloudscale-lpg-2:testproject',
cluster_id: 'c-appuio-cloudscale-lpg-2',
label_appuio_io_node_class: 'flex',
namespace: 'testproject',
product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:flex',
tenant_id: 'cherry-pickers-inc',
};

// Constants from the query
local minMemoryRequestMib = 128;
local cloudscaleFairUseRatio = 4294967296;

{
tests: [
c.test('minimal pod',
baseSeries,
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 10,
}),
c.test('pod with higher memory usage',
baseSeries {
runningPodMemoryUsage+: {
values: '%sx10' % (500 * 1024 * 1024),
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: 500 * 10,
}),
c.test('pod with higher memory requests',
baseSeries {
runningPodMemoryRequests+: {
values: '%sx10' % (500 * 1024 * 1024),
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: 500 * 10,
}),
c.test('pod with CPU requests violating fair use',
baseSeries {
runningPodCPURequests+: {
values: '1x10',
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
// See per cluster fair use ratio in query
// value: 2.048E+04,
value: (cloudscaleFairUseRatio / 1024 / 1024) * 10,
}),
c.test('non-running pods are not counted',
baseSeries {
local lbls = commonLabels {
namespace: 'testproject',
pod: 'succeeded-pod',
uid: '2a7a6e32-0840-4ac3-bab4-52d7e16f4a0a',
},
succeededPodPhase: c.series('kube_pod_status_phase', lbls {
phase: 'Succeeded',
}, '1x10'),
succeededPodMemoryRequests: c.series('kube_pod_container_resource_requests', lbls {
resource: 'memory',
node: 'flex-x666',
}, '1x10'),
succeededPodCPURequests: c.series('kube_pod_container_resource_requests', lbls {
node: 'flex-x666',
resource: 'cpu',
}, '1x10'),
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 10,
}),
c.test('unrelated kube node label changes do not throw errors - there is an overlap since series go stale only after a few missed scrapes',
baseSeries {
flexNodeLabel+: {
_labels+:: {
label_csi_driver_id: 'A09B8DDE-5435-4D74-923C-4866513E8F02',
},
values: '1x10 _x10 stale',
},
flexNodeLabelUpdated: self.flexNodeLabel {
_labels+:: {
label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807',
},
values: '_x5 1x15',
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 10,
}),
c.test('unrelated kube node label adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes',
baseSeries {
flexNodeLabel+: {
values: '1x10 _x10 stale',
},
flexNodeLabelUpdated: self.flexNodeLabel {
_labels+:: {
label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807',
},
values: '_x5 1x15',
},
},
query,
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 10,
}),
c.test('node class adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes',
baseSeries {
flexNodeLabel+: {
_labels+:: {
label_appuio_io_node_class:: null,
},
values: '1x10 _x10 stale',
},
flexNodeLabelUpdated: super.flexNodeLabel {
values: '_x5 1x15',
},
},
query,
[
// I'm not sure why this is 11 * minMemoryRequestMib, might have something to do with the intervals or intra minute switching
{
labels: c.formatLabels(baseCalculatedLabels),
value: minMemoryRequestMib * 8,
},
{
labels: c.formatLabels(baseCalculatedLabels {
label_appuio_io_node_class:: null,
product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:',
}),
value: minMemoryRequestMib * 3,
},
]),

c.test('sub CPU requests query sanity check',
baseSeries,
subCPUQuery,
{
labels: c.formatLabels(baseCalculatedLabels),
value: 0,
}),
c.test('sub memory requests query sanity check',
baseSeries,
subMemoryQuery,
{
labels: c.formatLabels(baseCalculatedLabels),
value: (minMemoryRequestMib - (1 / 1024 / 1024)) * 10,
}),
],
}
26 changes: 23 additions & 3 deletions pkg/db/seeds/promtest/common.libsonnet
@@ -1,13 +1,33 @@
local formatLabels = function(labels)
local lf = std.join(', ', std.map(function(l) '%s="%s"' % [ l, labels[l] ], std.objectFields(labels)));
"{%s}" % [ lf ];
local lf = std.join(', ', std.map(function(l) '%s="%s"' % [l, labels[l]], std.objectFields(labels)));
'{%s}' % [lf];

// returns a series object with correctly formatted labels.
// labels can be modified post creation using `_labels`.
local series = function(name, labels, values) {
series: name+formatLabels(labels),
_name:: name,
_labels:: labels,
series: self._name + formatLabels(self._labels),
values: values,
};

// returns a test object with the given series and samples. Sample interval is 30s
// the evaluation time is set one hour in the future since all our queries operate on a 1h window
local test = function(name, series, query, samples) {
name: name,
interval: '30s',
input_series: if std.isArray(series) then series else std.objectValues(series),
promql_expr_test: [
{
expr: query,
eval_time: '1h',
exp_samples: if std.isArray(samples) then samples else [samples],
},
],
};

{
series: series,
formatLabels: formatLabels,
test: test,
}