Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #105 from appuio/fix-mem-query-on-label-change
Fix memory query on unrelated node label changes
- Loading branch information
Showing
8 changed files
with
252 additions
and
121 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
local c = import 'common.libsonnet'; | ||
|
||
local query = importstr '../appuio_cloud_memory.promql'; | ||
local subCPUQuery = importstr '../appuio_cloud_memory_sub_cpu.promql'; | ||
local subMemoryQuery = importstr '../appuio_cloud_memory_sub_memory.promql'; | ||
|
||
local commonLabels = { | ||
cluster_id: 'c-appuio-cloudscale-lpg-2', | ||
tenant_id: 'c-appuio-cloudscale-lpg-2', | ||
}; | ||
|
||
// One running pod, minimal (=1 byte) memory request and usage, no CPU request | ||
// 10 samples | ||
local baseSeries = { | ||
flexNodeLabel: c.series('kube_node_labels', commonLabels { | ||
label_appuio_io_node_class: 'flex', | ||
label_kubernetes_io_hostname: 'flex-x666', | ||
node: 'flex-x666', | ||
}, '1x10'), | ||
testprojectNamespaceOrgLabel: c.series('kube_namespace_labels', commonLabels { | ||
namespace: 'testproject', | ||
label_appuio_io_organization: 'cherry-pickers-inc', | ||
}, '1x10'), | ||
|
||
local podLbls = commonLabels { | ||
namespace: 'testproject', | ||
pod: 'running-pod', | ||
uid: '35e3a8b1-b46d-496c-b2b7-1b52953bf904', | ||
}, | ||
// Phases | ||
runningPodPhase: c.series('kube_pod_status_phase', podLbls { | ||
phase: 'Running', | ||
}, '1x10'), | ||
// Requests | ||
runningPodMemoryRequests: c.series('kube_pod_container_resource_requests', podLbls { | ||
resource: 'memory', | ||
node: 'flex-x666', | ||
}, '1x10'), | ||
runningPodCPURequests: c.series('kube_pod_container_resource_requests', podLbls { | ||
resource: 'cpu', | ||
node: 'flex-x666', | ||
}, '0x10'), | ||
// Real usage | ||
runningPodMemoryUsage: c.series('container_memory_working_set_bytes', podLbls { | ||
image: 'busybox', | ||
node: 'flex-x666', | ||
}, '1x10'), | ||
}; | ||
|
||
local baseCalculatedLabels = { | ||
category: 'c-appuio-cloudscale-lpg-2:testproject', | ||
cluster_id: 'c-appuio-cloudscale-lpg-2', | ||
label_appuio_io_node_class: 'flex', | ||
namespace: 'testproject', | ||
product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:flex', | ||
tenant_id: 'cherry-pickers-inc', | ||
}; | ||
|
||
// Constants from the query | ||
local minMemoryRequestMib = 128; | ||
local cloudscaleFairUseRatio = 4294967296; | ||
|
||
{ | ||
tests: [ | ||
c.test('minimal pod', | ||
baseSeries, | ||
query, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
value: minMemoryRequestMib * 10, | ||
}), | ||
c.test('pod with higher memory usage', | ||
baseSeries { | ||
runningPodMemoryUsage+: { | ||
values: '%sx10' % (500 * 1024 * 1024), | ||
}, | ||
}, | ||
query, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
value: 500 * 10, | ||
}), | ||
c.test('pod with higher memory requests', | ||
baseSeries { | ||
runningPodMemoryRequests+: { | ||
values: '%sx10' % (500 * 1024 * 1024), | ||
}, | ||
}, | ||
query, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
value: 500 * 10, | ||
}), | ||
c.test('pod with CPU requests violating fair use', | ||
baseSeries { | ||
runningPodCPURequests+: { | ||
values: '1x10', | ||
}, | ||
}, | ||
query, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
// See per cluster fair use ratio in query | ||
// value: 2.048E+04, | ||
value: (cloudscaleFairUseRatio / 1024 / 1024) * 10, | ||
}), | ||
c.test('non-running pods are not counted', | ||
baseSeries { | ||
local lbls = commonLabels { | ||
namespace: 'testproject', | ||
pod: 'succeeded-pod', | ||
uid: '2a7a6e32-0840-4ac3-bab4-52d7e16f4a0a', | ||
}, | ||
succeededPodPhase: c.series('kube_pod_status_phase', lbls { | ||
phase: 'Succeeded', | ||
}, '1x10'), | ||
succeededPodMemoryRequests: c.series('kube_pod_container_resource_requests', lbls { | ||
resource: 'memory', | ||
node: 'flex-x666', | ||
}, '1x10'), | ||
succeededPodCPURequests: c.series('kube_pod_container_resource_requests', lbls { | ||
node: 'flex-x666', | ||
resource: 'cpu', | ||
}, '1x10'), | ||
}, | ||
query, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
value: minMemoryRequestMib * 10, | ||
}), | ||
c.test('unrelated kube node label changes do not throw errors - there is an overlap since series go stale only after a few missed scrapes', | ||
baseSeries { | ||
flexNodeLabel+: { | ||
_labels+:: { | ||
label_csi_driver_id: 'A09B8DDE-5435-4D74-923C-4866513E8F02', | ||
}, | ||
values: '1x10 _x10 stale', | ||
}, | ||
flexNodeLabelUpdated: self.flexNodeLabel { | ||
_labels+:: { | ||
label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807', | ||
}, | ||
values: '_x5 1x15', | ||
}, | ||
}, | ||
query, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
value: minMemoryRequestMib * 10, | ||
}), | ||
c.test('unrelated kube node label adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes', | ||
baseSeries { | ||
flexNodeLabel+: { | ||
values: '1x10 _x10 stale', | ||
}, | ||
flexNodeLabelUpdated: self.flexNodeLabel { | ||
_labels+:: { | ||
label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807', | ||
}, | ||
values: '_x5 1x15', | ||
}, | ||
}, | ||
query, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
value: minMemoryRequestMib * 10, | ||
}), | ||
c.test('node class adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes', | ||
baseSeries { | ||
flexNodeLabel+: { | ||
_labels+:: { | ||
label_appuio_io_node_class:: null, | ||
}, | ||
values: '1x10 _x10 stale', | ||
}, | ||
flexNodeLabelUpdated: super.flexNodeLabel { | ||
values: '_x5 1x15', | ||
}, | ||
}, | ||
query, | ||
[ | ||
// I'm not sure why this is 11 * minMemoryRequestMib, might have something to do with the intervals or intra minute switching | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
value: minMemoryRequestMib * 8, | ||
}, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels { | ||
label_appuio_io_node_class:: null, | ||
product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:', | ||
}), | ||
value: minMemoryRequestMib * 3, | ||
}, | ||
]), | ||
|
||
c.test('sub CPU requests query sanity check', | ||
baseSeries, | ||
subCPUQuery, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
value: 0, | ||
}), | ||
c.test('sub memory requests query sanity check', | ||
baseSeries, | ||
subMemoryQuery, | ||
{ | ||
labels: c.formatLabels(baseCalculatedLabels), | ||
value: (minMemoryRequestMib - (1 / 1024 / 1024)) * 10, | ||
}), | ||
], | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,33 @@ | ||
local formatLabels = function(labels) | ||
local lf = std.join(', ', std.map(function(l) '%s="%s"' % [ l, labels[l] ], std.objectFields(labels))); | ||
"{%s}" % [ lf ]; | ||
local lf = std.join(', ', std.map(function(l) '%s="%s"' % [l, labels[l]], std.objectFields(labels))); | ||
'{%s}' % [lf]; | ||
|
||
// returns a series object with correctly formatted labels. | ||
// labels can be modified post creation using `_labels`. | ||
local series = function(name, labels, values) { | ||
series: name+formatLabels(labels), | ||
_name:: name, | ||
_labels:: labels, | ||
series: self._name + formatLabels(self._labels), | ||
values: values, | ||
}; | ||
|
||
// returns a test object with the given series and samples. Sample interval is 30s | ||
// the evaluation time is set one hour in the future since all our queries operate on a 1h window | ||
local test = function(name, series, query, samples) { | ||
name: name, | ||
interval: '30s', | ||
input_series: if std.isArray(series) then series else std.objectValues(series), | ||
promql_expr_test: [ | ||
{ | ||
expr: query, | ||
eval_time: '1h', | ||
exp_samples: if std.isArray(samples) then samples else [samples], | ||
}, | ||
], | ||
}; | ||
|
||
{ | ||
series: series, | ||
formatLabels: formatLabels, | ||
test: test, | ||
} |
Oops, something went wrong.