From 5de33abb8f14463072e0f54d83256a09e995e75a Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Fri, 2 Dec 2022 16:57:53 +0100 Subject: [PATCH 1/5] Fix memory query on unrelated node label change --- pkg/db/seeds/appuio_cloud_memory.promql | 12 +- .../seeds/appuio_cloud_memory_sub_cpu.promql | 6 +- .../appuio_cloud_memory_sub_memory.promql | 6 +- .../promtest/appuio_cloud_memory.jsonnet | 229 ++++++++++++++++++ pkg/db/seeds/promtest/common.libsonnet | 16 ++ pkg/db/seeds/promtest/query.jsonnet | 105 -------- pkg/db/seeds/queries_test.go | 4 +- 7 files changed, 261 insertions(+), 117 deletions(-) create mode 100644 pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet delete mode 100644 pkg/db/seeds/promtest/query.jsonnet diff --git a/pkg/db/seeds/appuio_cloud_memory.promql b/pkg/db/seeds/appuio_cloud_memory.promql index dc0787b..a3ee705 100644 --- a/pkg/db/seeds/appuio_cloud_memory.promql +++ b/pkg/db/seeds/appuio_cloud_memory.promql @@ -17,12 +17,12 @@ sum_over_time( # Select used memory if higher. ( sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})) + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))) # IMPORTANT: one clause must use equal. If used grater and lesser than, equal values will be dropped. >= sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})) + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))) ) or # Select reserved memory if higher. @@ -30,10 +30,10 @@ sum_over_time( # IMPORTANT: The desired time series must always be first. sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})) + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))) > sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})) + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))) ) ) # Add CPU requests in violation to the ratio provided by the platform. @@ -41,7 +41,7 @@ sum_over_time( # Convert CPU request to their memory equivalent. sum by(cluster_id, namespace, label_appuio_io_node_class) ( kube_pod_container_resource_requests{resource="cpu"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}) + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})) # Build that ratio from static values * on(cluster_id) group_left()( # Build a time series of ratio for Cloudscale LPG 2 (4096 MiB/core) @@ -52,7 +52,7 @@ sum_over_time( ) # Subtract memory request - sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}) + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})) # Only values above zero are in violation. ), 0) ) diff --git a/pkg/db/seeds/appuio_cloud_memory_sub_cpu.promql b/pkg/db/seeds/appuio_cloud_memory_sub_cpu.promql index 44f0a15..aaa5f21 100644 --- a/pkg/db/seeds/appuio_cloud_memory_sub_cpu.promql +++ b/pkg/db/seeds/appuio_cloud_memory_sub_cpu.promql @@ -1,3 +1,5 @@ +# Calculates CPU requests higher than memory requests respecting the fair-use ratio + # Sum values over one hour. sum_over_time( # Average over a one-minute time frame. @@ -14,7 +16,7 @@ sum_over_time( sum by(cluster_id, namespace, label_appuio_io_node_class) ( # Get the CPU requests kube_pod_container_resource_requests{resource="cpu"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""}) + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""})) # Convert them to their memory equivalent by multiplying them by the memory to CPU ratio # Build that ratio from static values * on(cluster_id) group_left()( @@ -25,7 +27,7 @@ sum_over_time( ) ) - sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})) + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))) ) * # Join namespace label `label_appuio_io_organization` as `tenant_id`. diff --git a/pkg/db/seeds/appuio_cloud_memory_sub_memory.promql b/pkg/db/seeds/appuio_cloud_memory_sub_memory.promql index 679ba90..85f80bb 100644 --- a/pkg/db/seeds/appuio_cloud_memory_sub_memory.promql +++ b/pkg/db/seeds/appuio_cloud_memory_sub_memory.promql @@ -1,3 +1,5 @@ +# Calculates memory requests higher than the real memory usage + # Sum values over one hour. sum_over_time( # Average over a one-minute time frame. @@ -14,11 +16,11 @@ sum_over_time( clamp_min( sum by(cluster_id, namespace, label_appuio_io_node_class) (kube_pod_container_resource_requests{resource="memory"} * on(uid, cluster_id, pod, namespace) group_left kube_pod_status_phase{phase="Running"} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})), + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))), 128 * 1024 * 1024 ) - sum by(cluster_id, namespace, label_appuio_io_node_class) (container_memory_working_set_bytes{image!=""} - * on(node) group_left(label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(node) kube_node_labels{label_appuio_io_node_class=""})) + * on(cluster_id, node) group_left(label_appuio_io_node_class) (min by(cluster_id, node, label_appuio_io_node_class) (kube_node_labels{label_appuio_io_node_class!=""} or on(cluster_id, node) kube_node_labels{label_appuio_io_node_class=""}))) ), 0 ) diff --git a/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet b/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet new file mode 100644 index 0000000..990cbe6 --- /dev/null +++ b/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet @@ -0,0 +1,229 @@ +local c = import 'common.libsonnet'; + +local query = importstr '../appuio_cloud_memory.promql'; +local subCPUQuery = importstr '../appuio_cloud_memory_sub_cpu.promql'; +local subMemoryQuery = importstr '../appuio_cloud_memory_sub_memory.promql'; + +local commonLabels = { + cluster_id: 'c-appuio-cloudscale-lpg-2', + tenant_id: 'c-appuio-cloudscale-lpg-2', +}; + +// One running pod, minimal (=1 byte) memory request and usage, no CPU request +// 10 samples +local baseSeries = { + local runningUID = '35e3a8b1-b46d-496c-b2b7-1b52953bf904', + + flexNodeLabel: c.series('kube_node_labels', commonLabels { + label_appuio_io_node_class: 'flex', + label_kubernetes_io_hostname: 'flex-x666', + node: 'flex-x666', + }, '1x10'), + testprojectNamespaceOrgLabel: c.series('kube_namespace_labels', commonLabels { + namespace: 'testproject', + label_appuio_io_organization: 'cherry-pickers-inc', + }, '1x10'), + // Phases + runningPodPhase: c.series('kube_pod_status_phase', commonLabels { + namespace: 'testproject', + phase: 'Running', + pod: 'running-pod', + uid: runningUID, + }, '1x10'), + // Requests + runningPodMemoryRequests: c.series('kube_pod_container_resource_requests', commonLabels { + namespace: 'testproject', + pod: 'running-pod', + resource: 'memory', + node: 'flex-x666', + uid: runningUID, + }, '1x10'), + runningPodCPURequests: c.series('kube_pod_container_resource_requests', commonLabels { + namespace: 'testproject', + pod: 'running-pod', + node: 'flex-x666', + resource: 'cpu', + uid: runningUID, + }, '0x10'), + // Real usage + runningPodMemoryUsage: c.series('container_memory_working_set_bytes', commonLabels { + image: 'busybox', + namespace: 'testproject', + pod: 'running-pod', + node: 'flex-x666', + uid: runningUID, + }, '1x10'), +}; + +local baseCalculatedLabels = { + category: 'c-appuio-cloudscale-lpg-2:testproject', + cluster_id: 'c-appuio-cloudscale-lpg-2', + label_appuio_io_node_class: 'flex', + namespace: 'testproject', + product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:flex', + tenant_id: 'cherry-pickers-inc', +}; + +{ + tests: [ + c.test('minimal pod', + baseSeries, + query, + { + labels: c.formatLabels(baseCalculatedLabels), + // Minimum value is 128MiB + value: 128 * 10, + }), + c.test('pod with higher memory usage', + baseSeries { + runningPodMemoryUsage+: { + values: '%sx10' % (500 * 1024 * 1024), + }, + }, + query, + { + labels: c.formatLabels(baseCalculatedLabels), + value: 500 * 10, + }), + c.test('pod with higher memory requests', + baseSeries { + runningPodMemoryRequests+: { + values: '%sx10' % (500 * 1024 * 1024), + }, + }, + query, + { + labels: c.formatLabels(baseCalculatedLabels), + value: 500 * 10, + }), + c.test('pod with CPU requests violating fair use', + baseSeries { + runningPodCPURequests+: { + values: '%sx10' % 0.5, + }, + }, + query, + { + labels: c.formatLabels(baseCalculatedLabels), + // See per cluster fair use ratio in query + value: 2.048E+04, + }), + c.test('pod with CPU requests violating fair use', + baseSeries { + runningPodCPURequests+: { + values: '%sx10' % 0.5, + }, + }, + query, + { + labels: c.formatLabels(baseCalculatedLabels), + // See per cluster fair use ratio in query + value: 2.048E+04, + }), + c.test('non-running pods are not counted', + baseSeries { + local lbls = commonLabels { + namespace: 'testproject', + pod: 'succeeded-pod', + uid: '2a7a6e32-0840-4ac3-bab4-52d7e16f4a0a', + }, + succeededPodPhase: c.series('kube_pod_status_phase', lbls { + phase: 'Succeeded', + }, '1x10'), + succeededPodMemoryRequests: c.series('kube_pod_container_resource_requests', lbls { + resource: 'memory', + node: 'flex-x666', + }, '1x10'), + succeededPodCPURequests: c.series('kube_pod_container_resource_requests', lbls { + node: 'flex-x666', + resource: 'cpu', + }, '1x10'), + }, + query, + { + labels: c.formatLabels(baseCalculatedLabels), + value: 128 * 10, + }), + c.test('unrelated kube node label changes do not throw errors - there is an overlap since series go stale only after a few missed scrapes', + baseSeries { + flexNodeLabel: c.series('kube_node_labels', commonLabels { + label_csi_driver_id: 'A09B8DDE-5435-4D74-923C-4866513E8F02', + label_appuio_io_node_class: 'flex', + label_kubernetes_io_hostname: 'flex-x666', + node: 'flex-x666', + }, '1x10 _x10 stale'), + flexNodeLabelUpdated: c.series('kube_node_labels', commonLabels { + label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807', + label_appuio_io_node_class: 'flex', + label_kubernetes_io_hostname: 'flex-x666', + node: 'flex-x666', + }, '_x5 1x15'), + }, + query, + { + labels: c.formatLabels(baseCalculatedLabels), + value: 128 * 10, + }), + c.test('unrelated kube node label adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes', + baseSeries { + flexNodeLabel: c.series('kube_node_labels', commonLabels { + label_appuio_io_node_class: 'flex', + label_kubernetes_io_hostname: 'flex-x666', + node: 'flex-x666', + }, '1x10 _x10 stale'), + flexNodeLabelUpdated: c.series('kube_node_labels', commonLabels { + label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807', + label_appuio_io_node_class: 'flex', + label_kubernetes_io_hostname: 'flex-x666', + node: 'flex-x666', + }, '_x5 1x15'), + }, + query, + { + labels: c.formatLabels(baseCalculatedLabels), + value: 128 * 10, + }), + c.test('node class adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes', + baseSeries { + flexNodeLabel: c.series('kube_node_labels', commonLabels { + label_kubernetes_io_hostname: 'flex-x666', + node: 'flex-x666', + }, '1x10 _x10 stale'), + flexNodeLabelUpdated: c.series('kube_node_labels', commonLabels { + label_appuio_io_node_class: 'flex', + label_kubernetes_io_hostname: 'flex-x666', + node: 'flex-x666', + }, '_x5 1x15'), + }, + query, + [ + // I'm not sure why this is 11 * 128, might have something to do with the intervals or intra minute switching + { + labels: c.formatLabels(baseCalculatedLabels), + value: 128 * 8, + }, + { + labels: c.formatLabels(baseCalculatedLabels { + label_appuio_io_node_class:: null, + product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:', + }), + value: 128 * 3, + }, + ]), + + c.test('sub CPU requests query sanity check', + baseSeries, + subCPUQuery, + { + labels: c.formatLabels(baseCalculatedLabels), + value: 0, + }), + c.test('sub memory requests query sanity check', + baseSeries, + subMemoryQuery, + { + labels: c.formatLabels(baseCalculatedLabels), + value: (128 - (1 / 1024 / 1024)) * 10, + }), + ], +} diff --git a/pkg/db/seeds/promtest/common.libsonnet b/pkg/db/seeds/promtest/common.libsonnet index 0544b44..0b56f4e 100644 --- a/pkg/db/seeds/promtest/common.libsonnet +++ b/pkg/db/seeds/promtest/common.libsonnet @@ -7,7 +7,23 @@ local series = function(name, labels, values) { values: values, }; +// returns a test object with the given series and samples. Sample interval is 30s +// the evaluation time is set one hour in the future since all our queries operate on a 1h window +local test = function(name, series, query, samples) { + name: name, + interval: '30s', + input_series: if std.isArray(series) then series else std.objectValues(series), + promql_expr_test: [ + { + expr: query, + eval_time: '1h', + exp_samples: if std.isArray(samples) then samples else [ samples ], + }, + ], +}; + { series: series, formatLabels: formatLabels, + test: test, } diff --git a/pkg/db/seeds/promtest/query.jsonnet b/pkg/db/seeds/promtest/query.jsonnet deleted file mode 100644 index 94665ac..0000000 --- a/pkg/db/seeds/promtest/query.jsonnet +++ /dev/null @@ -1,105 +0,0 @@ -local c = import 'common.libsonnet'; - -local query = importstr '../appuio_cloud_memory.promql'; - -local commonLabels = { - cluster_id: 'c-appuio-cloudscale-lpg-2', - tenant_id: 'c-appuio-cloudscale-lpg-2', -}; - -{ - tests: [ - { - interval: '30s', - local runningUID = '35e3a8b1-b46d-496c-b2b7-1b52953bf904', - local succeededUID = '2a7a6e32-0840-4ac3-bab4-52d7e16f4a0a', - input_series: [ - c.series('kube_node_labels', commonLabels { - label_appuio_io_node_class: 'flex', - label_kubernetes_io_hostname: 'flex-x666', - node: 'flex-x666', - }, '1+0x10'), - c.series('kube_namespace_labels', commonLabels { - namespace: 'testproject', - label_appuio_io_organization: 'cherry-pickers-inc', - }, '1+0x10'), - // Phases - c.series('kube_pod_status_phase', commonLabels { - namespace: 'testproject', - phase: 'Succeeded', - pod: 'succeeded-pod', - uid: succeededUID, - }, '1+0x10'), - c.series('kube_pod_status_phase', commonLabels { - namespace: 'testproject', - phase: 'Running', - pod: 'running-pod', - uid: runningUID, - }, '1+0x10'), - // Requests - c.series('kube_pod_container_resource_requests', commonLabels { - namespace: 'testproject', - pod: 'succeeded-pod', - resource: 'memory', - node: 'flex-x666', - uid: succeededUID, - }, '1+0x10'), - c.series('kube_pod_container_resource_requests', commonLabels { - namespace: 'testproject', - pod: 'running-pod', - resource: 'memory', - node: 'flex-x666', - uid: runningUID, - }, '1+0x10'), - c.series('kube_pod_container_resource_requests', commonLabels { - namespace: 'testproject', - pod: 'succeeded-pod', - node: 'flex-x666', - resource: 'cpu', - uid: succeededUID, - }, '0+0x10'), - c.series('kube_pod_container_resource_requests', commonLabels { - namespace: 'testproject', - pod: 'running-pod', - node: 'flex-x666', - resource: 'cpu', - uid: runningUID, - }, '0+0x10'), - // Real usage - c.series('container_memory_working_set_bytes', commonLabels { - image: 'busybox', - namespace: 'testproject', - pod: 'succeeded-pod', - node: 'flex-x666', - uid: succeededUID, - }, '1+0x10'), - c.series('container_memory_working_set_bytes', commonLabels { - image: 'busybox', - namespace: 'testproject', - pod: 'running-pod', - node: 'flex-x666', - uid: runningUID, - }, '1+0x10'), - ], - promql_expr_test: [ - { - expr: query, - eval_time: '1h', - exp_samples: [ - { - labels: c.formatLabels({ - category: 'c-appuio-cloudscale-lpg-2:testproject', - cluster_id: 'c-appuio-cloudscale-lpg-2', - label_appuio_io_node_class: 'flex', - namespace: 'testproject', - product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:flex', - tenant_id: 'cherry-pickers-inc', - }), - value: 128 * 10, - }, - ], - }, - ], - }, - ], -} diff --git a/pkg/db/seeds/queries_test.go b/pkg/db/seeds/queries_test.go index 0042eb9..d884d47 100644 --- a/pkg/db/seeds/queries_test.go +++ b/pkg/db/seeds/queries_test.go @@ -38,10 +38,10 @@ func runPromtool(t *testing.T, tmp string) { cmd.Stdout = &stdout assert.NoError(t, cmd.Run()) // Not using t.Log to keep formatting sane - fmt.Println("STDERR") - fmt.Println(stderr.String()) fmt.Println("STDOUT") fmt.Println(stdout.String()) + fmt.Println("STDERR") + fmt.Println(stderr.String()) } func renderJsonnet(t *testing.T, tFile string) string { From 1703eb443aa353135fce05bc502f5c460726475b Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Wed, 21 Dec 2022 11:08:22 +0100 Subject: [PATCH 2/5] Enforce jsonnet formatting --- Makefile | 3 ++- pkg/db/seeds/promtest/common.libsonnet | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index b1e4785..41763ab 100644 --- a/Makefile +++ b/Makefile @@ -56,8 +56,9 @@ gen-golden: ensure-prometheus docker-compose-down ping-postgres ## Update golden @$(COMPOSE_CMD) $(compose_args) down .PHONY: fmt -fmt: ## Run 'go fmt' against code +fmt: ## Run 'go fmt' and `jsonnetfmt` against code go fmt ./... + find . \( -name '*.jsonnet' -o -name '*.libsonnet' \) -exec jsonnetfmt -i -- {} \; .PHONY: vet vet: ## Run 'go vet' against code diff --git a/pkg/db/seeds/promtest/common.libsonnet b/pkg/db/seeds/promtest/common.libsonnet index 0b56f4e..b1278de 100644 --- a/pkg/db/seeds/promtest/common.libsonnet +++ b/pkg/db/seeds/promtest/common.libsonnet @@ -1,9 +1,9 @@ local formatLabels = function(labels) - local lf = std.join(', ', std.map(function(l) '%s="%s"' % [ l, labels[l] ], std.objectFields(labels))); - "{%s}" % [ lf ]; + local lf = std.join(', ', std.map(function(l) '%s="%s"' % [l, labels[l]], std.objectFields(labels))); + '{%s}' % [lf]; local series = function(name, labels, values) { - series: name+formatLabels(labels), + series: name + formatLabels(labels), values: values, }; @@ -17,7 +17,7 @@ local test = function(name, series, query, samples) { { expr: query, eval_time: '1h', - exp_samples: if std.isArray(samples) then samples else [ samples ], + exp_samples: if std.isArray(samples) then samples else [samples], }, ], }; From 43d48a79bf6c9ac22098d47654e7c94d54fa33f6 Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Wed, 21 Dec 2022 11:58:54 +0100 Subject: [PATCH 3/5] less magic numbers --- .../promtest/appuio_cloud_memory.jsonnet | 38 ++++++++----------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet b/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet index 990cbe6..ff4a911 100644 --- a/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet +++ b/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet @@ -64,6 +64,10 @@ local baseCalculatedLabels = { tenant_id: 'cherry-pickers-inc', }; +// Constants from the query +local minMemoryRequestMib = 128; +local cloudscaleFairUseRatio = 4294967296; + { tests: [ c.test('minimal pod', @@ -71,8 +75,7 @@ local baseCalculatedLabels = { query, { labels: c.formatLabels(baseCalculatedLabels), - // Minimum value is 128MiB - value: 128 * 10, + value: minMemoryRequestMib * 10, }), c.test('pod with higher memory usage', baseSeries { @@ -99,26 +102,15 @@ local baseCalculatedLabels = { c.test('pod with CPU requests violating fair use', baseSeries { runningPodCPURequests+: { - values: '%sx10' % 0.5, - }, - }, - query, - { - labels: c.formatLabels(baseCalculatedLabels), - // See per cluster fair use ratio in query - value: 2.048E+04, - }), - c.test('pod with CPU requests violating fair use', - baseSeries { - runningPodCPURequests+: { - values: '%sx10' % 0.5, + values: '1x10', }, }, query, { labels: c.formatLabels(baseCalculatedLabels), // See per cluster fair use ratio in query - value: 2.048E+04, + // value: 2.048E+04, + value: (cloudscaleFairUseRatio / 1024 / 1024) * 10, }), c.test('non-running pods are not counted', baseSeries { @@ -142,7 +134,7 @@ local baseCalculatedLabels = { query, { labels: c.formatLabels(baseCalculatedLabels), - value: 128 * 10, + value: minMemoryRequestMib * 10, }), c.test('unrelated kube node label changes do not throw errors - there is an overlap since series go stale only after a few missed scrapes', baseSeries { @@ -162,7 +154,7 @@ local baseCalculatedLabels = { query, { labels: c.formatLabels(baseCalculatedLabels), - value: 128 * 10, + value: minMemoryRequestMib * 10, }), c.test('unrelated kube node label adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes', baseSeries { @@ -181,7 +173,7 @@ local baseCalculatedLabels = { query, { labels: c.formatLabels(baseCalculatedLabels), - value: 128 * 10, + value: minMemoryRequestMib * 10, }), c.test('node class adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes', baseSeries { @@ -197,17 +189,17 @@ local baseCalculatedLabels = { }, query, [ - // I'm not sure why this is 11 * 128, might have something to do with the intervals or intra minute switching + // I'm not sure why this is 11 * minMemoryRequestMib, might have something to do with the intervals or intra minute switching { labels: c.formatLabels(baseCalculatedLabels), - value: 128 * 8, + value: minMemoryRequestMib * 8, }, { labels: c.formatLabels(baseCalculatedLabels { label_appuio_io_node_class:: null, product: 'appuio_cloud_memory:c-appuio-cloudscale-lpg-2:cherry-pickers-inc:testproject:', }), - value: 128 * 3, + value: minMemoryRequestMib * 3, }, ]), @@ -223,7 +215,7 @@ local baseCalculatedLabels = { subMemoryQuery, { labels: c.formatLabels(baseCalculatedLabels), - value: (128 - (1 / 1024 / 1024)) * 10, + value: (minMemoryRequestMib - (1 / 1024 / 1024)) * 10, }), ], } From 9c4605350a959d9ea24109daf82bb8f3846a6b36 Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Wed, 21 Dec 2022 15:20:43 +0100 Subject: [PATCH 4/5] Reuse pod labels --- .../promtest/appuio_cloud_memory.jsonnet | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet b/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet index ff4a911..702e38f 100644 --- a/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet +++ b/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet @@ -12,8 +12,6 @@ local commonLabels = { // One running pod, minimal (=1 byte) memory request and usage, no CPU request // 10 samples local baseSeries = { - local runningUID = '35e3a8b1-b46d-496c-b2b7-1b52953bf904', - flexNodeLabel: c.series('kube_node_labels', commonLabels { label_appuio_io_node_class: 'flex', label_kubernetes_io_hostname: 'flex-x666', @@ -23,35 +21,29 @@ local baseSeries = { namespace: 'testproject', label_appuio_io_organization: 'cherry-pickers-inc', }, '1x10'), - // Phases - runningPodPhase: c.series('kube_pod_status_phase', commonLabels { + + local podLbls = commonLabels { namespace: 'testproject', - phase: 'Running', pod: 'running-pod', - uid: runningUID, + uid: '35e3a8b1-b46d-496c-b2b7-1b52953bf904', + }, + // Phases + runningPodPhase: c.series('kube_pod_status_phase', podLbls { + phase: 'Running', }, '1x10'), // Requests - runningPodMemoryRequests: c.series('kube_pod_container_resource_requests', commonLabels { - namespace: 'testproject', - pod: 'running-pod', + runningPodMemoryRequests: c.series('kube_pod_container_resource_requests', podLbls { resource: 'memory', node: 'flex-x666', - uid: runningUID, }, '1x10'), - runningPodCPURequests: c.series('kube_pod_container_resource_requests', commonLabels { - namespace: 'testproject', - pod: 'running-pod', - node: 'flex-x666', + runningPodCPURequests: c.series('kube_pod_container_resource_requests', podLbls { resource: 'cpu', - uid: runningUID, + node: 'flex-x666', }, '0x10'), // Real usage - runningPodMemoryUsage: c.series('container_memory_working_set_bytes', commonLabels { + runningPodMemoryUsage: c.series('container_memory_working_set_bytes', podLbls { image: 'busybox', - namespace: 'testproject', - pod: 'running-pod', node: 'flex-x666', - uid: runningUID, }, '1x10'), }; From c853f0b38f6a15ec13894aef59c04d2728481462 Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Wed, 21 Dec 2022 15:22:52 +0100 Subject: [PATCH 5/5] Allow modifying series labels in the hierarchy --- .../promtest/appuio_cloud_memory.jsonnet | 62 +++++++++---------- pkg/db/seeds/promtest/common.libsonnet | 6 +- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet b/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet index 702e38f..8003590 100644 --- a/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet +++ b/pkg/db/seeds/promtest/appuio_cloud_memory.jsonnet @@ -130,18 +130,18 @@ local cloudscaleFairUseRatio = 4294967296; }), c.test('unrelated kube node label changes do not throw errors - there is an overlap since series go stale only after a few missed scrapes', baseSeries { - flexNodeLabel: c.series('kube_node_labels', commonLabels { - label_csi_driver_id: 'A09B8DDE-5435-4D74-923C-4866513E8F02', - label_appuio_io_node_class: 'flex', - label_kubernetes_io_hostname: 'flex-x666', - node: 'flex-x666', - }, '1x10 _x10 stale'), - flexNodeLabelUpdated: c.series('kube_node_labels', commonLabels { - label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807', - label_appuio_io_node_class: 'flex', - label_kubernetes_io_hostname: 'flex-x666', - node: 'flex-x666', - }, '_x5 1x15'), + flexNodeLabel+: { + _labels+:: { + label_csi_driver_id: 'A09B8DDE-5435-4D74-923C-4866513E8F02', + }, + values: '1x10 _x10 stale', + }, + flexNodeLabelUpdated: self.flexNodeLabel { + _labels+:: { + label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807', + }, + values: '_x5 1x15', + }, }, query, { @@ -150,17 +150,15 @@ local cloudscaleFairUseRatio = 4294967296; }), c.test('unrelated kube node label adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes', baseSeries { - flexNodeLabel: c.series('kube_node_labels', commonLabels { - label_appuio_io_node_class: 'flex', - label_kubernetes_io_hostname: 'flex-x666', - node: 'flex-x666', - }, '1x10 _x10 stale'), - flexNodeLabelUpdated: c.series('kube_node_labels', commonLabels { - label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807', - label_appuio_io_node_class: 'flex', - label_kubernetes_io_hostname: 'flex-x666', - node: 'flex-x666', - }, '_x5 1x15'), + flexNodeLabel+: { + values: '1x10 _x10 stale', + }, + flexNodeLabelUpdated: self.flexNodeLabel { + _labels+:: { + label_csi_driver_id: '18539CC3-0B6C-4E72-82BD-90A9BEF7D807', + }, + values: '_x5 1x15', + }, }, query, { @@ -169,15 +167,15 @@ local cloudscaleFairUseRatio = 4294967296; }), c.test('node class adds do not throw errors - there is an overlap since series go stale only after a few missed scrapes', baseSeries { - flexNodeLabel: c.series('kube_node_labels', commonLabels { - label_kubernetes_io_hostname: 'flex-x666', - node: 'flex-x666', - }, '1x10 _x10 stale'), - flexNodeLabelUpdated: c.series('kube_node_labels', commonLabels { - label_appuio_io_node_class: 'flex', - label_kubernetes_io_hostname: 'flex-x666', - node: 'flex-x666', - }, '_x5 1x15'), + flexNodeLabel+: { + _labels+:: { + label_appuio_io_node_class:: null, + }, + values: '1x10 _x10 stale', + }, + flexNodeLabelUpdated: super.flexNodeLabel { + values: '_x5 1x15', + }, }, query, [ diff --git a/pkg/db/seeds/promtest/common.libsonnet b/pkg/db/seeds/promtest/common.libsonnet index b1278de..b3dd5e7 100644 --- a/pkg/db/seeds/promtest/common.libsonnet +++ b/pkg/db/seeds/promtest/common.libsonnet @@ -2,8 +2,12 @@ local formatLabels = function(labels) local lf = std.join(', ', std.map(function(l) '%s="%s"' % [l, labels[l]], std.objectFields(labels))); '{%s}' % [lf]; +// returns a series object with correctly formatted labels. +// labels can be modified post creation using `_labels`. local series = function(name, labels, values) { - series: name + formatLabels(labels), + _name:: name, + _labels:: labels, + series: self._name + formatLabels(self._labels), values: values, };