Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support scheduler_plugin_execution_duration_seconds in scheduler_perf #124578

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 18 additions & 1 deletion pkg/scheduler/metrics/metrics.go
Expand Up @@ -38,7 +38,24 @@ const (
Binding = "binding"
)

// Below are possible values for the extension_point label.
// ExtentionPoints is a list of possible values for the extension_point label.
var ExtentionPoints = []string{
PreFilter,
Filter,
PreFilterExtensionAddPod,
PreFilterExtensionRemovePod,
PostFilter,
PreScore,
Score,
ScoreExtensionNormalize,
PreBind,
Bind,
PostBind,
Reserve,
Unreserve,
Permit,
}

const (
PreFilter = "PreFilter"
Filter = "Filter"
Expand Down
61 changes: 53 additions & 8 deletions test/integration/scheduler_perf/scheduler_perf.go
Expand Up @@ -53,6 +53,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/apis/config/scheme"
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
"k8s.io/kubernetes/pkg/scheduler/metrics"
"k8s.io/kubernetes/test/integration/framework"
Expand Down Expand Up @@ -90,23 +91,65 @@ const (
configFile = "config/performance-config.yaml"
extensionPointsLabelName = "extension_point"
resultLabelName = "result"
pluginLabelName = "plugin"
)

var (
defaultMetricsCollectorConfig = metricsCollectorConfig{
Metrics: map[string]*labelValues{
Metrics: map[string][]*labelValues{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if the metrics in scheduler-perf are under control like other metrics. It means we should list stable metrics.
https://kubernetes.io/docs/reference/instrumentation/metrics/#list-of-stable-kubernetes-metrics

WDYT? @logicalhan

Copy link
Member

@utam0k utam0k May 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit out of responsibility in this PR, but let me ask you a question for our future.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we're going to be able to parse this file..

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks... challenging from a static analysis perspective.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably I don't understand what @utam0k means. Why should we in the first place actually?
All metrics used in scheduler_perf are existing metrics that are exposed from scheduler, not something like scheduler_perf's original metrics.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry you are right.

"scheduler_framework_extension_point_duration_seconds": {
label: extensionPointsLabelName,
values: []string{"Filter", "Score"},
{
label: extensionPointsLabelName,
values: metrics.ExtentionPoints,
},
},
"scheduler_scheduling_attempt_duration_seconds": {
label: resultLabelName,
values: []string{metrics.ScheduledResult, metrics.UnschedulableResult, metrics.ErrorResult},
{
label: resultLabelName,
values: []string{metrics.ScheduledResult, metrics.UnschedulableResult, metrics.ErrorResult},
},
},
"scheduler_pod_scheduling_duration_seconds": nil,
"scheduler_plugin_execution_duration_seconds": {
{
label: pluginLabelName,
values: PluginNames,
},
{
label: extensionPointsLabelName,
values: metrics.ExtentionPoints,
},
},
"scheduler_pod_scheduling_duration_seconds": nil,
"scheduler_pod_scheduling_sli_duration_seconds": nil,
},
}

// PluginNames is the names of the plugins that scheduler_perf collects metrics for.
// We export this variable because people outside k/k may want to put their custom plugins.
PluginNames = []string{
names.PrioritySort,
names.DefaultBinder,
names.DefaultPreemption,
names.DynamicResources,
names.ImageLocality,
names.InterPodAffinity,
names.NodeAffinity,
names.NodeName,
names.NodePorts,
names.NodeResourcesBalancedAllocation,
names.NodeResourcesFit,
names.NodeUnschedulable,
names.NodeVolumeLimits,
names.AzureDiskLimits,
names.CinderLimits,
names.EBSLimits,
names.GCEPDLimits,
names.PodTopologySpread,
names.SchedulingGates,
names.TaintToleration,
names.VolumeBinding,
names.VolumeRestrictions,
names.VolumeZone,
}
)

// testCase defines a set of test cases that intends to test the performance of
Expand Down Expand Up @@ -668,7 +711,9 @@ func withCleanup(tCtx ktesting.TContext, enabled bool) ktesting.TContext {
var perfSchedulingLabelFilter = flag.String("perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling")

// RunBenchmarkPerfScheduling runs the scheduler performance tests.
// Optionally, you can pass your own scheduler plugin via outOfTreePluginRegistry.
//
// You can pass your own scheduler plugins via outOfTreePluginRegistry.
// Also, you may want to put your plugins in PluginNames variable in this package.
func RunBenchmarkPerfScheduling(b *testing.B, outOfTreePluginRegistry frameworkruntime.Registry) {
testCases, err := getTestCases(configFile)
if err != nil {
Expand Down
36 changes: 30 additions & 6 deletions test/integration/scheduler_perf/util.go
Expand Up @@ -247,7 +247,7 @@ type labelValues struct {
// metricsCollectorConfig is the config to be marshalled to YAML config file.
// NOTE: The mapping here means only one filter is supported, either value in the list of `values` is able to be collected.
type metricsCollectorConfig struct {
Metrics map[string]*labelValues
Metrics map[string][]*labelValues
}

// metricsCollector collects metrics from legacyregistry.DefaultGatherer.Gather() endpoint.
Expand All @@ -270,17 +270,15 @@ func (*metricsCollector) run(tCtx ktesting.TContext) {

func (pc *metricsCollector) collect() []DataItem {
var dataItems []DataItem
for metric, labelVals := range pc.Metrics {
for metric, labelValsSlice := range pc.Metrics {
// no filter is specified, aggregate all the metrics within the same metricFamily.
if labelVals == nil {
if labelValsSlice == nil {
dataItem := collectHistogramVec(metric, pc.labels, nil)
if dataItem != nil {
dataItems = append(dataItems, *dataItem)
}
} else {
// fetch the metric from metricFamily which match each of the lvMap.
for _, value := range labelVals.values {
lvMap := map[string]string{labelVals.label: value}
for _, lvMap := range uniqueLVCombos(labelValsSlice) {
dataItem := collectHistogramVec(metric, pc.labels, lvMap)
if dataItem != nil {
dataItems = append(dataItems, *dataItem)
Expand All @@ -291,6 +289,32 @@ func (pc *metricsCollector) collect() []DataItem {
return dataItems
}

// uniqueLVCombos lists up all possible label values combinations.
// e.g., if there are 3 labelValues, each of which has 2 values,
// the result would be {A: a1, B: b1, C: c1}, {A: a2, B: b1, C: c1}, {A: a1, B: b2, C: c1}, ... (2^3 = 8 combinations).
func uniqueLVCombos(lvs []*labelValues) []map[string]string {
if len(lvs) == 0 {
return []map[string]string{{}}
}

remainingCombos := uniqueLVCombos(lvs[1:])

results := make([]map[string]string, 0)

current := lvs[0]
for _, value := range current.values {
for _, combo := range remainingCombos {
newCombo := make(map[string]string, len(combo)+1)
for k, v := range combo {
newCombo[k] = v
}
newCombo[current.label] = value
results = append(results, newCombo)
}
}
return results
}

func collectHistogramVec(metric string, labels map[string]string, lvMap map[string]string) *DataItem {
vec, err := testutil.GetHistogramVecFromGatherer(legacyregistry.DefaultGatherer, metric, lvMap)
if err != nil {
Expand Down
87 changes: 87 additions & 0 deletions test/integration/scheduler_perf/util_test.go
@@ -0,0 +1,87 @@
/*
Copyright 2015 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package benchmark

import (
"reflect"
"testing"
)

func Test_uniqueLVCombos(t *testing.T) {
type args struct {
lvs []*labelValues
}
tests := []struct {
name string
args args
want []map[string]string
}{
{
name: "empty input",
args: args{
lvs: []*labelValues{},
},
want: []map[string]string{{}},
},
{
name: "single label, multiple values",
args: args{
lvs: []*labelValues{
{"A", []string{"a1", "a2"}},
},
},
want: []map[string]string{
{"A": "a1"},
{"A": "a2"},
},
},
{
name: "multiple labels, single value each",
args: args{
lvs: []*labelValues{
{"A", []string{"a1"}},
{"B", []string{"b1"}},
},
},
want: []map[string]string{
{"A": "a1", "B": "b1"},
},
},
{
name: "multiple labels, multiple values",
args: args{
lvs: []*labelValues{
{"A", []string{"a1", "a2"}},
{"B", []string{"b1", "b2"}},
},
},
want: []map[string]string{
{"A": "a1", "B": "b1"},
{"A": "a1", "B": "b2"},
{"A": "a2", "B": "b1"},
{"A": "a2", "B": "b2"},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := uniqueLVCombos(tt.args.lvs); !reflect.DeepEqual(got, tt.want) {
t.Errorf("uniqueLVCombos() = %v, want %v", got, tt.want)
}
})
}
}