Skip to content

Commit 6217d62

Browse files
authoredSep 15, 2022
test: Flakiness Fix for Kubelet Configuration (#2511)
1 parent ef4111a commit 6217d62

File tree

5 files changed

+80
-57
lines changed

5 files changed

+80
-57
lines changed
 

‎.github/ISSUE_TEMPLATE/bug.yaml

+6-2
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@ name: Bug Report
22
description: Report a bug in Karpenter
33
labels: bug
44
body:
5-
- type: input
5+
- type: textarea
66
id: version
77
attributes:
88
label: Version
99
description: |
1010
https://github.com/aws/karpenter/releases
11-
placeholder: ex. kubectl version | grep Server
11+
value: |
12+
<!-- helm ls -A --all -o json | jq '.[] | select(.name=="karpenter") | .app_version' -r -->
13+
__Karpenter Version:__ v0.0.0
14+
<!-- kubectl version | grep Server -->
15+
__Kubernetes Version:__ v1.0.0
1216
validations:
1317
required: true
1418

‎Makefile

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
export K8S_VERSION ?= 1.23.x
22
export KUBEBUILDER_ASSETS ?= ${HOME}/.kubebuilder/bin
3+
export CLUSTER_NAME ?= $(shell kubectl config view --minify -o jsonpath='{.clusters[].name}' | rev | cut -d"/" -f1 | rev | cut -d"." -f1)
34

45
## Inject the app version into project.Version
56
LDFLAGS ?= -ldflags=-X=github.com/aws/karpenter/pkg/utils/project.Version=$(shell git describe --tags --always)
67
GOFLAGS ?= $(LDFLAGS)
78
WITH_GOFLAGS = GOFLAGS="$(GOFLAGS)"
89

910
## Extra helm options
10-
CLUSTER_NAME ?= $(shell kubectl config view --minify -o jsonpath='{.clusters[].name}' | rev | cut -d"/" -f1 | rev | cut -d"." -f1)
1111
CLUSTER_ENDPOINT ?= $(shell kubectl config view --minify -o jsonpath='{.clusters[].cluster.server}')
1212
AWS_ACCOUNT_ID ?= $(shell aws sts get-caller-identity --query Account --output text)
1313
KARPENTER_IAM_ROLE_ARN ?= arn:aws:iam::${AWS_ACCOUNT_ID}:role/${CLUSTER_NAME}-karpenter
@@ -20,6 +20,7 @@ TEST_FILTER ?= .*
2020

2121
# CR for local builds of Karpenter
2222
SYSTEM_NAMESPACE ?= karpenter
23+
KARPENTER_VERSION ?= $(shell git tag --sort=committerdate | tail -1)
2324
KO_DOCKER_REPO ?= ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/karpenter
2425
GETTING_STARTED_SCRIPT_DIR = website/content/en/preview/getting-started/getting-started-with-eksctl/scripts
2526

@@ -49,7 +50,7 @@ battletest: ## Run randomized, racing, code coveraged, tests
4950

5051
e2etests: ## Run the e2e suite against your local cluster
5152
go clean -testcache
52-
CLUSTER_NAME=${CLUSTER_NAME} go test -p 1 -timeout 180m -v ./test/suites/... -run=${TEST_FILTER}
53+
go test -p 1 -timeout 180m -v ./test/suites/... -run=${TEST_FILTER}
5354

5455
benchmark:
5556
go test -tags=test_performance -run=NoTests -bench=. ./...

‎pkg/controllers/consolidation/suite_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1367,7 +1367,7 @@ var _ = Describe("Parallelization", func() {
13671367
nodes := &v1.NodeList{}
13681368
g.Expect(env.Client.List(ctx, nodes)).To(Succeed())
13691369
g.Expect(len(nodes.Items)).To(Equal(2))
1370-
}).Should(Succeed())
1370+
}, time.Second*10).Should(Succeed())
13711371

13721372
// Add a new pending pod that should schedule while node is not yet deleted
13731373
pods := ExpectProvisionedNoBinding(ctx, env.Client, provisioningController, test.UnschedulablePod())

‎test/suites/integration/ttl_test.go ‎test/suites/integration/emptiness_test.go

+1-33
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import (
2929
. "github.com/onsi/gomega"
3030
)
3131

32-
var _ = Describe("TTL Empty", func() {
32+
var _ = Describe("Emptiness", func() {
3333
BeforeEach(func() { env.BeforeEach() })
3434
AfterEach(func() { env.AfterEach() })
3535

@@ -60,35 +60,3 @@ var _ = Describe("TTL Empty", func() {
6060
}
6161
})
6262
})
63-
64-
var _ = Describe("TTL Expired", func() {
65-
It("should terminate an expired node", func() {
66-
provider := test.AWSNodeTemplate(v1alpha1.AWSNodeTemplateSpec{AWS: awsv1alpha1.AWS{
67-
SecurityGroupSelector: map[string]string{"karpenter.sh/discovery": env.ClusterName},
68-
SubnetSelector: map[string]string{"karpenter.sh/discovery": env.ClusterName},
69-
}})
70-
provisioner := test.Provisioner(test.ProvisionerOptions{
71-
ProviderRef: &v1alpha5.ProviderRef{Name: provider.Name},
72-
})
73-
74-
const numPods = 1
75-
deployment := test.Deployment(test.DeploymentOptions{Replicas: numPods})
76-
77-
env.ExpectCreated(provisioner, provider, deployment)
78-
env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), numPods)
79-
env.ExpectCreatedNodeCount("==", 1)
80-
81-
persistedDep := deployment.DeepCopy()
82-
deployment.Spec.Replicas = ptr.Int32(0)
83-
Expect(env.Client.Patch(env, deployment, client.MergeFrom(persistedDep))).To(Succeed())
84-
85-
persistedProv := provisioner.DeepCopy()
86-
provisioner.Spec.TTLSecondsUntilExpired = ptr.Int64(10)
87-
Expect(env.Client.Patch(env, provisioner, client.MergeFrom(persistedProv))).To(Succeed())
88-
89-
nodes := env.Monitor.GetCreatedNodes()
90-
for i := range nodes {
91-
env.EventuallyExpectNotFound(&nodes[i])
92-
}
93-
})
94-
})

‎test/suites/integration/kubelet_config_test.go

+69-19
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,24 @@ limitations under the License.
1515
package integration_test
1616

1717
import (
18+
"math"
19+
1820
. "github.com/onsi/ginkgo/v2"
1921
. "github.com/onsi/gomega"
2022
appsv1 "k8s.io/api/apps/v1"
2123
v1 "k8s.io/api/core/v1"
24+
"k8s.io/apimachinery/pkg/util/sets"
2225
"knative.dev/pkg/ptr"
2326

2427
"github.com/aws/karpenter/pkg/apis/awsnodetemplate/v1alpha1"
2528
"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5"
2629
awsv1alpha1 "github.com/aws/karpenter/pkg/cloudprovider/aws/apis/v1alpha1"
30+
"github.com/aws/karpenter/pkg/scheduling"
2731
"github.com/aws/karpenter/pkg/test"
2832
)
2933

3034
var _ = Describe("KubeletConfiguration Overrides", func() {
3135
It("should schedule pods onto separate nodes when maxPods is set", func() {
32-
// Get the total number of daemonsets so that we see how many pods will be taken up
33-
// by daemonset overhead
34-
dsList := &appsv1.DaemonSetList{}
35-
Expect(env.Client.List(env.Context, dsList)).To(Succeed())
36-
dsCount := len(dsList.Items)
37-
3836
provider := test.AWSNodeTemplate(v1alpha1.AWSNodeTemplateSpec{AWS: awsv1alpha1.AWS{
3937
SecurityGroupSelector: map[string]string{"karpenter.sh/discovery": env.ClusterName},
4038
SubnetSelector: map[string]string{"karpenter.sh/discovery": env.ClusterName},
@@ -43,26 +41,36 @@ var _ = Describe("KubeletConfiguration Overrides", func() {
4341
// MaxPods needs to account for the daemonsets that will run on the nodes
4442
provisioner := test.Provisioner(test.ProvisionerOptions{
4543
ProviderRef: &v1alpha5.ProviderRef{Name: provider.Name},
46-
Kubelet: &v1alpha5.KubeletConfiguration{
47-
MaxPods: ptr.Int32(1 + int32(dsCount)),
44+
Requirements: []v1.NodeSelectorRequirement{
45+
{
46+
Key: v1.LabelOSStable,
47+
Operator: v1.NodeSelectorOpIn,
48+
Values: []string{string(v1.Linux)},
49+
},
4850
},
4951
})
5052

53+
// Get the DS pod count and use it to calculate the DS pod overhead
54+
dsCount := getDaemonSetPodCount(provisioner)
55+
provisioner.Spec.KubeletConfiguration = &v1alpha5.KubeletConfiguration{
56+
MaxPods: ptr.Int32(1 + int32(dsCount)),
57+
}
58+
5159
pods := []*v1.Pod{test.Pod(), test.Pod(), test.Pod()}
5260
env.ExpectCreated(provisioner, provider)
5361
for _, pod := range pods {
5462
env.ExpectCreated(pod)
5563
}
5664
env.EventuallyExpectHealthy(pods...)
5765
env.ExpectCreatedNodeCount("==", 3)
66+
67+
nodeNames := sets.NewString()
68+
for _, pod := range pods {
69+
nodeNames.Insert(pod.Spec.NodeName)
70+
}
71+
Expect(len(nodeNames)).To(BeNumerically("==", 3))
5872
})
5973
It("should schedule pods onto separate nodes when podsPerCore is set", func() {
60-
// Get the total number of daemonsets so that we see how many pods will be taken up
61-
// by daemonset overhead
62-
dsList := &appsv1.DaemonSetList{}
63-
Expect(env.Client.List(env.Context, dsList)).To(Succeed())
64-
dsCount := len(dsList.Items)
65-
6674
provider := test.AWSNodeTemplate(v1alpha1.AWSNodeTemplateSpec{AWS: awsv1alpha1.AWS{
6775
SecurityGroupSelector: map[string]string{"karpenter.sh/discovery": env.ClusterName},
6876
SubnetSelector: map[string]string{"karpenter.sh/discovery": env.ClusterName},
@@ -71,25 +79,46 @@ var _ = Describe("KubeletConfiguration Overrides", func() {
7179
// This will have 4 pods available on each node (2 taken by daemonset pods)
7280
provisioner := test.Provisioner(test.ProvisionerOptions{
7381
ProviderRef: &v1alpha5.ProviderRef{Name: provider.Name},
74-
Kubelet: &v1alpha5.KubeletConfiguration{
75-
PodsPerCore: ptr.Int32(2 + int32(dsCount)),
76-
},
7782
Requirements: []v1.NodeSelectorRequirement{
7883
{
7984
Key: awsv1alpha1.LabelInstanceCPU,
8085
Operator: v1.NodeSelectorOpIn,
81-
Values: []string{"1"},
86+
Values: []string{"2"},
87+
},
88+
{
89+
Key: v1.LabelOSStable,
90+
Operator: v1.NodeSelectorOpIn,
91+
Values: []string{string(v1.Linux)},
8292
},
8393
},
8494
})
8595

96+
// Get the DS pod count and use it to calculate the DS pod overhead
97+
// We calculate podsPerCore to split the test pods and the DS pods between two nodes:
98+
// 1. If # of DS pods is odd, we will have i.e. ceil((3+2)/2) = 3
99+
// Since we restrict node to two cores, we will allow 6 pods. One node will have 3
100+
// DS pods and 3 test pods. Other node will have 1 test pod and 3 DS pods
101+
// 2. If # of DS pods is even, we will have i.e. ceil((4+2)/2) = 3
102+
// Since we restrict node to two cores, we will allow 6 pods. Both nodes will have
103+
// 4 DS pods and 2 test pods.
104+
dsCount := getDaemonSetPodCount(provisioner)
105+
provisioner.Spec.KubeletConfiguration = &v1alpha5.KubeletConfiguration{
106+
PodsPerCore: ptr.Int32(int32(math.Ceil(float64(2+dsCount) / 2))),
107+
}
108+
86109
pods := []*v1.Pod{test.Pod(), test.Pod(), test.Pod(), test.Pod()}
87110
env.ExpectCreated(provisioner, provider)
88111
for _, pod := range pods {
89112
env.ExpectCreated(pod)
90113
}
91114
env.EventuallyExpectHealthy(pods...)
92115
env.ExpectCreatedNodeCount("==", 2)
116+
117+
nodeNames := sets.NewString()
118+
for _, pod := range pods {
119+
nodeNames.Insert(pod.Spec.NodeName)
120+
}
121+
Expect(len(nodeNames)).To(BeNumerically("==", 2))
93122
})
94123
It("should ignore podsPerCore value when Bottlerocket is used", func() {
95124
provider := test.AWSNodeTemplate(v1alpha1.AWSNodeTemplateSpec{AWS: awsv1alpha1.AWS{
@@ -108,7 +137,7 @@ var _ = Describe("KubeletConfiguration Overrides", func() {
108137
{
109138
Key: awsv1alpha1.LabelInstanceCPU,
110139
Operator: v1.NodeSelectorOpIn,
111-
Values: []string{"1"},
140+
Values: []string{"2"},
112141
},
113142
},
114143
})
@@ -122,3 +151,24 @@ var _ = Describe("KubeletConfiguration Overrides", func() {
122151
env.ExpectCreatedNodeCount("==", 1)
123152
})
124153
})
154+
155+
// Performs the same logic as the scheduler to get the number of daemonset
156+
// pods that we estimate we will need to schedule as overhead to each node
157+
func getDaemonSetPodCount(provisioner *v1alpha5.Provisioner) int {
158+
daemonSetList := &appsv1.DaemonSetList{}
159+
Expect(env.Client.List(env.Context, daemonSetList)).To(Succeed())
160+
161+
count := 0
162+
for _, daemonSet := range daemonSetList.Items {
163+
p := &v1.Pod{Spec: daemonSet.Spec.Template.Spec}
164+
nodeTemplate := scheduling.NewNodeTemplate(provisioner)
165+
if err := nodeTemplate.Taints.Tolerates(p); err != nil {
166+
continue
167+
}
168+
if err := nodeTemplate.Requirements.Compatible(scheduling.NewPodRequirements(p)); err != nil {
169+
continue
170+
}
171+
count++
172+
}
173+
return count
174+
}

0 commit comments

Comments
 (0)
Please sign in to comment.