From ee83ca74af6a155e0338d0087743bc76035aacea Mon Sep 17 00:00:00 2001 From: Travis Nielsen Date: Fri, 3 Dec 2021 15:35:44 -0700 Subject: [PATCH 1/2] osd: truncate osd prepare job names further In K8s 1.22 there is a bug in the job name generation that the job name is truncated an additional 10 characters. This can cause an issue in the generated pod name if it then ends in a non-alphanumeric character. In that case, we more aggressively generate a hashed job name. Signed-off-by: Travis Nielsen --- pkg/operator/ceph/cluster/cleanup.go | 2 +- .../ceph/cluster/osd/provision_spec.go | 2 +- pkg/operator/k8sutil/k8sutil.go | 18 ++++++++++++++- pkg/operator/k8sutil/k8sutil_test.go | 22 +++++++++++++++++++ 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/pkg/operator/ceph/cluster/cleanup.go b/pkg/operator/ceph/cluster/cleanup.go index 6d8eff2a95e7..a1bc76254aae 100644 --- a/pkg/operator/ceph/cluster/cleanup.go +++ b/pkg/operator/ceph/cluster/cleanup.go @@ -72,7 +72,7 @@ func (c *ClusterController) startClusterCleanUp(context context.Context, cluster func (c *ClusterController) startCleanUpJobs(cluster *cephv1.CephCluster, cephHosts []string, monSecret, clusterFSID string) { for _, hostName := range cephHosts { logger.Infof("starting clean up job on node %q", hostName) - jobName := k8sutil.TruncateNodeName("cluster-cleanup-job-%s", hostName) + jobName := k8sutil.TruncateNodeNameForJob("cluster-cleanup-job-%s", hostName) podSpec := c.cleanUpJobTemplateSpec(cluster, monSecret, clusterFSID) podSpec.Spec.NodeSelector = map[string]string{v1.LabelHostname: hostName} labels := controller.AppLabels(CleanupAppName, cluster.Namespace) diff --git a/pkg/operator/ceph/cluster/osd/provision_spec.go b/pkg/operator/ceph/cluster/osd/provision_spec.go index 18722fd4d0ee..3ee7d2f78702 100644 --- a/pkg/operator/ceph/cluster/osd/provision_spec.go +++ b/pkg/operator/ceph/cluster/osd/provision_spec.go @@ -55,7 +55,7 @@ func (c *Cluster) makeJob(osdProps osdProperties, provisionConfig *provisionConf job := &batch.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: k8sutil.TruncateNodeName(prepareAppNameFmt, osdProps.crushHostname), + Name: k8sutil.TruncateNodeNameForJob(prepareAppNameFmt, osdProps.crushHostname), Namespace: c.clusterInfo.Namespace, Labels: map[string]string{ k8sutil.AppAttr: prepareAppName, diff --git a/pkg/operator/k8sutil/k8sutil.go b/pkg/operator/k8sutil/k8sutil.go index 20c6fbec2109..737203a67bd5 100644 --- a/pkg/operator/k8sutil/k8sutil.go +++ b/pkg/operator/k8sutil/k8sutil.go @@ -85,6 +85,17 @@ func Hash(s string) string { return hex.EncodeToString(h[:16]) } +// TruncateNodeNameForJob hashes the nodeName in case it would case the name to be longer than 63 characters +// and avoids for a K8s 1.22 bug in the job pod name generation. If the job name contains a . or - in a certain +// position, the pod will fail to create. +func TruncateNodeNameForJob(format, nodeName string) string { + // In k8s 1.22, the job name is truncated an additional 10 characters which can cause an issue + // in the generated pod name if it then ends in a non-alphanumeric character. In that case, + // we more aggressively generate a hashed job name. + jobNameShortenLength := 10 + return truncateNodeName(format, nodeName, validation.DNS1035LabelMaxLength-jobNameShortenLength) +} + // TruncateNodeName hashes the nodeName in case it would case the name to be longer than 63 characters // WARNING If your format and nodeName as a hash, are longer than 63 chars it won't be truncated! // Your format alone should only be 31 chars at max because of MD5 hash being 32 chars. @@ -94,7 +105,12 @@ func Hash(s string) string { // Do **NOT** edit this function in a way that would change its output as it needs to // provide consistent mappings from string to hash across versions of rook. func TruncateNodeName(format, nodeName string) string { - if len(nodeName)+len(fmt.Sprintf(format, "")) > validation.DNS1035LabelMaxLength { + return truncateNodeName(format, nodeName, validation.DNS1035LabelMaxLength) +} + +// truncateNodeName takes the max length desired for a string and hashes the value if needed to shorten it. +func truncateNodeName(format, nodeName string, maxLength int) string { + if len(nodeName)+len(fmt.Sprintf(format, "")) > maxLength { hashed := Hash(nodeName) logger.Infof("format and nodeName longer than %d chars, nodeName %s will be %s", validation.DNS1035LabelMaxLength, nodeName, hashed) nodeName = hashed diff --git a/pkg/operator/k8sutil/k8sutil_test.go b/pkg/operator/k8sutil/k8sutil_test.go index db3ce0e33232..173d83f44804 100644 --- a/pkg/operator/k8sutil/k8sutil_test.go +++ b/pkg/operator/k8sutil/k8sutil_test.go @@ -52,6 +52,28 @@ func TestTruncateNodeName(t *testing.T) { } } +func TestTruncateJobName(t *testing.T) { + // An entry's key is the result. The first value in the []string is the format and the second is the nodeName + tests := map[string][]string{ + "rook-ceph-osd-prepare-k8s01": { // 27 chars + "rook-ceph-osd-prepare-%s", // 22 chars (without format) + "k8s01", // 5 chars + }, + "rook-ceph-osd-prepare-k8s-worker-500.this.is.a.not.so": { // 53 chars + "rook-ceph-osd-prepare-%s", // 22 chars (without format) + "k8s-worker-500.this.is.a.not.so", // 31 chars + }, + // 54 chars, but ok that it is longer than 53 since it ends in an alphanumeric char + "rook-ceph-osd-prepare-4d2c3e33ccd2764180d42c20dce1d66a": { + "rook-ceph-osd-prepare-%s", // 22 chars (without format) + "k8s-worker-ends-with-a-long-name", // 32 chars + }, + } + for result, params := range tests { + assert.Equal(t, result, TruncateNodeNameForJob(params[0], params[1])) + } +} + func TestValidateLabelValue(t *testing.T) { // The key is the result, and the value is the input. tests := map[string]string{ From 9d2aa1f6bddb8894de68dd49ee5bf0e211604a06 Mon Sep 17 00:00:00 2001 From: Travis Nielsen Date: Fri, 3 Dec 2021 15:37:49 -0700 Subject: [PATCH 2/2] test: generate long node name depending on test suite The generation of a long node name in the integration tests was being done based on the k8s version. In the past, older K8s versions did not support the changing name. Now it's more maintainable if we generate the long name depending on the test suite. Signed-off-by: Travis Nielsen --- pkg/operator/k8sutil/k8sutil.go | 2 +- tests/framework/installer/ceph_installer.go | 2 +- tests/framework/installer/ceph_settings.go | 1 + tests/integration/ceph_helm_test.go | 3 ++- tests/integration/ceph_smoke_test.go | 1 + 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/operator/k8sutil/k8sutil.go b/pkg/operator/k8sutil/k8sutil.go index 737203a67bd5..c96019ba2fc1 100644 --- a/pkg/operator/k8sutil/k8sutil.go +++ b/pkg/operator/k8sutil/k8sutil.go @@ -112,7 +112,7 @@ func TruncateNodeName(format, nodeName string) string { func truncateNodeName(format, nodeName string, maxLength int) string { if len(nodeName)+len(fmt.Sprintf(format, "")) > maxLength { hashed := Hash(nodeName) - logger.Infof("format and nodeName longer than %d chars, nodeName %s will be %s", validation.DNS1035LabelMaxLength, nodeName, hashed) + logger.Infof("format and nodeName longer than %d chars, nodeName %s will be %s", maxLength, nodeName, hashed) nodeName = hashed } return fmt.Sprintf(format, nodeName) diff --git a/tests/framework/installer/ceph_installer.go b/tests/framework/installer/ceph_installer.go index 19e8e57eff5f..1112b07a3bd2 100644 --- a/tests/framework/installer/ceph_installer.go +++ b/tests/framework/installer/ceph_installer.go @@ -920,7 +920,7 @@ func NewCephInstaller(t func() *testing.T, clientset *kubernetes.Clientset, sett k8shelper: k8shelp, helmHelper: utils.NewHelmHelper(testHelmPath()), k8sVersion: version.String(), - changeHostnames: k8shelp.VersionAtLeast("v1.18.0"), + changeHostnames: settings.ChangeHostName, T: t, } flag.Parse() diff --git a/tests/framework/installer/ceph_settings.go b/tests/framework/installer/ceph_settings.go index c93bb4c03661..f10998acbd1c 100644 --- a/tests/framework/installer/ceph_settings.go +++ b/tests/framework/installer/ceph_settings.go @@ -45,6 +45,7 @@ type TestCephSettings struct { SkipCleanupPolicy bool DirectMountToolbox bool EnableVolumeReplication bool + ChangeHostName bool RookVersion string CephVersion cephv1.CephVersionSpec } diff --git a/tests/integration/ceph_helm_test.go b/tests/integration/ceph_helm_test.go index 7455423d6d21..9ddf44317b73 100644 --- a/tests/integration/ceph_helm_test.go +++ b/tests/integration/ceph_helm_test.go @@ -72,6 +72,7 @@ func (h *HelmSuite) SetupSuite() { SkipOSDCreation: false, EnableAdmissionController: false, EnableDiscovery: true, + ChangeHostName: true, RookVersion: installer.LocalBuildTag, CephVersion: installer.OctopusVersion, } @@ -91,7 +92,7 @@ func (h *HelmSuite) AfterTest(suiteName, testName string) { // Test to make sure all rook components are installed and Running func (h *HelmSuite) TestARookInstallViaHelm() { checkIfRookClusterIsInstalled(h.Suite, h.k8shelper, h.settings.Namespace, h.settings.Namespace, 1) - checkIfRookClusterHasHealthyIngress(h.Suite, h.k8shelper, h.settings.Namespace) + checkIfRookClusterHasHealthyIngress(h.Suite, h.k8shelper, h.settings.Namespace) } // Test BlockCreation on Rook that was installed via Helm diff --git a/tests/integration/ceph_smoke_test.go b/tests/integration/ceph_smoke_test.go index 32c140e5b8d0..0edb000ae18d 100644 --- a/tests/integration/ceph_smoke_test.go +++ b/tests/integration/ceph_smoke_test.go @@ -94,6 +94,7 @@ func (s *SmokeSuite) SetupSuite() { EnableAdmissionController: true, UseCrashPruner: true, EnableVolumeReplication: true, + ChangeHostName: true, RookVersion: installer.LocalBuildTag, CephVersion: installer.ReturnCephVersion(), }