diff --git a/pkg/operator/ceph/cluster/cleanup.go b/pkg/operator/ceph/cluster/cleanup.go index c0ed852e2ca2..624a0bb0b1e5 100644 --- a/pkg/operator/ceph/cluster/cleanup.go +++ b/pkg/operator/ceph/cluster/cleanup.go @@ -72,7 +72,7 @@ func (c *ClusterController) startClusterCleanUp(stopCleanupCh chan struct{}, clu func (c *ClusterController) startCleanUpJobs(cluster *cephv1.CephCluster, cephHosts []string, monSecret, clusterFSID string) { for _, hostName := range cephHosts { logger.Infof("starting clean up job on node %q", hostName) - jobName := k8sutil.TruncateNodeName("cluster-cleanup-job-%s", hostName) + jobName := k8sutil.TruncateNodeNameForJob("cluster-cleanup-job-%s", hostName) podSpec := c.cleanUpJobTemplateSpec(cluster, monSecret, clusterFSID) podSpec.Spec.NodeSelector = map[string]string{v1.LabelHostname: hostName} labels := controller.AppLabels(CleanupAppName, cluster.Namespace) diff --git a/pkg/operator/ceph/cluster/osd/provision_spec.go b/pkg/operator/ceph/cluster/osd/provision_spec.go index 396c1adfb322..d2dc349946c7 100644 --- a/pkg/operator/ceph/cluster/osd/provision_spec.go +++ b/pkg/operator/ceph/cluster/osd/provision_spec.go @@ -55,7 +55,7 @@ func (c *Cluster) makeJob(osdProps osdProperties, provisionConfig *provisionConf job := &batch.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: k8sutil.TruncateNodeName(prepareAppNameFmt, osdProps.crushHostname), + Name: k8sutil.TruncateNodeNameForJob(prepareAppNameFmt, osdProps.crushHostname), Namespace: c.clusterInfo.Namespace, Labels: map[string]string{ k8sutil.AppAttr: prepareAppName, diff --git a/pkg/operator/k8sutil/k8sutil.go b/pkg/operator/k8sutil/k8sutil.go index f3a44c76b0c4..0fb83f720156 100644 --- a/pkg/operator/k8sutil/k8sutil.go +++ b/pkg/operator/k8sutil/k8sutil.go @@ -88,6 +88,17 @@ func Hash(s string) string { return hex.EncodeToString(h[:16]) } +// TruncateNodeNameForJob hashes the nodeName in case it would case the name to be longer than 63 characters +// and avoids for a K8s 1.22 bug in the job pod name generation. If the job name contains a . or - in a certain +// position, the pod will fail to create. +func TruncateNodeNameForJob(format, nodeName string) string { + // In k8s 1.22, the job name is truncated an additional 10 characters which can cause an issue + // in the generated pod name if it then ends in a non-alphanumeric character. In that case, + // we more aggressively generate a hashed job name. + jobNameShortenLength := 10 + return truncateNodeName(format, nodeName, validation.DNS1035LabelMaxLength-jobNameShortenLength) +} + // TruncateNodeName hashes the nodeName in case it would case the name to be longer than 63 characters // WARNING If your format and nodeName as a hash, are longer than 63 chars it won't be truncated! // Your format alone should only be 31 chars at max because of MD5 hash being 32 chars. @@ -97,9 +108,14 @@ func Hash(s string) string { // Do **NOT** edit this function in a way that would change its output as it needs to // provide consistent mappings from string to hash across versions of rook. func TruncateNodeName(format, nodeName string) string { - if len(nodeName)+len(fmt.Sprintf(format, "")) > validation.DNS1035LabelMaxLength { + return truncateNodeName(format, nodeName, validation.DNS1035LabelMaxLength) +} + +// truncateNodeName takes the max length desired for a string and hashes the value if needed to shorten it. +func truncateNodeName(format, nodeName string, maxLength int) string { + if len(nodeName)+len(fmt.Sprintf(format, "")) > maxLength { hashed := Hash(nodeName) - logger.Infof("format and nodeName longer than %d chars, nodeName %s will be %s", validation.DNS1035LabelMaxLength, nodeName, hashed) + logger.Infof("format and nodeName longer than %d chars, nodeName %s will be %s", maxLength, nodeName, hashed) nodeName = hashed } return fmt.Sprintf(format, nodeName) diff --git a/pkg/operator/k8sutil/k8sutil_test.go b/pkg/operator/k8sutil/k8sutil_test.go index db3ce0e33232..173d83f44804 100644 --- a/pkg/operator/k8sutil/k8sutil_test.go +++ b/pkg/operator/k8sutil/k8sutil_test.go @@ -52,6 +52,28 @@ func TestTruncateNodeName(t *testing.T) { } } +func TestTruncateJobName(t *testing.T) { + // An entry's key is the result. The first value in the []string is the format and the second is the nodeName + tests := map[string][]string{ + "rook-ceph-osd-prepare-k8s01": { // 27 chars + "rook-ceph-osd-prepare-%s", // 22 chars (without format) + "k8s01", // 5 chars + }, + "rook-ceph-osd-prepare-k8s-worker-500.this.is.a.not.so": { // 53 chars + "rook-ceph-osd-prepare-%s", // 22 chars (without format) + "k8s-worker-500.this.is.a.not.so", // 31 chars + }, + // 54 chars, but ok that it is longer than 53 since it ends in an alphanumeric char + "rook-ceph-osd-prepare-4d2c3e33ccd2764180d42c20dce1d66a": { + "rook-ceph-osd-prepare-%s", // 22 chars (without format) + "k8s-worker-ends-with-a-long-name", // 32 chars + }, + } + for result, params := range tests { + assert.Equal(t, result, TruncateNodeNameForJob(params[0], params[1])) + } +} + func TestValidateLabelValue(t *testing.T) { // The key is the result, and the value is the input. tests := map[string]string{ diff --git a/tests/framework/installer/ceph_installer.go b/tests/framework/installer/ceph_installer.go index 12b40c44a21f..d6a27574f6b9 100644 --- a/tests/framework/installer/ceph_installer.go +++ b/tests/framework/installer/ceph_installer.go @@ -919,7 +919,7 @@ func NewCephInstaller(t func() *testing.T, clientset *kubernetes.Clientset, sett k8shelper: k8shelp, helmHelper: utils.NewHelmHelper(testHelmPath()), k8sVersion: version.String(), - changeHostnames: k8shelp.VersionAtLeast("v1.18.0"), + changeHostnames: settings.ChangeHostName, T: t, } flag.Parse() diff --git a/tests/framework/installer/ceph_settings.go b/tests/framework/installer/ceph_settings.go index bb11e2dcd6ac..fb929b559a2b 100644 --- a/tests/framework/installer/ceph_settings.go +++ b/tests/framework/installer/ceph_settings.go @@ -46,6 +46,7 @@ type TestCephSettings struct { SkipCleanupPolicy bool DirectMountToolbox bool EnableVolumeReplication bool + ChangeHostName bool RookVersion string CephVersion cephv1.CephVersionSpec } diff --git a/tests/integration/ceph_helm_test.go b/tests/integration/ceph_helm_test.go index b0a1f5d741e9..b2c1a7951ed5 100644 --- a/tests/integration/ceph_helm_test.go +++ b/tests/integration/ceph_helm_test.go @@ -73,6 +73,7 @@ func (h *HelmSuite) SetupSuite() { SkipOSDCreation: false, EnableAdmissionController: false, EnableDiscovery: true, + ChangeHostName: true, RookVersion: installer.LocalBuildTag, CephVersion: installer.OctopusVersion, } diff --git a/tests/integration/ceph_smoke_test.go b/tests/integration/ceph_smoke_test.go index 052d37f2afdd..a9f7257573bc 100644 --- a/tests/integration/ceph_smoke_test.go +++ b/tests/integration/ceph_smoke_test.go @@ -100,6 +100,7 @@ func (s *SmokeSuite) SetupSuite() { EnableAdmissionController: true, UseCrashPruner: true, EnableVolumeReplication: true, + ChangeHostName: true, RookVersion: installer.LocalBuildTag, CephVersion: installer.PacificVersion, }