Skip to content

Commit

Permalink
Merge pull request #9312 from travisn/truncate-job-name
Browse files Browse the repository at this point in the history
Truncate job name of the osd prepare job further to avoid pod generation failure on K8s 1.22
  • Loading branch information
leseb committed Dec 7, 2021
2 parents 21e9243 + 9d2aa1f commit 1f33859
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 6 deletions.
2 changes: 1 addition & 1 deletion pkg/operator/ceph/cluster/cleanup.go
Expand Up @@ -72,7 +72,7 @@ func (c *ClusterController) startClusterCleanUp(context context.Context, cluster
func (c *ClusterController) startCleanUpJobs(cluster *cephv1.CephCluster, cephHosts []string, monSecret, clusterFSID string) {
for _, hostName := range cephHosts {
logger.Infof("starting clean up job on node %q", hostName)
jobName := k8sutil.TruncateNodeName("cluster-cleanup-job-%s", hostName)
jobName := k8sutil.TruncateNodeNameForJob("cluster-cleanup-job-%s", hostName)
podSpec := c.cleanUpJobTemplateSpec(cluster, monSecret, clusterFSID)
podSpec.Spec.NodeSelector = map[string]string{v1.LabelHostname: hostName}
labels := controller.AppLabels(CleanupAppName, cluster.Namespace)
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/ceph/cluster/osd/provision_spec.go
Expand Up @@ -55,7 +55,7 @@ func (c *Cluster) makeJob(osdProps osdProperties, provisionConfig *provisionConf

job := &batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: k8sutil.TruncateNodeName(prepareAppNameFmt, osdProps.crushHostname),
Name: k8sutil.TruncateNodeNameForJob(prepareAppNameFmt, osdProps.crushHostname),
Namespace: c.clusterInfo.Namespace,
Labels: map[string]string{
k8sutil.AppAttr: prepareAppName,
Expand Down
20 changes: 18 additions & 2 deletions pkg/operator/k8sutil/k8sutil.go
Expand Up @@ -85,6 +85,17 @@ func Hash(s string) string {
return hex.EncodeToString(h[:16])
}

// TruncateNodeNameForJob hashes the nodeName in case it would case the name to be longer than 63 characters
// and avoids for a K8s 1.22 bug in the job pod name generation. If the job name contains a . or - in a certain
// position, the pod will fail to create.
func TruncateNodeNameForJob(format, nodeName string) string {
// In k8s 1.22, the job name is truncated an additional 10 characters which can cause an issue
// in the generated pod name if it then ends in a non-alphanumeric character. In that case,
// we more aggressively generate a hashed job name.
jobNameShortenLength := 10
return truncateNodeName(format, nodeName, validation.DNS1035LabelMaxLength-jobNameShortenLength)
}

// TruncateNodeName hashes the nodeName in case it would case the name to be longer than 63 characters
// WARNING If your format and nodeName as a hash, are longer than 63 chars it won't be truncated!
// Your format alone should only be 31 chars at max because of MD5 hash being 32 chars.
Expand All @@ -94,9 +105,14 @@ func Hash(s string) string {
// Do **NOT** edit this function in a way that would change its output as it needs to
// provide consistent mappings from string to hash across versions of rook.
func TruncateNodeName(format, nodeName string) string {
if len(nodeName)+len(fmt.Sprintf(format, "")) > validation.DNS1035LabelMaxLength {
return truncateNodeName(format, nodeName, validation.DNS1035LabelMaxLength)
}

// truncateNodeName takes the max length desired for a string and hashes the value if needed to shorten it.
func truncateNodeName(format, nodeName string, maxLength int) string {
if len(nodeName)+len(fmt.Sprintf(format, "")) > maxLength {
hashed := Hash(nodeName)
logger.Infof("format and nodeName longer than %d chars, nodeName %s will be %s", validation.DNS1035LabelMaxLength, nodeName, hashed)
logger.Infof("format and nodeName longer than %d chars, nodeName %s will be %s", maxLength, nodeName, hashed)
nodeName = hashed
}
return fmt.Sprintf(format, nodeName)
Expand Down
22 changes: 22 additions & 0 deletions pkg/operator/k8sutil/k8sutil_test.go
Expand Up @@ -52,6 +52,28 @@ func TestTruncateNodeName(t *testing.T) {
}
}

func TestTruncateJobName(t *testing.T) {
// An entry's key is the result. The first value in the []string is the format and the second is the nodeName
tests := map[string][]string{
"rook-ceph-osd-prepare-k8s01": { // 27 chars
"rook-ceph-osd-prepare-%s", // 22 chars (without format)
"k8s01", // 5 chars
},
"rook-ceph-osd-prepare-k8s-worker-500.this.is.a.not.so": { // 53 chars
"rook-ceph-osd-prepare-%s", // 22 chars (without format)
"k8s-worker-500.this.is.a.not.so", // 31 chars
},
// 54 chars, but ok that it is longer than 53 since it ends in an alphanumeric char
"rook-ceph-osd-prepare-4d2c3e33ccd2764180d42c20dce1d66a": {
"rook-ceph-osd-prepare-%s", // 22 chars (without format)
"k8s-worker-ends-with-a-long-name", // 32 chars
},
}
for result, params := range tests {
assert.Equal(t, result, TruncateNodeNameForJob(params[0], params[1]))
}
}

func TestValidateLabelValue(t *testing.T) {
// The key is the result, and the value is the input.
tests := map[string]string{
Expand Down
2 changes: 1 addition & 1 deletion tests/framework/installer/ceph_installer.go
Expand Up @@ -920,7 +920,7 @@ func NewCephInstaller(t func() *testing.T, clientset *kubernetes.Clientset, sett
k8shelper: k8shelp,
helmHelper: utils.NewHelmHelper(testHelmPath()),
k8sVersion: version.String(),
changeHostnames: k8shelp.VersionAtLeast("v1.18.0"),
changeHostnames: settings.ChangeHostName,
T: t,
}
flag.Parse()
Expand Down
1 change: 1 addition & 0 deletions tests/framework/installer/ceph_settings.go
Expand Up @@ -45,6 +45,7 @@ type TestCephSettings struct {
SkipCleanupPolicy bool
DirectMountToolbox bool
EnableVolumeReplication bool
ChangeHostName bool
RookVersion string
CephVersion cephv1.CephVersionSpec
}
Expand Down
3 changes: 2 additions & 1 deletion tests/integration/ceph_helm_test.go
Expand Up @@ -72,6 +72,7 @@ func (h *HelmSuite) SetupSuite() {
SkipOSDCreation: false,
EnableAdmissionController: false,
EnableDiscovery: true,
ChangeHostName: true,
RookVersion: installer.LocalBuildTag,
CephVersion: installer.OctopusVersion,
}
Expand All @@ -91,7 +92,7 @@ func (h *HelmSuite) AfterTest(suiteName, testName string) {
// Test to make sure all rook components are installed and Running
func (h *HelmSuite) TestARookInstallViaHelm() {
checkIfRookClusterIsInstalled(h.Suite, h.k8shelper, h.settings.Namespace, h.settings.Namespace, 1)
checkIfRookClusterHasHealthyIngress(h.Suite, h.k8shelper, h.settings.Namespace)
checkIfRookClusterHasHealthyIngress(h.Suite, h.k8shelper, h.settings.Namespace)
}

// Test BlockCreation on Rook that was installed via Helm
Expand Down
1 change: 1 addition & 0 deletions tests/integration/ceph_smoke_test.go
Expand Up @@ -94,6 +94,7 @@ func (s *SmokeSuite) SetupSuite() {
EnableAdmissionController: true,
UseCrashPruner: true,
EnableVolumeReplication: true,
ChangeHostName: true,
RookVersion: installer.LocalBuildTag,
CephVersion: installer.ReturnCephVersion(),
}
Expand Down

0 comments on commit 1f33859

Please sign in to comment.