Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Truncate job name of the osd prepare job further to avoid pod generation failure on K8s 1.22 #9312

Merged
merged 2 commits into from Dec 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/operator/ceph/cluster/cleanup.go
Expand Up @@ -72,7 +72,7 @@ func (c *ClusterController) startClusterCleanUp(context context.Context, cluster
func (c *ClusterController) startCleanUpJobs(cluster *cephv1.CephCluster, cephHosts []string, monSecret, clusterFSID string) {
for _, hostName := range cephHosts {
logger.Infof("starting clean up job on node %q", hostName)
jobName := k8sutil.TruncateNodeName("cluster-cleanup-job-%s", hostName)
jobName := k8sutil.TruncateNodeNameForJob("cluster-cleanup-job-%s", hostName)
podSpec := c.cleanUpJobTemplateSpec(cluster, monSecret, clusterFSID)
podSpec.Spec.NodeSelector = map[string]string{v1.LabelHostname: hostName}
labels := controller.AppLabels(CleanupAppName, cluster.Namespace)
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/ceph/cluster/osd/provision_spec.go
Expand Up @@ -55,7 +55,7 @@ func (c *Cluster) makeJob(osdProps osdProperties, provisionConfig *provisionConf

job := &batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: k8sutil.TruncateNodeName(prepareAppNameFmt, osdProps.crushHostname),
Name: k8sutil.TruncateNodeNameForJob(prepareAppNameFmt, osdProps.crushHostname),
Namespace: c.clusterInfo.Namespace,
Labels: map[string]string{
k8sutil.AppAttr: prepareAppName,
Expand Down
20 changes: 18 additions & 2 deletions pkg/operator/k8sutil/k8sutil.go
Expand Up @@ -85,6 +85,17 @@ func Hash(s string) string {
return hex.EncodeToString(h[:16])
}

// TruncateNodeNameForJob hashes the nodeName in case it would case the name to be longer than 63 characters
// and avoids for a K8s 1.22 bug in the job pod name generation. If the job name contains a . or - in a certain
// position, the pod will fail to create.
func TruncateNodeNameForJob(format, nodeName string) string {
// In k8s 1.22, the job name is truncated an additional 10 characters which can cause an issue
// in the generated pod name if it then ends in a non-alphanumeric character. In that case,
// we more aggressively generate a hashed job name.
jobNameShortenLength := 10
return truncateNodeName(format, nodeName, validation.DNS1035LabelMaxLength-jobNameShortenLength)
}

// TruncateNodeName hashes the nodeName in case it would case the name to be longer than 63 characters
// WARNING If your format and nodeName as a hash, are longer than 63 chars it won't be truncated!
// Your format alone should only be 31 chars at max because of MD5 hash being 32 chars.
Expand All @@ -94,9 +105,14 @@ func Hash(s string) string {
// Do **NOT** edit this function in a way that would change its output as it needs to
// provide consistent mappings from string to hash across versions of rook.
func TruncateNodeName(format, nodeName string) string {
if len(nodeName)+len(fmt.Sprintf(format, "")) > validation.DNS1035LabelMaxLength {
return truncateNodeName(format, nodeName, validation.DNS1035LabelMaxLength)
}

// truncateNodeName takes the max length desired for a string and hashes the value if needed to shorten it.
func truncateNodeName(format, nodeName string, maxLength int) string {
if len(nodeName)+len(fmt.Sprintf(format, "")) > maxLength {
hashed := Hash(nodeName)
logger.Infof("format and nodeName longer than %d chars, nodeName %s will be %s", validation.DNS1035LabelMaxLength, nodeName, hashed)
logger.Infof("format and nodeName longer than %d chars, nodeName %s will be %s", maxLength, nodeName, hashed)
nodeName = hashed
}
return fmt.Sprintf(format, nodeName)
Expand Down
22 changes: 22 additions & 0 deletions pkg/operator/k8sutil/k8sutil_test.go
Expand Up @@ -52,6 +52,28 @@ func TestTruncateNodeName(t *testing.T) {
}
}

func TestTruncateJobName(t *testing.T) {
// An entry's key is the result. The first value in the []string is the format and the second is the nodeName
tests := map[string][]string{
"rook-ceph-osd-prepare-k8s01": { // 27 chars
"rook-ceph-osd-prepare-%s", // 22 chars (without format)
"k8s01", // 5 chars
},
"rook-ceph-osd-prepare-k8s-worker-500.this.is.a.not.so": { // 53 chars
"rook-ceph-osd-prepare-%s", // 22 chars (without format)
"k8s-worker-500.this.is.a.not.so", // 31 chars
},
// 54 chars, but ok that it is longer than 53 since it ends in an alphanumeric char
"rook-ceph-osd-prepare-4d2c3e33ccd2764180d42c20dce1d66a": {
"rook-ceph-osd-prepare-%s", // 22 chars (without format)
"k8s-worker-ends-with-a-long-name", // 32 chars
},
}
for result, params := range tests {
assert.Equal(t, result, TruncateNodeNameForJob(params[0], params[1]))
}
}

func TestValidateLabelValue(t *testing.T) {
// The key is the result, and the value is the input.
tests := map[string]string{
Expand Down
2 changes: 1 addition & 1 deletion tests/framework/installer/ceph_installer.go
Expand Up @@ -920,7 +920,7 @@ func NewCephInstaller(t func() *testing.T, clientset *kubernetes.Clientset, sett
k8shelper: k8shelp,
helmHelper: utils.NewHelmHelper(testHelmPath()),
k8sVersion: version.String(),
changeHostnames: k8shelp.VersionAtLeast("v1.18.0"),
changeHostnames: settings.ChangeHostName,
T: t,
}
flag.Parse()
Expand Down
1 change: 1 addition & 0 deletions tests/framework/installer/ceph_settings.go
Expand Up @@ -45,6 +45,7 @@ type TestCephSettings struct {
SkipCleanupPolicy bool
DirectMountToolbox bool
EnableVolumeReplication bool
ChangeHostName bool
RookVersion string
CephVersion cephv1.CephVersionSpec
}
Expand Down
3 changes: 2 additions & 1 deletion tests/integration/ceph_helm_test.go
Expand Up @@ -72,6 +72,7 @@ func (h *HelmSuite) SetupSuite() {
SkipOSDCreation: false,
EnableAdmissionController: false,
EnableDiscovery: true,
ChangeHostName: true,
RookVersion: installer.LocalBuildTag,
CephVersion: installer.OctopusVersion,
}
Expand All @@ -91,7 +92,7 @@ func (h *HelmSuite) AfterTest(suiteName, testName string) {
// Test to make sure all rook components are installed and Running
func (h *HelmSuite) TestARookInstallViaHelm() {
checkIfRookClusterIsInstalled(h.Suite, h.k8shelper, h.settings.Namespace, h.settings.Namespace, 1)
checkIfRookClusterHasHealthyIngress(h.Suite, h.k8shelper, h.settings.Namespace)
checkIfRookClusterHasHealthyIngress(h.Suite, h.k8shelper, h.settings.Namespace)
}

// Test BlockCreation on Rook that was installed via Helm
Expand Down
1 change: 1 addition & 0 deletions tests/integration/ceph_smoke_test.go
Expand Up @@ -94,6 +94,7 @@ func (s *SmokeSuite) SetupSuite() {
EnableAdmissionController: true,
UseCrashPruner: true,
EnableVolumeReplication: true,
ChangeHostName: true,
RookVersion: installer.LocalBuildTag,
CephVersion: installer.ReturnCephVersion(),
}
Expand Down