Skip to content

Commit

Permalink
mds: skip sanity check during upgrades to 16.2.7
Browse files Browse the repository at this point in the history
The mons need to be configured to skip the mds sanity check
during upgrade to 16.2.7 or the mons may crash during that
upgrade.

Signed-off-by: Travis Nielsen <tnielsen@redhat.com>
  • Loading branch information
travisn committed Dec 14, 2021
1 parent 9299e32 commit c49c9ee
Showing 1 changed file with 46 additions and 0 deletions.
46 changes: 46 additions & 0 deletions pkg/operator/ceph/cluster/cluster.go
Expand Up @@ -92,6 +92,13 @@ func (c *cluster) reconcileCephDaemons(rookImage string, cephVersion cephver.Cep
}
c.ClusterInfo.SetName(c.namespacedName.Name)

// Execute actions after the monitors are up and running
logger.Debug("monitors are about to reconcile, executing pre actions")
err = c.preMonStartupActions()
if err != nil {
return errors.Wrap(err, "failed to execute pre actions before reconciling the ceph monitors")
}

// Start the mon pods
controller.UpdateCondition(c.ClusterInfo.Context, c.context, c.namespacedName, cephv1.ConditionProgressing, v1.ConditionTrue, cephv1.ClusterProgressingReason, "Configuring Ceph Mons")
clusterInfo, err := c.mons.Start(c.ClusterInfo, rookImage, cephVersion, *c.Spec)
Expand Down Expand Up @@ -443,6 +450,38 @@ func (c *cluster) replaceDefaultCrushMap(newRoot string) (err error) {
return nil
}

// preMonStartupActions is a collection of actions to run before the monitors are reconciled.
func (c *cluster) preMonStartupActions() error {
// Disable the mds sanity checks for the mons due to a ceph upgrade issue
// for the mds.
if err := c.skipMDSSanityChecks(true); err != nil {
// If there is an error, just print it and continue. Likely there is not a
// negative consequence of continuing since several complex conditions must exist to hit
// the upgrade issue where the sanity checks need to be disabled.
logger.Warningf("failed to disable the mon_mds_skip_sanity. %v", err)
}

return nil
}

func (c *cluster) skipMDSSanityChecks(skip bool) error {
// In a new cluster, there is no need for this change.
// TODO: Check if a new cluster

// In a running cluster disable the mds skip sanity setting during upgrades.
monStore := config.GetMonStore(c.context, c.ClusterInfo)
if skip {
if err := monStore.Set("mon", "mon_mds_skip_sanity", "1"); err != nil {
return err
}
} else {
if err := monStore.Delete("mon", "mon_mds_skip_sanity"); err != nil {
return err
}
}
return nil
}

// postMonStartupActions is a collection of actions to run once the monitors are up and running
// It gets executed right after the main mon Start() method
// Basically, it is executed between the monitors and the manager sequence
Expand All @@ -464,6 +503,13 @@ func (c *cluster) postMonStartupActions() error {
return errors.Wrap(err, "failed to enable Ceph messenger version 2")
}

// Re-enable the mds sanity checks
if err := c.skipMDSSanityChecks(false); err != nil {
// If there is an error, just print it and continue. We can just try again
// at the next reconcile.
logger.Warningf("failed to re-enable the mon_mds_skip_sanity. %v", err)
}

crushRoot := client.GetCrushRootFromSpec(c.Spec)
if crushRoot != "default" {
// Remove the root=default and replicated_rule which are created by
Expand Down

0 comments on commit c49c9ee

Please sign in to comment.