Skip to content

Commit

Permalink
Merge pull request #9447 from rook/mergify/bp/release-1.8/pr-9418
Browse files Browse the repository at this point in the history
mds: Skip sanity check during upgrades to 16.2.7 (backport #9418)
  • Loading branch information
travisn committed Dec 16, 2021
2 parents 37a20ba + 8645d24 commit 7b9c691
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 0 deletions.
48 changes: 48 additions & 0 deletions pkg/operator/ceph/cluster/cluster.go
Expand Up @@ -92,6 +92,14 @@ func (c *cluster) reconcileCephDaemons(rookImage string, cephVersion cephver.Cep
}
c.ClusterInfo.SetName(c.namespacedName.Name)

// Execute actions before the monitors are up and running, if needed during upgrades.
// These actions would be skipped in a new cluster.
logger.Debug("monitors are about to reconcile, executing pre actions")
err = c.preMonStartupActions(cephVersion)
if err != nil {
return errors.Wrap(err, "failed to execute actions before reconciling the ceph monitors")
}

// Start the mon pods
controller.UpdateCondition(c.ClusterInfo.Context, c.context, c.namespacedName, cephv1.ConditionProgressing, v1.ConditionTrue, cephv1.ClusterProgressingReason, "Configuring Ceph Mons")
clusterInfo, err := c.mons.Start(c.ClusterInfo, rookImage, cephVersion, *c.Spec)
Expand Down Expand Up @@ -443,6 +451,37 @@ func (c *cluster) replaceDefaultCrushMap(newRoot string) (err error) {
return nil
}

// preMonStartupActions is a collection of actions to run before the monitors are reconciled.
func (c *cluster) preMonStartupActions(cephVersion cephver.CephVersion) error {
// Disable the mds sanity checks for the mons due to a ceph upgrade issue
// for the mds to Pacific if 16.2.7 or greater. We keep it more general for any
// Pacific upgrade greater than 16.2.7 in case they skip updrading directly to 16.2.7.
if c.isUpgrade && cephVersion.IsPacific() && cephVersion.IsAtLeast(cephver.CephVersion{Major: 16, Minor: 2, Extra: 7}) {
if err := c.skipMDSSanityChecks(true); err != nil {
// If there is an error, just print it and continue. Likely there is not a
// negative consequence of continuing since several complex conditions must exist to hit
// the upgrade issue where the sanity checks need to be disabled.
logger.Warningf("failed to disable the mon_mds_skip_sanity. %v", err)
}
}
return nil
}

func (c *cluster) skipMDSSanityChecks(skip bool) error {
// In a running cluster disable the mds skip sanity setting during upgrades.
monStore := config.GetMonStore(c.context, c.ClusterInfo)
if skip {
if err := monStore.Set("mon", "mon_mds_skip_sanity", "1"); err != nil {
return err
}
} else {
if err := monStore.Delete("mon", "mon_mds_skip_sanity"); err != nil {
return err
}
}
return nil
}

// postMonStartupActions is a collection of actions to run once the monitors are up and running
// It gets executed right after the main mon Start() method
// Basically, it is executed between the monitors and the manager sequence
Expand All @@ -464,6 +503,15 @@ func (c *cluster) postMonStartupActions() error {
return errors.Wrap(err, "failed to enable Ceph messenger version 2")
}

// Always ensure the skip mds sanity checks setting is cleared, for all Pacific deployments
if c.ClusterInfo.CephVersion.IsPacific() {
if err := c.skipMDSSanityChecks(false); err != nil {
// If there is an error, just print it and continue. We can just try again
// at the next reconcile.
logger.Warningf("failed to re-enable the mon_mds_skip_sanity. %v", err)
}
}

crushRoot := client.GetCrushRootFromSpec(c.Spec)
if crushRoot != "default" {
// Remove the root=default and replicated_rule which are created by
Expand Down
80 changes: 80 additions & 0 deletions pkg/operator/ceph/cluster/cluster_test.go
Expand Up @@ -19,11 +19,17 @@ package cluster

import (
"testing"
"time"

"github.com/pkg/errors"
cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
"github.com/rook/rook/pkg/clusterd"
"github.com/rook/rook/pkg/daemon/ceph/client"
cephclient "github.com/rook/rook/pkg/daemon/ceph/client"
cephver "github.com/rook/rook/pkg/operator/ceph/version"
testop "github.com/rook/rook/pkg/operator/test"
exectest "github.com/rook/rook/pkg/util/exec/test"
"github.com/stretchr/testify/assert"
)

func TestPreClusterStartValidation(t *testing.T) {
Expand Down Expand Up @@ -69,3 +75,77 @@ func TestPreClusterStartValidation(t *testing.T) {
})
}
}

func TestPreMonChecks(t *testing.T) {
executor := &exectest.MockExecutor{}
context := &clusterd.Context{Executor: executor}
setSkipSanity := false
unsetSkipSanity := false
executor.MockExecuteCommandWithTimeout = func(timeout time.Duration, command string, args ...string) (string, error) {
logger.Infof("Command: %s %v", command, args)
if args[0] == "config" {
if args[1] == "set" {
setSkipSanity = true
assert.Equal(t, "mon", args[2])
assert.Equal(t, "mon_mds_skip_sanity", args[3])
assert.Equal(t, "1", args[4])
return "", nil
}
if args[1] == "rm" {
unsetSkipSanity = true
assert.Equal(t, "mon", args[2])
assert.Equal(t, "mon_mds_skip_sanity", args[3])
return "", nil
}
}
return "", errors.Errorf("unexpected ceph command %q", args)
}
c := cluster{context: context, ClusterInfo: cephclient.AdminTestClusterInfo("cluster")}

t.Run("no upgrade", func(t *testing.T) {
v := cephver.CephVersion{Major: 16, Minor: 2, Extra: 7}
c.isUpgrade = false
err := c.preMonStartupActions(v)
assert.NoError(t, err)
assert.False(t, setSkipSanity)
assert.False(t, unsetSkipSanity)
})

t.Run("upgrade below version", func(t *testing.T) {
setSkipSanity = false
unsetSkipSanity = false
v := cephver.CephVersion{Major: 16, Minor: 2, Extra: 6}
c.isUpgrade = true
err := c.preMonStartupActions(v)
assert.NoError(t, err)
assert.False(t, setSkipSanity)
assert.False(t, unsetSkipSanity)
})

t.Run("upgrade to applicable version", func(t *testing.T) {
setSkipSanity = false
unsetSkipSanity = false
v := cephver.CephVersion{Major: 16, Minor: 2, Extra: 7}
c.isUpgrade = true
err := c.preMonStartupActions(v)
assert.NoError(t, err)
assert.True(t, setSkipSanity)
assert.False(t, unsetSkipSanity)

// This will be called during the post mon checks
err = c.skipMDSSanityChecks(false)
assert.NoError(t, err)
assert.True(t, unsetSkipSanity)
})

t.Run("upgrade to quincy", func(t *testing.T) {
setSkipSanity = false
unsetSkipSanity = false
v := cephver.CephVersion{Major: 17, Minor: 2, Extra: 0}
c.isUpgrade = true
err := c.preMonStartupActions(v)
assert.NoError(t, err)
assert.False(t, setSkipSanity)
assert.False(t, unsetSkipSanity)
})
}

0 comments on commit 7b9c691

Please sign in to comment.