From ee791b09fd5ed4878886eefe3a747fd59e1cec21 Mon Sep 17 00:00:00 2001 From: Joseph Sawaya Date: Mon, 9 Aug 2021 15:59:15 -0400 Subject: [PATCH] ceph: update CephNFS to use ".nfs" pool in newer ceph versions This commit updates the CephNFS CR to make the RADOS settings optional for Ceph versions above 16.2.7 due to the NFS module changes in Ceph. The changes in Ceph make it so the RADOS pool is always ".nfs" and the RADOS namespace is always the name of the NFS cluster. This commit also handles the changes in Ceph Pacific versions before 16.2.7 where the default pool name is "nfs-ganesha" instead of ".nfs". Closes: https://github.com/rook/rook/issues/8450 Signed-off-by: Joseph Sawaya --- Documentation/ceph-nfs-crd.md | 4 + .../charts/rook-ceph/templates/resources.yaml | 2 +- cluster/examples/kubernetes/ceph/crds.yaml | 2 +- .../examples/kubernetes/ceph/nfs-test.yaml | 1 + pkg/apis/ceph.rook.io/v1/types.go | 4 +- pkg/operator/ceph/nfs/controller.go | 21 +++ pkg/operator/ceph/nfs/controller_test.go | 130 ++++++++++++++++++ pkg/operator/ceph/nfs/nfs.go | 36 ++++- 8 files changed, 196 insertions(+), 4 deletions(-) diff --git a/Documentation/ceph-nfs-crd.md b/Documentation/ceph-nfs-crd.md index 9b10aa47fd12..527db5846219 100644 --- a/Documentation/ceph-nfs-crd.md +++ b/Documentation/ceph-nfs-crd.md @@ -25,6 +25,8 @@ metadata: name: my-nfs namespace: rook-ceph spec: + # rados property is not used in versions of Ceph equal to or greater than + # 16.2.7, see note in RADOS settings section below. rados: # RADOS pool where NFS client recovery data and per-daemon configs are # stored. In this example the data pool for the "myfs" filesystem is used. @@ -91,6 +93,8 @@ ceph dashboard set-ganesha-clusters-rados-pool-namespace : **NOTE**: The RADOS settings aren't used in Ceph versions equal to or greater than Pacific 16.2.7, default values are used instead ".nfs" for the RADOS pool and the CephNFS CR's name for the RADOS namespace. However, RADOS settings are mandatory for versions preceding Pacific 16.2.7. + > **NOTE**: Don't use EC pools for NFS because ganesha uses omap in the recovery objects and grace db. EC pools do not support omap. ## EXPORT Block Configuration diff --git a/cluster/charts/rook-ceph/templates/resources.yaml b/cluster/charts/rook-ceph/templates/resources.yaml index 5e1bf1caa132..a3952c88c30a 100644 --- a/cluster/charts/rook-ceph/templates/resources.yaml +++ b/cluster/charts/rook-ceph/templates/resources.yaml @@ -5655,6 +5655,7 @@ spec: properties: rados: description: RADOS is the Ganesha RADOS specification + nullable: true properties: namespace: description: Namespace is the RADOS namespace where NFS client recovery data is stored. @@ -6257,7 +6258,6 @@ spec: - active type: object required: - - rados - server type: object status: diff --git a/cluster/examples/kubernetes/ceph/crds.yaml b/cluster/examples/kubernetes/ceph/crds.yaml index 0fb957cf76d9..41ed3867202c 100644 --- a/cluster/examples/kubernetes/ceph/crds.yaml +++ b/cluster/examples/kubernetes/ceph/crds.yaml @@ -5653,6 +5653,7 @@ spec: properties: rados: description: RADOS is the Ganesha RADOS specification + nullable: true properties: namespace: description: Namespace is the RADOS namespace where NFS client recovery data is stored. @@ -6255,7 +6256,6 @@ spec: - active type: object required: - - rados - server type: object status: diff --git a/cluster/examples/kubernetes/ceph/nfs-test.yaml b/cluster/examples/kubernetes/ceph/nfs-test.yaml index 46770bdb62b6..4d8ee6966053 100644 --- a/cluster/examples/kubernetes/ceph/nfs-test.yaml +++ b/cluster/examples/kubernetes/ceph/nfs-test.yaml @@ -4,6 +4,7 @@ metadata: name: my-nfs namespace: rook-ceph # namespace:cluster spec: + # rados settings aren't necessary in Ceph Versions equal to or greater than Pacific 16.2.7 rados: # RADOS pool where NFS client recovery data is stored. # In this example the data pool for the "myfs" filesystem is used. diff --git a/pkg/apis/ceph.rook.io/v1/types.go b/pkg/apis/ceph.rook.io/v1/types.go index b20d945532b6..148a79f179b0 100755 --- a/pkg/apis/ceph.rook.io/v1/types.go +++ b/pkg/apis/ceph.rook.io/v1/types.go @@ -1619,7 +1619,9 @@ type CephNFSList struct { // NFSGaneshaSpec represents the spec of an nfs ganesha server type NFSGaneshaSpec struct { // RADOS is the Ganesha RADOS specification - RADOS GaneshaRADOSSpec `json:"rados"` + // +nullable + // +optional + RADOS GaneshaRADOSSpec `json:"rados,omitempty"` // Server is the Ganesha Server specification Server GaneshaServerSpec `json:"server"` diff --git a/pkg/operator/ceph/nfs/controller.go b/pkg/operator/ceph/nfs/controller.go index 586d03b77356..348cacd13733 100644 --- a/pkg/operator/ceph/nfs/controller.go +++ b/pkg/operator/ceph/nfs/controller.go @@ -31,6 +31,7 @@ import ( "github.com/rook/rook/pkg/operator/ceph/config" opcontroller "github.com/rook/rook/pkg/operator/ceph/controller" "github.com/rook/rook/pkg/operator/ceph/reporting" + "github.com/rook/rook/pkg/operator/ceph/version" "github.com/rook/rook/pkg/operator/k8sutil" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -50,6 +51,9 @@ const ( controllerName = "ceph-nfs-controller" ) +// Version of Ceph where NFS default pool name changes to ".nfs" +var cephNFSChangeVersion = version.CephVersion{Major: 16, Minor: 2, Extra: 7} + var logger = capnslog.NewPackageLogger("github.com/rook/rook", controllerName) // List of object resources to watch by the controller @@ -247,10 +251,27 @@ func (r *ReconcileCephNFS) reconcile(request reconcile.Request) (reconcile.Resul } r.clusterInfo.CephVersion = *runningCephVersion + // Octopus: Customization is allowed, so don't change the pool and namespace + // Pacific before 16.2.7: No customization, default pool name is nfs-ganesha + // Pacific after 16.2.7: No customization, default pool name is .nfs + // This code is changes the pool and namespace to the correct values if the version is Pacific. + // If the version precedes Pacific it doesn't change it at all and the values used are what the user provided in the spec. + if r.clusterInfo.CephVersion.IsAtLeastPacific() { + if r.clusterInfo.CephVersion.IsAtLeast(cephNFSChangeVersion) { + cephNFS.Spec.RADOS.Pool = postNFSChangeDefaultPoolName + } else { + cephNFS.Spec.RADOS.Pool = preNFSChangeDefaultPoolName + } + cephNFS.Spec.RADOS.Namespace = cephNFS.Name + } + // validate the store settings if err := validateGanesha(r.context, r.clusterInfo, cephNFS); err != nil { return reconcile.Result{}, errors.Wrapf(err, "invalid ceph nfs %q arguments", cephNFS.Name) } + if err := fetchOrCreatePool(r.context, r.clusterInfo, cephNFS); err != nil { + return reconcile.Result{}, errors.Wrap(err, "failed to fetch or create RADOS pool") + } // CREATE/UPDATE logger.Debug("reconciling ceph nfs deployments") diff --git a/pkg/operator/ceph/nfs/controller_test.go b/pkg/operator/ceph/nfs/controller_test.go index 6d2b7841b9fa..8378245ad7d0 100644 --- a/pkg/operator/ceph/nfs/controller_test.go +++ b/pkg/operator/ceph/nfs/controller_test.go @@ -29,7 +29,9 @@ import ( "github.com/rook/rook/pkg/client/clientset/versioned/scheme" "github.com/rook/rook/pkg/clusterd" "github.com/rook/rook/pkg/daemon/ceph/client" + "github.com/rook/rook/pkg/operator/ceph/cluster/mon" "github.com/rook/rook/pkg/operator/ceph/version" + cephver "github.com/rook/rook/pkg/operator/ceph/version" "github.com/rook/rook/pkg/operator/k8sutil" "github.com/rook/rook/pkg/operator/test" exectest "github.com/rook/rook/pkg/util/exec/test" @@ -261,3 +263,131 @@ func TestGetGaneshaConfigObject(t *testing.T) { logger.Infof("Config Object for Nautilus is %s", res) assert.Equal(t, "conf-my-nfs.a", res) } + +func TestFetchOrCreatePool(t *testing.T) { + ctx := context.TODO() + cephNFS := &cephv1.CephNFS{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: cephv1.NFSGaneshaSpec{ + Server: cephv1.GaneshaServerSpec{ + Active: 1, + }, + }, + TypeMeta: controllerTypeMeta, + } + executor := &exectest.MockExecutor{ + MockExecuteCommandWithOutput: func(command string, args ...string) (string, error) { + return "", nil + }, + } + clientset := test.New(t, 3) + c := &clusterd.Context{ + Executor: executor, + RookClientset: rookclient.NewSimpleClientset(), + Clientset: clientset, + } + // Mock clusterInfo + secrets := map[string][]byte{ + "fsid": []byte(name), + "mon-secret": []byte("monsecret"), + "admin-secret": []byte("adminsecret"), + } + secret := &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rook-ceph-mon", + Namespace: namespace, + }, + Data: secrets, + Type: k8sutil.RookType, + } + _, err := c.Clientset.CoreV1().Secrets(namespace).Create(ctx, secret, metav1.CreateOptions{}) + assert.NoError(t, err) + clusterInfo, _, _, err := mon.LoadClusterInfo(c, ctx, namespace) + if err != nil { + return + } + + err = fetchOrCreatePool(c, clusterInfo, cephNFS) + assert.NoError(t, err) + + executor = &exectest.MockExecutor{ + MockExecuteCommandWithOutput: func(command string, args ...string) (string, error) { + if args[1] == "pool" && args[2] == "get" { + return "Error", errors.New("failed to get pool") + } + return "", nil + }, + } + + c.Executor = executor + err = fetchOrCreatePool(c, clusterInfo, cephNFS) + assert.Error(t, err) + + executor = &exectest.MockExecutor{ + MockExecuteCommandWithOutput: func(command string, args ...string) (string, error) { + if args[1] == "pool" && args[2] == "get" { + return "Error", errors.New("failed to get pool: unrecognized pool") + } + return "", nil + }, + } + + c.Executor = executor + err = fetchOrCreatePool(c, clusterInfo, cephNFS) + assert.Error(t, err) + + clusterInfo.CephVersion = cephver.CephVersion{ + Major: 16, + Minor: 2, + Extra: 6, + } + + executor = &exectest.MockExecutor{ + MockExecuteCommandWithOutput: func(command string, args ...string) (string, error) { + if args[1] == "pool" && args[2] == "get" { + return "Error", errors.New("failed to get pool: unrecognized pool") + } + return "", nil + }, + } + + c.Executor = executor + err = fetchOrCreatePool(c, clusterInfo, cephNFS) + assert.NoError(t, err) + + executor = &exectest.MockExecutor{ + MockExecuteCommandWithOutput: func(command string, args ...string) (string, error) { + if args[1] == "pool" && args[2] == "get" { + return "Error", errors.New("failed to get pool: unrecognized pool") + } + if args[1] == "pool" && args[2] == "create" { + return "Error", errors.New("creating pool failed") + } + return "", nil + }, + } + + c.Executor = executor + err = fetchOrCreatePool(c, clusterInfo, cephNFS) + assert.Error(t, err) + + executor = &exectest.MockExecutor{ + MockExecuteCommandWithOutput: func(command string, args ...string) (string, error) { + if args[1] == "pool" && args[2] == "get" { + return "Error", errors.New("unrecognized pool") + } + if args[1] == "pool" && args[2] == "application" { + return "Error", errors.New("enabling pool failed") + } + return "", nil + }, + } + + c.Executor = executor + err = fetchOrCreatePool(c, clusterInfo, cephNFS) + assert.Error(t, err) + +} diff --git a/pkg/operator/ceph/nfs/nfs.go b/pkg/operator/ceph/nfs/nfs.go index 3cf1dcd11a8e..644aca1396fe 100644 --- a/pkg/operator/ceph/nfs/nfs.go +++ b/pkg/operator/ceph/nfs/nfs.go @@ -19,6 +19,7 @@ package nfs import ( "fmt" + "strings" "github.com/banzaicloud/k8s-objectmatcher/patch" "github.com/pkg/errors" @@ -36,6 +37,10 @@ import ( const ( ganeshaRadosGraceCmd = "ganesha-rados-grace" + // Default RADOS pool name after the NFS changes in Ceph + postNFSChangeDefaultPoolName = ".nfs" + // Default RADOS pool name before the NFS changes in Ceph + preNFSChangeDefaultPoolName = "nfs-ganesha" ) var updateDeploymentAndWait = opmon.UpdateCephDeploymentAndWait @@ -264,16 +269,45 @@ func validateGanesha(context *clusterd.Context, clusterInfo *cephclient.ClusterI return errors.New("missing RADOS.pool") } + if n.Spec.RADOS.Namespace == "" { + return errors.New("missing RADOS.namespace") + } + // Ganesha server properties if n.Spec.Server.Active == 0 { return errors.New("at least one active server required") } + return nil +} + +// create and enable default RADOS pool +func createDefaultNFSRADOSPool(context *clusterd.Context, clusterInfo *cephclient.ClusterInfo, defaultRadosPoolName string) error { + args := []string{"osd", "pool", "create", defaultRadosPoolName} + _, err := cephclient.NewCephCommand(context, clusterInfo, args).Run() + if err != nil { + return err + } + args = []string{"osd", "pool", "application", "enable", defaultRadosPoolName, "nfs"} + _, err = cephclient.NewCephCommand(context, clusterInfo, args).Run() + if err != nil { + return err + } + return nil +} + +func fetchOrCreatePool(context *clusterd.Context, clusterInfo *cephclient.ClusterInfo, n *cephv1.CephNFS) error { // The existence of the pool provided in n.Spec.RADOS.Pool is necessary otherwise addRADOSConfigFile() will fail _, err := cephclient.GetPoolDetails(context, clusterInfo, n.Spec.RADOS.Pool) if err != nil { + if strings.Contains(err.Error(), "unrecognized pool") && clusterInfo.CephVersion.IsAtLeastPacific() { + err := createDefaultNFSRADOSPool(context, clusterInfo, n.Spec.RADOS.Pool) + if err != nil { + return errors.Wrapf(err, "failed to find %q pool and unable to create it", n.Spec.RADOS.Pool) + } + return nil + } return errors.Wrapf(err, "pool %q not found", n.Spec.RADOS.Pool) } - return nil }