Skip to content

Commit

Permalink
Merge pull request #8646 from parth-gr/mon-finalizer
Browse files Browse the repository at this point in the history
ceph: add finalizers to rook-ceph-mon secrets and configmap
  • Loading branch information
leseb committed Oct 8, 2021
2 parents 0e40d6d + 7c99858 commit c45fee8
Show file tree
Hide file tree
Showing 9 changed files with 170 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Documentation/ceph-disaster-recovery.md
Expand Up @@ -320,7 +320,7 @@ kubectl create -f cluster.yaml
7. Scale up the operator

```shell
kubectl -n rook-ceph --replicas=1 deploy/rook-ceph-operator
kubectl -n rook-ceph scale --replicas=1 deploy/rook-ceph-operator
```

Watch the operator log to confirm that the reconcile completes successfully.
Expand Down
14 changes: 13 additions & 1 deletion Documentation/ceph-teardown.md
Expand Up @@ -148,7 +148,7 @@ If the cluster CRD still exists even though you have executed the delete command
When a Cluster CRD is created, a [finalizer](https://kubernetes.io/docs/tasks/access-kubernetes-api/extend-api-custom-resource-definitions/#finalizers) is added automatically by the Rook operator. The finalizer will allow the operator to ensure that before the cluster CRD is deleted, all block and file mounts will be cleaned up. Without proper cleanup, pods consuming the storage will be hung indefinitely until a system reboot.

The operator is responsible for removing the finalizer after the mounts have been cleaned up.
If for some reason the operator is not able to remove the finalizer (ie. the operator is not running anymore), you can delete the finalizer manually with the following command:
If for some reason the operator is not able to remove the finalizer (i.e., the operator is not running anymore), you can delete the finalizer manually with the following command:

```console
for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do
Expand All @@ -175,3 +175,15 @@ If the namespace is still stuck in Terminating state, you can check which resour
kubectl api-resources --verbs=list --namespaced -o name \
| xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph
```

### Remove critical resource finalizers

Rook adds a finalizer `ceph.rook.io/disaster-protection` to resources critical to the Ceph cluster so that the resources will not be accidentally deleted.

The operator is responsible for removing the finalizers when a CephCluster is deleted.
If for some reason the operator is not able to remove the finalizers (i.e., the operator is not running anymore), you can remove the finalizers manually with the following commands:

```console
kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{"metadata":{"finalizers": [null]}}'
kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{"metadata":{"finalizers": [null]}}'
```
2 changes: 2 additions & 0 deletions PendingReleaseNotes.md
Expand Up @@ -17,3 +17,5 @@ v1.8...

- The Rook Operator does not use "tini" as an init process. Instead, it uses the "rook" and handles
signals on its own.
- Rook adds a finalizer `ceph.rook.io/disaster-protection` to resources critical to the Ceph cluster
(rook-ceph-mon secrets and configmap) so that the resources will not be accidentally deleted.
54 changes: 41 additions & 13 deletions pkg/operator/ceph/cluster/controller.go
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/rook/rook/pkg/clusterd"
cephclient "github.com/rook/rook/pkg/daemon/ceph/client"
"github.com/rook/rook/pkg/daemon/ceph/osd/kms"
"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/operator/ceph/cluster/osd"
opcontroller "github.com/rook/rook/pkg/operator/ceph/controller"
"github.com/rook/rook/pkg/operator/ceph/csi"
Expand Down Expand Up @@ -312,10 +313,10 @@ func (r *ReconcileCephCluster) reconcileDelete(cephCluster *cephv1.CephCluster)
}
}

// Remove finalizer
err = removeFinalizer(r.client, nsName)
// Remove finalizers
err = r.removeFinalizers(r.client, nsName)
if err != nil {
return reconcile.Result{}, cephCluster, errors.Wrap(err, "failed to remove finalizer")
return reconcile.Result{}, cephCluster, errors.Wrap(err, "failed to remove finalizers")
}

// Return and do not requeue. Successful deletion.
Expand Down Expand Up @@ -461,23 +462,50 @@ func (c *ClusterController) checkPVPresentInCluster(drivers []string, clusterID
return false, nil
}

// removeFinalizer removes a finalizer
func removeFinalizer(client client.Client, name types.NamespacedName) error {
cephCluster := &cephv1.CephCluster{}
err := client.Get(context.TODO(), name, cephCluster)
func (r *ReconcileCephCluster) removeFinalizers(client client.Client, name types.NamespacedName) error {
// Remove cephcluster finalizer
err := r.removeFinalizer(client, name, &cephv1.CephCluster{}, "")
if err != nil {
return errors.Wrap(err, "failed to remove cephcluster finalizer")
}

// Remove finalizer for rook-ceph-mon secret
name = types.NamespacedName{Name: mon.AppName, Namespace: name.Namespace}
err = r.removeFinalizer(client, name, &corev1.Secret{}, mon.DisasterProtectionFinalizerName)
if err != nil {
return errors.Wrapf(err, "failed to remove finalizer for the secret %q", name.Name)
}

// Remove finalizer for rook-ceph-mon-endpoints configmap
name = types.NamespacedName{Name: mon.EndpointConfigMapName, Namespace: name.Namespace}
err = r.removeFinalizer(client, name, &corev1.ConfigMap{}, mon.DisasterProtectionFinalizerName)
if err != nil {
return errors.Wrapf(err, "failed to remove finalizer for the configmap %q", name.Name)
}
return nil
}

func (r *ReconcileCephCluster) removeFinalizer(client client.Client, name types.NamespacedName, obj client.Object, finalizer string) error {
err := client.Get(r.opManagerContext, name, obj)
if err != nil {
if kerrors.IsNotFound(err) {
logger.Debug("CephCluster resource not found. Ignoring since object must be deleted.")
logger.Debugf("%s resource not found. Ignoring since object must be deleted.", name.Name)
return nil
}
return errors.Wrapf(err, "failed to retrieve ceph cluster %q to remove finalizer", name.Name)
return errors.Wrapf(err, "failed to retrieve %q to remove finalizer", name.Name)
}

err = opcontroller.RemoveFinalizer(client, cephCluster)
if err != nil {
return errors.Wrap(err, "failed to remove finalizer")
if finalizer == "" {
err = opcontroller.RemoveFinalizer(client, obj)
if err != nil {
return errors.Wrap(err, "failed to remove finalizer")
}
} else {
err = opcontroller.RemoveFinalizerWithName(client, obj, finalizer)
if err != nil {
return errors.Wrapf(err, "failed to remove finalizer %q", finalizer)
}
}

return nil
}

Expand Down
68 changes: 68 additions & 0 deletions pkg/operator/ceph/cluster/controller_test.go
Expand Up @@ -22,17 +22,22 @@ import (
"time"

cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
"github.com/rook/rook/pkg/client/clientset/versioned/scheme"
"github.com/rook/rook/pkg/clusterd"
"github.com/rook/rook/pkg/operator/k8sutil"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
dynamicfake "k8s.io/client-go/dynamic/fake"
k8sfake "k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
clientfake "sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)
Expand Down Expand Up @@ -86,6 +91,8 @@ func TestReconcileDeleteCephCluster(t *testing.T) {
// Make sure it has the fake CephCluster that is to be deleted in it
client := clientfake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(fakeCluster).Build()

err := corev1.AddToScheme(scheme)
assert.NoError(t, err)
// Create a ReconcileCephClient object with the scheme and fake client.
reconcileCephCluster := &ReconcileCephCluster{
client: client,
Expand Down Expand Up @@ -130,3 +137,64 @@ func TestReconcileDeleteCephCluster(t *testing.T) {
assert.True(t, kerrors.IsNotFound(err))
})
}

func TestRemoveFinalizers(t *testing.T) {
reconcileCephCluster := &ReconcileCephCluster{
opManagerContext: context.TODO(),
}
s := scheme.Scheme
fakeObject1 := &cephv1.CephCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "rook-ceph",
Finalizers: []string{
"cephcluster.ceph.rook.io",
},
},
TypeMeta: metav1.TypeMeta{
Kind: "CephCluster",
},
}
schema1 := schema.GroupVersion{Group: "ceph.rook.io", Version: "v1"}
fakeObject2 := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "rook-ceph",
Finalizers: []string{
"ceph.rook.io/disaster-protection",
},
},
TypeMeta: metav1.TypeMeta{
Kind: "Secret",
},
}
schema2 := schema.GroupVersion{Group: "", Version: "v1"}

tests := []struct {
name string
finalizer string
object client.Object
schema schema.GroupVersion
}{
{"CephCluster", "cephcluster.ceph.rook.io", fakeObject1, schema1},
{"mon secret", "ceph.rook.io/disaster-protection", fakeObject2, schema2},
}

for _, tt := range tests {
t.Run("delete finalizer for "+tt.name, func(t *testing.T) {
fakeObject, err := meta.Accessor(tt.object)
assert.NoError(t, err)
object := []runtime.Object{
tt.object,
}
s.AddKnownTypes(tt.schema, tt.object)
cl := fake.NewClientBuilder().WithScheme(s).WithRuntimeObjects(object...).Build()

assert.NotEmpty(t, fakeObject.GetFinalizers())
name := types.NamespacedName{Name: fakeObject.GetName(), Namespace: fakeObject.GetNamespace()}
err = reconcileCephCluster.removeFinalizer(cl, name, tt.object, tt.finalizer)
assert.NoError(t, err)
assert.Empty(t, fakeObject.GetFinalizers())
})
}
}
5 changes: 3 additions & 2 deletions pkg/operator/ceph/cluster/mon/config.go
Expand Up @@ -280,8 +280,9 @@ func createClusterAccessSecret(clientset kubernetes.Interface, namespace string,
}
secret := &v1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: AppName,
Namespace: namespace,
Name: AppName,
Namespace: namespace,
Finalizers: []string{DisasterProtectionFinalizerName},
},
Data: secrets,
Type: k8sutil.RookType,
Expand Down
7 changes: 5 additions & 2 deletions pkg/operator/ceph/cluster/mon/mon.go
Expand Up @@ -94,6 +94,8 @@ const (
// pods and waiting for kubernetes scheduling to complete.
canaryRetries = 30
canaryRetryDelaySeconds = 5

DisasterProtectionFinalizerName = cephv1.CustomResourceGroup + "/disaster-protection"
)

var (
Expand Down Expand Up @@ -1030,8 +1032,9 @@ func (c *Cluster) saveMonConfig() error {
func (c *Cluster) persistExpectedMonDaemons() error {
configMap := &v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: EndpointConfigMapName,
Namespace: c.Namespace,
Name: EndpointConfigMapName,
Namespace: c.Namespace,
Finalizers: []string{DisasterProtectionFinalizerName},
},
}
err := c.ownerInfo.SetControllerReference(configMap)
Expand Down
15 changes: 10 additions & 5 deletions pkg/operator/ceph/controller/finalizer.go
Expand Up @@ -75,21 +75,26 @@ func AddFinalizerIfNotPresent(client client.Client, obj client.Object) error {

// RemoveFinalizer removes a finalizer from an object
func RemoveFinalizer(client client.Client, obj client.Object) error {
finalizerName := buildFinalizerName(obj.GetObjectKind().GroupVersionKind().Kind)
return RemoveFinalizerWithName(client, obj, finalizerName)
}

// RemoveFinalizerWithName removes finalizer passed as an argument from an object
func RemoveFinalizerWithName(client client.Client, obj client.Object, finalizerName string) error {
err := client.Get(context.TODO(), types.NamespacedName{Name: obj.GetName(), Namespace: obj.GetNamespace()}, obj)
if err != nil {
return errors.Wrap(err, "failed to get the latest version of the object")
}
objectFinalizer := buildFinalizerName(obj.GetObjectKind().GroupVersionKind().Kind)
accessor, err := meta.Accessor(obj)
if err != nil {
return errors.Wrap(err, "failed to get meta information of object")
}

if contains(accessor.GetFinalizers(), objectFinalizer) {
logger.Infof("removing finalizer %q on %q", objectFinalizer, accessor.GetName())
accessor.SetFinalizers(remove(accessor.GetFinalizers(), objectFinalizer))
if contains(accessor.GetFinalizers(), finalizerName) {
logger.Infof("removing finalizer %q on %q", finalizerName, accessor.GetName())
accessor.SetFinalizers(remove(accessor.GetFinalizers(), finalizerName))
if err := client.Update(context.TODO(), obj); err != nil {
return errors.Wrapf(err, "failed to remove finalizer %q on %q", objectFinalizer, accessor.GetName())
return errors.Wrapf(err, "failed to remove finalizer %q on %q", finalizerName, accessor.GetName())
}
}

Expand Down
27 changes: 27 additions & 0 deletions pkg/operator/ceph/controller/finalizer_test.go
Expand Up @@ -76,3 +76,30 @@ func TestRemoveFinalizer(t *testing.T) {
assert.NoError(t, err)
assert.Empty(t, fakeObject.Finalizers)
}

func TestRemoveFinalizerWithName(t *testing.T) {
fakeObject := &cephv1.CephBlockPool{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "rook-ceph",
Finalizers: []string{
"cephblockpool.ceph.rook.io",
},
},
TypeMeta: metav1.TypeMeta{
Kind: "cephblockpool",
},
}

object := []runtime.Object{
fakeObject,
}
s := scheme.Scheme
s.AddKnownTypes(cephv1.SchemeGroupVersion, fakeObject)
cl := fake.NewClientBuilder().WithScheme(s).WithRuntimeObjects(object...).Build()

assert.NotEmpty(t, fakeObject.Finalizers)
err := RemoveFinalizerWithName(cl, fakeObject, "cephblockpool.ceph.rook.io")
assert.NoError(t, err)
assert.Empty(t, fakeObject.Finalizers)
}

0 comments on commit c45fee8

Please sign in to comment.