From 7402c2cce681103248202e0d09f85d7cc1159642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Tue, 23 Nov 2021 18:49:43 +0100 Subject: [PATCH] osd: check if osd is ok-to-stop before removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If multiple removal jobs are fired in parallel, there is a risk of losing data since we will forcefully remove the OSD. It's also simply true if a single OSD is not safe to destroy, there is also a risk of data loss. So now, we check if the OSD is safe-to-destroy first and then proceed. The code waits forever and retries every minute unless the --force-osd-removal flag is passed. Signed-off-by: Sébastien Han --- .github/workflows/canary-integration-test.yml | 15 +++++ .../examples/kubernetes/ceph/osd-purge.yaml | 14 ++++- cmd/rook/ceph/osd.go | 27 ++++++++- pkg/daemon/ceph/client/crash.go | 14 +++-- pkg/daemon/ceph/client/osd.go | 2 +- pkg/daemon/ceph/osd/remove.go | 55 ++++++++++++++++--- tests/scripts/collect-logs.sh | 4 ++ 7 files changed, 115 insertions(+), 16 deletions(-) diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index 5821161395de..80e8b9a1745f 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -69,6 +69,21 @@ jobs: - name: check-ownerreferences run: tests/scripts/github-action-helper.sh check_ownerreferences + - name: test osd removal jobs + run: | + kubectl -n rook-ceph delete deploy/rook-ceph-operator + kubectl -n rook-ceph delete deploy/rook-ceph-osd-1 --grace-period=0 --force + sed -i 's//1/' cluster/examples/kubernetes/ceph/osd-purge.yaml + # the CI must force the deletion since we use replica 1 on 2 OSDs + sed -i 's/false/true/' cluster/examples/kubernetes/ceph/osd-purge.yaml + sed -i 's|rook/ceph:master|rook/ceph:local-build|' cluster/examples/kubernetes/ceph/osd-purge.yaml + kubectl -n rook-ceph create -f cluster/examples/kubernetes/ceph/osd-purge.yaml + toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') + kubectl -n rook-ceph exec $toolbox -- ceph status + timeout 120 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph osd tree|grep -qE 'osd.1.*.destroyed'; do echo 'waiting for ceph osd 1 to be destroyed'; sleep 1; done" + kubectl -n rook-ceph exec $toolbox -- ceph status + kubectl -n rook-ceph exec $toolbox -- ceph osd tree + - name: collect common logs if: always() uses: ./.github/workflows/collect-logs diff --git a/cluster/examples/kubernetes/ceph/osd-purge.yaml b/cluster/examples/kubernetes/ceph/osd-purge.yaml index d00a6ebbec31..cda943794560 100644 --- a/cluster/examples/kubernetes/ceph/osd-purge.yaml +++ b/cluster/examples/kubernetes/ceph/osd-purge.yaml @@ -29,7 +29,19 @@ spec: # TODO: Insert the OSD ID in the last parameter that is to be removed # The OSD IDs are a comma-separated list. For example: "0" or "0,2". # If you want to preserve the OSD PVCs, set `--preserve-pvc true`. - args: ["ceph", "osd", "remove", "--preserve-pvc", "false", "--osd-ids", ""] + # + # A --force-osd-removal option is available if the OSD should be destroyed even though the + # removal could lead to data loss. + args: + - "ceph" + - "osd" + - "remove" + - "--preserve-pvc" + - "false" + - "--force-osd-removal" + - "false" + - "--osd-ids" + - "" env: - name: POD_NAMESPACE valueFrom: diff --git a/cmd/rook/ceph/osd.go b/cmd/rook/ceph/osd.go index a57d6d1f47d5..f7adb70b4458 100644 --- a/cmd/rook/ceph/osd.go +++ b/cmd/rook/ceph/osd.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "os" + "strconv" "strings" "k8s.io/client-go/kubernetes" @@ -71,7 +72,8 @@ var ( blockPath string lvBackedPV bool osdIDsToRemove string - preservePVC bool + preservePVC string + forceOSDRemoval string ) func addOSDFlags(command *cobra.Command) { @@ -100,7 +102,8 @@ func addOSDFlags(command *cobra.Command) { // flags for removing OSDs that are unhealthy or otherwise should be purged from the cluster osdRemoveCmd.Flags().StringVar(&osdIDsToRemove, "osd-ids", "", "OSD IDs to remove from the cluster") - osdRemoveCmd.Flags().BoolVar(&preservePVC, "preserve-pvc", false, "Whether PVCs for OSDs will be deleted") + osdRemoveCmd.Flags().StringVar(&preservePVC, "preserve-pvc", "false", "Whether PVCs for OSDs will be deleted") + osdRemoveCmd.Flags().StringVar(&forceOSDRemoval, "force-osd-removal", "false", "Whether to force remove the OSD") // add the subcommands to the parent osd command osdCmd.AddCommand(osdConfigCmd, @@ -265,11 +268,29 @@ func removeOSDs(cmd *cobra.Command, args []string) error { clusterInfo.Context = cmd.Context() + // We use strings instead of bool since the flag package has issues with parsing bools, or + // perhaps it's the translation between YAML and code... It's unclear but see: + // starting Rook v1.7.0-alpha.0.660.gb13faecc8 with arguments '/usr/local/bin/rook ceph osd remove --preserve-pvc false --force-osd-removal false --osd-ids 1' + // flag values: --force-osd-removal=true, --help=false, --log-level=DEBUG, --operator-image=, + // --osd-ids=1, --preserve-pvc=true, --service-account= + // + // Bools are false but they are interpreted true by the flag package. + + forceOSDRemovalBool, err := strconv.ParseBool(forceOSDRemoval) + if err != nil { + return errors.Wrapf(err, "failed to parse --force-osd-removal flag") + } + preservePVCBool, err := strconv.ParseBool(preservePVC) + if err != nil { + return errors.Wrapf(err, "failed to parse --preserve-pvc flag") + } + // Run OSD remove sequence - err := osddaemon.RemoveOSDs(context, &clusterInfo, strings.Split(osdIDsToRemove, ","), preservePVC) + err = osddaemon.RemoveOSDs(context, &clusterInfo, strings.Split(osdIDsToRemove, ","), preservePVCBool, forceOSDRemovalBool) if err != nil { rook.TerminateFatal(err) } + return nil } diff --git a/pkg/daemon/ceph/client/crash.go b/pkg/daemon/ceph/client/crash.go index 8f8d478a9f0a..81366134a91e 100644 --- a/pkg/daemon/ceph/client/crash.go +++ b/pkg/daemon/ceph/client/crash.go @@ -57,26 +57,31 @@ type CrashList struct { // GetCrashList gets the list of Crashes. func GetCrashList(context *clusterd.Context, clusterInfo *ClusterInfo) ([]CrashList, error) { - crashargs := []string{"crash", "ls"} - output, err := NewCephCommand(context, clusterInfo, crashargs).Run() + crashArgs := []string{"crash", "ls"} + output, err := NewCephCommand(context, clusterInfo, crashArgs).Run() if err != nil { return nil, errors.Wrap(err, "failed to list ceph crash") } + var crash []CrashList err = json.Unmarshal(output, &crash) if err != nil { return nil, errors.Wrapf(err, "failed to unmarshal crash ls response. %s", string(output)) } + return crash, err } // ArchiveCrash archives the crash with respective crashID func ArchiveCrash(context *clusterd.Context, clusterInfo *ClusterInfo, crashID string) error { - crashsilenceargs := []string{"crash", "archive", crashID} - _, err := NewCephCommand(context, clusterInfo, crashsilenceargs).Run() + logger.Infof("silencing crash %q", crashID) + crashSilenceArgs := []string{"crash", "archive", crashID} + _, err := NewCephCommand(context, clusterInfo, crashSilenceArgs).Run() if err != nil { return errors.Wrapf(err, "failed to archive crash %q", crashID) } + + logger.Infof("successfully silenced crash %q", crashID) return nil } @@ -86,5 +91,6 @@ func GetCrash(context *clusterd.Context, clusterInfo *ClusterInfo) ([]CrashList, if err != nil { return nil, errors.Wrap(err, "failed to list ceph crash") } + return crash, nil } diff --git a/pkg/daemon/ceph/client/osd.go b/pkg/daemon/ceph/client/osd.go index c571ec800057..4df569b07202 100644 --- a/pkg/daemon/ceph/client/osd.go +++ b/pkg/daemon/ceph/client/osd.go @@ -263,7 +263,7 @@ func OsdSafeToDestroy(context *clusterd.Context, clusterInfo *ClusterInfo, osdID var output SafeToDestroyStatus if err := json.Unmarshal(buf, &output); err != nil { - return false, errors.Wrap(err, "failed to unmarshal safe-to-destroy response") + return false, errors.Wrapf(err, "failed to unmarshal safe-to-destroy response. %s", string(buf)) } if len(output.SafeToDestroy) != 0 && output.SafeToDestroy[0] == osdID { return true, nil diff --git a/pkg/daemon/ceph/osd/remove.go b/pkg/daemon/ceph/osd/remove.go index 81ec4863ddb8..afee93a67ef3 100644 --- a/pkg/daemon/ceph/osd/remove.go +++ b/pkg/daemon/ceph/osd/remove.go @@ -19,6 +19,7 @@ package osd import ( "fmt" "strconv" + "time" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -31,8 +32,7 @@ import ( ) // RemoveOSDs purges a list of OSDs from the cluster -func RemoveOSDs(context *clusterd.Context, clusterInfo *client.ClusterInfo, osdsToRemove []string, preservePVC bool) error { - +func RemoveOSDs(context *clusterd.Context, clusterInfo *client.ClusterInfo, osdsToRemove []string, preservePVC, forceOSDRemoval bool) error { // Generate the ceph config for running ceph commands similar to the operator if err := client.WriteCephConfig(context, clusterInfo); err != nil { return errors.Wrap(err, "failed to write the ceph config") @@ -58,8 +58,43 @@ func RemoveOSDs(context *clusterd.Context, clusterInfo *client.ClusterInfo, osds if status == upStatus { logger.Infof("osd.%d is healthy. It cannot be removed unless it is 'down'", osdID) continue + } else { + logger.Infof("osd.%d is marked 'DOWN'", osdID) + } + + // Check we can remove the OSD + // Loop forever until the osd is safe-to-destroy + for { + isSafeToDestroy, err := client.OsdSafeToDestroy(context, clusterInfo, osdID) + if err != nil { + // If we want to force remove the OSD and there was an error let's break outside of + // the loop and proceed with the OSD removal + if forceOSDRemoval { + logger.Errorf("failed to check if osd %d is safe to destroy, but force removal is enabled so proceeding with removal. %v", osdID, err) + break + } else { + logger.Errorf("failed to check if osd %d is safe to destroy, retrying in 1m. %v", osdID, err) + time.Sleep(1 * time.Minute) + continue + } + } + + // If no error and the OSD is safe to destroy, we can proceed with the OSD removal + if isSafeToDestroy { + logger.Infof("osd.%d is safe to destroy, proceeding", osdID) + break + } else { + // If we arrive here and forceOSDRemoval is true, we should proceed with the OSD removal + if forceOSDRemoval { + logger.Infof("osd.%d is NOT be ok to destroy but force removal is enabled so proceeding with removal", osdID) + break + } + // Else we wait until the OSD can be removed + logger.Warningf("osd.%d is NOT be ok to destroy, retrying in 1m until success", osdID) + time.Sleep(1 * time.Minute) + } } - logger.Infof("osd.%d is marked 'DOWN'. Removing it", osdID) + removeOSD(context, clusterInfo, osdID, preservePVC) } @@ -74,6 +109,7 @@ func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInf } // Mark the OSD as out. + logger.Infof("marking osd.%d out", osdID) args := []string{"osd", "out", fmt.Sprintf("osd.%d", osdID)} _, err = client.NewCephCommand(clusterdContext, clusterInfo, args).Run() if err != nil { @@ -138,18 +174,21 @@ func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInf } // purge the osd - purgeosdargs := []string{"osd", "purge", fmt.Sprintf("osd.%d", osdID), "--force", "--yes-i-really-mean-it"} - _, err = client.NewCephCommand(clusterdContext, clusterInfo, purgeosdargs).Run() + logger.Infof("destroying osd.%d", osdID) + purgeOSDArgs := []string{"osd", "destroy", fmt.Sprintf("osd.%d", osdID), "--yes-i-really-mean-it"} + _, err = client.NewCephCommand(clusterdContext, clusterInfo, purgeOSDArgs).Run() if err != nil { logger.Errorf("failed to purge osd.%d. %v", osdID, err) } // Attempting to remove the parent host. Errors can be ignored if there are other OSDs on the same host - hostargs := []string{"osd", "crush", "rm", hostName} - _, err = client.NewCephCommand(clusterdContext, clusterInfo, hostargs).Run() + logger.Infof("removing osd.%d from ceph", osdID) + hostArgs := []string{"osd", "crush", "rm", hostName} + _, err = client.NewCephCommand(clusterdContext, clusterInfo, hostArgs).Run() if err != nil { logger.Errorf("failed to remove CRUSH host %q. %v", hostName, err) } + // call archiveCrash to silence crash warning in ceph health if any archiveCrash(clusterdContext, clusterInfo, osdID) @@ -167,6 +206,7 @@ func archiveCrash(clusterdContext *clusterd.Context, clusterInfo *client.Cluster logger.Info("no ceph crash to silence") return } + var crashID string for _, c := range crash { if c.Entity == fmt.Sprintf("osd.%d", osdID) { @@ -174,6 +214,7 @@ func archiveCrash(clusterdContext *clusterd.Context, clusterInfo *client.Cluster break } } + err = client.ArchiveCrash(clusterdContext, clusterInfo, crashID) if err != nil { logger.Errorf("failed to archive the crash %q. %v", crashID, err) diff --git a/tests/scripts/collect-logs.sh b/tests/scripts/collect-logs.sh index f0bf62cebb43..fce61d60f88e 100755 --- a/tests/scripts/collect-logs.sh +++ b/tests/scripts/collect-logs.sh @@ -26,9 +26,13 @@ done kubectl -n "${OPERATOR_NAMESPACE}" logs deploy/rook-ceph-operator > "${LOG_DIR}"/operator-logs.txt kubectl -n "${OPERATOR_NAMESPACE}" get pods -o wide > "${LOG_DIR}"/operator-pods-list.txt kubectl -n "${CLUSTER_NAMESPACE}" get pods -o wide > "${LOG_DIR}"/cluster-pods-list.txt +kubectl -n "${CLUSTER_NAMESPACE}" get jobs -o wide > "${LOG_DIR}"/cluster-jobs-list.txt prepare_job="$(kubectl -n "${CLUSTER_NAMESPACE}" get job -l app=rook-ceph-osd-prepare --output name | awk 'FNR <= 1')" # outputs job/ +removal_job="$(kubectl -n "${CLUSTER_NAMESPACE}" get job -l app=rook-ceph-purge-osd --output name | awk 'FNR <= 1')" # outputs job/ kubectl -n "${CLUSTER_NAMESPACE}" describe "${prepare_job}" > "${LOG_DIR}"/osd-prepare-describe.txt kubectl -n "${CLUSTER_NAMESPACE}" logs "${prepare_job}" > "${LOG_DIR}"/osd-prepare-logs.txt +kubectl -n "${CLUSTER_NAMESPACE}" describe "${removal_job}" > "${LOG_DIR}"/osd-removal-describe.txt +kubectl -n "${CLUSTER_NAMESPACE}" logs "${removal_job}" > "${LOG_DIR}"/osd-removal-logs.txt kubectl -n "${CLUSTER_NAMESPACE}" logs deploy/rook-ceph-osd-0 --all-containers > "${LOG_DIR}"/rook-ceph-osd-0-logs.txt kubectl -n "${CLUSTER_NAMESPACE}" logs deploy/rook-ceph-osd-1 --all-containers > "${LOG_DIR}"/rook-ceph-osd-1-logs.txt kubectl get all -n "${OPERATOR_NAMESPACE}" -o wide > "${LOG_DIR}"/operator-wide.txt