From dfca98df150cbc48652bc33d77226eb2f0852e93 Mon Sep 17 00:00:00 2001 From: Blaine Gardner Date: Mon, 4 Oct 2021 13:06:46 -0600 Subject: [PATCH] rgw: replace period update --commit with function Replace calls to 'radosgw-admin period update --commit' with an idempotent function. Resolves #8879 Signed-off-by: Blaine Gardner --- pkg/operator/ceph/object/admin.go | 86 +++ pkg/operator/ceph/object/admin_test.go | 572 +++++++++++++++++++ pkg/operator/ceph/object/controller.go | 5 +- pkg/operator/ceph/object/controller_test.go | 85 ++- pkg/operator/ceph/object/objectstore.go | 48 +- pkg/operator/ceph/object/objectstore_test.go | 178 +++--- 6 files changed, 831 insertions(+), 143 deletions(-) diff --git a/pkg/operator/ceph/object/admin.go b/pkg/operator/ceph/object/admin.go index 1c60da838b4e8..bac57005ce118 100644 --- a/pkg/operator/ceph/object/admin.go +++ b/pkg/operator/ceph/object/admin.go @@ -18,13 +18,16 @@ package object import ( "context" + "encoding/json" "fmt" "net/http" "net/http/httputil" "regexp" + "strings" "github.com/ceph/go-ceph/rgw/admin" "github.com/coreos/pkg/capnslog" + "github.com/google/go-cmp/cmp" "github.com/pkg/errors" cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" "github.com/rook/rook/pkg/clusterd" @@ -292,6 +295,89 @@ func isInvalidFlagError(err error) bool { return exitCode == 22 } +// CommitConfigChanges commits changes to RGW configs for realm/zonegroup/zone changes idempotently. +// Under the hood, this updates the RGW config period and commits the change if changes are detected. +func CommitConfigChanges(c *Context) error { + currentPeriod, err := runAdminCommand(c, true, "period", "get") + if err != nil { + return errorOrIsNotFound(err, "failed to get the current RGW configuration period to see if it needs changed") + } + + // this stages the current config changees and returns what the new period config will look like + // without committing the changes + stagedPeriod, err := runAdminCommand(c, true, "period", "update") + if err != nil { + return errorOrIsNotFound(err, "failed to stage the current RGW configuration period") + } + + shouldCommit, err := periodWillChange(currentPeriod, stagedPeriod) + if err != nil { + return errors.Wrap(err, "failed to determine if the staged RGW configuration period is different from current") + } + + if !shouldCommit { + nsName := fmt.Sprintf("%s/%s", c.clusterInfo.Namespace, c.Name) + logger.Debugf("not committing changes to RGW configuration period for CephObjectStore %q", nsName) + return nil + } + // don't expect json output since we don't intend to use the output from the command + _, err = runAdminCommand(c, false, "period", "update", "--commit") + if err != nil { + return errorOrIsNotFound(err, "failed to commit RGW configuration period changes") + } + + return nil +} + +// return true if the configuration period will change if the staged period is committed +func periodWillChange(current, staged string) (bool, error) { + var currentJSON map[string]interface{} + var stagedJSON map[string]interface{} + var err error + + err = json.Unmarshal([]byte(current), ¤tJSON) + if err != nil { + return true, errors.Wrap(err, "failed to unmarshal current RGW configuration period") + } + err = json.Unmarshal([]byte(staged), &stagedJSON) + if err != nil { + return true, errors.Wrap(err, "failed to unmarshal staged RGW configuration period") + } + + // There are some values in the periods that we don't care to diff because they are always + // different in the staged period, even when no updates are needed. Sometimes, the values are + // reported as different in the staging output but aren't actually changed upon commit. + ignorePaths := cmp.FilterPath(func(path cmp.Path) bool { + // path.String() outputs nothing for the crude map[string]interface{} JSON struct + // Example of path.GoString() output for a long path in the period JSON: + // root["period_map"].(map[string]interface {})["short_zone_ids"].([]interface {})[0].(map[string]interface {})["val"].(float64) + switch path.GoString() { + case `root["id"]`: + // "id" is always changed in the staged period, but it doesn't always update. + return true + case `root["predecessor_uuid"]`: + // "predecessor_uuid" is always changed in the staged period, but it doesn't always update. + return true + case `root["realm_epoch"]`: + // "realm_epoch" is always incremented in the staged period, but it doesn't always increment. + return true + case `root["epoch"]`: + // Strangely, "epoch" is not incremented in the staged period even though it is always + // incremented upon an actual commit. It could be argued that this behavior is a bug. + // Ignore this value to handle the possibility that the behavior changes in the future. + return true + default: + return false + } + }, cmp.Ignore()) + + diff := cmp.Diff(currentJSON, stagedJSON, ignorePaths) + diff = strings.TrimSpace(diff) + logger.Debugf("RGW config period diff:\n%s", diff) + + return (diff != ""), nil +} + func GetAdminOPSUserCredentials(objContext *Context, spec *cephv1.ObjectStoreSpec) (string, string, error) { ns := objContext.clusterInfo.Namespace diff --git a/pkg/operator/ceph/object/admin_test.go b/pkg/operator/ceph/object/admin_test.go index 5e05060b23421..30b84e23ee514 100644 --- a/pkg/operator/ceph/object/admin_test.go +++ b/pkg/operator/ceph/object/admin_test.go @@ -21,6 +21,7 @@ import ( "testing" "time" + "github.com/pkg/errors" v1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" "github.com/rook/rook/pkg/clusterd" "github.com/rook/rook/pkg/daemon/ceph/client" @@ -158,3 +159,574 @@ func TestRunAdminCommandNoMultisite(t *testing.T) { assert.EqualError(t, err, "no pods found with selector \"rook-ceph-mgr\"") }) } + +func TestCommitConfigChanges(t *testing.T) { + // control the return values from calling get/update on period + type commandReturns struct { + periodGetOutput string // empty implies error + periodUpdateOutput string // empty implies error + periodCommitError bool + } + + // control whether we should expect certain 'get' calls + type expectCommands struct { + // note: always expect period get to be called + periodUpdate bool + periodCommit bool + } + + // vars used to check if commands were called + var ( + periodGetCalled = false + periodUpdateCalled = false + periodCommitCalled = false + ) + + setupTest := func(returns commandReturns) *clusterd.Context { + // reset vars for checking if commands were called + periodGetCalled = false + periodUpdateCalled = false + periodCommitCalled = false + + executor := &exectest.MockExecutor{ + MockExecuteCommandWithTimeout: func(timeout time.Duration, command string, args ...string) (string, error) { + if command == "radosgw-admin" { + if args[0] == "period" { + if args[1] == "get" { + periodGetCalled = true + if returns.periodGetOutput == "" { + return "", errors.New("fake period get error") + } + return returns.periodGetOutput, nil + } + if args[1] == "update" { + if args[2] == "--commit" { + periodCommitCalled = true + if returns.periodCommitError { + return "", errors.New("fake period update --commit error") + } + return "", nil // success + } + periodUpdateCalled = true + if returns.periodUpdateOutput == "" { + return "", errors.New("fake period update (no --commit) error") + } + return returns.periodUpdateOutput, nil + } + } + } + + t.Fatalf("unhandled command: %s %v", command, args) + panic("unhandled command") + }, + } + + return &clusterd.Context{ + Executor: executor, + } + } + + expectNoErr := false // want no error + expectErr := true // want an error + + tests := []struct { + name string + commandReturns commandReturns + expectCommands expectCommands + wantErr bool + }{ + // a bit more background: creating a realm creates the first period epoch. When Rook creates + // zonegroup and zone, it results in many changes to the period. + {"real-world first reconcile (many changes, should commit period)", + commandReturns{ + periodGetOutput: firstPeriodGet, + periodUpdateOutput: firstPeriodUpdate, + }, + expectCommands{ + periodUpdate: true, + periodCommit: true, + }, + expectNoErr, + }, + // note: this also tests that we support the output changing in the future to increment "epoch" + {"real-world second reconcile (no changes, should not commit period)", + commandReturns{ + periodGetOutput: secondPeriodGet, + periodUpdateOutput: secondPeriodUpdateWithoutChanges, + }, + expectCommands{ + periodUpdate: true, + periodCommit: false, + }, + expectNoErr, + }, + {"second reconcile with changes", + commandReturns{ + periodGetOutput: secondPeriodGet, + periodUpdateOutput: secondPeriodUpdateWithChanges, + }, + expectCommands{ + periodUpdate: true, + periodCommit: true, + }, + expectNoErr, + }, + {"invalid get json", + commandReturns{ + periodGetOutput: `{"ids": [}`, // json obj with incomplete array that won't parse + periodUpdateOutput: firstPeriodUpdate, + }, + expectCommands{ + periodUpdate: true, + periodCommit: false, + }, + expectErr, + }, + {"invalid update json", + commandReturns{ + periodGetOutput: firstPeriodGet, + periodUpdateOutput: `{"ids": [}`, + }, + expectCommands{ + periodUpdate: true, + periodCommit: false, + }, + expectErr, + }, + {"fail period get", + commandReturns{ + periodGetOutput: "", // error + periodUpdateOutput: firstPeriodUpdate, + }, + expectCommands{ + periodUpdate: false, + periodCommit: false, + }, + expectErr, + }, + {"fail period update", + commandReturns{ + periodGetOutput: firstPeriodGet, + periodUpdateOutput: "", // error + }, + expectCommands{ + periodUpdate: true, + periodCommit: false, + }, + expectErr, + }, + {"fail period commit", + commandReturns{ + periodGetOutput: firstPeriodGet, + periodUpdateOutput: firstPeriodUpdate, + periodCommitError: true, + }, + expectCommands{ + periodUpdate: true, + periodCommit: true, + }, + expectErr, + }, + {"configs are removed", + commandReturns{ + periodGetOutput: secondPeriodUpdateWithChanges, + periodUpdateOutput: secondPeriodUpdateWithoutChanges, + }, + expectCommands{ + periodUpdate: true, + periodCommit: true, + }, + expectNoErr, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := setupTest(tt.commandReturns) + objCtx := NewContext(ctx, &client.ClusterInfo{Namespace: "my-cluster"}, "my-store") + + err := CommitConfigChanges(objCtx) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + assert.True(t, periodGetCalled) + assert.Equal(t, tt.expectCommands.periodUpdate, periodUpdateCalled) + assert.Equal(t, tt.expectCommands.periodCommit, periodCommitCalled) + }) + } +} + +// example real-world output from 'radosgw-admin period get' after initial realm, zonegroup, and +// zone creation and before 'radosgw-admin period update --commit' +const firstPeriodGet = `{ + "id": "5338e008-26db-4013-92f5-c51505a917e2", + "epoch": 1, + "predecessor_uuid": "", + "sync_status": [], + "period_map": { + "id": "5338e008-26db-4013-92f5-c51505a917e2", + "zonegroups": [], + "short_zone_ids": [] + }, + "master_zonegroup": "", + "master_zone": "", + "period_config": { + "bucket_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + }, + "user_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + } + }, + "realm_id": "94ba560d-a560-431d-8ed4-85a2891f9122", + "realm_name": "my-store", + "realm_epoch": 1 +}` + +// example real-world output from 'radosgw-admin period update' after initial realm, zonegroup, and +// zone creation and before 'radosgw-admin period update --commit' +const firstPeriodUpdate = `{ + "id": "94ba560d-a560-431d-8ed4-85a2891f9122:staging", + "epoch": 1, + "predecessor_uuid": "5338e008-26db-4013-92f5-c51505a917e2", + "sync_status": [], + "period_map": { + "id": "5338e008-26db-4013-92f5-c51505a917e2", + "zonegroups": [ + { + "id": "1580fd1d-a065-4484-82ff-329e9a779999", + "name": "my-store", + "api_name": "my-store", + "is_master": "true", + "endpoints": [ + "http://10.105.59.166:80" + ], + "hostnames": [], + "hostnames_s3website": [], + "master_zone": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "zones": [ + { + "id": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "name": "my-store", + "endpoints": [ + "http://10.105.59.166:80" + ], + "log_meta": "false", + "log_data": "false", + "bucket_index_max_shards": 11, + "read_only": "false", + "tier_type": "", + "sync_from_all": "true", + "sync_from": [], + "redirect_zone": "" + } + ], + "placement_targets": [ + { + "name": "default-placement", + "tags": [], + "storage_classes": [ + "STANDARD" + ] + } + ], + "default_placement": "default-placement", + "realm_id": "94ba560d-a560-431d-8ed4-85a2891f9122", + "sync_policy": { + "groups": [] + } + } + ], + "short_zone_ids": [ + { + "key": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "val": 1698422904 + } + ] + }, + "master_zonegroup": "1580fd1d-a065-4484-82ff-329e9a779999", + "master_zone": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "period_config": { + "bucket_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + }, + "user_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + } + }, + "realm_id": "94ba560d-a560-431d-8ed4-85a2891f9122", + "realm_name": "my-store", + "realm_epoch": 2 +}` + +// example real-world output from 'radosgw-admin period get' after the first period commit +const secondPeriodGet = `{ + "id": "600c23a6-2452-4fc0-96b4-0c78b9b7c439", + "epoch": 1, + "predecessor_uuid": "5338e008-26db-4013-92f5-c51505a917e2", + "sync_status": [], + "period_map": { + "id": "600c23a6-2452-4fc0-96b4-0c78b9b7c439", + "zonegroups": [ + { + "id": "1580fd1d-a065-4484-82ff-329e9a779999", + "name": "my-store", + "api_name": "my-store", + "is_master": "true", + "endpoints": [ + "http://10.105.59.166:80" + ], + "hostnames": [], + "hostnames_s3website": [], + "master_zone": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "zones": [ + { + "id": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "name": "my-store", + "endpoints": [ + "http://10.105.59.166:80" + ], + "log_meta": "false", + "log_data": "false", + "bucket_index_max_shards": 11, + "read_only": "false", + "tier_type": "", + "sync_from_all": "true", + "sync_from": [], + "redirect_zone": "" + } + ], + "placement_targets": [ + { + "name": "default-placement", + "tags": [], + "storage_classes": [ + "STANDARD" + ] + } + ], + "default_placement": "default-placement", + "realm_id": "94ba560d-a560-431d-8ed4-85a2891f9122", + "sync_policy": { + "groups": [] + } + } + ], + "short_zone_ids": [ + { + "key": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "val": 1698422904 + } + ] + }, + "master_zonegroup": "1580fd1d-a065-4484-82ff-329e9a779999", + "master_zone": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "period_config": { + "bucket_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + }, + "user_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + } + }, + "realm_id": "94ba560d-a560-431d-8ed4-85a2891f9122", + "realm_name": "my-store", + "realm_epoch": 2 +}` + +// example real-world output from 'radosgw-admin period update' after the first period commit, +// and with no changes since the first commit +// note: output was modified to increment the epoch to make sure this code works in case the "epoch" +// behavior changes in radosgw-admin in the future +const secondPeriodUpdateWithoutChanges = `{ + "id": "94ba560d-a560-431d-8ed4-85a2891f9122:staging", + "epoch": 2, + "predecessor_uuid": "600c23a6-2452-4fc0-96b4-0c78b9b7c439", + "sync_status": [], + "period_map": { + "id": "600c23a6-2452-4fc0-96b4-0c78b9b7c439", + "zonegroups": [ + { + "id": "1580fd1d-a065-4484-82ff-329e9a779999", + "name": "my-store", + "api_name": "my-store", + "is_master": "true", + "endpoints": [ + "http://10.105.59.166:80" + ], + "hostnames": [], + "hostnames_s3website": [], + "master_zone": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "zones": [ + { + "id": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "name": "my-store", + "endpoints": [ + "http://10.105.59.166:80" + ], + "log_meta": "false", + "log_data": "false", + "bucket_index_max_shards": 11, + "read_only": "false", + "tier_type": "", + "sync_from_all": "true", + "sync_from": [], + "redirect_zone": "" + } + ], + "placement_targets": [ + { + "name": "default-placement", + "tags": [], + "storage_classes": [ + "STANDARD" + ] + } + ], + "default_placement": "default-placement", + "realm_id": "94ba560d-a560-431d-8ed4-85a2891f9122", + "sync_policy": { + "groups": [] + } + } + ], + "short_zone_ids": [ + { + "key": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "val": 1698422904 + } + ] + }, + "master_zonegroup": "1580fd1d-a065-4484-82ff-329e9a779999", + "master_zone": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "period_config": { + "bucket_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + }, + "user_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + } + }, + "realm_id": "94ba560d-a560-431d-8ed4-85a2891f9122", + "realm_name": "my-store", + "realm_epoch": 3 +}` + +// example output from 'radosgw-admin period update' after the first period commit, +// and with un-committed changes since the first commit (endpoint added to zonegroup and zone) +const secondPeriodUpdateWithChanges = `{ + "id": "94ba560d-a560-431d-8ed4-85a2891f9122:staging", + "epoch": 1, + "predecessor_uuid": "600c23a6-2452-4fc0-96b4-0c78b9b7c439", + "sync_status": [], + "period_map": { + "id": "600c23a6-2452-4fc0-96b4-0c78b9b7c439", + "zonegroups": [ + { + "id": "1580fd1d-a065-4484-82ff-329e9a779999", + "name": "my-store", + "api_name": "my-store", + "is_master": "true", + "endpoints": [ + "http://10.105.59.166:80", + "https://10.105.59.166:443" + ], + "hostnames": [], + "hostnames_s3website": [], + "master_zone": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "zones": [ + { + "id": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "name": "my-store", + "endpoints": [ + "http://10.105.59.166:80", + "https://10.105.59.166:443" + ], + "log_meta": "false", + "log_data": "false", + "bucket_index_max_shards": 11, + "read_only": "false", + "tier_type": "", + "sync_from_all": "true", + "sync_from": [], + "redirect_zone": "" + } + ], + "placement_targets": [ + { + "name": "default-placement", + "tags": [], + "storage_classes": [ + "STANDARD" + ] + } + ], + "default_placement": "default-placement", + "realm_id": "94ba560d-a560-431d-8ed4-85a2891f9122", + "sync_policy": { + "groups": [] + } + } + ], + "short_zone_ids": [ + { + "key": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "val": 1698422904 + } + ] + }, + "master_zonegroup": "1580fd1d-a065-4484-82ff-329e9a779999", + "master_zone": "cea71d3a-9d22-45fb-a4e8-04fc6a494a50", + "period_config": { + "bucket_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + }, + "user_quota": { + "enabled": false, + "check_on_raw": false, + "max_size": -1, + "max_size_kb": 0, + "max_objects": -1 + } + }, + "realm_id": "94ba560d-a560-431d-8ed4-85a2891f9122", + "realm_name": "my-store", + "realm_epoch": 3 +}` diff --git a/pkg/operator/ceph/object/controller.go b/pkg/operator/ceph/object/controller.go index 85e3370f06e0d..dab69a5d2dfc5 100644 --- a/pkg/operator/ceph/object/controller.go +++ b/pkg/operator/ceph/object/controller.go @@ -76,6 +76,9 @@ var controllerTypeMeta = metav1.TypeMeta{ var currentAndDesiredCephVersion = opcontroller.CurrentAndDesiredCephVersion +// allow this to be overridden for unit tests +var cephObjectStoreDependents = CephObjectStoreDependents + // ReconcileCephObjectStore reconciles a cephObjectStore object type ReconcileCephObjectStore struct { client client.Client @@ -252,7 +255,7 @@ func (r *ReconcileCephObjectStore) reconcile(request reconcile.Request) (reconci return reconcile.Result{}, cephObjectStore, errors.Wrapf(err, "failed to check for object buckets. failed to get admin ops API context") } - deps, err := CephObjectStoreDependents(r.context, r.clusterInfo, cephObjectStore, objCtx, opsCtx) + deps, err := cephObjectStoreDependents(r.context, r.clusterInfo, cephObjectStore, objCtx, opsCtx) if err != nil { return reconcile.Result{}, cephObjectStore, err } diff --git a/pkg/operator/ceph/object/controller_test.go b/pkg/operator/ceph/object/controller_test.go index 8bc9282c39c11..44d6626e991f3 100644 --- a/pkg/operator/ceph/object/controller_test.go +++ b/pkg/operator/ceph/object/controller_test.go @@ -28,12 +28,14 @@ import ( "github.com/pkg/errors" cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" rookclient "github.com/rook/rook/pkg/client/clientset/versioned/fake" + rookfake "github.com/rook/rook/pkg/client/clientset/versioned/fake" "github.com/rook/rook/pkg/client/clientset/versioned/scheme" "github.com/rook/rook/pkg/clusterd" "github.com/rook/rook/pkg/daemon/ceph/client" cephver "github.com/rook/rook/pkg/operator/ceph/version" "github.com/rook/rook/pkg/operator/k8sutil" "github.com/rook/rook/pkg/operator/test" + "github.com/rook/rook/pkg/util/dependents" exectest "github.com/rook/rook/pkg/util/exec/test" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" @@ -285,7 +287,20 @@ func TestCephObjectStoreController(t *testing.T) { capnslog.SetGlobalLogLevel(capnslog.DEBUG) os.Setenv("ROOK_LOG_LEVEL", "DEBUG") + commitConfigChangesOrig := commitConfigChanges + defer func() { commitConfigChanges = commitConfigChangesOrig }() + + // make sure joining multisite calls to commit config changes + calledCommitConfigChanges := false + commitConfigChanges = func(c *Context) error { + calledCommitConfigChanges = true + return nil + } + setupNewEnvironment := func(additionalObjects ...runtime.Object) *ReconcileCephObjectStore { + // reset var we use to check if we have called to commit config changes + calledCommitConfigChanges = false + // A Pool resource with metadata and spec. objectStore := &cephv1.CephObjectStore{ ObjectMeta: metav1.ObjectMeta{ @@ -360,6 +375,7 @@ func TestCephObjectStoreController(t *testing.T) { res, err := r.Reconcile(ctx, req) assert.NoError(t, err) assert.True(t, res.Requeue) + assert.False(t, calledCommitConfigChanges) }) t.Run("error - ceph cluster not ready", func(t *testing.T) { @@ -381,6 +397,7 @@ func TestCephObjectStoreController(t *testing.T) { res, err := r.Reconcile(ctx, req) assert.NoError(t, err) assert.True(t, res.Requeue) + assert.False(t, calledCommitConfigChanges) }) // set up an environment that has a ready ceph cluster, and return the reconciler for it @@ -483,6 +500,9 @@ func TestCephObjectStoreController(t *testing.T) { // we don't actually care if Requeue is true if there is an error assert.True(t, res.Requeue) assert.Contains(t, err.Error(), "failed to start rgw health checker") assert.Contains(t, err.Error(), "induced error creating admin ops API connection") + + // health checker should start up after committing config changes + assert.True(t, calledCommitConfigChanges) }) t.Run("success - object store is running", func(t *testing.T) { @@ -498,8 +518,8 @@ func TestCephObjectStoreController(t *testing.T) { assert.Equal(t, cephv1.ConditionProgressing, objectStore.Status.Phase, objectStore) assert.NotEmpty(t, objectStore.Status.Info["endpoint"], objectStore) assert.Equal(t, "http://rook-ceph-rgw-my-store.rook-ceph.svc:80", objectStore.Status.Info["endpoint"], objectStore) + assert.True(t, calledCommitConfigChanges) }) - } func TestCephObjectStoreControllerMultisite(t *testing.T) { @@ -635,6 +655,16 @@ func TestCephObjectStoreControllerMultisite(t *testing.T) { }, } + commitConfigChangesOrig := commitConfigChanges + defer func() { commitConfigChanges = commitConfigChangesOrig }() + + // make sure joining multisite calls to commit config changes + calledCommitConfigChanges := false + commitConfigChanges = func(c *Context) error { + calledCommitConfigChanges = true + return nil + } + clientset := test.New(t, 3) c := &clusterd.Context{ Executor: executor, @@ -668,9 +698,52 @@ func TestCephObjectStoreControllerMultisite(t *testing.T) { }, } - res, err := r.Reconcile(ctx, req) - assert.NoError(t, err) - assert.False(t, res.Requeue) - err = r.client.Get(context.TODO(), req.NamespacedName, objectStore) - assert.NoError(t, err) + currentAndDesiredCephVersion = func(rookImage string, namespace string, jobName string, ownerInfo *k8sutil.OwnerInfo, context *clusterd.Context, cephClusterSpec *cephv1.ClusterSpec, clusterInfo *client.ClusterInfo) (*cephver.CephVersion, *cephver.CephVersion, error) { + return &cephver.Pacific, &cephver.Pacific, nil + } + + t.Run("create an object store", func(t *testing.T) { + res, err := r.Reconcile(ctx, req) + assert.NoError(t, err) + assert.False(t, res.Requeue) + assert.True(t, calledCommitConfigChanges) + err = r.client.Get(context.TODO(), req.NamespacedName, objectStore) + assert.NoError(t, err) + }) + + t.Run("delete the same store", func(t *testing.T) { + calledCommitConfigChanges = false + + // no dependents + dependentsChecked := false + cephObjectStoreDependentsOrig := cephObjectStoreDependents + defer func() { cephObjectStoreDependents = cephObjectStoreDependentsOrig }() + cephObjectStoreDependents = func(clusterdCtx *clusterd.Context, clusterInfo *client.ClusterInfo, store *cephv1.CephObjectStore, objCtx *Context, opsCtx *AdminOpsContext) (*dependents.DependentList, error) { + dependentsChecked = true + return &dependents.DependentList{}, nil + } + + err = r.client.Get(context.TODO(), req.NamespacedName, objectStore) + assert.NoError(t, err) + objectStore.DeletionTimestamp = &metav1.Time{ + Time: time.Now(), + } + err = r.client.Update(ctx, objectStore) + + // have to also track the same objects in the rook clientset + r.context.RookClientset = rookfake.NewSimpleClientset( + objectRealm, + objectZoneGroup, + objectZone, + objectStore, + ) + + res, err := r.Reconcile(ctx, req) + assert.NoError(t, err) + assert.False(t, res.Requeue) + + assert.True(t, dependentsChecked) + + assert.True(t, calledCommitConfigChanges) + }) } diff --git a/pkg/operator/ceph/object/objectstore.go b/pkg/operator/ceph/object/objectstore.go index dcb70f3c67675..7356423e3bed6 100644 --- a/pkg/operator/ceph/object/objectstore.go +++ b/pkg/operator/ceph/object/objectstore.go @@ -86,6 +86,9 @@ type realmType struct { Realms []string `json:"realms"` } +// allow commitConfigChanges to be overridden for unit testing +var commitConfigChanges = CommitConfigChanges + func deleteRealmAndPools(objContext *Context, spec cephv1.ObjectStoreSpec) error { if spec.IsMultisite() { // since pools for object store are created by the zone, the object store only needs to be removed from the zone @@ -146,12 +149,11 @@ func removeObjectStoreFromMultisite(objContext *Context, spec cephv1.ObjectStore logger.Infof("WARNING: No other zone in realm %q can commit to the period or pull the realm until you create another object-store in zone %q", objContext.Realm, objContext.Zone) } - // the period will help notify other zones of changes if there are multi-zones - _, err = runAdminCommand(objContext, false, "period", "update", "--commit") - if err != nil { - return errors.Wrap(err, "failed to update period after removing an endpoint from the zone") + // this will notify other zones of changes if there are multi-zones + if err := commitConfigChanges(objContext); err != nil { + nsName := fmt.Sprintf("%s/%s", objContext.clusterInfo.Namespace, objContext.Name) + return errors.Wrapf(err, "failed to commit config changes after removing CephObjectStore %q from multi-site", nsName) } - logger.Infof("successfully updated period for realm %q after removal of object-store %q", objContext.Realm, objContext.Name) return nil } @@ -362,13 +364,11 @@ func createMultisite(objContext *Context, endpointArg string) error { realmArg := fmt.Sprintf("--rgw-realm=%s", objContext.Realm) zoneGroupArg := fmt.Sprintf("--rgw-zonegroup=%s", objContext.ZoneGroup) - updatePeriod := false // create the realm if it doesn't exist yet output, err := RunAdminCommandNoMultisite(objContext, true, "realm", "get", realmArg) if err != nil { // ENOENT means “No such file or directory” if code, err := exec.ExtractExitCode(err); err == nil && code == int(syscall.ENOENT) { - updatePeriod = true output, err = RunAdminCommandNoMultisite(objContext, false, "realm", "create", realmArg) if err != nil { return errorOrIsNotFound(err, "failed to create ceph realm %q, for reason %q", objContext.ZoneGroup, output) @@ -384,7 +384,6 @@ func createMultisite(objContext *Context, endpointArg string) error { if err != nil { // ENOENT means “No such file or directory” if code, err := exec.ExtractExitCode(err); err == nil && code == int(syscall.ENOENT) { - updatePeriod = true output, err = RunAdminCommandNoMultisite(objContext, false, "zonegroup", "create", "--master", realmArg, zoneGroupArg, endpointArg) if err != nil { return errorOrIsNotFound(err, "failed to create ceph zone group %q, for reason %q", objContext.ZoneGroup, output) @@ -400,7 +399,6 @@ func createMultisite(objContext *Context, endpointArg string) error { if err != nil { // ENOENT means “No such file or directory” if code, err := exec.ExtractExitCode(err); err == nil && code == int(syscall.ENOENT) { - updatePeriod = true output, err = runAdminCommand(objContext, false, "zone", "create", "--master", endpointArg) if err != nil { return errorOrIsNotFound(err, "failed to create ceph zone %q, for reason %q", objContext.Zone, output) @@ -411,27 +409,9 @@ func createMultisite(objContext *Context, endpointArg string) error { } } - // check if the period exists - output, err = runAdminCommand(objContext, false, "period", "get") - if err != nil { - code, err := exec.ExtractExitCode(err) - // ENOENT means “No such file or directory” - if err == nil && code == int(syscall.ENOENT) { - // period does not exist and so needs to be created - logger.Debugf("period must be updated for CephObjectStore %q because it does not exist", objContext.Name) - updatePeriod = true - } else { - return errorOrIsNotFound(err, "'radosgw-admin period get' failed with code %d, for reason %q", strconv.Itoa(code), output) - } - } - - if updatePeriod { - // the period will help notify other zones of changes if there are multi-zones - _, err = runAdminCommand(objContext, false, "period", "update", "--commit") - if err != nil { - return errorOrIsNotFound(err, "failed to update period") - } - logger.Debugf("updated period for realm %q", objContext.Realm) + if err := commitConfigChanges(objContext); err != nil { + nsName := fmt.Sprintf("%s/%s", objContext.clusterInfo.Namespace, objContext.Name) + return errors.Wrapf(err, "failed to commit config changes after creating multisite config for CephObjectStore %q", nsName) } logger.Infof("Multisite for object-store: realm=%s, zonegroup=%s, zone=%s", objContext.Realm, objContext.ZoneGroup, objContext.Zone) @@ -471,11 +451,11 @@ func joinMultisite(objContext *Context, endpointArg, zoneEndpoints, namespace st } logger.Debugf("endpoints for zone %q are now %q", objContext.Zone, zoneEndpoints) - // the period will help notify other zones of changes if there are multi-zones - _, err = RunAdminCommandNoMultisite(objContext, false, "period", "update", "--commit", realmArg, zoneGroupArg, zoneArg) - if err != nil { - return errorOrIsNotFound(err, "failed to update period") + if err := commitConfigChanges(objContext); err != nil { + nsName := fmt.Sprintf("%s/%s", objContext.clusterInfo.Namespace, objContext.Name) + return errors.Wrapf(err, "failed to commit config changes for CephObjectStore %q when joining multisite ", nsName) } + logger.Infof("added object store %q to realm %q, zonegroup %q, zone %q", objContext.Name, objContext.Realm, objContext.ZoneGroup, objContext.Zone) // create system user for realm for master zone in master zonegorup for multisite scenario diff --git a/pkg/operator/ceph/object/objectstore_test.go b/pkg/operator/ceph/object/objectstore_test.go index 5b952340eee09..51cbec7c4053d 100644 --- a/pkg/operator/ceph/object/objectstore_test.go +++ b/pkg/operator/ceph/object/objectstore_test.go @@ -301,40 +301,40 @@ func TestMockExecHelperProcess(t *testing.T) { func Test_createMultisite(t *testing.T) { // control the return values from calling get/create/update on resources type commandReturns struct { - realmExists bool - zoneGroupExists bool - zoneExists bool - periodExists bool - failCreateRealm bool - failCreateZoneGroup bool - failCreateZone bool - failUpdatePeriod bool + realmExists bool + zoneGroupExists bool + zoneExists bool + failCreateRealm bool + failCreateZoneGroup bool + failCreateZone bool + failCommitConfigChanges bool } // control whether we should expect certain 'get' calls type expectCommands struct { - getRealm bool - createRealm bool - getZoneGroup bool - createZoneGroup bool - getZone bool - createZone bool - getPeriod bool - updatePeriod bool + getRealm bool + createRealm bool + getZoneGroup bool + createZoneGroup bool + getZone bool + createZone bool + commitConfigChanges bool } // vars used for testing if calls were made var ( - calledGetRealm = false - calledGetZoneGroup = false - calledGetZone = false - calledGetPeriod = false - calledCreateRealm = false - calledCreateZoneGroup = false - calledCreateZone = false - calledUpdatePeriod = false + calledGetRealm = false + calledGetZoneGroup = false + calledGetZone = false + calledCreateRealm = false + calledCreateZoneGroup = false + calledCreateZone = false + calledCommitConfigChanges = false ) + commitConfigChangesOrig := commitConfigChanges + defer func() { commitConfigChanges = commitConfigChangesOrig }() + enoentIfNotExist := func(resourceExists bool) (string, error) { if !resourceExists { return "", exectest.MockExecCommandReturns(t, "", "", int(syscall.ENOENT)) @@ -357,8 +357,15 @@ func Test_createMultisite(t *testing.T) { calledCreateZoneGroup = false calledGetZone = false calledCreateZone = false - calledGetPeriod = false - calledUpdatePeriod = false + calledCommitConfigChanges = false + + commitConfigChanges = func(c *Context) error { + calledCommitConfigChanges = true + if env.failCommitConfigChanges { + return errors.New("fake error from CommitConfigChanges") + } + return nil + } return &exectest.MockExecutor{ MockExecuteCommandWithTimeout: func(timeout time.Duration, command string, arg ...string) (string, error) { @@ -391,19 +398,10 @@ func Test_createMultisite(t *testing.T) { calledCreateZone = true return errorIfFail(env.failCreateZone) } - case "period": - switch arg[1] { - case "get": - calledGetPeriod = true - return enoentIfNotExist(env.periodExists) - case "update": - calledUpdatePeriod = true - return errorIfFail(env.failUpdatePeriod) - } } } t.Fatalf("unhandled command: %s %v", command, arg) - return "", nil + panic("unhandled command") }, } } @@ -417,19 +415,18 @@ func Test_createMultisite(t *testing.T) { expectCommands expectCommands wantErr bool }{ - {"create realm, zonegroup, and zone; period update", + {"create realm, zonegroup, and zone; commit config", commandReturns{ // nothing exists, and all should succeed }, expectCommands{ - getRealm: true, - createRealm: true, - getZoneGroup: true, - createZoneGroup: true, - getZone: true, - createZone: true, - getPeriod: true, - updatePeriod: true, + getRealm: true, + createRealm: true, + getZoneGroup: true, + createZoneGroup: true, + getZone: true, + createZone: true, + commitConfigChanges: true, }, expectNoErr}, {"fail creating realm", @@ -468,85 +465,63 @@ func Test_createMultisite(t *testing.T) { // when we fail to create zone, we should not continue }, expectErr}, - {"fail period update", + {"fail commit config", commandReturns{ - failUpdatePeriod: true, + failCommitConfigChanges: true, }, expectCommands{ - getRealm: true, - createRealm: true, - getZoneGroup: true, - createZoneGroup: true, - getZone: true, - createZone: true, - getPeriod: true, - updatePeriod: true, + getRealm: true, + createRealm: true, + getZoneGroup: true, + createZoneGroup: true, + getZone: true, + createZone: true, + commitConfigChanges: true, }, expectErr}, - {"realm exists; create zonegroup and zone; period update", + {"realm exists; create zonegroup and zone; commit config", commandReturns{ realmExists: true, }, expectCommands{ - getRealm: true, - createRealm: false, - getZoneGroup: true, - createZoneGroup: true, - getZone: true, - createZone: true, - getPeriod: true, - updatePeriod: true, - }, - expectNoErr}, - {"realm and zonegroup exist; create zone; period update", - commandReturns{ - realmExists: true, - zoneGroupExists: true, - }, - expectCommands{ - getRealm: true, - createRealm: false, - getZoneGroup: true, - createZoneGroup: false, - getZone: true, - createZone: true, - getPeriod: true, - updatePeriod: true, + getRealm: true, + createRealm: false, + getZoneGroup: true, + createZoneGroup: true, + getZone: true, + createZone: true, + commitConfigChanges: true, }, expectNoErr}, - {"realm, zonegroup, and zone exist; period update", + {"realm and zonegroup exist; create zone; commit config", commandReturns{ realmExists: true, zoneGroupExists: true, - zoneExists: true, }, expectCommands{ - getRealm: true, - createRealm: false, - getZoneGroup: true, - createZoneGroup: false, - getZone: true, - createZone: false, - getPeriod: true, - updatePeriod: true, + getRealm: true, + createRealm: false, + getZoneGroup: true, + createZoneGroup: false, + getZone: true, + createZone: true, + commitConfigChanges: true, }, expectNoErr}, - {"realm, zonegroup, zone, and period all exist", + {"realm, zonegroup, and zone exist; commit config", commandReturns{ realmExists: true, zoneGroupExists: true, zoneExists: true, - periodExists: true, }, expectCommands{ - getRealm: true, - createRealm: false, - getZoneGroup: true, - createZoneGroup: false, - getZone: true, - createZone: false, - getPeriod: true, - updatePeriod: false, + getRealm: true, + createRealm: false, + getZoneGroup: true, + createZoneGroup: false, + getZone: true, + createZone: false, + commitConfigChanges: true, }, expectNoErr}, } @@ -566,8 +541,7 @@ func Test_createMultisite(t *testing.T) { assert.Equal(t, tt.expectCommands.createZoneGroup, calledCreateZoneGroup) assert.Equal(t, tt.expectCommands.getZone, calledGetZone) assert.Equal(t, tt.expectCommands.createZone, calledCreateZone) - assert.Equal(t, tt.expectCommands.getPeriod, calledGetPeriod) - assert.Equal(t, tt.expectCommands.updatePeriod, calledUpdatePeriod) + assert.Equal(t, tt.expectCommands.commitConfigChanges, calledCommitConfigChanges) if tt.wantErr { assert.Error(t, err) } else {