From 1649df0462860a3d6c38aa68459fe922583f7236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Miguel=20Olmo=20Mart=C3=ADnez?= Date: Thu, 23 Sep 2021 13:59:53 +0200 Subject: [PATCH] ceph: retry commands when they fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixes: https://github.com/rook/rook/issues/8759 Signed-off-by: Juan Miguel Olmo Martínez --- tests/integration/ceph_mgr_test.go | 45 ++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/tests/integration/ceph_mgr_test.go b/tests/integration/ceph_mgr_test.go index e7c1baebb3f6..312dd68d1227 100644 --- a/tests/integration/ceph_mgr_test.go +++ b/tests/integration/ceph_mgr_test.go @@ -33,6 +33,10 @@ import ( "github.com/stretchr/testify/suite" ) +const ( + defaultTries = 3 +) + // ************************************************** // *** Mgr operations covered by TestMgrSmokeSuite *** // @@ -94,7 +98,7 @@ func (s *CephMgrSuite) SetupSuite() { Mons: 1, UseCSI: true, SkipOSDCreation: true, - EnableDiscovery: true, + EnableDiscovery: false, RookVersion: installer.LocalBuildTag, CephVersion: installer.MasterVersion, } @@ -113,9 +117,28 @@ func (s *CephMgrSuite) TearDownSuite() { s.installer.UninstallRook() } -func (s *CephMgrSuite) execute(command []string) (error, string) { - orchCommand := append([]string{"orch"}, command...) - return s.installer.Execute("ceph", orchCommand, s.namespace) +func (s *CephMgrSuite) executeWithRetry(command []string, maxRetries int) (string, error) { + tries := 0 + orchestratorCommand := append([]string{"orch"}, command...) + for { + err, output := s.installer.Execute("ceph", orchestratorCommand, s.namespace) + tries++ + if err != nil { + if maxRetries == 1 { + return output, err + } + if tries == maxRetries { + return "", fmt.Errorf("max retries(%d) reached, last err: %v", tries, err) + } + logger.Infof("retrying command <>: last error: %v", command, err) + continue + } + return output, nil + } +} + +func (s *CephMgrSuite) execute(command []string) (string, error) { + return s.executeWithRetry(command, 1) } func (s *CephMgrSuite) prepareLocalStorageClass(storageClassName string) { @@ -152,7 +175,7 @@ func (s *CephMgrSuite) enableOrchestratorModule() { } logger.Info("Setting orchestrator backend to Rook .... ") - err, output = s.execute([]string{"set", "backend", "rook"}) + output, err = s.execute([]string{"set", "backend", "rook"}) logger.Infof("output: %s", output) if err != nil { logger.Infof("Not possible to set rook as backend orchestrator module: %q", err) @@ -170,7 +193,7 @@ func (s *CephMgrSuite) waitForOrchestrationModule() { for timeout := 0; timeout < 30; timeout++ { logger.Info("Waiting for rook orchestrator module enabled and ready ...") - err, output := s.execute([]string{"status", "--format", "json"}) + output, err := s.execute([]string{"status", "--format", "json"}) logger.Infof("%s", output) if err == nil { logger.Info("Ceph orchestrator ready to execute commands") @@ -210,14 +233,14 @@ func (s *CephMgrSuite) waitForOrchestrationModule() { } func (s *CephMgrSuite) TestDeviceLs() { logger.Info("Testing .... ") - err, device_list := s.execute([]string{"device", "ls"}) + deviceList, err := s.executeWithRetry([]string{"device", "ls"}, defaultTries) assert.Nil(s.T(), err) - logger.Infof("output = %s", device_list) + logger.Infof("output = %s", deviceList) } func (s *CephMgrSuite) TestStatus() { logger.Info("Testing .... ") - err, status := s.execute([]string{"status"}) + status, err := s.executeWithRetry([]string{"status"}, defaultTries) assert.Nil(s.T(), err) logger.Infof("output = %s", status) @@ -236,7 +259,7 @@ func (s *CephMgrSuite) TestHostLs() { logger.Info("Testing .... ") // Get the orchestrator hosts - err, output := s.execute([]string{"host", "ls", "json"}) + output, err := s.executeWithRetry([]string{"host", "ls", "json"}, defaultTries) assert.Nil(s.T(), err) logger.Infof("output = %s", output) @@ -271,7 +294,7 @@ func (s *CephMgrSuite) TestHostLs() { func (s *CephMgrSuite) TestServiceLs() { logger.Info("Testing .... ") - err, output := s.execute([]string{"ls", "--format", "json"}) + output, err := s.executeWithRetry([]string{"ls", "--format", "json"}, defaultTries) assert.Nil(s.T(), err) logger.Infof("output = %s", output)