diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index e92f79f35fd74..d5f8a26346ab4 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -953,6 +953,23 @@ jobs: - name: write an object to one cluster, read from the other run: tests/scripts/github-action-helper.sh write_object_to_cluster1_read_from_cluster2 + - name: verify the RGW configuration period was committed for both clusters + run: | + committed_msg="committing changes to RGW configuration period for CephObjectStore" + tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg}" rook-ceph + tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg}" rook-ceph-secondary + + - name: restart operators for both clusters + run: | + tests/scripts/github-action-helper.sh restart_operator rook-ceph + tests/scripts/github-action-helper.sh restart_operator rook-ceph-secondary + + - name: wait for both clusters to report that the RGW configuration period was NOT committed on the second reconcile + run: | + not_committed_msg="there are no changes to commit for RGW configuration period for CephObjectStore" + tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg}" 60 rook-ceph + tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg}" 20 rook-ceph-secondary + - name: upload test result uses: actions/upload-artifact@v2 if: always() diff --git a/pkg/operator/ceph/object/admin.go b/pkg/operator/ceph/object/admin.go index bac57005ce118..5fedaa51c29d0 100644 --- a/pkg/operator/ceph/object/admin.go +++ b/pkg/operator/ceph/object/admin.go @@ -315,11 +315,14 @@ func CommitConfigChanges(c *Context) error { return errors.Wrap(err, "failed to determine if the staged RGW configuration period is different from current") } + nsName := fmt.Sprintf("%s/%s", c.clusterInfo.Namespace, c.Name) if !shouldCommit { - nsName := fmt.Sprintf("%s/%s", c.clusterInfo.Namespace, c.Name) - logger.Debugf("not committing changes to RGW configuration period for CephObjectStore %q", nsName) + // DO NOT MODIFY THE MESSAGE BELOW. It is checked in integration tests. + logger.Infof("there are no changes to commit for RGW configuration period for CephObjectStore %q", nsName) return nil } + // DO NOT MODIFY THE MESSAGE BELOW. It is checked in integration tests. + logger.Infof("committing changes to RGW configuration period for CephObjectStore %q", nsName) // don't expect json output since we don't intend to use the output from the command _, err = runAdminCommand(c, false, "period", "update", "--commit") if err != nil { diff --git a/tests/scripts/github-action-helper.sh b/tests/scripts/github-action-helper.sh index 3f5d913c0e24c..2e5fe2616d7c9 100755 --- a/tests/scripts/github-action-helper.sh +++ b/tests/scripts/github-action-helper.sh @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -xe +set -xeEo pipefail ############# # VARIABLES # @@ -237,6 +237,35 @@ function wait_for_rgw_pods() { } +function verify_operator_log_message() { + local message="$1" # param 1: the message to verify exists + local namespace="${2:-rook-ceph}" # optional param 2: the namespace of the CephCluster (default: rook-ceph) + kubectl --namespace "$namespace" logs deployment/rook-ceph-operator | grep "$message" +} + +function wait_for_operator_log_message() { + local message="$1" # param 1: the message to look for + local timeout="$2" # param 2: the timeout for waiting for the message to exist + local namespace="${3:-rook-ceph}" # optional param 3: the namespace of the CephCluster (default: rook-ceph) + start_time="${SECONDS}" + while [[ $(( SECONDS - start_time )) -lt $timeout ]]; do + if verify_operator_log_message "$message" "$namespace"; then return 0; fi + sleep 5 + done + echo "timed out" >&2 && return 1 +} + +function restart_operator () { + local namespace="${1:-rook-ceph}" # optional param 1: the namespace of the CephCluster (default: rook-ceph) + get_pod_cmd=(kubectl --namespace "$namespace" get pod --selector app=rook-ceph-operator --no-headers) + pod="$("${get_pod_cmd[@]}" --output name)" + # --output name gives "pod/", so don't specify the kind here + kubectl --namespace "$namespace" delete "$pod" # waits for pod to be deleted but not new pod to be running + timeout 20 bash -c \ + "until [[ -n \"\$(${get_pod_cmd[*]} --field-selector=status.phase=Running 2>/dev/null)\" ]] ; do echo waiting && sleep 1; done" + "${get_pod_cmd[@]}" +} + function deploy_second_rook_cluster() { BLOCK=$(sudo lsblk|awk '/14G/ {print $1}'| head -1) cd cluster/examples/kubernetes/ceph/ @@ -275,7 +304,5 @@ EOF FUNCTION="$1" shift # remove function arg now that we've recorded it # call the function with the remainder of the user-provided args -if ! $FUNCTION "$@"; then - echo "Call to $FUNCTION was not successful" >&2 - exit 1 -fi +# -e, -E, and -o=pipefail will ensure this script returns a failure if a part of the function fails +$FUNCTION "$@"