From 6c00276994578798cf3c0df7614c40287010f71d Mon Sep 17 00:00:00 2001 From: Blaine Gardner Date: Tue, 5 Oct 2021 14:26:17 -0600 Subject: [PATCH] rgw: add integration test for committing period Add to the RGW multisite integration test a verification that the RGW period is committed on the first reconcile and not committed on the second reconcile. Do this in the multisite test so that we verify that this works for both the primary and secondary multi-site cluster. To add this test, the github-action-helper.sh script had to be modified to 1. actually deploy the version of Rook under test 2. adjust how functions are called to not lose the `-e` in a subshell 3. fix wait_for_prepare_pod helper that had a failure in the middle of its operation that didn't cause failures in the past Signed-off-by: Blaine Gardner --- .github/workflows/canary-integration-test.yml | 13 +++ pkg/operator/ceph/object/admin.go | 5 +- tests/scripts/github-action-helper.sh | 99 ++++++++++++------- 3 files changed, 81 insertions(+), 36 deletions(-) diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index e92f79f35fd74..bc727a91f6145 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -953,6 +953,19 @@ jobs: - name: write an object to one cluster, read from the other run: tests/scripts/github-action-helper.sh write_object_to_cluster1_read_from_cluster2 + # if this test fails, it could mean the RGW `period get` or `period update` output has changed + - name: RGW configuration period should be committed on first reconcile and not be committed on second reconcile + run: | + ns_name_primary='"rook-ceph/multisite-store"' # double quotes intended + ns_name_secondary='"rook-ceph-secondary/zone-b-multisite-store"' # double quotes intended + committed_msg="committing changes to RGW configuration period for CephObjectStore" + tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_primary}" + tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_secondary}" + tests/scripts/github-action-helper.sh restart_operator + not_committed_msg="there are no changes to commit for RGW configuration period for CephObjectStore" + tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_primary}" 60 + tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_secondary}" 20 + - name: upload test result uses: actions/upload-artifact@v2 if: always() diff --git a/pkg/operator/ceph/object/admin.go b/pkg/operator/ceph/object/admin.go index 2f8e89491c668..b671832269161 100644 --- a/pkg/operator/ceph/object/admin.go +++ b/pkg/operator/ceph/object/admin.go @@ -318,9 +318,12 @@ func CommitConfigChanges(c *Context) error { // DO NOT MODIFY nsName here. It is part of the integration test checks noted below. nsName := fmt.Sprintf("%s/%s", c.clusterInfo.Namespace, c.Name) if !shouldCommit { - logger.Debugf("not committing changes to RGW configuration period for CephObjectStore %q", nsName) + // DO NOT MODIFY THE MESSAGE BELOW. It is checked in integration tests. + logger.Infof("there are no changes to commit for RGW configuration period for CephObjectStore %q", nsName) return nil } + // DO NOT MODIFY THE MESSAGE BELOW. It is checked in integration tests. + logger.Infof("committing changes to RGW configuration period for CephObjectStore %q", nsName) // don't expect json output since we don't intend to use the output from the command _, err = runAdminCommand(c, false, "period", "update", "--commit") if err != nil { diff --git a/tests/scripts/github-action-helper.sh b/tests/scripts/github-action-helper.sh index 3f5d913c0e24c..ed7bde8a07be8 100755 --- a/tests/scripts/github-action-helper.sh +++ b/tests/scripts/github-action-helper.sh @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -xe +set -xeEo pipefail ############# # VARIABLES # @@ -145,8 +145,8 @@ function validate_yaml() { } function create_cluster_prerequisites() { - cd cluster/examples/kubernetes/ceph - kubectl create -f crds.yaml -f common.yaml + # this might be called from another function that has already done a cd + ( cd cluster/examples/kubernetes/ceph && kubectl create -f crds.yaml -f common.yaml ) } function deploy_manifest_with_local_build() { @@ -169,23 +169,22 @@ function deploy_cluster() { } function wait_for_prepare_pod() { - timeout 180 bash <<-'EOF' - while true; do - if [[ "$(kubectl -n rook-ceph get pod -l app=rook-ceph-osd-prepare --field-selector=status.phase=Running)" -gt 1 ]]; then - break - fi - sleep 5 - done - kubectl -n rook-ceph logs --follow pod/$(kubectl -n rook-ceph get pod -l app=rook-ceph-osd-prepare -o jsonpath='{.items[0].metadata.name}') -EOF - timeout 60 bash <<-'EOF' - until kubectl -n rook-ceph logs $(kubectl -n rook-ceph get pod -l app=rook-ceph-osd,ceph_daemon_id=0 -o jsonpath='{.items[*].metadata.name}') --all-containers || true; do - echo "waiting for osd container" - sleep 1 - done -EOF - kubectl -n rook-ceph describe job/"$(kubectl -n rook-ceph get pod -l app=rook-ceph-osd-prepare -o jsonpath='{.items[*].metadata.name}')" || true - kubectl -n rook-ceph describe deploy/rook-ceph-osd-0 || true + get_pod_cmd=(kubectl --namespace rook-ceph get pod --no-headers --output name) + timeout 180 bash -c \ + "until [[ -n \"\$(${get_pod_cmd[*]} --selector app=rook-ceph-osd-prepare --field-selector=status.phase=Running)\" ]]; do \ + echo waiting for osd prepare pod to be running && sleep 5; \ + done" + pod="$("${get_pod_cmd[@]}" --selector app=rook-ceph-osd-prepare)" + kubectl --namespace rook-ceph logs --follow "$pod" + timeout 60 bash -c \ + "until [[ -n \"\$(${get_pod_cmd[*]} --selector app=rook-ceph-osd,ceph_daemon_id=0 --field-selector=status.phase=Running)\" ]]; do \ + echo waiting for osd pod to be running && sleep 1; \ + done" + pod="$("${get_pod_cmd[@]}" --selector app=rook-ceph-osd,ceph_daemon_id=0)" || true + kubectl --namespace rook-ceph logs "$pod" || true + job="$(kubectl --namespace rook-ceph get job --selector app=rook-ceph-osd-prepare --output name)" || true + kubectl -n rook-ceph describe "$job" || true + kubectl -n rook-ceph describe deployment/rook-ceph-osd-0 || true } function wait_for_ceph_to_be_ready() { @@ -217,12 +216,26 @@ function create_LV_on_disk() { function deploy_first_rook_cluster() { BLOCK=$(sudo lsblk|awk '/14G/ {print $1}'| head -1) + create_cluster_prerequisites cd cluster/examples/kubernetes/ceph/ - kubectl create -f crds.yaml -f common.yaml -f operator.yaml + deploy_manifest_with_local_build operator.yaml yq w -i -d1 cluster-test.yaml spec.dashboard.enabled false yq w -i -d1 cluster-test.yaml spec.storage.useAllDevices false yq w -i -d1 cluster-test.yaml spec.storage.deviceFilter "${BLOCK}"1 - kubectl create -f cluster-test.yaml -f toolbox.yaml + kubectl create -f cluster-test.yaml + deploy_manifest_with_local_build toolbox.yaml +} + +function deploy_second_rook_cluster() { + BLOCK=$(sudo lsblk|awk '/14G/ {print $1}'| head -1) + cd cluster/examples/kubernetes/ceph/ + NAMESPACE=rook-ceph-secondary envsubst < common-second-cluster.yaml | kubectl create -f - + sed -i 's/namespace: rook-ceph/namespace: rook-ceph-secondary/g' cluster-test.yaml + yq w -i -d1 cluster-test.yaml spec.storage.deviceFilter "${BLOCK}"2 + yq w -i -d1 cluster-test.yaml spec.dataDirHostPath "/var/lib/rook-external" + kubectl create -f cluster-test.yaml + yq w -i toolbox.yaml metadata.namespace rook-ceph-secondary + deploy_manifest_with_local_build toolbox.yaml toolbox.yaml } function wait_for_rgw_pods() { @@ -237,15 +250,33 @@ function wait_for_rgw_pods() { } -function deploy_second_rook_cluster() { - BLOCK=$(sudo lsblk|awk '/14G/ {print $1}'| head -1) - cd cluster/examples/kubernetes/ceph/ - NAMESPACE=rook-ceph-secondary envsubst < common-second-cluster.yaml | kubectl create -f - - sed -i 's/namespace: rook-ceph/namespace: rook-ceph-secondary/g' cluster-test.yaml - yq w -i -d1 cluster-test.yaml spec.storage.deviceFilter "${BLOCK}"2 - yq w -i -d1 cluster-test.yaml spec.dataDirHostPath "/var/lib/rook-external" - yq w -i toolbox.yaml metadata.namespace rook-ceph-secondary - kubectl create -f cluster-test.yaml -f toolbox.yaml +function verify_operator_log_message() { + local message="$1" # param 1: the message to verify exists + local namespace="${2:-rook-ceph}" # optional param 2: the namespace of the CephCluster (default: rook-ceph) + kubectl --namespace "$namespace" logs deployment/rook-ceph-operator | grep "$message" +} + +function wait_for_operator_log_message() { + local message="$1" # param 1: the message to look for + local timeout="$2" # param 2: the timeout for waiting for the message to exist + local namespace="${3:-rook-ceph}" # optional param 3: the namespace of the CephCluster (default: rook-ceph) + start_time="${SECONDS}" + while [[ $(( SECONDS - start_time )) -lt $timeout ]]; do + if verify_operator_log_message "$message" "$namespace"; then return 0; fi + sleep 5 + done + echo "timed out" >&2 && return 1 +} + +function restart_operator () { + local namespace="${1:-rook-ceph}" # optional param 1: the namespace of the CephCluster (default: rook-ceph) + get_pod_cmd=(kubectl --namespace "$namespace" get pod --selector app=rook-ceph-operator --no-headers) + pod="$("${get_pod_cmd[@]}" --output name)" + # --output name gives "pod/", so don't specify the kind here + kubectl --namespace "$namespace" delete "$pod" # waits for pod to be deleted but not new pod to be running + timeout 20 bash -c \ + "until [[ -n \"\$(${get_pod_cmd[*]} --field-selector=status.phase=Running 2>/dev/null)\" ]] ; do echo waiting && sleep 1; done" + "${get_pod_cmd[@]}" } function write_object_to_cluster1_read_from_cluster2() { @@ -275,7 +306,5 @@ EOF FUNCTION="$1" shift # remove function arg now that we've recorded it # call the function with the remainder of the user-provided args -if ! $FUNCTION "$@"; then - echo "Call to $FUNCTION was not successful" >&2 - exit 1 -fi +# -e, -E, and -o=pipefail will ensure this script returns a failure if a part of the function fails +$FUNCTION "$@"