diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index e92f79f35fd74..bc727a91f6145 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -953,6 +953,19 @@ jobs: - name: write an object to one cluster, read from the other run: tests/scripts/github-action-helper.sh write_object_to_cluster1_read_from_cluster2 + # if this test fails, it could mean the RGW `period get` or `period update` output has changed + - name: RGW configuration period should be committed on first reconcile and not be committed on second reconcile + run: | + ns_name_primary='"rook-ceph/multisite-store"' # double quotes intended + ns_name_secondary='"rook-ceph-secondary/zone-b-multisite-store"' # double quotes intended + committed_msg="committing changes to RGW configuration period for CephObjectStore" + tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_primary}" + tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_secondary}" + tests/scripts/github-action-helper.sh restart_operator + not_committed_msg="there are no changes to commit for RGW configuration period for CephObjectStore" + tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_primary}" 60 + tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_secondary}" 20 + - name: upload test result uses: actions/upload-artifact@v2 if: always() diff --git a/pkg/operator/ceph/object/admin.go b/pkg/operator/ceph/object/admin.go index 2f8e89491c668..b671832269161 100644 --- a/pkg/operator/ceph/object/admin.go +++ b/pkg/operator/ceph/object/admin.go @@ -318,9 +318,12 @@ func CommitConfigChanges(c *Context) error { // DO NOT MODIFY nsName here. It is part of the integration test checks noted below. nsName := fmt.Sprintf("%s/%s", c.clusterInfo.Namespace, c.Name) if !shouldCommit { - logger.Debugf("not committing changes to RGW configuration period for CephObjectStore %q", nsName) + // DO NOT MODIFY THE MESSAGE BELOW. It is checked in integration tests. + logger.Infof("there are no changes to commit for RGW configuration period for CephObjectStore %q", nsName) return nil } + // DO NOT MODIFY THE MESSAGE BELOW. It is checked in integration tests. + logger.Infof("committing changes to RGW configuration period for CephObjectStore %q", nsName) // don't expect json output since we don't intend to use the output from the command _, err = runAdminCommand(c, false, "period", "update", "--commit") if err != nil { diff --git a/tests/scripts/github-action-helper.sh b/tests/scripts/github-action-helper.sh index 3f5d913c0e24c..ed7bde8a07be8 100755 --- a/tests/scripts/github-action-helper.sh +++ b/tests/scripts/github-action-helper.sh @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -xe +set -xeEo pipefail ############# # VARIABLES # @@ -145,8 +145,8 @@ function validate_yaml() { } function create_cluster_prerequisites() { - cd cluster/examples/kubernetes/ceph - kubectl create -f crds.yaml -f common.yaml + # this might be called from another function that has already done a cd + ( cd cluster/examples/kubernetes/ceph && kubectl create -f crds.yaml -f common.yaml ) } function deploy_manifest_with_local_build() { @@ -169,23 +169,22 @@ function deploy_cluster() { } function wait_for_prepare_pod() { - timeout 180 bash <<-'EOF' - while true; do - if [[ "$(kubectl -n rook-ceph get pod -l app=rook-ceph-osd-prepare --field-selector=status.phase=Running)" -gt 1 ]]; then - break - fi - sleep 5 - done - kubectl -n rook-ceph logs --follow pod/$(kubectl -n rook-ceph get pod -l app=rook-ceph-osd-prepare -o jsonpath='{.items[0].metadata.name}') -EOF - timeout 60 bash <<-'EOF' - until kubectl -n rook-ceph logs $(kubectl -n rook-ceph get pod -l app=rook-ceph-osd,ceph_daemon_id=0 -o jsonpath='{.items[*].metadata.name}') --all-containers || true; do - echo "waiting for osd container" - sleep 1 - done -EOF - kubectl -n rook-ceph describe job/"$(kubectl -n rook-ceph get pod -l app=rook-ceph-osd-prepare -o jsonpath='{.items[*].metadata.name}')" || true - kubectl -n rook-ceph describe deploy/rook-ceph-osd-0 || true + get_pod_cmd=(kubectl --namespace rook-ceph get pod --no-headers --output name) + timeout 180 bash -c \ + "until [[ -n \"\$(${get_pod_cmd[*]} --selector app=rook-ceph-osd-prepare --field-selector=status.phase=Running)\" ]]; do \ + echo waiting for osd prepare pod to be running && sleep 5; \ + done" + pod="$("${get_pod_cmd[@]}" --selector app=rook-ceph-osd-prepare)" + kubectl --namespace rook-ceph logs --follow "$pod" + timeout 60 bash -c \ + "until [[ -n \"\$(${get_pod_cmd[*]} --selector app=rook-ceph-osd,ceph_daemon_id=0 --field-selector=status.phase=Running)\" ]]; do \ + echo waiting for osd pod to be running && sleep 1; \ + done" + pod="$("${get_pod_cmd[@]}" --selector app=rook-ceph-osd,ceph_daemon_id=0)" || true + kubectl --namespace rook-ceph logs "$pod" || true + job="$(kubectl --namespace rook-ceph get job --selector app=rook-ceph-osd-prepare --output name)" || true + kubectl -n rook-ceph describe "$job" || true + kubectl -n rook-ceph describe deployment/rook-ceph-osd-0 || true } function wait_for_ceph_to_be_ready() { @@ -217,12 +216,26 @@ function create_LV_on_disk() { function deploy_first_rook_cluster() { BLOCK=$(sudo lsblk|awk '/14G/ {print $1}'| head -1) + create_cluster_prerequisites cd cluster/examples/kubernetes/ceph/ - kubectl create -f crds.yaml -f common.yaml -f operator.yaml + deploy_manifest_with_local_build operator.yaml yq w -i -d1 cluster-test.yaml spec.dashboard.enabled false yq w -i -d1 cluster-test.yaml spec.storage.useAllDevices false yq w -i -d1 cluster-test.yaml spec.storage.deviceFilter "${BLOCK}"1 - kubectl create -f cluster-test.yaml -f toolbox.yaml + kubectl create -f cluster-test.yaml + deploy_manifest_with_local_build toolbox.yaml +} + +function deploy_second_rook_cluster() { + BLOCK=$(sudo lsblk|awk '/14G/ {print $1}'| head -1) + cd cluster/examples/kubernetes/ceph/ + NAMESPACE=rook-ceph-secondary envsubst < common-second-cluster.yaml | kubectl create -f - + sed -i 's/namespace: rook-ceph/namespace: rook-ceph-secondary/g' cluster-test.yaml + yq w -i -d1 cluster-test.yaml spec.storage.deviceFilter "${BLOCK}"2 + yq w -i -d1 cluster-test.yaml spec.dataDirHostPath "/var/lib/rook-external" + kubectl create -f cluster-test.yaml + yq w -i toolbox.yaml metadata.namespace rook-ceph-secondary + deploy_manifest_with_local_build toolbox.yaml toolbox.yaml } function wait_for_rgw_pods() { @@ -237,15 +250,33 @@ function wait_for_rgw_pods() { } -function deploy_second_rook_cluster() { - BLOCK=$(sudo lsblk|awk '/14G/ {print $1}'| head -1) - cd cluster/examples/kubernetes/ceph/ - NAMESPACE=rook-ceph-secondary envsubst < common-second-cluster.yaml | kubectl create -f - - sed -i 's/namespace: rook-ceph/namespace: rook-ceph-secondary/g' cluster-test.yaml - yq w -i -d1 cluster-test.yaml spec.storage.deviceFilter "${BLOCK}"2 - yq w -i -d1 cluster-test.yaml spec.dataDirHostPath "/var/lib/rook-external" - yq w -i toolbox.yaml metadata.namespace rook-ceph-secondary - kubectl create -f cluster-test.yaml -f toolbox.yaml +function verify_operator_log_message() { + local message="$1" # param 1: the message to verify exists + local namespace="${2:-rook-ceph}" # optional param 2: the namespace of the CephCluster (default: rook-ceph) + kubectl --namespace "$namespace" logs deployment/rook-ceph-operator | grep "$message" +} + +function wait_for_operator_log_message() { + local message="$1" # param 1: the message to look for + local timeout="$2" # param 2: the timeout for waiting for the message to exist + local namespace="${3:-rook-ceph}" # optional param 3: the namespace of the CephCluster (default: rook-ceph) + start_time="${SECONDS}" + while [[ $(( SECONDS - start_time )) -lt $timeout ]]; do + if verify_operator_log_message "$message" "$namespace"; then return 0; fi + sleep 5 + done + echo "timed out" >&2 && return 1 +} + +function restart_operator () { + local namespace="${1:-rook-ceph}" # optional param 1: the namespace of the CephCluster (default: rook-ceph) + get_pod_cmd=(kubectl --namespace "$namespace" get pod --selector app=rook-ceph-operator --no-headers) + pod="$("${get_pod_cmd[@]}" --output name)" + # --output name gives "pod/", so don't specify the kind here + kubectl --namespace "$namespace" delete "$pod" # waits for pod to be deleted but not new pod to be running + timeout 20 bash -c \ + "until [[ -n \"\$(${get_pod_cmd[*]} --field-selector=status.phase=Running 2>/dev/null)\" ]] ; do echo waiting && sleep 1; done" + "${get_pod_cmd[@]}" } function write_object_to_cluster1_read_from_cluster2() { @@ -275,7 +306,5 @@ EOF FUNCTION="$1" shift # remove function arg now that we've recorded it # call the function with the remainder of the user-provided args -if ! $FUNCTION "$@"; then - echo "Call to $FUNCTION was not successful" >&2 - exit 1 -fi +# -e, -E, and -o=pipefail will ensure this script returns a failure if a part of the function fails +$FUNCTION "$@"