Skip to content

Commit

Permalink
test: run RGW multisite test in nightly job
Browse files Browse the repository at this point in the history
In the nightly job, run the test with the latest Ceph version so we can
detect if there are RGW changes in Ceph that might break multisite. Use
a reusable GitHub action workflow to duplicate as little code as
possible.

Signed-off-by: Blaine Gardner <blaine.gardner@redhat.com>
  • Loading branch information
BlaineEXE committed Oct 6, 2021
1 parent 4463793 commit cb6992c
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 79 deletions.
101 changes: 101 additions & 0 deletions .github/actions/rgw-multisite-test/action.yml
@@ -0,0 +1,101 @@
name: RGW Multisite Test
description: Reusable workflow to test RGW multisite integration
inputs:
github-token:
description: GITHUB_TOKEN from the calling workflow
required: true
ceph-image:
description: Ceph image to use for the workflow (e.g., quay.io/ceph/ceph:v16.2.5)
required: false

runs:
using: "composite"
steps:
- name: setup golang
uses: actions/setup-go@v2
with:
go-version: 1.16

- name: install deps
shell: bash --noprofile --norc -eo pipefail -x {0}
run: |
tests/scripts/github-action-helper.sh install_deps
sudo apt-get install -y s3cmd
- name: setup minikube
uses: manusa/actions-setup-minikube@v2.3.1
with:
minikube version: 'v1.18.1'
kubernetes version: 'v1.19.2'
start args: --memory 6g --cpus=2
github token: ${{ inputs.github-token }}

- name: use local disk into two partitions
shell: bash --noprofile --norc -eo pipefail -x {0}
run: |
BLOCK=$(sudo lsblk --paths|awk '/14G/ {print $1}'| head -1)
tests/scripts/github-action-helper.sh use_local_disk
tests/scripts/create-bluestore-partitions.sh --disk "$BLOCK" --osd-count 2
sudo lsblk
- name: build rook
shell: bash --noprofile --norc -eo pipefail -x {0}
run: tests/scripts/github-action-helper.sh build_rook

- name: set Ceph version in CephCluster manifest
shell: bash --noprofile --norc -eo pipefail -x {0}
run: |
tests/scripts/github-action-helper.sh replace_ceph_image \
"cluster/examples/kubernetes/ceph/cluster-test.yaml" "${{ inputs.ceph-image }}"
- name: deploy first cluster rook
shell: bash --noprofile --norc -eo pipefail -x {0}
run: |
tests/scripts/github-action-helper.sh deploy_first_rook_cluster
kubectl create -f cluster/examples/kubernetes/ceph/object-multisite-test.yaml
# wait for multisite store to be created
tests/scripts/github-action-helper.sh wait_for_rgw_pods rook-ceph
- name: prep second cluster pull realm config
shell: bash --noprofile --norc -eo pipefail -x {0}
run: |
cd cluster/examples/kubernetes/ceph/
IP_ADDR=$(kubectl -n rook-ceph get svc rook-ceph-rgw-multisite-store -o jsonpath="{.spec.clusterIP}")
yq w -i -d1 object-multisite-pull-realm-test.yaml spec.pull.endpoint http://${IP_ADDR}:80
BASE64_ACCESS_KEY=$(kubectl -n rook-ceph get secrets realm-a-keys -o jsonpath="{.data.access-key}")
BASE64_SECRET_KEY=$(kubectl -n rook-ceph get secrets realm-a-keys -o jsonpath="{.data.secret-key}")
sed -i 's/VzFjNFltMVdWRTFJWWxZelZWQT0=/'"$BASE64_ACCESS_KEY"'/g' object-multisite-pull-realm-test.yaml
sed -i 's/WVY1MFIxeExkbG84U3pKdlRseEZXVGR3T3k1U1dUSS9KaTFoUVE9PQ==/'"$BASE64_SECRET_KEY"'/g' object-multisite-pull-realm-test.yaml
- name: deploy second cluster rook
shell: bash --noprofile --norc -eo pipefail -x {0}
run: |
tests/scripts/github-action-helper.sh deploy_second_rook_cluster
kubectl create -f cluster/examples/kubernetes/ceph/object-multisite-pull-realm-test.yaml
# wait for realms to be pulled and zone-b-multisite-store to be created
tests/scripts/github-action-helper.sh wait_for_rgw_pods rook-ceph-secondary
- name: wait for ceph cluster 1 to be ready
shell: bash --noprofile --norc -eo pipefail -x {0}
run: |
mkdir test
tests/scripts/validate_cluster.sh osd 1
kubectl -n rook-ceph get pods
- name: write an object to one cluster, read from the other
shell: bash --noprofile --norc -eo pipefail -x {0}
run: tests/scripts/github-action-helper.sh write_object_to_cluster1_read_from_cluster2

# if this test fails, it could mean the RGW `period get` or `period update` output has changed
- name: RGW configuration period should be committed on first reconcile and not be committed on second reconcile
shell: bash --noprofile --norc -eo pipefail -x {0}
run: |
ns_name_primary='"rook-ceph/multisite-store"' # double quotes intended
ns_name_secondary='"rook-ceph-secondary/zone-b-multisite-store"' # double quotes intended
committed_msg="committing changes to RGW configuration period for CephObjectStore"
tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_primary}"
tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_secondary}"
tests/scripts/github-action-helper.sh restart_operator
not_committed_msg="there are no changes to commit for RGW configuration period for CephObjectStore"
tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_primary}" 120
tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_secondary}" 90
86 changes: 11 additions & 75 deletions .github/workflows/canary-integration-test.yml
Expand Up @@ -883,88 +883,24 @@ jobs:
uses: mxschmitt/action-tmate@v3
timeout-minutes: 60

# rgw-multisite-testing:
# uses: ./.github/workflows/z_rgw-multisite.yml
# secrets:
# github-token: ${{ secrets.GITHUB_TOKEN }}

rgw-multisite-testing:
runs-on: ubuntu-18.04
if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')"
runs-on: ubuntu-18.04
steps:
- name: checkout
uses: actions/checkout@v2
- uses: actions/checkout@v2
with:
fetch-depth: 0

- name: setup golang
uses: actions/setup-go@v2
- name: run RGW multisite test
uses: ./.github/actions/rgw-multisite-test
with:
go-version: 1.16

- name: install deps
run: |
tests/scripts/github-action-helper.sh install_deps
sudo apt-get install -y s3cmd
- name: setup minikube
uses: manusa/actions-setup-minikube@v2.3.1
with:
minikube version: 'v1.18.1'
kubernetes version: 'v1.19.2'
start args: --memory 6g --cpus=2
github token: ${{ secrets.GITHUB_TOKEN }}

- name: use local disk into two partitions
run: |
BLOCK=$(sudo lsblk --paths|awk '/14G/ {print $1}'| head -1)
tests/scripts/github-action-helper.sh use_local_disk
tests/scripts/create-bluestore-partitions.sh --disk "$BLOCK" --osd-count 2
sudo lsblk
- name: build rook
run: tests/scripts/github-action-helper.sh build_rook

- name: deploy first cluster rook
run: |
tests/scripts/github-action-helper.sh deploy_first_rook_cluster
kubectl create -f cluster/examples/kubernetes/ceph/object-multisite-test.yaml
# wait for multisite store to be created
tests/scripts/github-action-helper.sh wait_for_rgw_pods rook-ceph
- name: prep second cluster pull realm config
run: |
cd cluster/examples/kubernetes/ceph/
IP_ADDR=$(kubectl -n rook-ceph get svc rook-ceph-rgw-multisite-store -o jsonpath="{.spec.clusterIP}")
yq w -i -d1 object-multisite-pull-realm-test.yaml spec.pull.endpoint http://${IP_ADDR}:80
BASE64_ACCESS_KEY=$(kubectl -n rook-ceph get secrets realm-a-keys -o jsonpath="{.data.access-key}")
BASE64_SECRET_KEY=$(kubectl -n rook-ceph get secrets realm-a-keys -o jsonpath="{.data.secret-key}")
sed -i 's/VzFjNFltMVdWRTFJWWxZelZWQT0=/'"$BASE64_ACCESS_KEY"'/g' object-multisite-pull-realm-test.yaml
sed -i 's/WVY1MFIxeExkbG84U3pKdlRseEZXVGR3T3k1U1dUSS9KaTFoUVE9PQ==/'"$BASE64_SECRET_KEY"'/g' object-multisite-pull-realm-test.yaml
- name: deploy second cluster rook
run: |
tests/scripts/github-action-helper.sh deploy_second_rook_cluster
kubectl create -f cluster/examples/kubernetes/ceph/object-multisite-pull-realm-test.yaml
# wait for realms to be pulled and zone-b-multisite-store to be created
tests/scripts/github-action-helper.sh wait_for_rgw_pods rook-ceph-secondary
- name: wait for ceph cluster 1 to be ready
run: |
mkdir test
tests/scripts/validate_cluster.sh osd 1
kubectl -n rook-ceph get pods
- name: write an object to one cluster, read from the other
run: tests/scripts/github-action-helper.sh write_object_to_cluster1_read_from_cluster2

# if this test fails, it could mean the RGW `period get` or `period update` output has changed
- name: RGW configuration period should be committed on first reconcile and not be committed on second reconcile
run: |
ns_name_primary='"rook-ceph/multisite-store"' # double quotes intended
ns_name_secondary='"rook-ceph-secondary/zone-b-multisite-store"' # double quotes intended
committed_msg="committing changes to RGW configuration period for CephObjectStore"
tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_primary}"
tests/scripts/github-action-helper.sh verify_operator_log_message "${committed_msg} ${ns_name_secondary}"
tests/scripts/github-action-helper.sh restart_operator
not_committed_msg="there are no changes to commit for RGW configuration period for CephObjectStore"
tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_primary}" 120
tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_secondary}" 90
github-token: ${{ secrets.GITHUB_TOKEN }}
# ceph-image: # use default

- name: upload test result
uses: actions/upload-artifact@v2
Expand Down
34 changes: 34 additions & 0 deletions .github/workflows/daily-nightly-canary.yml
@@ -0,0 +1,34 @@
name: Daily nightly canary on Ceph devel
on:
schedule:
- cron: "0 0 * * *" # every day at midnight

defaults:
run:
# reference: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#using-a-specific-shell
shell: bash --noprofile --norc -eo pipefail -x {0}

jobs:
rgw-multisite-test-with-ceph-devel:
runs-on: ubuntu-18.04
strategy:
matrix:
ceph-image-tag: ['latest-master-devel', 'latest-octopus-devel', 'latest-pacific-devel']

steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0

- name: run RGW multisite test
uses: ./.github/actions/rgw-multisite-test
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
ceph-image: quay.io/ceph/daemon-base:${{ matrix.ceph-image-tag }}

- name: upload test result
uses: actions/upload-artifact@v2
if: always()
with:
name: rgw-multisite-testing
path: test
16 changes: 12 additions & 4 deletions tests/scripts/github-action-helper.sh
Expand Up @@ -150,10 +150,20 @@ function create_cluster_prerequisites() {
}

function deploy_manifest_with_local_build() {
sed -i "s|image: rook/ceph:[0-9a-zA-Z.]*|image: rook/ceph:local-build|g" $1
sed -i "s|image: rook/ceph:.*|image: rook/ceph:local-build|g" $1
kubectl create -f $1
}

function replace_ceph_image() {
local file="$1" # parameter 1: the file in which to replace the ceph image
local ceph_image="${2:-}" # parameter 2: the new ceph image to use
if [[ -z ${ceph_image} ]]; then
echo "No Ceph image given. Not adjusting manifests."
return 0
fi
sed -i "s|image: .*ceph/ceph:.*|image: ${ceph_image}|g" "${file}"
}

function deploy_cluster() {
cd cluster/examples/kubernetes/ceph
deploy_manifest_with_local_build operator.yaml
Expand Down Expand Up @@ -218,8 +228,6 @@ function deploy_first_rook_cluster() {
BLOCK=$(sudo lsblk|awk '/14G/ {print $1}'| head -1)
create_cluster_prerequisites
cd cluster/examples/kubernetes/ceph/

sed -i 's/ROOK_LOG_LEVEL: "INFO"/ROOK_LOG_LEVEL: "DEBUG"/' operator.yaml # TODO: testing only
deploy_manifest_with_local_build operator.yaml
yq w -i -d1 cluster-test.yaml spec.dashboard.enabled false
yq w -i -d1 cluster-test.yaml spec.storage.useAllDevices false
Expand All @@ -237,7 +245,7 @@ function deploy_second_rook_cluster() {
yq w -i -d1 cluster-test.yaml spec.dataDirHostPath "/var/lib/rook-external"
kubectl create -f cluster-test.yaml
yq w -i toolbox.yaml metadata.namespace rook-ceph-secondary
deploy_manifest_with_local_build toolbox.yaml toolbox.yaml
deploy_manifest_with_local_build toolbox.yaml
}

function wait_for_rgw_pods() {
Expand Down

0 comments on commit cb6992c

Please sign in to comment.