Skip to content

Commit

Permalink
chore(ci): Downgrade telepresence (#1641)
Browse files Browse the repository at this point in the history
The latest spate of E2E failures with random timeouts seem to be related
to the new release of Telepresence. This PR pins the version of
Telepresence to the last-known good version.

This PR also includes a change to dump pod logs on test failure to help
with debugging.

Signed-off-by: Charith Ellawala <charith@cerbos.dev>
  • Loading branch information
charithe committed Jun 14, 2023
1 parent 37762a8 commit d2dbcd0
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 9 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/e2e.yaml
Expand Up @@ -21,10 +21,10 @@ jobs:
- name: Install Helmfile and Telepresence
run: |
mkdir bin
curl -fL https://github.com/helmfile/helmfile/releases/download/v0.153.1/helmfile_0.153.1_linux_amd64.tar.gz -o bin/helmfile.tar.gz
curl -fL https://github.com/helmfile/helmfile/releases/download/v0.154.0/helmfile_0.154.0_linux_amd64.tar.gz -o bin/helmfile.tar.gz
tar -xf bin/helmfile.tar.gz -C bin
chmod +x bin/helmfile
curl -fL https://app.getambassador.io/download/tel2/linux/amd64/latest/telepresence -o bin/telepresence
curl -fL https://ambassador-labs.gateway.scarf.sh/telepresenceio/telepresence/releases/download/v2.13.2/telepresence-linux-amd64 -o bin/telepresence
chmod +x bin/telepresence
echo "$(pwd)/bin" >> $GITHUB_PATH
mkdir -p ~/.config/telepresence
Expand All @@ -33,6 +33,7 @@ jobs:
helm: 60s
trafficManagerAPI: 30s
EOF
go install github.com/stern/stern@latest
- name: Initialize Helmfile
run: helmfile init --force
Expand Down
2 changes: 1 addition & 1 deletion e2e/run.sh
Expand Up @@ -42,5 +42,5 @@ if [[ "$#" -gt "0" ]]; then
# E.g. e2e/run.sh ./mysql/... -args -run-id=xxxxx -no-cleanup
run_tests "$@"
else
run_tests ./... -args -no-cleanup="$E2E_NO_CLEANUP" -command-timeout=4m
run_tests ./... -args -no-cleanup="$E2E_NO_CLEANUP" -command-timeout=5m
fi
12 changes: 9 additions & 3 deletions internal/server/tests.go
Expand Up @@ -133,28 +133,34 @@ func (tr *TestRunner) executeGRPCTestCase(grpcConn *grpc.ClientConn, tc *private
ctx, cancelFunc := context.WithTimeout(context.Background(), tr.Timeout)
defer cancelFunc()

backoffConf := backoff.WithContext(
backoff.WithMaxRetries(
backoff.NewConstantBackOff(time.Millisecond*retryBackoffDelay),
tr.CerbosClientMaxRetries),
ctx)

switch call := tc.CallKind.(type) {
case *privatev1.ServerTestCase_CheckResourceSet:
cerbosClient := svcv1.NewCerbosServiceClient(grpcConn)
want = call.CheckResourceSet.WantResponse
err = backoff.Retry(func() error {
have, err = cerbosClient.CheckResourceSet(ctx, call.CheckResourceSet.Input)
return err
}, backoff.WithMaxRetries(backoff.NewConstantBackOff(time.Millisecond*retryBackoffDelay), tr.CerbosClientMaxRetries))
}, backoffConf)
case *privatev1.ServerTestCase_CheckResourceBatch:
cerbosClient := svcv1.NewCerbosServiceClient(grpcConn)
want = call.CheckResourceBatch.WantResponse
err = backoff.Retry(func() error {
have, err = cerbosClient.CheckResourceBatch(ctx, call.CheckResourceBatch.Input)
return err
}, backoff.WithMaxRetries(backoff.NewConstantBackOff(time.Millisecond*retryBackoffDelay), tr.CerbosClientMaxRetries))
}, backoffConf)
case *privatev1.ServerTestCase_CheckResources:
cerbosClient := svcv1.NewCerbosServiceClient(grpcConn)
want = call.CheckResources.WantResponse
err = backoff.Retry(func() error {
have, err = cerbosClient.CheckResources(ctx, call.CheckResources.Input)
return err
}, backoff.WithMaxRetries(backoff.NewConstantBackOff(time.Millisecond*retryBackoffDelay), tr.CerbosClientMaxRetries))
}, backoffConf)
case *privatev1.ServerTestCase_PlaygroundValidate:
playgroundClient := svcv1.NewCerbosPlaygroundServiceClient(grpcConn)
want = call.PlaygroundValidate.WantResponse
Expand Down
13 changes: 13 additions & 0 deletions internal/test/e2e/setup.go
Expand Up @@ -36,6 +36,14 @@ func Teardown(ctx Ctx) error {
}

func Cmd(ctx Ctx, name string, args ...string) error {
return execCmd(ctx, false, name, args...)
}

func CmdWithOutput(ctx Ctx, name string, args ...string) error {
return execCmd(ctx, true, name, args...)
}

func execCmd(ctx Ctx, showOutput bool, name string, args ...string) error {
c := cmd.NewCmd(name, args...)
c.Env = ctx.Environ()

Expand All @@ -47,6 +55,9 @@ func Cmd(ctx Ctx, name string, args ...string) error {
select {
case done := <-status:
if done.Complete && done.Error == nil && done.Exit == 0 {
if showOutput {
dumpOutput(ctx, done)
}
return nil
}

Expand Down Expand Up @@ -83,9 +94,11 @@ func checkCerbosIsUp(ctx Ctx) func() error {
ctx.Logf("Checking whether Cerbos is up")
resp, err := client.Get(healthURL)
if err != nil {
ctx.Logf("Error during healthcheck: %v", err)
return err
}
if resp.StatusCode != http.StatusOK {
ctx.Logf("Received health status: %q", resp.Status)
return fmt.Errorf("received status %q", resp.Status)
}

Expand Down
16 changes: 13 additions & 3 deletions internal/test/e2e/tests.go
Expand Up @@ -7,21 +7,24 @@ package e2e

import (
"crypto/tls"
"fmt"
"testing"
"time"

"github.com/cerbos/cerbos/client"
"github.com/cerbos/cerbos/internal/server"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"

"github.com/cerbos/cerbos/client"
"github.com/cerbos/cerbos/internal/server"
)

const (
AdminSuite = "admin"
ChecksSuite = "checks"
PlanResourcesSuite = "plan_resources"
testTimeout = 90 * time.Second // Things are slower inside Kind
)

type Opt func(*suiteOpt)
Expand Down Expand Up @@ -101,6 +104,13 @@ func RunSuites(t *testing.T, opts ...Opt) {
}

require.NoError(t, Setup(ctx))
t.Cleanup(func() {
if t.Failed() {
if err := CmdWithOutput(ctx, "stern", ".*", fmt.Sprintf("--namespace=%s", ctx.Namespace()), "--no-follow"); err != nil {
t.Logf("Failed to grab logs: %v", err)
}
}
})

if sopt.postSetup != nil {
ctx.Logf("Running PostSetup function")
Expand All @@ -109,7 +119,7 @@ func RunSuites(t *testing.T, opts ...Opt) {
}

tr := server.LoadTestCases(t, sopt.suites...)
tr.Timeout = 30 * time.Second // Things are slower inside Kind
tr.Timeout = testTimeout

if sopt.overlayMaxRetries != 0 {
tr.WithCerbosClientRetries(sopt.overlayMaxRetries)
Expand Down

0 comments on commit d2dbcd0

Please sign in to comment.