Skip to content

Commit

Permalink
Tolerate temporary errors from etcdserver
Browse files Browse the repository at this point in the history
There are cases when the etcdserver is temporarily unavailable and the
errors that we get back from kube-apiserver reflect that error. It looks
like we bail out immediately when these errors happen currently. We
should retry until timeout is reached when this sort of errors happen.

Signed-off-by: Davanum Srinivas <davanum@gmail.com>
(cherry picked from commit ebc79fa)
  • Loading branch information
dims authored and hickeyma committed Oct 7, 2022
1 parent ce66412 commit 802a229
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 2 deletions.
3 changes: 2 additions & 1 deletion go.mod
Expand Up @@ -31,9 +31,11 @@ require (
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.0
github.com/xeipuuv/gojsonschema v1.2.0
go.etcd.io/etcd/api/v3 v3.5.4
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211
golang.org/x/text v0.3.7
google.golang.org/grpc v1.47.0
k8s.io/api v0.25.0
k8s.io/apiextensions-apiserver v0.25.0
k8s.io/apimachinery v0.25.0
Expand Down Expand Up @@ -149,7 +151,6 @@ require (
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20220502173005-c8bf987b8c21 // indirect
google.golang.org/grpc v1.47.0 // indirect
google.golang.org/protobuf v1.28.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Expand Up @@ -619,6 +619,8 @@ github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go
github.com/ziutek/mymysql v1.5.4 h1:GB0qdRGsTwQSBVYuVShFBKaXSnSnYYC2d9knnE1LHFs=
github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0=
go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
go.etcd.io/etcd/api/v3 v3.5.4 h1:OHVyt3TopwtUQ2GKdd5wu3PmmipR4FTwCqoEjSyRdIc=
go.etcd.io/etcd/api/v3 v3.5.4/go.mod h1:5GB2vv4A4AOn3yk7MftYGHkUfGtDHnEraIjym4dYz5A=
go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
Expand Down
26 changes: 25 additions & 1 deletion pkg/kube/wait.go
Expand Up @@ -22,6 +22,9 @@ import (
"time"

"github.com/pkg/errors"
"go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
"google.golang.org/grpc/codes"

appsv1 "k8s.io/api/apps/v1"
appsv1beta1 "k8s.io/api/apps/v1beta1"
appsv1beta2 "k8s.io/api/apps/v1beta2"
Expand All @@ -32,7 +35,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"

"k8s.io/apimachinery/pkg/util/wait"
)

Expand All @@ -42,6 +44,22 @@ type waiter struct {
log func(string, ...interface{})
}

// isServiceUnavailable helps figure out if the error is caused by etcd not being available
// see https://pkg.go.dev/go.etcd.io/etcd/api/v3/v3rpc/rpctypes for `codes.Unavailable`
// we use this to check if the etcdserver is not available we should retry in case
// this is a temporary situation
func isServiceUnavailable(err error) bool {
if err != nil {
err = rpctypes.Error(err)
if ev, ok := err.(rpctypes.EtcdError); ok {
if ev.Code() == codes.Unavailable {
return true
}
}
}
return false
}

// waitForResources polls to get the current status of all pods, PVCs, Services and
// Jobs(optional) until all are ready or a timeout is reached
func (w *waiter) waitForResources(created ResourceList) error {
Expand All @@ -54,6 +72,9 @@ func (w *waiter) waitForResources(created ResourceList) error {
for _, v := range created {
ready, err := w.c.IsReady(ctx, v)
if !ready || err != nil {
if isServiceUnavailable(err) {
return false, nil
}
return false, err
}
}
Expand All @@ -72,6 +93,9 @@ func (w *waiter) waitForDeletedResources(deleted ResourceList) error {
for _, v := range deleted {
err := v.Get()
if err == nil || !apierrors.IsNotFound(err) {
if isServiceUnavailable(err) {
return false, nil
}
return false, err
}
}
Expand Down
42 changes: 42 additions & 0 deletions pkg/kube/wait_test.go
@@ -0,0 +1,42 @@
/*
Copyright The Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package kube // import "helm.sh/helm/v3/pkg/kube"

import (
"errors"
"testing"

"go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
)

func Test_isServiceUnavailable(t *testing.T) {
tests := []struct {
err error
expect bool
}{
{err: nil, expect: false},
{err: errors.New("random error from somewhere"), expect: false},
{err: rpctypes.ErrGRPCLeaderChanged, expect: true},
{err: errors.New("etcdserver: leader changed"), expect: true},
}

for _, tt := range tests {
if isServiceUnavailable(tt.err) != tt.expect {
t.Errorf("failed test for %q (expect equal: %t)", tt.err, tt.expect)
}
}
}

0 comments on commit 802a229

Please sign in to comment.