Skip to content

Commit

Permalink
Make sure preferred peer for stepdown is healthy.
Browse files Browse the repository at this point in the history
Signed-off-by: Derek Collison <derek@nats.io>
  • Loading branch information
derekcollison committed Feb 23, 2023
1 parent 9d97264 commit b70b706
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 12 deletions.
4 changes: 2 additions & 2 deletions server/jetstream_cluster_2_test.go
Expand Up @@ -199,10 +199,10 @@ func TestJetStreamClusterMultiRestartBug(t *testing.T) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
checkFor(t, 20*time.Second, 250*time.Millisecond, func() error {
si, _ := js2.StreamInfo("TEST")
if si == nil || si.Cluster == nil {
t.Fatalf("Did not get stream info")
return fmt.Errorf("No stream info or cluster")
}
for _, pi := range si.Cluster.Replicas {
if !pi.Current {
Expand Down
40 changes: 30 additions & 10 deletions server/raft.go
Expand Up @@ -1281,7 +1281,6 @@ func (n *raft) StepDown(preferred ...string) error {
n.debug("Being asked to stepdown")

// See if we have up to date followers.
nowts := time.Now().UnixNano()
maybeLeader := noLeader
if len(preferred) > 0 {
if preferred[0] != _EMPTY_ {
Expand All @@ -1290,21 +1289,42 @@ func (n *raft) StepDown(preferred ...string) error {
preferred = nil
}
}
// Can't pick ourselves.
if maybeLeader != noLeader && maybeLeader == n.id {
maybeLeader = noLeader
preferred = nil
}

for peer, ps := range n.peers {
// If not us and alive and caughtup.
if peer != n.id && (nowts-ps.ts) < int64(hbInterval*3) {
if maybeLeader != noLeader && maybeLeader != peer {
nowts := time.Now().UnixNano()

// If we have a preferred check it first.
if maybeLeader != noLeader {
var isHealthy bool
if ps, ok := n.peers[maybeLeader]; ok {
si, ok := n.s.nodeToInfo.Load(maybeLeader)
isHealthy = ok && !si.(nodeInfo).offline && (nowts-ps.ts) < int64(hbInterval*3)
}
if !isHealthy {
maybeLeader = noLeader
}
}

// If we do not have a preferred at this point pick the first healthy one.
// Make sure not ourselves.
if maybeLeader == noLeader {
for peer, ps := range n.peers {
if peer == n.id {
continue
}
if si, ok := n.s.nodeToInfo.Load(peer); !ok || si.(nodeInfo).offline {
continue
si, ok := n.s.nodeToInfo.Load(peer)
isHealthy := ok && !si.(nodeInfo).offline && (nowts-ps.ts) < int64(hbInterval*3)
if isHealthy {
maybeLeader = peer
break
}
n.debug("Looking at %q which is %v behind", peer, time.Duration(nowts-ps.ts))
maybeLeader = peer
break
}
}

stepdown := n.stepdown
n.Unlock()

Expand Down

0 comments on commit b70b706

Please sign in to comment.