Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IMPROVED] Make sure preferred peer for stepdown is healthy. #3905

Merged
merged 1 commit into from Feb 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions server/jetstream_cluster_2_test.go
Expand Up @@ -199,10 +199,10 @@ func TestJetStreamClusterMultiRestartBug(t *testing.T) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
checkFor(t, 20*time.Second, 250*time.Millisecond, func() error {
si, _ := js2.StreamInfo("TEST")
if si == nil || si.Cluster == nil {
t.Fatalf("Did not get stream info")
return fmt.Errorf("No stream info or cluster")
}
for _, pi := range si.Cluster.Replicas {
if !pi.Current {
Expand Down
40 changes: 30 additions & 10 deletions server/raft.go
Expand Up @@ -1281,7 +1281,6 @@ func (n *raft) StepDown(preferred ...string) error {
n.debug("Being asked to stepdown")

// See if we have up to date followers.
nowts := time.Now().UnixNano()
maybeLeader := noLeader
if len(preferred) > 0 {
if preferred[0] != _EMPTY_ {
Expand All @@ -1290,21 +1289,42 @@ func (n *raft) StepDown(preferred ...string) error {
preferred = nil
}
}
// Can't pick ourselves.
if maybeLeader == n.id {
maybeLeader = noLeader
preferred = nil
}

for peer, ps := range n.peers {
// If not us and alive and caughtup.
if peer != n.id && (nowts-ps.ts) < int64(hbInterval*3) {
if maybeLeader != noLeader && maybeLeader != peer {
nowts := time.Now().UnixNano()

// If we have a preferred check it first.
if maybeLeader != noLeader {
var isHealthy bool
if ps, ok := n.peers[maybeLeader]; ok {
si, ok := n.s.nodeToInfo.Load(maybeLeader)
isHealthy = ok && !si.(nodeInfo).offline && (nowts-ps.ts) < int64(hbInterval*3)
}
if !isHealthy {
maybeLeader = noLeader
}
}

// If we do not have a preferred at this point pick the first healthy one.
// Make sure not ourselves.
if maybeLeader == noLeader {
for peer, ps := range n.peers {
if peer == n.id {
continue
}
if si, ok := n.s.nodeToInfo.Load(peer); !ok || si.(nodeInfo).offline {
continue
si, ok := n.s.nodeToInfo.Load(peer)
isHealthy := ok && !si.(nodeInfo).offline && (nowts-ps.ts) < int64(hbInterval*3)
if isHealthy {
maybeLeader = peer
break
}
n.debug("Looking at %q which is %v behind", peer, time.Duration(nowts-ps.ts))
maybeLeader = peer
break
}
}

stepdown := n.stepdown
n.Unlock()

Expand Down