Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIXED] A peer-remove of an R1 could brick the stream. #4420

Merged
merged 1 commit into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 6 additions & 1 deletion server/jetstream_cluster.go
Expand Up @@ -5184,7 +5184,12 @@ func (cc *jetStreamCluster) remapStreamAssignment(sa *streamAssignment, removePe
return true
}

// If we are here let's remove the peer at least.
// If R1 just return to avoid bricking the stream.
if sa.Group.node == nil || len(sa.Group.Peers) == 1 {
return false
}

// If we are here let's remove the peer at least, as long as we are R>1
for i, peer := range sa.Group.Peers {
if peer == removePeer {
sa.Group.Peers[i] = sa.Group.Peers[len(sa.Group.Peers)-1]
Expand Down
31 changes: 31 additions & 0 deletions server/jetstream_super_cluster_test.go
Expand Up @@ -4016,3 +4016,34 @@ func TestJetStreamSuperClusterMovingR1Stream(t *testing.T) {
return nil
})
}

// https://github.com/nats-io/nats-server/issues/4396
func TestJetStreamSuperClusterR1StreamPeerRemove(t *testing.T) {
sc := createJetStreamSuperCluster(t, 1, 3)
defer sc.shutdown()

nc, js := jsClientConnect(t, sc.serverByName("C1-S1"))
defer nc.Close()

_, err := js.AddStream(&nats.StreamConfig{
Name: "TEST",
Subjects: []string{"foo"},
Replicas: 1,
})
require_NoError(t, err)

si, err := js.StreamInfo("TEST")
require_NoError(t, err)

// Call peer remove on the only peer the leader.
resp, err := nc.Request(fmt.Sprintf(JSApiStreamRemovePeerT, "TEST"), []byte(`{"peer":"`+si.Cluster.Leader+`"}`), time.Second)
require_NoError(t, err)
var rpr JSApiStreamRemovePeerResponse
require_NoError(t, json.Unmarshal(resp.Data, &rpr))
require_False(t, rpr.Success)
require_True(t, rpr.Error.ErrCode == 10075)

// Stream should still be in place and useable.
_, err = js.StreamInfo("TEST")
require_NoError(t, err)
}