Skip to content

Commit

Permalink
[FIXED] A peer-remove of an R1 could brick the stream. (#4420)
Browse files Browse the repository at this point in the history
We should not remove a peer from a stream when we can not find a
replacement unless R>1.

Signed-off-by: Derek Collison <derek@nats.io>

Resolves #4396
  • Loading branch information
derekcollison committed Aug 23, 2023
2 parents dc09bb7 + ddb7f9f commit 5a926f1
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
7 changes: 6 additions & 1 deletion server/jetstream_cluster.go
Expand Up @@ -5184,7 +5184,12 @@ func (cc *jetStreamCluster) remapStreamAssignment(sa *streamAssignment, removePe
return true
}

// If we are here let's remove the peer at least.
// If R1 just return to avoid bricking the stream.
if sa.Group.node == nil || len(sa.Group.Peers) == 1 {
return false
}

// If we are here let's remove the peer at least, as long as we are R>1
for i, peer := range sa.Group.Peers {
if peer == removePeer {
sa.Group.Peers[i] = sa.Group.Peers[len(sa.Group.Peers)-1]
Expand Down
31 changes: 31 additions & 0 deletions server/jetstream_super_cluster_test.go
Expand Up @@ -4016,3 +4016,34 @@ func TestJetStreamSuperClusterMovingR1Stream(t *testing.T) {
return nil
})
}

// https://github.com/nats-io/nats-server/issues/4396
func TestJetStreamSuperClusterR1StreamPeerRemove(t *testing.T) {
sc := createJetStreamSuperCluster(t, 1, 3)
defer sc.shutdown()

nc, js := jsClientConnect(t, sc.serverByName("C1-S1"))
defer nc.Close()

_, err := js.AddStream(&nats.StreamConfig{
Name: "TEST",
Subjects: []string{"foo"},
Replicas: 1,
})
require_NoError(t, err)

si, err := js.StreamInfo("TEST")
require_NoError(t, err)

// Call peer remove on the only peer the leader.
resp, err := nc.Request(fmt.Sprintf(JSApiStreamRemovePeerT, "TEST"), []byte(`{"peer":"`+si.Cluster.Leader+`"}`), time.Second)
require_NoError(t, err)
var rpr JSApiStreamRemovePeerResponse
require_NoError(t, json.Unmarshal(resp.Data, &rpr))
require_False(t, rpr.Success)
require_True(t, rpr.Error.ErrCode == 10075)

// Stream should still be in place and useable.
_, err = js.StreamInfo("TEST")
require_NoError(t, err)
}

0 comments on commit 5a926f1

Please sign in to comment.