Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not fail healthz in single server mode on failed snapshot restore. #4100

Merged
merged 3 commits into from Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 34 additions & 0 deletions server/jetstream_test.go
Expand Up @@ -19968,3 +19968,37 @@ func TestJetStreamKVHistoryRegression(t *testing.T) {
})
}
}

func TestJetStreamSnapshotRestoreStallAndHealthz(t *testing.T) {
s := RunBasicJetStreamServer(t)
defer s.Shutdown()

nc, js := jsClientConnect(t, s)
defer nc.Close()

_, err := js.AddStream(&nats.StreamConfig{
Name: "ORDERS",
Subjects: []string{"orders.*"},
})
require_NoError(t, err)

for i := 0; i < 1000; i++ {
sendStreamMsg(t, nc, "orders.created", "new order")
}

hs := s.healthz(nil)
if hs.Status != "ok" || hs.Error != _EMPTY_ {
t.Fatalf("Expected health to be ok, got %+v", hs)
}

// Simulate the staging directory for restores. This is normally cleaned up
// but since its at the root of the storage directory make sure healthz is not affected.
snapDir := filepath.Join(s.getJetStream().config.StoreDir, snapStagingDir)
require_NoError(t, os.MkdirAll(snapDir, defaultDirPerms))

// Make sure healthz ok.
hs = s.healthz(nil)
if hs.Status != "ok" || hs.Error != _EMPTY_ {
t.Fatalf("Expected health to be ok, got %+v", hs)
}
}
3 changes: 3 additions & 0 deletions server/monitor.go
Expand Up @@ -3083,6 +3083,9 @@ func (s *Server) healthz(opts *HealthzOptions) *HealthStatus {
// Whip through account folders and pull each stream name.
fis, _ := os.ReadDir(sdir)
for _, fi := range fis {
if fi.Name() == snapStagingDir {
continue
}
acc, err := s.LookupAccount(fi.Name())
if err != nil {
health.Status = na
Expand Down
4 changes: 2 additions & 2 deletions server/norace_test.go
Expand Up @@ -6397,8 +6397,8 @@ func TestNoRaceJetStreamConsumerCreateTimeNumPending(t *testing.T) {
case <-time.After(5 * time.Second):
}

// Should stay under 5ms now, but for Travis variability say 25ms.
threshold := 25 * time.Millisecond
// Should stay under 5ms now, but for Travis variability say 50ms.
threshold := 50 * time.Millisecond

start := time.Now()
_, err = js.PullSubscribe("events.*", "dlc")
Expand Down