Skip to content

Commit

Permalink
Add logfile_max_num feature (#4548)
Browse files Browse the repository at this point in the history
### Changes proposed in this pull request:

NATS Server 2.9 has `logfile_size_limit` option which allows the
operator to set an optional byte limit on the NATS Server log file which
when met causes a "rotation" such that the current log file is renamed
(original file name appended with a time stamp to nanosecond accuracy)
and a new log file is instantiated.

This PR is a new `logfile_max_num` companion option (alias
`log_max_num`) which allows the operator to designate that the server
should prune the **total number of log files** -- the currently active
log file plus backups -- to the maximum setting.

A max value of `0` (the implicit default) or a negative number has
meaning of unlimited log files (no maximum) as this is an opt-in
feature.

A max value of `1` is effectively a truncate-only logging pattern as any
backup made at rotation will subsequently be purged.

A max value of `2` will maintain the active log file plus the latest
backup. And so on...

> The currently active log file is never purged. Only backups are
purged.

When enabled, backup log deletion is evaluated inline after each
successful rotation event. To be considered for log deletion, backup log
files MUST adhere to the file naming format used in log rotation as well
as agree with the current `logfile` name and location. Any other files
or sub-directories in the log directory will be ignored. E.g. if an
operator makes a manual copy of the log file to `logfile.bak` that file
will not be evaluated as a backup.

### Typical use case:

This feature is useful in a constrained hosting environment for NATS
Server, for example an embedded, edge-compute, or IoT device scenario,
in which _more featureful_ platform or operating system log management
features do not exist or the complexity is not required.
  • Loading branch information
derekcollison committed Sep 18, 2023
2 parents 8f0e65f + 349e718 commit 22514a0
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 7 deletions.
74 changes: 67 additions & 7 deletions logger/log.go
Expand Up @@ -18,6 +18,8 @@ import (
"fmt"
"log"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
Expand Down Expand Up @@ -128,13 +130,14 @@ type fileLogger struct {
out int64
canRotate int32
sync.Mutex
l *Logger
f writerAndCloser
limit int64
olimit int64
pid string
time bool
closed bool
l *Logger
f writerAndCloser
limit int64
olimit int64
pid string
time bool
closed bool
maxNumFiles int
}

func newFileLogger(filename, pidPrefix string, time bool) (*fileLogger, error) {
Expand Down Expand Up @@ -169,6 +172,12 @@ func (l *fileLogger) setLimit(limit int64) {
}
}

func (l *fileLogger) setMaxNumFiles(max int) {
l.Lock()
l.maxNumFiles = max
l.Unlock()
}

func (l *fileLogger) logDirect(label, format string, v ...interface{}) int {
var entrya = [256]byte{}
var entry = entrya[:0]
Expand All @@ -190,6 +199,41 @@ func (l *fileLogger) logDirect(label, format string, v ...interface{}) int {
return len(entry)
}

func (l *fileLogger) logPurge(fname string) {
var backups []string
lDir := filepath.Dir(fname)
lBase := filepath.Base(fname)
entries, err := os.ReadDir(lDir)
if err != nil {
l.logDirect(l.l.errorLabel, "Unable to read directory %q for log purge (%v), will attempt next rotation", lDir, err)
return
}
for _, entry := range entries {
if entry.IsDir() || entry.Name() == lBase || !strings.HasPrefix(entry.Name(), lBase) {
continue
}
if stamp, found := strings.CutPrefix(entry.Name(), fmt.Sprintf("%s%s", lBase, ".")); found {
_, err := time.Parse("2006:01:02:15:04:05.999999999", strings.Replace(stamp, ".", ":", 5))
if err == nil {
backups = append(backups, entry.Name())
}
}
}
currBackups := len(backups)
maxBackups := l.maxNumFiles - 1
if currBackups > maxBackups {
// backups sorted oldest to latest based on timestamped lexical filename (ReadDir)
for i := 0; i < currBackups-maxBackups; i++ {
if err := os.Remove(filepath.Join(lDir, string(os.PathSeparator), backups[i])); err != nil {
l.logDirect(l.l.errorLabel, "Unable to remove backup log file %q (%v), will attempt next rotation", backups[i], err)
// Bail fast, we'll try again next rotation
return
}
l.logDirect(l.l.infoLabel, "Purged log file %q", backups[i])
}
}
}

func (l *fileLogger) Write(b []byte) (int, error) {
if atomic.LoadInt32(&l.canRotate) == 0 {
n, err := l.f.Write(b)
Expand Down Expand Up @@ -225,6 +269,9 @@ func (l *fileLogger) Write(b []byte) (int, error) {
n := l.logDirect(l.l.infoLabel, "Rotated log, backup saved as %q", bak)
l.out = int64(n)
l.limit = l.olimit
if l.maxNumFiles > 0 {
l.logPurge(fname)
}
}
}
l.Unlock()
Expand Down Expand Up @@ -257,6 +304,19 @@ func (l *Logger) SetSizeLimit(limit int64) error {
return nil
}

// SetMaxNumFiles sets the number of archived log files that will be retained
func (l *Logger) SetMaxNumFiles(max int) error {
l.Lock()
if l.fl == nil {
l.Unlock()
return fmt.Errorf("can set log max number of files only for file logger")
}
fl := l.fl
l.Unlock()
fl.setMaxNumFiles(max)
return nil
}

// NewTestLogger creates a logger with output directed to Stderr with a prefix.
// Useful for tracing in tests when multiple servers are in the same pid
func NewTestLogger(prefix string, time bool) *Logger {
Expand Down
10 changes: 10 additions & 0 deletions server/log.go
Expand Up @@ -72,6 +72,16 @@ func (s *Server) ConfigureLogger() {
l.SetSizeLimit(opts.LogSizeLimit)
}
}
if opts.LogMaxFiles > 0 {
if l, ok := log.(*srvlog.Logger); ok {
al := int(opts.LogMaxFiles)
if int64(al) != opts.LogMaxFiles {
// set to default (no max) on overflow
al = 0
}
l.SetMaxNumFiles(al)
}
}
} else if opts.RemoteSyslog != "" {
log = srvlog.NewRemoteSysLogger(opts.RemoteSyslog, opts.Debug, opts.Trace)
} else if syslog {
Expand Down
3 changes: 3 additions & 0 deletions server/opts.go
Expand Up @@ -311,6 +311,7 @@ type Options struct {
PortsFileDir string `json:"-"`
LogFile string `json:"-"`
LogSizeLimit int64 `json:"-"`
LogMaxFiles int64 `json:"-"`
Syslog bool `json:"-"`
RemoteSyslog string `json:"-"`
Routes []*url.URL `json:"-"`
Expand Down Expand Up @@ -999,6 +1000,8 @@ func (o *Options) processConfigFileLine(k string, v interface{}, errors *[]error
o.LogFile = v.(string)
case "logfile_size_limit", "log_size_limit":
o.LogSizeLimit = v.(int64)
case "logfile_max_num", "log_max_num":
o.LogMaxFiles = v.(int64)
case "syslog":
o.Syslog = v.(bool)
trackExplicitVal(o, &o.inConfig, "Syslog", o.Syslog)
Expand Down
131 changes: 131 additions & 0 deletions test/log_test.go
@@ -0,0 +1,131 @@
package test

import (
"fmt"
"os"
"path/filepath"
"runtime"
"testing"
"time"

"github.com/nats-io/nats-server/v2/server"
)

func RunServerWithLogging(opts *server.Options) *server.Server {
if opts == nil {
opts = &DefaultTestOptions
}
opts.NoLog = false
opts.Cluster.PoolSize = -1
opts.Cluster.Compression.Mode = server.CompressionOff
opts.LeafNode.Compression.Mode = server.CompressionOff
s, err := server.NewServer(opts)
if err != nil || s == nil {
panic(fmt.Sprintf("No NATS Server object returned: %v", err))
}
s.ConfigureLogger()
go s.Start()
if !s.ReadyForConnections(10 * time.Second) {
panic("Unable to start NATS Server in Go Routine")
}
return s
}

func TestLogMaxArchives(t *testing.T) {
// With logfile_size_limit set to small 100 characters, plain startup rotates 8 times
for _, test := range []struct {
name string
config string
totEntriesExpected int
}{
{
"Default implicit, no max logs, expect 0 purged logs",
`
port: -1
log_file: %s
logfile_size_limit: 100
`,
9,
},
{
"Default explicit, no max logs, expect 0 purged logs",
`
port: -1
log_file: %s
logfile_size_limit: 100
logfile_max_num: 0
`,
9,
},
{
"Default explicit - negative val, no max logs, expect 0 purged logs",
`
port: -1
log_file: %s
logfile_size_limit: 100
logfile_max_num: -42
`,
9,
},
{
"1-max num, expect 8 purged logs",
`
port: -1
log_file: %s
logfile_size_limit: 100
logfile_max_num: 1
`,
1,
},
{
"5-max num, expect 4 purged logs; use opt alias",
`
port: -1
log_file: %s
log_size_limit: 100
log_max_num: 5
`,
5,
},
{
"100-max num, expect 0 purged logs",
`
port: -1
log_file: %s
logfile_size_limit: 100
logfile_max_num: 100
`,
9,
},
} {
t.Run(test.name, func(t *testing.T) {
d, err := os.MkdirTemp("", "logtest")
if err != nil {
t.Fatalf("Error creating temp dir: %v", err)
}
content := fmt.Sprintf(test.config, filepath.Join(d, "nats-server.log"))
// server config does not like plain windows backslash
if runtime.GOOS == "windows" {
content = filepath.ToSlash(content)
}
opts, err := server.ProcessConfigFile(createConfFile(t, []byte(content)))
if err != nil {
t.Fatalf("Error processing config file: %v", err)
}
s := RunServerWithLogging(opts)
if s == nil {
t.Fatalf("No NATS Server object returned")
}
s.Shutdown()
// Windows filesystem can be a little pokey on the flush, so wait a bit after shutdown...
time.Sleep(500 * time.Millisecond)
entries, err := os.ReadDir(d)
if err != nil {
t.Fatalf("Error reading dir: %v", err)
}
if len(entries) != test.totEntriesExpected {
t.Fatalf("Expected %d log files, got %d", test.totEntriesExpected, len(entries))
}
})
}
}

0 comments on commit 22514a0

Please sign in to comment.