-
Notifications
You must be signed in to change notification settings - Fork 4.1k
/
activity_log.go
2055 lines (1774 loc) · 61.5 KB
/
activity_log.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package vault
import (
"context"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"os"
"sort"
"strconv"
"strings"
"sync"
"time"
"unicode/utf8"
"github.com/golang/protobuf/proto"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/vault/helper/metricsutil"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/helper/timeutil"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/vault/vault/activity"
"github.com/mitchellh/copystructure"
)
const (
// activitySubPath is the directory under the system view where
// the log will be stored.
activitySubPath = "counters/activity/"
activityEntityBasePath = "log/entity/"
activityTokenBasePath = "log/directtokens/"
activityQueryBasePath = "queries/"
activityConfigKey = "config"
activityIntentLogKey = "endofmonth"
// for testing purposes (public as needed)
ActivityLogPrefix = "sys/counters/activity/log/"
ActivityPrefix = "sys/counters/activity/"
// Time to wait on perf standby before sending fragment
activityFragmentStandbyTime = 10 * time.Minute
// Time between writes of segment to storage
activitySegmentInterval = 10 * time.Minute
// Timeout on RPC calls.
activityFragmentSendTimeout = 1 * time.Minute
// Timeout on storage calls.
activitySegmentWriteTimeout = 1 * time.Minute
// Number of entity records to store per segment
// Estimated as 512KiB / 64 bytes = 8192, rounded down
activitySegmentEntityCapacity = 8000
// Maximum number of segments per month
activityLogMaxSegmentPerMonth = 81
// Number of records (entity or token) to store in a
// standby fragment before sending it to the active node.
// Estimates as 8KiB / 64 bytes = 128
activityFragmentStandbyCapacity = 128
// Delimiter between the string fields used to generate a client
// ID for tokens without entities. This is the 0 character, which
// is a non-printable string. Please see unicode.IsPrint for details.
clientIDTWEDelimiter = rune('\x00')
// Delimiter between each policy in the sorted policies used to
// generate a client ID for tokens without entities. This is the 127
// character, which is a non-printable string. Please see unicode.IsPrint
// for details.
sortedPoliciesTWEDelimiter = rune('\x7F')
// trackedTWESegmentPeriod is a time period of a little over a month, and represents
// the amount of time that needs to pass after a 1.9 or later upgrade to result in
// all fragments and segments no longer storing token counts in the directtokens
// storage path.
trackedTWESegmentPeriod = 35 * 24
)
type segmentInfo struct {
startTimestamp int64
currentClients *activity.EntityActivityLog
clientSequenceNumber uint64
// DEPRECATED
// This field is needed for backward compatibility with fragments
// and segments created with vault versions before 1.9.
tokenCount *activity.TokenCount
}
type clients struct {
distinctEntities uint64
distinctNonEntities uint64
}
// ActivityLog tracks unique entity counts and non-entity token counts.
// It handles assembling log fragments (and sending them to the active
// node), writing log segments, and precomputing queries.
type ActivityLog struct {
core *Core
configOverrides *ActivityLogCoreConfig
// ActivityLog.l protects the configuration settings, except enable, and any modifications
// to the current segment.
// Acquire "l" before fragmentLock if both must be held.
l sync.RWMutex
// fragmentLock protects enable, activeClients, fragment, standbyFragmentsReceived
fragmentLock sync.RWMutex
// enabled indicates if the activity log is enabled for this cluster.
// This is protected by fragmentLock so we can check with only
// a single synchronization call.
enabled bool
// log destination
logger log.Logger
// metrics sink
metrics metricsutil.Metrics
// view is the storage location used by ActivityLog,
// defaults to sys/activity.
view *BarrierView
// nodeID is the ID to use for all fragments that
// are generated.
// TODO: use secondary ID when available?
nodeID string
// current log fragment (may be nil)
fragment *activity.LogFragment
fragmentCreation time.Time
// Channel to signal a new fragment has been created
// so it's appropriate to start the timer.
newFragmentCh chan struct{}
// Channel for sending fragment immediately
sendCh chan struct{}
// Channel for writing fragment immediately
writeCh chan struct{}
// Channel to stop background processing
doneCh chan struct{}
// track metadata and contents of the most recent log segment
currentSegment segmentInfo
// Fragments received from performance standbys
standbyFragmentsReceived []*activity.LogFragment
// precomputed queries
queryStore *activity.PrecomputedQueryStore
defaultReportMonths int
retentionMonths int
// channel closed by delete worker when done
deleteDone chan struct{}
// channel closed when deletion at startup is done
// (for unit test robustness)
retentionDone chan struct{}
// for testing: is config currently being invalidated. protected by l
configInvalidationInProgress bool
// clientTracker tracks active clients this month. Protected by fragmentLock.
clientTracker *ClientTracker
}
type ClientTracker struct {
// All known active clients this month; use fragmentLock read-locked
// to check whether it already exists.
activeClients map[string]struct{}
entityCountByNamespaceID map[string]uint64
nonEntityCountByNamespaceID map[string]uint64
}
// These non-persistent configuration options allow us to disable
// parts of the implementation for integration testing.
// The default values should turn everything on.
type ActivityLogCoreConfig struct {
// Enable activity log even if the feature flag not set
ForceEnable bool
// Do not start timers to send or persist fragments.
DisableTimers bool
}
// NewActivityLog creates an activity log.
func NewActivityLog(core *Core, logger log.Logger, view *BarrierView, metrics metricsutil.Metrics) (*ActivityLog, error) {
hostname, err := os.Hostname()
if err != nil {
return nil, err
}
a := &ActivityLog{
core: core,
configOverrides: &core.activityLogConfig,
logger: logger,
view: view,
metrics: metrics,
nodeID: hostname,
newFragmentCh: make(chan struct{}, 1),
sendCh: make(chan struct{}, 1), // buffered so it can be triggered by fragment size
writeCh: make(chan struct{}, 1), // same for full segment
doneCh: make(chan struct{}, 1),
clientTracker: &ClientTracker{
activeClients: make(map[string]struct{}),
entityCountByNamespaceID: make(map[string]uint64),
nonEntityCountByNamespaceID: make(map[string]uint64),
},
currentSegment: segmentInfo{
startTimestamp: 0,
currentClients: &activity.EntityActivityLog{
Clients: make([]*activity.EntityRecord, 0),
},
// tokenCount is deprecated, but must still exist for the current segment
// so the fragment that was using TWEs before the 1.9 changes
// can be flushed to the current segment.
tokenCount: &activity.TokenCount{
CountByNamespaceID: make(map[string]uint64),
},
clientSequenceNumber: 0,
},
standbyFragmentsReceived: make([]*activity.LogFragment, 0),
}
config, err := a.loadConfigOrDefault(core.activeContext)
if err != nil {
return nil, err
}
a.SetConfigInit(config)
a.queryStore = activity.NewPrecomputedQueryStore(
logger,
view.SubView(activityQueryBasePath),
config.RetentionMonths)
return a, nil
}
// saveCurrentSegmentToStorage updates the record of Entities or
// Non Entity Tokens in persistent storage
// :force: forces a save of tokens/entities even if the in-memory log is empty
func (a *ActivityLog) saveCurrentSegmentToStorage(ctx context.Context, force bool) error {
// Prevent simultaneous changes to segment
a.l.Lock()
defer a.l.Unlock()
return a.saveCurrentSegmentToStorageLocked(ctx, force)
}
// Must be called with l held.
// :force: forces a save of tokens/entities even if the in-memory log is empty
func (a *ActivityLog) saveCurrentSegmentToStorageLocked(ctx context.Context, force bool) error {
defer a.metrics.MeasureSinceWithLabels([]string{"core", "activity", "segment_write"},
time.Now(), []metricsutil.Label{})
// Swap out the pending fragments
a.fragmentLock.Lock()
localFragment := a.fragment
a.fragment = nil
standbys := a.standbyFragmentsReceived
a.standbyFragmentsReceived = make([]*activity.LogFragment, 0)
a.fragmentLock.Unlock()
// If segment start time is zero, do not update or write
// (even if force is true). This can happen if activityLog is
// disabled after a save as been triggered.
if a.currentSegment.startTimestamp == 0 {
return nil
}
// Measure the current fragment
if localFragment != nil {
a.metrics.IncrCounterWithLabels([]string{"core", "activity", "fragment_size"},
float32(len(localFragment.Clients)),
[]metricsutil.Label{
{"type", "entity"},
})
a.metrics.IncrCounterWithLabels([]string{"core", "activity", "fragment_size"},
float32(len(localFragment.NonEntityTokens)),
[]metricsutil.Label{
{"type", "direct_token"},
})
}
// Collect new entities and new tokens.
saveChanges := false
newEntities := make(map[string]*activity.EntityRecord)
for _, f := range append(standbys, localFragment) {
if f == nil {
continue
}
for _, e := range f.Clients {
// We could sort by timestamp to see which is first.
// We'll ignore that; the order of the append above means
// that we choose entries in localFragment over those
// from standby nodes.
newEntities[e.ClientID] = e
saveChanges = true
}
// As of 1.9, a fragment should no longer have any NonEntityTokens. However
// in order to not lose any information about the current segment during the
// month when the client upgrades to 1.9, we must retain this functionality.
for ns, val := range f.NonEntityTokens {
// We track these pre-1.9 values in the old location, which is
// a.currentSegment.tokenCount, as opposed to the counter that stores tokens
// without entities that have client IDs, namely
// a.clientTracker.nonEntityCountByNamespaceID. This preserves backward
// compatibility for the precomputedQueryWorkers and the segment storing
// logic.
a.currentSegment.tokenCount.CountByNamespaceID[ns] += val
saveChanges = true
}
}
if !saveChanges {
return nil
}
// Will all new entities fit? If not, roll over to a new segment.
available := activitySegmentEntityCapacity - len(a.currentSegment.currentClients.Clients)
remaining := available - len(newEntities)
excess := 0
if remaining < 0 {
excess = -remaining
}
segmentClients := a.currentSegment.currentClients.Clients
excessClients := make([]*activity.EntityRecord, 0, excess)
for _, record := range newEntities {
if available > 0 {
segmentClients = append(segmentClients, record)
available -= 1
} else {
excessClients = append(excessClients, record)
}
}
a.currentSegment.currentClients.Clients = segmentClients
err := a.saveCurrentSegmentInternal(ctx, force)
if err != nil {
// The current fragment(s) have already been placed into the in-memory
// segment, but we may lose any excess (in excessClients).
// There isn't a good way to unwind the transaction on failure,
// so we may just lose some records.
return err
}
if available <= 0 {
if a.currentSegment.clientSequenceNumber >= activityLogMaxSegmentPerMonth {
// Cannot send as Warn because it will repeat too often,
// and disabling/renabling would be complicated.
a.logger.Trace("too many segments in current month", "dropped", len(excessClients))
return nil
}
// Rotate to next segment
a.currentSegment.clientSequenceNumber += 1
if len(excessClients) > activitySegmentEntityCapacity {
a.logger.Warn("too many new active entities, dropping tail", "entities", len(excessClients))
excessClients = excessClients[:activitySegmentEntityCapacity]
}
a.currentSegment.currentClients.Clients = excessClients
err := a.saveCurrentSegmentInternal(ctx, force)
if err != nil {
return err
}
}
return nil
}
// :force: forces a save of tokens/entities even if the in-memory log is empty
func (a *ActivityLog) saveCurrentSegmentInternal(ctx context.Context, force bool) error {
entityPath := fmt.Sprintf("log/entity/%d/%d", a.currentSegment.startTimestamp, a.currentSegment.clientSequenceNumber)
// RFC (VLT-120) defines this as 1-indexed, but it should be 0-indexed
tokenPath := fmt.Sprintf("log/directtokens/%d/0", a.currentSegment.startTimestamp)
for _, client := range a.currentSegment.currentClients.Clients {
// Explicitly catch and throw clear error message if client ID creation and storage
// results in a []byte that doesn't assert into a valid string.
if !utf8.ValidString(client.ClientID) {
return fmt.Errorf("client ID %q is not a valid string:", client.ClientID)
}
}
if len(a.currentSegment.currentClients.Clients) > 0 || force {
entities, err := proto.Marshal(a.currentSegment.currentClients)
if err != nil {
return err
}
a.logger.Trace("writing segment", "path", entityPath)
err = a.view.Put(ctx, &logical.StorageEntry{
Key: entityPath,
Value: entities,
})
if err != nil {
return err
}
}
// We must still allow for the tokenCount of the current segment to
// be written to storage, since if we remove this code we will incur
// data loss for one segment's worth of TWEs.
if len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 || force {
// We can get away with simply using the oldest version stored because
// the storing of versions was introduced at the same time as this code.
oldestVersion, oldestUpgradeTime, err := a.core.FindOldestVersionTimestamp()
switch {
case err != nil:
a.logger.Error(fmt.Sprintf("unable to retrieve oldest version timestamp: %s", err.Error()))
case len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 &&
(oldestUpgradeTime.Add(time.Duration(trackedTWESegmentPeriod * time.Hour)).Before(time.Now())):
a.logger.Error(fmt.Sprintf("storing nonzero token count over a month after vault was upgraded to %s", oldestVersion))
default:
if len(a.currentSegment.tokenCount.CountByNamespaceID) > 0 {
a.logger.Info("storing nonzero token count")
}
}
tokenCount, err := proto.Marshal(a.currentSegment.tokenCount)
if err != nil {
return err
}
a.logger.Trace("writing segment", "path", tokenPath)
err = a.view.Put(ctx, &logical.StorageEntry{
Key: tokenPath,
Value: tokenCount,
})
if err != nil {
return err
}
}
return nil
}
// parseSegmentNumberFromPath returns the segment number from a path
// (and if it exists - it is the last element in the path)
func parseSegmentNumberFromPath(path string) (int, bool) {
// as long as both s and sep are not "", len(elems) >= 1
elems := strings.Split(path, "/")
segmentNum, err := strconv.Atoi(elems[len(elems)-1])
if err != nil {
return 0, false
}
return segmentNum, true
}
// availableLogs returns the start_time(s) (in UTC) associated with months for which logs exist,
// sorted last to first
func (a *ActivityLog) availableLogs(ctx context.Context) ([]time.Time, error) {
paths := make([]string, 0)
for _, basePath := range []string{activityEntityBasePath, activityTokenBasePath} {
p, err := a.view.List(ctx, basePath)
if err != nil {
return nil, err
}
paths = append(paths, p...)
}
pathSet := make(map[time.Time]struct{})
out := make([]time.Time, 0)
for _, path := range paths {
// generate a set of unique start times
time, err := timeutil.ParseTimeFromPath(path)
if err != nil {
return nil, err
}
if _, present := pathSet[time]; !present {
pathSet[time] = struct{}{}
out = append(out, time)
}
}
sort.Slice(out, func(i, j int) bool {
// sort in reverse order to make processing most recent segment easier
return out[i].After(out[j])
})
a.logger.Trace("scanned existing logs", "out", out)
return out, nil
}
// getMostRecentActivityLogSegment gets the times (in UTC) associated with the most recent
// contiguous set of activity logs, sorted in decreasing order (latest to earliest)
func (a *ActivityLog) getMostRecentActivityLogSegment(ctx context.Context) ([]time.Time, error) {
logTimes, err := a.availableLogs(ctx)
if err != nil {
return nil, err
}
return timeutil.GetMostRecentContiguousMonths(logTimes), nil
}
// getLastEntitySegmentNumber returns the (non-negative) last segment number for the :startTime:, if it exists
func (a *ActivityLog) getLastEntitySegmentNumber(ctx context.Context, startTime time.Time) (uint64, bool, error) {
p, err := a.view.List(ctx, activityEntityBasePath+fmt.Sprint(startTime.Unix())+"/")
if err != nil {
return 0, false, err
}
highestNum := -1
for _, path := range p {
if num, ok := parseSegmentNumberFromPath(path); ok {
if num > highestNum {
highestNum = num
}
}
}
if highestNum < 0 {
// numbers less than 0 are invalid. if a negative number is the highest value, there isn't a segment
return 0, false, nil
}
return uint64(highestNum), true, nil
}
// WalkEntitySegments loads each of the entity segments for a particular start time
func (a *ActivityLog) WalkEntitySegments(ctx context.Context,
startTime time.Time,
walkFn func(*activity.EntityActivityLog)) error {
basePath := activityEntityBasePath + fmt.Sprint(startTime.Unix()) + "/"
pathList, err := a.view.List(ctx, basePath)
if err != nil {
return err
}
for _, path := range pathList {
raw, err := a.view.Get(ctx, basePath+path)
if err != nil {
return err
}
if raw == nil {
a.logger.Warn("expected log segment not found", "startTime", startTime, "segment", path)
continue
}
out := &activity.EntityActivityLog{}
err = proto.Unmarshal(raw.Value, out)
if err != nil {
return fmt.Errorf("unable to parse segment %v%v: %w", basePath, path, err)
}
walkFn(out)
}
return nil
}
// WalkTokenSegments loads each of the token segments (expected 1) for a particular start time
func (a *ActivityLog) WalkTokenSegments(ctx context.Context,
startTime time.Time,
walkFn func(*activity.TokenCount)) error {
basePath := activityTokenBasePath + fmt.Sprint(startTime.Unix()) + "/"
pathList, err := a.view.List(ctx, basePath)
if err != nil {
return err
}
for _, path := range pathList {
raw, err := a.view.Get(ctx, basePath+path)
if err != nil {
return err
}
if raw == nil {
a.logger.Trace("no tokens without entities stored without tracking", "startTime", startTime, "segment", path)
continue
}
out := &activity.TokenCount{}
err = proto.Unmarshal(raw.Value, out)
if err != nil {
return fmt.Errorf("unable to parse token segment %v%v: %w", basePath, path, err)
}
walkFn(out)
}
return nil
}
// loadPriorEntitySegment populates the in-memory tracker for entity IDs that have
// been active "this month"
func (a *ActivityLog) loadPriorEntitySegment(ctx context.Context, startTime time.Time, sequenceNum uint64) error {
path := activityEntityBasePath + fmt.Sprint(startTime.Unix()) + "/" + strconv.FormatUint(sequenceNum, 10)
data, err := a.view.Get(ctx, path)
if err != nil {
return err
}
out := &activity.EntityActivityLog{}
err = proto.Unmarshal(data.Value, out)
if err != nil {
return err
}
a.l.RLock()
a.fragmentLock.Lock()
// Handle the (unlikely) case where the end of the month has been reached while background loading.
// Or the feature has been disabled.
if a.enabled && startTime.Unix() == a.currentSegment.startTimestamp {
for _, ent := range out.Clients {
a.clientTracker.addClient(ent)
}
}
a.fragmentLock.Unlock()
a.l.RUnlock()
return nil
}
// loadCurrentClientSegment loads the most recent segment (for "this month") into memory
// (to append new entries), and to the activeClients to avoid duplication
// call with fragmentLock and l held
func (a *ActivityLog) loadCurrentClientSegment(ctx context.Context, startTime time.Time, sequenceNum uint64) error {
path := activityEntityBasePath + fmt.Sprint(startTime.Unix()) + "/" + strconv.FormatUint(sequenceNum, 10)
data, err := a.view.Get(ctx, path)
if err != nil {
return err
}
out := &activity.EntityActivityLog{}
err = proto.Unmarshal(data.Value, out)
if err != nil {
return err
}
if !a.core.perfStandby {
a.currentSegment = segmentInfo{
startTimestamp: startTime.Unix(),
currentClients: &activity.EntityActivityLog{
Clients: out.Clients,
},
tokenCount: a.currentSegment.tokenCount,
clientSequenceNumber: sequenceNum,
}
} else {
// populate this for edge case checking (if end of month passes while background loading on standby)
a.currentSegment.startTimestamp = startTime.Unix()
}
for _, ent := range out.Clients {
a.clientTracker.addClient(ent)
}
return nil
}
// tokenCountExists checks if there's a token log for :startTime:
// this function should be called with the lock held
func (a *ActivityLog) tokenCountExists(ctx context.Context, startTime time.Time) (bool, error) {
p, err := a.view.List(ctx, activityTokenBasePath+fmt.Sprint(startTime.Unix())+"/")
if err != nil {
return false, err
}
for _, path := range p {
if num, ok := parseSegmentNumberFromPath(path); ok && num == 0 {
return true, nil
}
}
return false, nil
}
// loadTokenCount populates the in-memory representation of activity token count
// this function should be called with the lock held
func (a *ActivityLog) loadTokenCount(ctx context.Context, startTime time.Time) error {
tokenCountExists, err := a.tokenCountExists(ctx, startTime)
if err != nil {
return err
}
if !tokenCountExists {
return nil
}
path := activityTokenBasePath + fmt.Sprint(startTime.Unix()) + "/0"
data, err := a.view.Get(ctx, path)
if err != nil {
return err
}
out := &activity.TokenCount{}
err = proto.Unmarshal(data.Value, out)
if err != nil {
return err
}
// An empty map is unmarshaled as nil
if out.CountByNamespaceID == nil {
out.CountByNamespaceID = make(map[string]uint64)
}
// We must load the tokenCount of the current segment into the activity log
// so that TWEs counted before the introduction of a client ID for TWEs are
// still reported in the partial client counts.
a.currentSegment.tokenCount = out
return nil
}
// entityBackgroundLoader loads entity activity log records for start_date `t`
func (a *ActivityLog) entityBackgroundLoader(ctx context.Context, wg *sync.WaitGroup, t time.Time, seqNums <-chan uint64) {
defer wg.Done()
for seqNum := range seqNums {
select {
case <-a.doneCh:
a.logger.Info("background processing told to halt while loading entities", "time", t, "sequence", seqNum)
return
default:
}
err := a.loadPriorEntitySegment(ctx, t, seqNum)
if err != nil {
a.logger.Error("error loading entity activity log", "time", t, "sequence", seqNum, "err", err)
}
}
}
// Initialize a new current segment, based on the current time.
// Call with fragmentLock and l held.
func (a *ActivityLog) startNewCurrentLogLocked(now time.Time) {
a.logger.Trace("initializing new log")
a.resetCurrentLog()
a.currentSegment.startTimestamp = now.Unix()
}
// Should be called with fragmentLock and l held.
func (a *ActivityLog) newMonthCurrentLogLocked(currentTime time.Time) {
a.logger.Trace("continuing log to new month")
a.resetCurrentLog()
monthStart := timeutil.StartOfMonth(currentTime.UTC())
a.currentSegment.startTimestamp = monthStart.Unix()
}
// Initialize a new current segment, based on the given time
// should be called with fragmentLock and l held.
func (a *ActivityLog) newSegmentAtGivenTime(t time.Time) {
timestamp := t.Unix()
a.logger.Trace("starting a segment", "timestamp", timestamp)
a.resetCurrentLog()
a.currentSegment.startTimestamp = timestamp
}
// Reset all the current segment state.
// Should be called with fragmentLock and l held.
func (a *ActivityLog) resetCurrentLog() {
a.currentSegment.startTimestamp = 0
a.currentSegment.currentClients = &activity.EntityActivityLog{
Clients: make([]*activity.EntityRecord, 0),
}
// We must still initialize the tokenCount to recieve tokenCounts from fragments
// during the month where customers upgrade to 1.9
a.currentSegment.tokenCount = &activity.TokenCount{
CountByNamespaceID: make(map[string]uint64),
}
a.currentSegment.clientSequenceNumber = 0
a.fragment = nil
a.clientTracker.activeClients = make(map[string]struct{})
a.clientTracker.entityCountByNamespaceID = make(map[string]uint64)
a.clientTracker.nonEntityCountByNamespaceID = make(map[string]uint64)
a.standbyFragmentsReceived = make([]*activity.LogFragment, 0)
}
func (a *ActivityLog) deleteLogWorker(startTimestamp int64, whenDone chan struct{}) {
ctx := namespace.RootContext(nil)
entityPath := fmt.Sprintf("%v%v/", activityEntityBasePath, startTimestamp)
tokenPath := fmt.Sprintf("%v%v/", activityTokenBasePath, startTimestamp)
// TODO: handle seal gracefully, if we're still working?
entitySegments, err := a.view.List(ctx, entityPath)
if err != nil {
a.logger.Error("could not list entity paths", "error", err)
return
}
for _, p := range entitySegments {
err = a.view.Delete(ctx, entityPath+p)
if err != nil {
a.logger.Error("could not delete entity log", "error", err)
}
}
tokenSegments, err := a.view.List(ctx, tokenPath)
if err != nil {
a.logger.Error("could not list token paths", "error", err)
return
}
for _, p := range tokenSegments {
err = a.view.Delete(ctx, tokenPath+p)
if err != nil {
a.logger.Error("could not delete token log", "error", err)
}
}
// Allow whoever started this as a goroutine to wait for it to finish.
close(whenDone)
}
func (a *ActivityLog) WaitForDeletion() {
a.l.Lock()
// May be nil, if never set
doneCh := a.deleteDone
a.l.Unlock()
if doneCh != nil {
select {
case <-doneCh:
break
}
}
}
// refreshFromStoredLog loads the appropriate entities/tokencounts for active and performance standbys
// the most recent segment is loaded synchronously, and older segments are loaded in the background
// this function expects stateLock to be held
func (a *ActivityLog) refreshFromStoredLog(ctx context.Context, wg *sync.WaitGroup, now time.Time) error {
a.l.Lock()
defer a.l.Unlock()
a.fragmentLock.Lock()
defer a.fragmentLock.Unlock()
decreasingLogTimes, err := a.getMostRecentActivityLogSegment(ctx)
if err != nil {
return err
}
if len(decreasingLogTimes) == 0 {
if a.enabled {
if a.core.perfStandby {
// reset the log without updating the timestamp
a.resetCurrentLog()
} else {
a.startNewCurrentLogLocked(now)
}
}
return nil
}
mostRecent := decreasingLogTimes[0]
if !a.enabled {
a.logger.Debug("activity log not enabled, skipping refresh from storage")
if !a.core.perfStandby && timeutil.IsCurrentMonth(mostRecent, now) {
a.logger.Debug("activity log is disabled, cleaning up logs for the current month")
go a.deleteLogWorker(mostRecent.Unix(), make(chan struct{}))
}
return nil
}
if timeutil.IsPreviousMonth(mostRecent, now) {
// no activity logs to load for this month. if we are enabled, interpret
// it as having missed the rotation, so let it fall through and load
// if we missed generating the precomputed query, activeFragmentWorker()
// will clean things up when it runs next
a.logger.Trace("no log segments for current month", "mostRecent", mostRecent)
a.logger.Info("rotating activity log to new month")
} else if mostRecent.After(now) {
// we can't do anything if the most recent log is in the future
a.logger.Warn("timestamp from log to load is in the future", "timestamp", mostRecent)
return nil
} else if !timeutil.IsCurrentMonth(mostRecent, now) {
// the most recent log in storage is 2+ months in the past
a.logger.Warn("most recent log in storage is 2 or more months in the past.", "timestamp", mostRecent)
if a.core.perfStandby {
// reset the log without updating the timestamp
a.resetCurrentLog()
} else {
a.startNewCurrentLogLocked(now)
}
return nil
}
// load token counts from storage into memory. As of 1.9, this functionality
// is still required since without it, we would lose replicated TWE counts for the
// current segment.
if !a.core.perfStandby {
err = a.loadTokenCount(ctx, mostRecent)
if err != nil {
return err
}
}
// load entity logs from storage into memory
lastSegment, segmentsExist, err := a.getLastEntitySegmentNumber(ctx, mostRecent)
if err != nil {
return err
}
if !segmentsExist {
a.logger.Trace("no entity segments for current month")
return nil
}
err = a.loadCurrentClientSegment(ctx, mostRecent, lastSegment)
if err != nil || lastSegment == 0 {
return err
}
lastSegment--
seqNums := make(chan uint64, lastSegment+1)
wg.Add(1)
go a.entityBackgroundLoader(ctx, wg, mostRecent, seqNums)
for n := int(lastSegment); n >= 0; n-- {
seqNums <- uint64(n)
}
close(seqNums)
return nil
}
// This version is used during construction
func (a *ActivityLog) SetConfigInit(config activityConfig) {
switch config.Enabled {
case "enable":
a.enabled = true
case "default":
a.enabled = activityLogEnabledDefault
case "disable":
a.enabled = false
}
if a.configOverrides.ForceEnable {
a.enabled = true
}
a.defaultReportMonths = config.DefaultReportMonths
a.retentionMonths = config.RetentionMonths
}
// This version reacts to user changes
func (a *ActivityLog) SetConfig(ctx context.Context, config activityConfig) {
a.l.Lock()
defer a.l.Unlock()
// enabled is protected by fragmentLock
a.fragmentLock.Lock()
originalEnabled := a.enabled
switch config.Enabled {
case "enable":
a.enabled = true
case "default":
a.enabled = activityLogEnabledDefault
case "disable":
a.enabled = false
}
if a.enabled != originalEnabled {
a.logger.Info("activity log enable changed", "original", originalEnabled, "current", a.enabled)
}
if !a.enabled && a.currentSegment.startTimestamp != 0 {
a.logger.Trace("deleting current segment")
a.deleteDone = make(chan struct{})
go a.deleteLogWorker(a.currentSegment.startTimestamp, a.deleteDone)
a.resetCurrentLog()
}
forceSave := false
if a.enabled && a.currentSegment.startTimestamp == 0 {
a.startNewCurrentLogLocked(time.Now().UTC())
// Force a save so we can distinguish between
//
// Month N-1: present
// Month N: <blank because we missed the month end>
//
// and
//
// Month N-1: present
// Month N: <blank because disabled and re-enabled>
forceSave = true
}
a.fragmentLock.Unlock()
if forceSave {
// l is still held here
a.saveCurrentSegmentInternal(ctx, true)
}
a.defaultReportMonths = config.DefaultReportMonths
a.retentionMonths = config.RetentionMonths
// check for segments out of retention period, if it has changed
go a.retentionWorker(time.Now(), a.retentionMonths)
}