From 60daf78ad00bee1ddca3bdbd897eaf7121c6dd5a Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Fri, 15 Oct 2021 15:47:46 +0100 Subject: [PATCH 01/10] agent/cache: Store leases in-order in persistent cache so that restore respects dependencies --- command/agent/cache/cacheboltdb/bolt.go | 184 ++++++++++++++----- command/agent/cache/cacheboltdb/bolt_test.go | 36 ++-- command/agent/cache/lease_cache.go | 114 +++++------- command/agent/cache/lease_cache_test.go | 32 ++-- 4 files changed, 223 insertions(+), 143 deletions(-) diff --git a/command/agent/cache/cacheboltdb/bolt.go b/command/agent/cache/cacheboltdb/bolt.go index 0a39c9cc15c61..968bffbaaff13 100644 --- a/command/agent/cache/cacheboltdb/bolt.go +++ b/command/agent/cache/cacheboltdb/bolt.go @@ -2,6 +2,7 @@ package cacheboltdb import ( "context" + "encoding/binary" "fmt" "os" "path/filepath" @@ -17,7 +18,7 @@ import ( const ( // Keep track of schema version for future migrations storageVersionKey = "version" - storageVersion = "1" + storageVersion = "2" // v2 merges auth-lease and secret-lease buckets into one ordered bucket // DatabaseFileName - filename for the persistent cache file DatabaseFileName = "vault-agent-cache.db" @@ -26,15 +27,29 @@ const ( // bootstrapping keys metaBucketName = "meta" - // SecretLeaseType - Bucket/type for leases with secret info + // DEPRECATED: SecretLeaseType - v1 Bucket/type for leases with secret info SecretLeaseType = "secret-lease" - // AuthLeaseType - Bucket/type for leases with auth info + // DEPRECATED: AuthLeaseType - v1 Bucket/type for leases with auth info AuthLeaseType = "auth-lease" // TokenType - Bucket/type for auto-auth tokens TokenType = "token" + // LeaseType - v2 Bucket/type for auth AND secret leases. + // + // This bucket stores keys in the same order they were created using + // auto-incrementing keys and the fact that BoltDB stores keys in byte + // slice order. This means when we iterate through this bucket during + // restore, we will always restore parent tokens before their children, + // allowing us to correctly attach child contexts to their parent's context. + LeaseType = "lease" + + // LookupType - v2 Bucket/type to map from a memcachedb index ID to an + // auto-incrementing BoltDB key. Facilitates deletes from the lease + // bucket using an ID instead of the auto-incrementing BoltDB key. + lookupType = "lookup" + // AutoAuthToken - key for the latest auto-auth token AutoAuthToken = "auto-auth-token" @@ -99,25 +114,88 @@ func createBoltSchema(tx *bolt.Tx) error { if err != nil { return fmt.Errorf("failed to set storage version: %w", err) } - case string(version) != storageVersion: + + return createV2BoltSchema(tx) + + case string(version) == storageVersion: + return createV2BoltSchema(tx) + + case string(version) == "1": + return migrateFromV1ToV2Schema(tx) + + default: return fmt.Errorf("storage migration from %s to %s not implemented", string(version), storageVersion) } +} +func createV2BoltSchema(tx *bolt.Tx) error { // create the buckets for tokens and leases - _, err = tx.CreateBucketIfNotExists([]byte(TokenType)) - if err != nil { - return fmt.Errorf("failed to create token bucket: %w", err) + for _, bucket := range []string{TokenType, LeaseType, lookupType} { + if _, err := tx.CreateBucketIfNotExists([]byte(bucket)); err != nil { + return fmt.Errorf("failed to create token bucket: %w", err) + } + } + + return nil +} + +func migrateFromV1ToV2Schema(tx *bolt.Tx) error { + if err := createV2BoltSchema(tx); err != nil { + return err + } + + for _, leaseType := range []string{AuthLeaseType, SecretLeaseType} { + if bucket := tx.Bucket([]byte(leaseType)); bucket != nil { + bucket.ForEach(func(key, value []byte) error { + autoIncKey, err := autoIncrementedLeaseKey(tx, string(key)) + if err != nil { + return fmt.Errorf("error migrating %s %q key to auto incremented key: %w", leaseType, string(key), err) + } + if err := tx.Bucket([]byte(LeaseType)).Put(autoIncKey, value); err != nil { + return fmt.Errorf("error migrating %s %q from v1 to v2 schema: %w", leaseType, string(key), err) + } + return nil + }) + + if err := tx.DeleteBucket([]byte(leaseType)); err != nil { + return fmt.Errorf("failed to clean up %s bucket during v1 to v2 schema migration: %w", leaseType, err) + } + } + } + + if err := tx.Bucket([]byte(metaBucketName)).Put([]byte(storageVersionKey), []byte(storageVersion)); err != nil { + return fmt.Errorf("failed to update schema from v1 to v2: %w", err) } - _, err = tx.CreateBucketIfNotExists([]byte(AuthLeaseType)) + + return nil +} + +func autoIncrementedLeaseKey(tx *bolt.Tx, id string) ([]byte, error) { + leaseBucket := tx.Bucket([]byte(LeaseType)) + keyValue, err := leaseBucket.NextSequence() if err != nil { - return fmt.Errorf("failed to create auth lease bucket: %w", err) + return nil, fmt.Errorf("failed to generate lookup key for id %q: %w", id, err) } - _, err = tx.CreateBucketIfNotExists([]byte(SecretLeaseType)) + + key := make([]byte, 8) + // MUST be big endian, because keys are ordered by byte slice comparison + // which progressively compares each byte in the slice starting at index 0. + // BigEndian in the range [255-257] looks like this: + // [0 0 0 0 0 0 0 255] + // [0 0 0 0 0 0 1 0] + // [0 0 0 0 0 0 1 1] + // LittleEndian in the same range looks like this: + // [255 0 0 0 0 0 0 0] + // [0 1 0 0 0 0 0 0] + // [1 1 0 0 0 0 0 0] + binary.BigEndian.PutUint64(key, keyValue) + + err = tx.Bucket([]byte(lookupType)).Put([]byte(id), key) if err != nil { - return fmt.Errorf("failed to create secret lease bucket: %w", err) + return nil, err } - return nil + return key, nil } // Set an index (token or lease) in bolt storage @@ -133,44 +211,56 @@ func (b *BoltStorage) Set(ctx context.Context, id string, plaintext []byte, inde } return b.db.Update(func(tx *bolt.Tx) error { - s := tx.Bucket([]byte(indexType)) - if s == nil { - return fmt.Errorf("bucket %q not found", indexType) - } - // If this is an auto-auth token, also stash it in the meta bucket for - // easy retrieval upon restore - if indexType == TokenType { + var key []byte + switch indexType { + case LeaseType: + // If this is a lease type, generate an auto-incrementing key and + // store an ID -> key lookup entry + key, err = autoIncrementedLeaseKey(tx, id) + if err != nil { + return err + } + case TokenType: + // If this is an auto-auth token, also stash it in the meta bucket for + // easy retrieval upon restore + key = []byte(id) meta := tx.Bucket([]byte(metaBucketName)) if err := meta.Put([]byte(AutoAuthToken), protoBlob); err != nil { return fmt.Errorf("failed to set latest auto-auth token: %w", err) } + default: + return fmt.Errorf("called Set for unsupported type %q", indexType) } - return s.Put([]byte(id), protoBlob) - }) -} - -func getBucketIDs(b *bolt.Bucket) ([][]byte, error) { - ids := [][]byte{} - err := b.ForEach(func(k, v []byte) error { - ids = append(ids, k) - return nil + s := tx.Bucket([]byte(indexType)) + if s == nil { + return fmt.Errorf("bucket %q not found", indexType) + } + return s.Put(key, protoBlob) }) - return ids, err } -// Delete an index (token or lease) by id from bolt storage -func (b *BoltStorage) Delete(id string) error { +// Delete an index (token or lease) by key from bolt storage +func (b *BoltStorage) Delete(id string, indexType string) error { return b.db.Update(func(tx *bolt.Tx) error { - // Since Delete returns a nil error if the key doesn't exist, just call - // delete in all three index buckets without checking existence first - if err := tx.Bucket([]byte(TokenType)).Delete([]byte(id)); err != nil { - return fmt.Errorf("failed to delete %q from token bucket: %w", id, err) + key := []byte(id) + if indexType == LeaseType { + key = tx.Bucket([]byte(lookupType)).Get(key) + if key == nil { + return fmt.Errorf("failed to lookup bolt DB key for id %q", id) + } + + err := tx.Bucket([]byte(lookupType)).Delete([]byte(id)) + if err != nil { + return fmt.Errorf("failed to delete %q from lookup bucket: %w", id, err) + } } - if err := tx.Bucket([]byte(AuthLeaseType)).Delete([]byte(id)); err != nil { - return fmt.Errorf("failed to delete %q from auth lease bucket: %w", id, err) + + bucket := tx.Bucket([]byte(indexType)) + if bucket == nil { + return fmt.Errorf("bucket %q not found during delete", indexType) } - if err := tx.Bucket([]byte(SecretLeaseType)).Delete([]byte(id)); err != nil { - return fmt.Errorf("failed to delete %q from secret lease bucket: %w", id, err) + if err := bucket.Delete(key); err != nil { + return fmt.Errorf("failed to delete %q from %q bucket: %w", id, indexType, err) } b.logger.Trace("deleted index from bolt db", "id", id) return nil @@ -193,10 +283,14 @@ func (b *BoltStorage) GetByType(ctx context.Context, indexType string) ([][]byte err := b.db.View(func(tx *bolt.Tx) error { var errors *multierror.Error - tx.Bucket([]byte(indexType)).ForEach(func(id, ciphertext []byte) error { + bucket := tx.Bucket([]byte(indexType)) + if bucket == nil { + return fmt.Errorf("bucket %q not found", indexType) + } + bucket.ForEach(func(key, ciphertext []byte) error { plaintext, err := b.decrypt(ctx, ciphertext) if err != nil { - errors = multierror.Append(errors, fmt.Errorf("error decrypting index id %s: %w", id, err)) + errors = multierror.Append(errors, fmt.Errorf("error decrypting entry %s: %w", string(key), err)) return nil } @@ -247,11 +341,11 @@ func (b *BoltStorage) GetRetrievalToken() ([]byte, error) { var token []byte err := b.db.View(func(tx *bolt.Tx) error { - keyBucket := tx.Bucket([]byte(metaBucketName)) - if keyBucket == nil { + metaBucket := tx.Bucket([]byte(metaBucketName)) + if metaBucket == nil { return fmt.Errorf("bucket %q not found", metaBucketName) } - value := keyBucket.Get([]byte(RetrievalTokenMaterial)) + value := metaBucket.Get([]byte(RetrievalTokenMaterial)) if value != nil { token = make([]byte, len(value)) copy(token, value) @@ -286,7 +380,7 @@ func (b *BoltStorage) Close() error { // the schema/layout func (b *BoltStorage) Clear() error { return b.db.Update(func(tx *bolt.Tx) error { - for _, name := range []string{AuthLeaseType, SecretLeaseType, TokenType} { + for _, name := range []string{TokenType, LeaseType, lookupType} { b.logger.Trace("deleting bolt bucket", "name", name) if err := tx.DeleteBucket([]byte(name)); err != nil { return err diff --git a/command/agent/cache/cacheboltdb/bolt_test.go b/command/agent/cache/cacheboltdb/bolt_test.go index 8dfafc4ee63b2..f42c774326637 100644 --- a/command/agent/cache/cacheboltdb/bolt_test.go +++ b/command/agent/cache/cacheboltdb/bolt_test.go @@ -36,13 +36,13 @@ func TestBolt_SetGet(t *testing.T) { }) require.NoError(t, err) - secrets, err := b.GetByType(ctx, SecretLeaseType) + secrets, err := b.GetByType(ctx, LeaseType) assert.NoError(t, err) require.Len(t, secrets, 0) - err = b.Set(ctx, "test1", []byte("hello"), SecretLeaseType) + err = b.Set(ctx, "test1", []byte("hello"), LeaseType) assert.NoError(t, err) - secrets, err = b.GetByType(ctx, SecretLeaseType) + secrets, err = b.GetByType(ctx, LeaseType) assert.NoError(t, err) require.Len(t, secrets, 1) assert.Equal(t, []byte("hello"), secrets[0]) @@ -62,19 +62,19 @@ func TestBoltDelete(t *testing.T) { }) require.NoError(t, err) - err = b.Set(ctx, "secret-test1", []byte("hello1"), SecretLeaseType) + err = b.Set(ctx, "secret-test1", []byte("hello1"), LeaseType) require.NoError(t, err) - err = b.Set(ctx, "secret-test2", []byte("hello2"), SecretLeaseType) + err = b.Set(ctx, "secret-test2", []byte("hello2"), LeaseType) require.NoError(t, err) - secrets, err := b.GetByType(ctx, SecretLeaseType) + secrets, err := b.GetByType(ctx, LeaseType) require.NoError(t, err) assert.Len(t, secrets, 2) assert.ElementsMatch(t, [][]byte{[]byte("hello1"), []byte("hello2")}, secrets) - err = b.Delete("secret-test1") + err = b.Delete("secret-test1", LeaseType) require.NoError(t, err) - secrets, err = b.GetByType(ctx, SecretLeaseType) + secrets, err = b.GetByType(ctx, LeaseType) require.NoError(t, err) require.Len(t, secrets, 1) assert.Equal(t, []byte("hello2"), secrets[0]) @@ -95,19 +95,20 @@ func TestBoltClear(t *testing.T) { require.NoError(t, err) // Populate the bolt db - err = b.Set(ctx, "secret-test1", []byte("hello"), SecretLeaseType) + err = b.Set(ctx, "secret-test1", []byte("hello1"), LeaseType) require.NoError(t, err) - secrets, err := b.GetByType(ctx, SecretLeaseType) + secrets, err := b.GetByType(ctx, LeaseType) require.NoError(t, err) require.Len(t, secrets, 1) - assert.Equal(t, []byte("hello"), secrets[0]) + assert.Equal(t, []byte("hello1"), secrets[0]) - err = b.Set(ctx, "auth-test1", []byte("hello"), AuthLeaseType) + err = b.Set(ctx, "auth-test1", []byte("hello2"), LeaseType) require.NoError(t, err) - auths, err := b.GetByType(ctx, AuthLeaseType) + auths, err := b.GetByType(ctx, LeaseType) require.NoError(t, err) - require.Len(t, auths, 1) - assert.Equal(t, []byte("hello"), auths[0]) + require.Len(t, auths, 2) + assert.Equal(t, []byte("hello1"), auths[0]) + assert.Equal(t, []byte("hello2"), auths[1]) err = b.Set(ctx, "token-test1", []byte("hello"), TokenType) require.NoError(t, err) @@ -119,10 +120,7 @@ func TestBoltClear(t *testing.T) { // Clear the bolt db, and check that it's indeed clear err = b.Clear() require.NoError(t, err) - secrets, err = b.GetByType(ctx, SecretLeaseType) - require.NoError(t, err) - assert.Len(t, secrets, 0) - auths, err = b.GetByType(ctx, AuthLeaseType) + auths, err = b.GetByType(ctx, LeaseType) require.NoError(t, err) assert.Len(t, auths, 0) tokens, err = b.GetByType(ctx, TokenType) diff --git a/command/agent/cache/lease_cache.go b/command/agent/cache/lease_cache.go index ad6d71c0c902f..4dc99fa89b5bb 100644 --- a/command/agent/cache/lease_cache.go +++ b/command/agent/cache/lease_cache.go @@ -357,7 +357,7 @@ func (c *LeaseCache) Send(ctx context.Context, req *SendRequest) (*SendResponse, index.Lease = secret.LeaseID index.LeaseToken = req.Token - index.Type = cacheboltdb.SecretLeaseType + index.Type = cacheboltdb.LeaseType case secret.Auth != nil: c.logger.Debug("processing auth response", "method", req.Request.Method, "path", req.Request.URL.Path) @@ -387,7 +387,7 @@ func (c *LeaseCache) Send(ctx context.Context, req *SendRequest) (*SendResponse, index.Token = secret.Auth.ClientToken index.TokenAccessor = secret.Auth.Accessor - index.Type = cacheboltdb.AuthLeaseType + index.Type = cacheboltdb.LeaseType default: // We shouldn't be hitting this, but will err on the side of caution and @@ -459,7 +459,7 @@ func (c *LeaseCache) startRenewing(ctx context.Context, index *cachememdb.Index, return } c.logger.Debug("evicting index from cache", "id", id, "method", req.Request.Method, "path", req.Request.URL.Path) - err := c.Evict(id) + err := c.Evict(index) if err != nil { c.logger.Error("failed to evict index", "id", id, "error", err) return @@ -556,7 +556,9 @@ func computeIndexID(req *SendRequest) (string, error) { // Append req.Token into the byte slice. This is needed since auto-auth'ed // requests sets the token directly into SendRequest.Token - b.Write([]byte(req.Token)) + if _, err := b.Write([]byte(req.Token)); err != nil { + return "", fmt.Errorf("failed to write token to hash input: %w", err) + } return hex.EncodeToString(cryptoutil.Blake2b256Hash(string(b.Bytes()))), nil } @@ -921,12 +923,12 @@ func (c *LeaseCache) Set(ctx context.Context, index *cachememdb.Index) error { } if c.ps != nil { - b, err := index.Serialize() + plaintext, err := index.Serialize() if err != nil { return err } - if err := c.ps.Set(ctx, index.ID, b, index.Type); err != nil { + if err := c.ps.Set(ctx, index.ID, plaintext, index.Type); err != nil { return err } c.logger.Trace("set entry in persistent storage", "type", index.Type, "path", index.RequestPath, "id", index.ID) @@ -937,16 +939,16 @@ func (c *LeaseCache) Set(ctx context.Context, index *cachememdb.Index) error { // Evict removes an Index from the cachememdb, and also removes it from the // persistent cache (if enabled) -func (c *LeaseCache) Evict(id string) error { - if err := c.db.Evict(cachememdb.IndexNameID, id); err != nil { +func (c *LeaseCache) Evict(index *cachememdb.Index) error { + if err := c.db.Evict(cachememdb.IndexNameID, index.ID); err != nil { return err } if c.ps != nil { - if err := c.ps.Delete(id); err != nil { + if err := c.ps.Delete(index.ID, index.Type); err != nil { return err } - c.logger.Trace("deleted item from persistent storage", "id", id) + c.logger.Trace("deleted item from persistent storage", "id", index.ID) } return nil @@ -970,39 +972,52 @@ func (c *LeaseCache) Flush() error { // tokens first, since restoring a lease's renewal context and watcher requires // looking up the token in the cachememdb. func (c *LeaseCache) Restore(ctx context.Context, storage *cacheboltdb.BoltStorage) error { - var errors *multierror.Error + var errs *multierror.Error // Process tokens first tokens, err := storage.GetByType(ctx, cacheboltdb.TokenType) if err != nil { - errors = multierror.Append(errors, err) + errs = multierror.Append(errs, err) } else { if err := c.restoreTokens(tokens); err != nil { - errors = multierror.Append(errors, err) + errs = multierror.Append(errs, err) } } - // Then process auth leases - authLeases, err := storage.GetByType(ctx, cacheboltdb.AuthLeaseType) + // Then process leases + leases, err := storage.GetByType(ctx, cacheboltdb.LeaseType) if err != nil { - errors = multierror.Append(errors, err) + errs = multierror.Append(errs, err) } else { - if err := c.restoreLeases(authLeases); err != nil { - errors = multierror.Append(errors, err) - } - } + for _, lease := range leases { + newIndex, err := cachememdb.Deserialize(lease) + if err != nil { + errs = multierror.Append(errs, err) + continue + } - // Then process secret leases - secretLeases, err := storage.GetByType(ctx, cacheboltdb.SecretLeaseType) - if err != nil { - errors = multierror.Append(errors, err) - } else { - if err := c.restoreLeases(secretLeases); err != nil { - errors = multierror.Append(errors, err) + // Check if this lease has already expired + expired, err := c.hasExpired(time.Now().UTC(), newIndex) + if err != nil { + c.logger.Warn("failed to check if lease is expired", "id", newIndex.ID, "error", err) + } + if expired { + continue + } + + if err := c.restoreLeaseRenewCtx(newIndex); err != nil { + errs = multierror.Append(errs, err) + continue + } + if err := c.db.Set(newIndex); err != nil { + errs = multierror.Append(errs, err) + continue + } + c.logger.Trace("restored lease", "id", newIndex.ID, "path", newIndex.RequestPath) } } - return errors.ErrorOrNil() + return errs.ErrorOrNil() } func (c *LeaseCache) restoreTokens(tokens [][]byte) error { @@ -1025,39 +1040,6 @@ func (c *LeaseCache) restoreTokens(tokens [][]byte) error { return errors.ErrorOrNil() } -func (c *LeaseCache) restoreLeases(leases [][]byte) error { - var errors *multierror.Error - - for _, lease := range leases { - newIndex, err := cachememdb.Deserialize(lease) - if err != nil { - errors = multierror.Append(errors, err) - continue - } - - // Check if this lease has already expired - expired, err := c.hasExpired(time.Now().UTC(), newIndex) - if err != nil { - c.logger.Warn("failed to check if lease is expired", "id", newIndex.ID, "error", err) - } - if expired { - continue - } - - if err := c.restoreLeaseRenewCtx(newIndex); err != nil { - errors = multierror.Append(errors, err) - continue - } - if err := c.db.Set(newIndex); err != nil { - errors = multierror.Append(errors, err) - continue - } - c.logger.Trace("restored lease", "id", newIndex.ID, "path", newIndex.RequestPath) - } - - return errors.ErrorOrNil() -} - // restoreLeaseRenewCtx re-creates a RenewCtx for an index object and starts // the watcher go routine func (c *LeaseCache) restoreLeaseRenewCtx(index *cachememdb.Index) error { @@ -1300,13 +1282,13 @@ func (c *LeaseCache) hasExpired(currentTime time.Time, index *cachememdb.Index) elapsed := currentTime.Sub(index.LastRenewed) var leaseDuration int - switch index.Type { - case cacheboltdb.AuthLeaseType: - leaseDuration = secret.Auth.LeaseDuration - case cacheboltdb.SecretLeaseType: + switch { + case secret.LeaseID != "": leaseDuration = secret.LeaseDuration + case secret.Auth != nil: + leaseDuration = secret.Auth.LeaseDuration default: - return false, fmt.Errorf("index type %q unexpected in expiration check", index.Type) + return false, errors.New("secret without lease encountered in expiration check") } if int(elapsed.Seconds()) > leaseDuration { diff --git a/command/agent/cache/lease_cache_test.go b/command/agent/cache/lease_cache_test.go index 66fb56be6b202..520eebaa9d6f3 100644 --- a/command/agent/cache/lease_cache_test.go +++ b/command/agent/cache/lease_cache_test.go @@ -175,7 +175,7 @@ func TestLeaseCache_SendCacheable(t *testing.T) { lc := testNewLeaseCache(t, responses) // Register an token so that the token and lease requests are cached - lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, lc.RegisterAutoAuthToken("autoauthtoken")) // Make a request. A response with a new token is returned to the lease // cache and that will be cached. @@ -600,6 +600,7 @@ func TestLeaseCache_Concurrent_NonCacheable(t *testing.T) { defer cancel() wgDoneCh := make(chan struct{}) + errCh := make(chan error) go func() { var wg sync.WaitGroup @@ -618,7 +619,7 @@ func TestLeaseCache_Concurrent_NonCacheable(t *testing.T) { _, err := lc.Send(ctx, sendReq) if err != nil { - t.Fatal(err) + errCh <- err } }() } @@ -631,6 +632,8 @@ func TestLeaseCache_Concurrent_NonCacheable(t *testing.T) { case <-ctx.Done(): t.Fatalf("request timed out: %s", ctx.Err()) case <-wgDoneCh: + case err := <-errCh: + t.Fatal(err) } } @@ -649,6 +652,7 @@ func TestLeaseCache_Concurrent_Cacheable(t *testing.T) { var cacheCount atomic.Uint32 wgDoneCh := make(chan struct{}) + errCh := make(chan error) go func() { var wg sync.WaitGroup @@ -666,7 +670,7 @@ func TestLeaseCache_Concurrent_Cacheable(t *testing.T) { resp, err := lc.Send(ctx, sendReq) if err != nil { - t.Fatal(err) + errCh <- err } if resp.CacheMeta != nil && resp.CacheMeta.Hit { @@ -683,6 +687,8 @@ func TestLeaseCache_Concurrent_Cacheable(t *testing.T) { case <-ctx.Done(): t.Fatalf("request timed out: %s", ctx.Err()) case <-wgDoneCh: + case err := <-errCh: + t.Fatal(err) } // Ensure that all but one request got proxied. The other 99 should be @@ -827,7 +833,7 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { require.NotEmpty(t, deleteIDs) for _, deleteID := range deleteIDs { - err = boltStorage.Delete(deleteID) + err = boltStorage.Delete(deleteID, cacheboltdb.LeaseType) require.NoError(t, err) } @@ -911,7 +917,7 @@ func TestEvictPersistent(t *testing.T) { defer boltStorage.Close() lc := testNewLeaseCacheWithPersistence(t, responses, boltStorage) - lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, lc.RegisterAutoAuthToken("autoauthtoken")) // populate cache by sending request through sendReq := &SendRequest{ @@ -924,7 +930,7 @@ func TestEvictPersistent(t *testing.T) { assert.Nil(t, resp.CacheMeta) // Check bolt for the cached lease - secrets, err := lc.ps.GetByType(ctx, cacheboltdb.SecretLeaseType) + secrets, err := lc.ps.GetByType(ctx, cacheboltdb.LeaseType) require.NoError(t, err) assert.Len(t, secrets, 1) @@ -938,7 +944,7 @@ func TestEvictPersistent(t *testing.T) { time.Sleep(2 * time.Second) // Check that cached item is gone - secrets, err = lc.ps.GetByType(ctx, cacheboltdb.SecretLeaseType) + secrets, err = lc.ps.GetByType(ctx, cacheboltdb.LeaseType) require.NoError(t, err) assert.Len(t, secrets, 0) } @@ -978,7 +984,7 @@ func Test_hasExpired(t *testing.T) { newTestSendResponse(201, `{"lease_id": "foo", "renewable": true, "data": {"value": "foo"}, "lease_duration": 60}`), } lc := testNewLeaseCache(t, responses) - lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, lc.RegisterAutoAuthToken("autoauthtoken")) cacheTests := []struct { token string @@ -990,14 +996,14 @@ func Test_hasExpired(t *testing.T) { // auth lease token: "autoauthtoken", urlPath: "/v1/sample/auth", - leaseType: cacheboltdb.AuthLeaseType, + leaseType: cacheboltdb.LeaseType, wantStatusCode: responses[0].Response.StatusCode, }, { // secret lease token: "autoauthtoken", urlPath: "/v1/sample/secret", - leaseType: cacheboltdb.SecretLeaseType, + leaseType: cacheboltdb.LeaseType, wantStatusCode: responses[1].Response.StatusCode, }, } @@ -1039,13 +1045,13 @@ func TestLeaseCache_hasExpired_wrong_type(t *testing.T) { Content-Type: application/json Date: Tue, 02 Mar 2021 17:54:16 GMT -{"auth": {"client_token": "testtoken", "renewable": true, "lease_duration": 60}}`), +{}`), } lc := testNewLeaseCache(t, nil) expired, err := lc.hasExpired(time.Now().UTC(), index) assert.False(t, expired) - assert.EqualError(t, err, `index type "token" unexpected in expiration check`) + assert.EqualError(t, err, `secret without lease encountered in expiration check`) } func TestLeaseCacheRestore_expired(t *testing.T) { @@ -1061,7 +1067,7 @@ func TestLeaseCacheRestore_expired(t *testing.T) { lc := testNewLeaseCacheWithPersistence(t, responses, boltStorage) // Register an auto-auth token so that the token and lease requests are cached in mem - lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, lc.RegisterAutoAuthToken("autoauthtoken")) cacheTests := []struct { token string From b316b6b1e6a9ff35529b715dbce39bcacee8efa2 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Fri, 15 Oct 2021 16:02:18 +0100 Subject: [PATCH 02/10] Add stress test for correct dependency restore order --- command/agent/cache/lease_cache_test.go | 213 +++++++++++++++++++----- 1 file changed, 173 insertions(+), 40 deletions(-) diff --git a/command/agent/cache/lease_cache_test.go b/command/agent/cache/lease_cache_test.go index 520eebaa9d6f3..7b2cc28659215 100644 --- a/command/agent/cache/lease_cache_test.go +++ b/command/agent/cache/lease_cache_test.go @@ -1,14 +1,18 @@ package cache import ( + "bytes" "context" "fmt" + "io" "io/ioutil" "net/http" "net/http/httptest" "net/url" "os" "reflect" + "regexp" + "strconv" "strings" "sync" "testing" @@ -32,21 +36,25 @@ import ( func testNewLeaseCache(t *testing.T, responses []*SendResponse) *LeaseCache { t.Helper() + return testNewLeaseCacheWithLogWriter(t, responses, hclog.DefaultOutput) +} + +func testNewLeaseCacheWithLogWriter(t *testing.T, responses []*SendResponse, w io.Writer) *LeaseCache { + t.Helper() + client, err := api.NewClient(api.DefaultConfig()) if err != nil { t.Fatal(err) } - lc, err := NewLeaseCache(&LeaseCacheConfig{ Client: client, BaseContext: context.Background(), Proxier: newMockProxier(responses), - Logger: logging.NewVaultLogger(hclog.Trace).Named("cache.leasecache"), + Logger: logging.NewVaultLoggerWithWriter(w, hclog.Trace).Named("cache.leasecache"), }) if err != nil { t.Fatal(err) } - return lc } @@ -717,6 +725,45 @@ func setupBoltStorage(t *testing.T) (tempCacheDir string, boltStorage *cachebolt return tempCacheDir, boltStorage } +func compareBeforeAndAfter(t *testing.T, before, after *LeaseCache, beforeLen, afterLen int) { + beforeDB, err := before.db.GetByPrefix(cachememdb.IndexNameID) + require.NoError(t, err) + assert.Len(t, beforeDB, beforeLen) + afterDB, err := after.db.GetByPrefix(cachememdb.IndexNameID) + require.NoError(t, err) + assert.Len(t, afterDB, afterLen) + for _, cachedItem := range beforeDB { + if strings.Contains(cachedItem.RequestPath, "expect-missing") { + continue + } + restoredItem, err := after.db.Get(cachememdb.IndexNameID, cachedItem.ID) + require.NoError(t, err) + + assert.NoError(t, err) + assert.Equal(t, cachedItem.ID, restoredItem.ID) + assert.Equal(t, cachedItem.Lease, restoredItem.Lease) + assert.Equal(t, cachedItem.LeaseToken, restoredItem.LeaseToken) + assert.Equal(t, cachedItem.Namespace, restoredItem.Namespace) + assert.Equal(t, cachedItem.RequestHeader, restoredItem.RequestHeader) + assert.Equal(t, cachedItem.RequestMethod, restoredItem.RequestMethod) + assert.Equal(t, cachedItem.RequestPath, restoredItem.RequestPath) + assert.Equal(t, cachedItem.RequestToken, restoredItem.RequestToken) + assert.Equal(t, cachedItem.Response, restoredItem.Response) + assert.Equal(t, cachedItem.Token, restoredItem.Token) + assert.Equal(t, cachedItem.TokenAccessor, restoredItem.TokenAccessor) + assert.Equal(t, cachedItem.TokenParent, restoredItem.TokenParent) + + // check what we can in the renewal context + assert.NotEmpty(t, restoredItem.RenewCtxInfo.CancelFunc) + assert.NotZero(t, restoredItem.RenewCtxInfo.DoneCh) + require.NotEmpty(t, restoredItem.RenewCtxInfo.Ctx) + assert.Equal(t, + cachedItem.RenewCtxInfo.Ctx.Value(contextIndexID), + restoredItem.RenewCtxInfo.Ctx.Value(contextIndexID), + ) + } +} + func TestLeaseCache_PersistAndRestore(t *testing.T) { // Emulate responses from the api proxy. The first two use the auto-auth // token, and the others use another token. @@ -848,43 +895,8 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { assert.Len(t, errors.Errors, 1) assert.Contains(t, errors.Error(), "could not find parent Token testtoken2") - // Now compare before and after - beforeDB, err := lc.db.GetByPrefix(cachememdb.IndexNameID) - require.NoError(t, err) - assert.Len(t, beforeDB, 7) - for _, cachedItem := range beforeDB { - if strings.Contains(cachedItem.RequestPath, "expect-missing") { - continue - } - restoredItem, err := restoredCache.db.Get(cachememdb.IndexNameID, cachedItem.ID) - require.NoError(t, err) - - assert.NoError(t, err) - assert.Equal(t, cachedItem.ID, restoredItem.ID) - assert.Equal(t, cachedItem.Lease, restoredItem.Lease) - assert.Equal(t, cachedItem.LeaseToken, restoredItem.LeaseToken) - assert.Equal(t, cachedItem.Namespace, restoredItem.Namespace) - assert.Equal(t, cachedItem.RequestHeader, restoredItem.RequestHeader) - assert.Equal(t, cachedItem.RequestMethod, restoredItem.RequestMethod) - assert.Equal(t, cachedItem.RequestPath, restoredItem.RequestPath) - assert.Equal(t, cachedItem.RequestToken, restoredItem.RequestToken) - assert.Equal(t, cachedItem.Response, restoredItem.Response) - assert.Equal(t, cachedItem.Token, restoredItem.Token) - assert.Equal(t, cachedItem.TokenAccessor, restoredItem.TokenAccessor) - assert.Equal(t, cachedItem.TokenParent, restoredItem.TokenParent) - - // check what we can in the renewal context - assert.NotEmpty(t, restoredItem.RenewCtxInfo.CancelFunc) - assert.NotZero(t, restoredItem.RenewCtxInfo.DoneCh) - require.NotEmpty(t, restoredItem.RenewCtxInfo.Ctx) - assert.Equal(t, - cachedItem.RenewCtxInfo.Ctx.Value(contextIndexID), - restoredItem.RenewCtxInfo.Ctx.Value(contextIndexID), - ) - } - afterDB, err := restoredCache.db.GetByPrefix(cachememdb.IndexNameID) - require.NoError(t, err) - assert.Len(t, afterDB, 5) + // Now compare the cache contents before and after + compareBeforeAndAfter(t, lc, restoredCache, 7, 5) // And finally send the cache requests once to make sure they're all being // served from the restoredCache unless they were intended to be missing after restore. @@ -905,6 +917,127 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { } } +func TestLeaseCache_PersistAndRestore_WithManyDependencies(t *testing.T) { + tempDir, boltStorage := setupBoltStorage(t) + defer os.RemoveAll(tempDir) + defer boltStorage.Close() + + var requests []*SendRequest + var responses []*SendResponse + + // helper func to generate new auth leases with a child secret lease attached + authAndSecretLease := func(id int, parentToken, newToken string) { + t.Helper() + requests = append(requests, &SendRequest{ + Token: parentToken, + Request: httptest.NewRequest( + "PUT", + fmt.Sprintf("http://example.com/v1/auth/approle-%d/login", id), + strings.NewReader(""), + ), + }) + responses = append(responses, newTestSendResponse(200, fmt.Sprintf(`{"auth": {"client_token": "%s", "renewable": true, "lease_duration": 600}}`, newToken))) + + // Fetch a leased secret using the new token + requests = append(requests, &SendRequest{ + Token: newToken, + Request: httptest.NewRequest( + "GET", + fmt.Sprintf("http://example.com/v1/kv/%d", id), + strings.NewReader(""), + ), + }) + responses = append(responses, newTestSendResponse(200, fmt.Sprintf(`{"lease_id": "secret-%d-lease", "renewable": true, "data": {"number": %d}, "lease_duration": 600}`, id, id))) + } + + // Pathological case: a long chain of child tokens + authAndSecretLease(0, "autoauthtoken", "many-ancestors-token;0") + for i := 1; i <= 50; i++ { + // Create a new generation of child token + authAndSecretLease(i, fmt.Sprintf("many-ancestors-token;%d", i-1), fmt.Sprintf("many-ancestors-token;%d", i)) + } + + // Lots of sibling tokens with auto auth token as their parent + for i := 51; i <= 100; i++ { + authAndSecretLease(i, "autoauthtoken", fmt.Sprintf("many-siblings-token;%d", i)) + } + + // Also create some extra siblings for an auth token further down the chain + for i := 101; i <= 110; i++ { + authAndSecretLease(i, "many-ancestors-token;25", fmt.Sprintf("many-siblings-for-ancestor-token;%d", i)) + } + + lc := testNewLeaseCacheWithPersistence(t, responses, boltStorage) + + // Register an auto-auth token so that the token and lease requests are cached + err := lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, err) + + for _, req := range requests { + // Send once to cache + resp, err := lc.Send(context.Background(), req) + require.NoError(t, err) + assert.Equal(t, 200, resp.Response.StatusCode, "expected success") + assert.Nil(t, resp.CacheMeta) + } + + logBuffer := &bytes.Buffer{} + restoredCache := testNewLeaseCacheWithLogWriter(t, nil, logBuffer) + + err = restoredCache.Restore(context.Background(), boltStorage) + require.NoError(t, err) + + // Now compare the cache contents before and after + compareBeforeAndAfter(t, lc, restoredCache, 223, 223) + + // Clear the cache so that there's no further logging in the background and + // we can safely read from the log buffer. + require.NoError(t, restoredCache.handleCacheClear(context.Background(), &cacheClearInput{Type: "all"})) + + // Ensure leases were restored in the correct order + maxAppRoleAncestor := -1 + restoredTokens := make(map[int]struct{}) + var approleTokensFound, secretsFound int + approleRegex := regexp.MustCompile("restored lease.*path=/v1/auth/approle-(\\d+)/login") + kvRegex := regexp.MustCompile("restored lease.*path=/v1/kv/(\\d+)") + for { + line, err := logBuffer.ReadBytes(byte('\n')) + if err == io.EOF { + break + } + t.Log(string(line[:len(line)-1])) + if matches := approleRegex.FindSubmatch(line); len(matches) >= 2 { + id, err := strconv.Atoi(string(matches[1])) + require.NoError(t, err) + approleTokensFound++ + restoredTokens[id] = struct{}{} + + // These tokens are all children of each other, and should be restored in + // strictly ascending order. + if id <= 50 { + require.Equal(t, maxAppRoleAncestor+1, id) + maxAppRoleAncestor = id + } + // These tokens should not start getting restored until their parent(25) is restored. + if id >= 101 { + require.GreaterOrEqual(t, maxAppRoleAncestor, 25) + } + } else if matches := kvRegex.FindSubmatch(line); len(matches) >= 2 { + id, err := strconv.Atoi(string(matches[1])) + require.NoError(t, err) + secretsFound++ + + // Every secret lease should be restored after its own parent (the same number) + _, restored := restoredTokens[id] + require.True(t, restored, "kv lease restored before its parent token", id) + } + } + + assert.Equal(t, 111, approleTokensFound) + assert.Equal(t, 111, secretsFound) + assert.Equal(t, 50, maxAppRoleAncestor, "expected to find all approle logins in the range 0-50") +} + func TestEvictPersistent(t *testing.T) { ctx := context.Background() From 2b743772ca270319ffb0b6428267df229c8dac74 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Mon, 18 Oct 2021 12:03:15 +0100 Subject: [PATCH 03/10] Extra trace log --- command/agent/cache/lease_cache.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/command/agent/cache/lease_cache.go b/command/agent/cache/lease_cache.go index 4dc99fa89b5bb..8ecea7d93aeb6 100644 --- a/command/agent/cache/lease_cache.go +++ b/command/agent/cache/lease_cache.go @@ -996,6 +996,8 @@ func (c *LeaseCache) Restore(ctx context.Context, storage *cacheboltdb.BoltStora continue } + c.logger.Trace("restoring lease", "id", newIndex.ID, "path", newIndex.RequestPath) + // Check if this lease has already expired expired, err := c.hasExpired(time.Now().UTC(), newIndex) if err != nil { From 1ea9967e333fed23be1623ae175a535ddc5e2904 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Mon, 18 Oct 2021 16:40:46 +0100 Subject: [PATCH 04/10] Don't rely on log order for test --- changelog/12843.txt | 3 ++ command/agent/cache/lease_cache.go | 11 +++++++ command/agent/cache/lease_cache_test.go | 39 +++++++++++++++---------- 3 files changed, 38 insertions(+), 15 deletions(-) create mode 100644 changelog/12843.txt diff --git a/changelog/12843.txt b/changelog/12843.txt new file mode 100644 index 0000000000000..2beee7f660d4b --- /dev/null +++ b/changelog/12843.txt @@ -0,0 +1,3 @@ +```release-note:improvement +agent/cache: Process persistent cache leases in dependency order during restore to ensure child leases are always correctly restored +``` diff --git a/command/agent/cache/lease_cache.go b/command/agent/cache/lease_cache.go index 8ecea7d93aeb6..642e613abe88f 100644 --- a/command/agent/cache/lease_cache.go +++ b/command/agent/cache/lease_cache.go @@ -972,6 +972,10 @@ func (c *LeaseCache) Flush() error { // tokens first, since restoring a lease's renewal context and watcher requires // looking up the token in the cachememdb. func (c *LeaseCache) Restore(ctx context.Context, storage *cacheboltdb.BoltStorage) error { + return c.restoreWithChannel(ctx, storage, nil) +} + +func (c *LeaseCache) restoreWithChannel(ctx context.Context, storage *cacheboltdb.BoltStorage, ch chan *cachememdb.Index) error { var errs *multierror.Error // Process tokens first @@ -1015,10 +1019,17 @@ func (c *LeaseCache) Restore(ctx context.Context, storage *cacheboltdb.BoltStora errs = multierror.Append(errs, err) continue } + if ch != nil { + ch <- newIndex + } c.logger.Trace("restored lease", "id", newIndex.ID, "path", newIndex.RequestPath) } } + if ch != nil { + close(ch) + } + return errs.ErrorOrNil() } diff --git a/command/agent/cache/lease_cache_test.go b/command/agent/cache/lease_cache_test.go index 7b2cc28659215..138aea7946f07 100644 --- a/command/agent/cache/lease_cache_test.go +++ b/command/agent/cache/lease_cache_test.go @@ -984,29 +984,38 @@ func TestLeaseCache_PersistAndRestore_WithManyDependencies(t *testing.T) { logBuffer := &bytes.Buffer{} restoredCache := testNewLeaseCacheWithLogWriter(t, nil, logBuffer) - err = restoredCache.Restore(context.Background(), boltStorage) + ch := make(chan *cachememdb.Index) + var restoredLeases []*cachememdb.Index + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for { + select { + case index, ok := <-ch: + if !ok { + return + } + restoredLeases = append(restoredLeases, index) + } + } + }() + err = restoredCache.restoreWithChannel(context.Background(), boltStorage, ch) require.NoError(t, err) + wg.Wait() // Now compare the cache contents before and after compareBeforeAndAfter(t, lc, restoredCache, 223, 223) - // Clear the cache so that there's no further logging in the background and - // we can safely read from the log buffer. - require.NoError(t, restoredCache.handleCacheClear(context.Background(), &cacheClearInput{Type: "all"})) - // Ensure leases were restored in the correct order + approleRegex := regexp.MustCompile("/v1/auth/approle-(\\d+)/login") + kvRegex := regexp.MustCompile("/v1/kv/(\\d+)") maxAppRoleAncestor := -1 restoredTokens := make(map[int]struct{}) var approleTokensFound, secretsFound int - approleRegex := regexp.MustCompile("restored lease.*path=/v1/auth/approle-(\\d+)/login") - kvRegex := regexp.MustCompile("restored lease.*path=/v1/kv/(\\d+)") - for { - line, err := logBuffer.ReadBytes(byte('\n')) - if err == io.EOF { - break - } - t.Log(string(line[:len(line)-1])) - if matches := approleRegex.FindSubmatch(line); len(matches) >= 2 { + for _, lease := range restoredLeases { + t.Log(lease.RequestPath) + if matches := approleRegex.FindStringSubmatch(lease.RequestPath); len(matches) >= 2 { id, err := strconv.Atoi(string(matches[1])) require.NoError(t, err) approleTokensFound++ @@ -1022,7 +1031,7 @@ func TestLeaseCache_PersistAndRestore_WithManyDependencies(t *testing.T) { if id >= 101 { require.GreaterOrEqual(t, maxAppRoleAncestor, 25) } - } else if matches := kvRegex.FindSubmatch(line); len(matches) >= 2 { + } else if matches := kvRegex.FindStringSubmatch(lease.RequestPath); len(matches) >= 2 { id, err := strconv.Atoi(string(matches[1])) require.NoError(t, err) secretsFound++ From a0560edb5c21d75d7b95e78ada085e406563ddbd Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Tue, 19 Oct 2021 15:42:13 +0100 Subject: [PATCH 05/10] Fix hardcoded error message --- command/agent/cache/cacheboltdb/bolt.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/cache/cacheboltdb/bolt.go b/command/agent/cache/cacheboltdb/bolt.go index 968bffbaaff13..4b93b0bed9405 100644 --- a/command/agent/cache/cacheboltdb/bolt.go +++ b/command/agent/cache/cacheboltdb/bolt.go @@ -132,7 +132,7 @@ func createV2BoltSchema(tx *bolt.Tx) error { // create the buckets for tokens and leases for _, bucket := range []string{TokenType, LeaseType, lookupType} { if _, err := tx.CreateBucketIfNotExists([]byte(bucket)); err != nil { - return fmt.Errorf("failed to create token bucket: %w", err) + return fmt.Errorf("failed to create %s bucket: %w", bucket, err) } } From 6284dbfde014b2a58aead9fd2066bde7bb0f4455 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Mon, 25 Oct 2021 16:11:23 +0100 Subject: [PATCH 06/10] Tidy up test --- command/agent/cache/lease_cache.go | 11 ----- command/agent/cache/lease_cache_test.go | 57 ++++++++----------------- 2 files changed, 17 insertions(+), 51 deletions(-) diff --git a/command/agent/cache/lease_cache.go b/command/agent/cache/lease_cache.go index 642e613abe88f..8ecea7d93aeb6 100644 --- a/command/agent/cache/lease_cache.go +++ b/command/agent/cache/lease_cache.go @@ -972,10 +972,6 @@ func (c *LeaseCache) Flush() error { // tokens first, since restoring a lease's renewal context and watcher requires // looking up the token in the cachememdb. func (c *LeaseCache) Restore(ctx context.Context, storage *cacheboltdb.BoltStorage) error { - return c.restoreWithChannel(ctx, storage, nil) -} - -func (c *LeaseCache) restoreWithChannel(ctx context.Context, storage *cacheboltdb.BoltStorage, ch chan *cachememdb.Index) error { var errs *multierror.Error // Process tokens first @@ -1019,17 +1015,10 @@ func (c *LeaseCache) restoreWithChannel(ctx context.Context, storage *cacheboltd errs = multierror.Append(errs, err) continue } - if ch != nil { - ch <- newIndex - } c.logger.Trace("restored lease", "id", newIndex.ID, "path", newIndex.RequestPath) } } - if ch != nil { - close(ch) - } - return errs.ErrorOrNil() } diff --git a/command/agent/cache/lease_cache_test.go b/command/agent/cache/lease_cache_test.go index 138aea7946f07..7e5976711a9f1 100644 --- a/command/agent/cache/lease_cache_test.go +++ b/command/agent/cache/lease_cache_test.go @@ -1,10 +1,8 @@ package cache import ( - "bytes" "context" "fmt" - "io" "io/ioutil" "net/http" "net/http/httptest" @@ -36,12 +34,6 @@ import ( func testNewLeaseCache(t *testing.T, responses []*SendResponse) *LeaseCache { t.Helper() - return testNewLeaseCacheWithLogWriter(t, responses, hclog.DefaultOutput) -} - -func testNewLeaseCacheWithLogWriter(t *testing.T, responses []*SendResponse, w io.Writer) *LeaseCache { - t.Helper() - client, err := api.NewClient(api.DefaultConfig()) if err != nil { t.Fatal(err) @@ -50,7 +42,7 @@ func testNewLeaseCacheWithLogWriter(t *testing.T, responses []*SendResponse, w i Client: client, BaseContext: context.Background(), Proxier: newMockProxier(responses), - Logger: logging.NewVaultLoggerWithWriter(w, hclog.Trace).Named("cache.leasecache"), + Logger: logging.NewVaultLogger(hclog.Trace).Named("cache.leasecache"), }) if err != nil { t.Fatal(err) @@ -981,41 +973,19 @@ func TestLeaseCache_PersistAndRestore_WithManyDependencies(t *testing.T) { assert.Nil(t, resp.CacheMeta) } - logBuffer := &bytes.Buffer{} - restoredCache := testNewLeaseCacheWithLogWriter(t, nil, logBuffer) - - ch := make(chan *cachememdb.Index) - var restoredLeases []*cachememdb.Index - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - for { - select { - case index, ok := <-ch: - if !ok { - return - } - restoredLeases = append(restoredLeases, index) - } - } - }() - err = restoredCache.restoreWithChannel(context.Background(), boltStorage, ch) - require.NoError(t, err) - wg.Wait() - - // Now compare the cache contents before and after - compareBeforeAndAfter(t, lc, restoredCache, 223, 223) - - // Ensure leases were restored in the correct order + // Ensure leases are retrieved in the correct order approleRegex := regexp.MustCompile("/v1/auth/approle-(\\d+)/login") kvRegex := regexp.MustCompile("/v1/kv/(\\d+)") maxAppRoleAncestor := -1 restoredTokens := make(map[int]struct{}) var approleTokensFound, secretsFound int - for _, lease := range restoredLeases { - t.Log(lease.RequestPath) - if matches := approleRegex.FindStringSubmatch(lease.RequestPath); len(matches) >= 2 { + + leases, err := boltStorage.GetByType(context.Background(), cacheboltdb.LeaseType) + for _, lease := range leases { + index, err := cachememdb.Deserialize(lease) + require.NoError(t, err) + t.Log(index.RequestPath) + if matches := approleRegex.FindStringSubmatch(index.RequestPath); len(matches) >= 2 { id, err := strconv.Atoi(string(matches[1])) require.NoError(t, err) approleTokensFound++ @@ -1031,7 +1001,7 @@ func TestLeaseCache_PersistAndRestore_WithManyDependencies(t *testing.T) { if id >= 101 { require.GreaterOrEqual(t, maxAppRoleAncestor, 25) } - } else if matches := kvRegex.FindStringSubmatch(lease.RequestPath); len(matches) >= 2 { + } else if matches := kvRegex.FindStringSubmatch(index.RequestPath); len(matches) >= 2 { id, err := strconv.Atoi(string(matches[1])) require.NoError(t, err) secretsFound++ @@ -1045,6 +1015,13 @@ func TestLeaseCache_PersistAndRestore_WithManyDependencies(t *testing.T) { assert.Equal(t, 111, approleTokensFound) assert.Equal(t, 111, secretsFound) assert.Equal(t, 50, maxAppRoleAncestor, "expected to find all approle logins in the range 0-50") + + restoredCache := testNewLeaseCache(t, nil) + err = restoredCache.Restore(context.Background(), boltStorage) + require.NoError(t, err) + + // Now compare the cache contents before and after + compareBeforeAndAfter(t, lc, restoredCache, 223, 223) } func TestEvictPersistent(t *testing.T) { From 19c1b61110ffb5df3fd2e63c3dc8f76bf852df40 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Mon, 25 Oct 2021 17:04:45 +0100 Subject: [PATCH 07/10] Add migration test --- command/agent/cache/cacheboltdb/bolt.go | 68 +++++++++++------ command/agent/cache/cacheboltdb/bolt_test.go | 78 ++++++++++++++++++++ 2 files changed, 124 insertions(+), 22 deletions(-) diff --git a/command/agent/cache/cacheboltdb/bolt.go b/command/agent/cache/cacheboltdb/bolt.go index 4b93b0bed9405..8e3cc73dfa5a4 100644 --- a/command/agent/cache/cacheboltdb/bolt.go +++ b/command/agent/cache/cacheboltdb/bolt.go @@ -27,11 +27,11 @@ const ( // bootstrapping keys metaBucketName = "meta" - // DEPRECATED: SecretLeaseType - v1 Bucket/type for leases with secret info - SecretLeaseType = "secret-lease" + // DEPRECATED: secretLeaseType - v1 Bucket/type for leases with secret info + secretLeaseType = "secret-lease" - // DEPRECATED: AuthLeaseType - v1 Bucket/type for leases with auth info - AuthLeaseType = "auth-lease" + // DEPRECATED: authLeaseType - v1 Bucket/type for leases with auth info + authLeaseType = "auth-lease" // TokenType - Bucket/type for auto-auth tokens TokenType = "token" @@ -86,7 +86,7 @@ func NewBoltStorage(config *BoltStorageConfig) (*BoltStorage, error) { return nil, err } err = db.Update(func(tx *bolt.Tx) error { - return createBoltSchema(tx) + return createBoltSchema(tx, storageVersion) }) if err != nil { return nil, err @@ -100,36 +100,60 @@ func NewBoltStorage(config *BoltStorageConfig) (*BoltStorage, error) { return bs, nil } -func createBoltSchema(tx *bolt.Tx) error { - // create the meta bucket at the top level +func createBoltSchema(tx *bolt.Tx, createVersion string) error { + switch { + case createVersion == "1": + if err := createV1BoltSchema(tx); err != nil { + return err + } + case createVersion == "2": + if err := createV2BoltSchema(tx); err != nil { + return err + } + default: + return fmt.Errorf("schema version %s not supported", createVersion) + } + meta, err := tx.CreateBucketIfNotExists([]byte(metaBucketName)) if err != nil { return fmt.Errorf("failed to create bucket %s: %w", metaBucketName, err) } - // check and set file version in the meta bucket + + // Check and set file version in the meta bucket. version := meta.Get([]byte(storageVersionKey)) switch { case version == nil: - err = meta.Put([]byte(storageVersionKey), []byte(storageVersion)) + err = meta.Put([]byte(storageVersionKey), []byte(createVersion)) if err != nil { return fmt.Errorf("failed to set storage version: %w", err) } - return createV2BoltSchema(tx) + return nil - case string(version) == storageVersion: - return createV2BoltSchema(tx) + case string(version) == createVersion: + return nil - case string(version) == "1": + case string(version) == "1" && createVersion == "2": return migrateFromV1ToV2Schema(tx) default: - return fmt.Errorf("storage migration from %s to %s not implemented", string(version), storageVersion) + return fmt.Errorf("storage migration from %s to %s not implemented", string(version), createVersion) } } +func createV1BoltSchema(tx *bolt.Tx) error { + // Create the buckets for tokens and leases. + for _, bucket := range []string{TokenType, authLeaseType, secretLeaseType} { + if _, err := tx.CreateBucketIfNotExists([]byte(bucket)); err != nil { + return fmt.Errorf("failed to create %s bucket: %w", bucket, err) + } + } + + return nil +} + func createV2BoltSchema(tx *bolt.Tx) error { - // create the buckets for tokens and leases + // Create the buckets for tokens and leases. for _, bucket := range []string{TokenType, LeaseType, lookupType} { if _, err := tx.CreateBucketIfNotExists([]byte(bucket)); err != nil { return fmt.Errorf("failed to create %s bucket: %w", bucket, err) @@ -144,21 +168,21 @@ func migrateFromV1ToV2Schema(tx *bolt.Tx) error { return err } - for _, leaseType := range []string{AuthLeaseType, SecretLeaseType} { - if bucket := tx.Bucket([]byte(leaseType)); bucket != nil { + for _, v1BucketType := range []string{authLeaseType, secretLeaseType} { + if bucket := tx.Bucket([]byte(v1BucketType)); bucket != nil { bucket.ForEach(func(key, value []byte) error { autoIncKey, err := autoIncrementedLeaseKey(tx, string(key)) if err != nil { - return fmt.Errorf("error migrating %s %q key to auto incremented key: %w", leaseType, string(key), err) + return fmt.Errorf("error migrating %s %q key to auto incremented key: %w", v1BucketType, string(key), err) } if err := tx.Bucket([]byte(LeaseType)).Put(autoIncKey, value); err != nil { - return fmt.Errorf("error migrating %s %q from v1 to v2 schema: %w", leaseType, string(key), err) + return fmt.Errorf("error migrating %s %q from v1 to v2 schema: %w", v1BucketType, string(key), err) } return nil }) - if err := tx.DeleteBucket([]byte(leaseType)); err != nil { - return fmt.Errorf("failed to clean up %s bucket during v1 to v2 schema migration: %w", leaseType, err) + if err := tx.DeleteBucket([]byte(v1BucketType)); err != nil { + return fmt.Errorf("failed to clean up %s bucket during v1 to v2 schema migration: %w", v1BucketType, err) } } } @@ -386,7 +410,7 @@ func (b *BoltStorage) Clear() error { return err } } - return createBoltSchema(tx) + return createBoltSchema(tx, storageVersion) }) } diff --git a/command/agent/cache/cacheboltdb/bolt_test.go b/command/agent/cache/cacheboltdb/bolt_test.go index f42c774326637..867c04157c52d 100644 --- a/command/agent/cache/cacheboltdb/bolt_test.go +++ b/command/agent/cache/cacheboltdb/bolt_test.go @@ -2,15 +2,21 @@ package cacheboltdb import ( "context" + "fmt" "io/ioutil" "os" "path" + "path/filepath" + "strings" "testing" + "time" + "github.com/golang/protobuf/proto" "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/command/agent/cache/keymanager" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + bolt "go.etcd.io/bbolt" ) func getTestKeyManager(t *testing.T) keymanager.KeyManager { @@ -257,3 +263,75 @@ func Test_SetGetRetrievalToken(t *testing.T) { }) } } + +func TestBolt_MigtrateFromV1ToV2Schema(t *testing.T) { + ctx := context.Background() + + path, err := ioutil.TempDir("", "bolt-test") + require.NoError(t, err) + defer os.RemoveAll(path) + + dbPath := filepath.Join(path, DatabaseFileName) + db, err := bolt.Open(dbPath, 0o600, &bolt.Options{Timeout: 1 * time.Second}) + require.NoError(t, err) + err = db.Update(func(tx *bolt.Tx) error { + return createBoltSchema(tx, "1") + }) + require.NoError(t, err) + b := &BoltStorage{ + db: db, + logger: hclog.Default(), + wrapper: getTestKeyManager(t).Wrapper(), + } + + // Manually insert some items into the v1 schema. + err = db.Update(func(tx *bolt.Tx) error { + blob, err := b.wrapper.Encrypt(ctx, []byte("ignored-contents"), []byte("")) + if err != nil { + return fmt.Errorf("error encrypting contents: %w", err) + } + protoBlob, err := proto.Marshal(blob) + if err != nil { + return err + } + + if err := tx.Bucket([]byte(authLeaseType)).Put([]byte("test-auth-id-1"), protoBlob); err != nil { + return err + } + if err := tx.Bucket([]byte(authLeaseType)).Put([]byte("test-auth-id-2"), protoBlob); err != nil { + return err + } + if err := tx.Bucket([]byte(secretLeaseType)).Put([]byte("test-secret-id-1"), protoBlob); err != nil { + return err + } + + return nil + }) + require.NoError(t, err) + + // Check we have the contents we would expect for the v1 schema. + leases, err := b.GetByType(ctx, authLeaseType) + require.NoError(t, err) + assert.Len(t, leases, 2) + leases, err = b.GetByType(ctx, secretLeaseType) + require.NoError(t, err) + assert.Len(t, leases, 1) + leases, err = b.GetByType(ctx, LeaseType) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + + // Now migrate to the v2 schema. + err = db.Update(migrateFromV1ToV2Schema) + require.NoError(t, err) + + // Check all the leases have been migrated into one bucket. + leases, err = b.GetByType(ctx, authLeaseType) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + leases, err = b.GetByType(ctx, secretLeaseType) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + leases, err = b.GetByType(ctx, LeaseType) + require.NoError(t, err) + assert.Len(t, leases, 3) +} From 1abadccb66a8eae8feffeca350c1c035bf4a2413 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Mon, 25 Oct 2021 17:26:57 +0100 Subject: [PATCH 08/10] Simplify test --- command/agent/cache/lease_cache_test.go | 61 ++++++------------------- 1 file changed, 13 insertions(+), 48 deletions(-) diff --git a/command/agent/cache/lease_cache_test.go b/command/agent/cache/lease_cache_test.go index 7e5976711a9f1..0575c9d0311f4 100644 --- a/command/agent/cache/lease_cache_test.go +++ b/command/agent/cache/lease_cache_test.go @@ -9,8 +9,6 @@ import ( "net/url" "os" "reflect" - "regexp" - "strconv" "strings" "sync" "testing" @@ -916,28 +914,25 @@ func TestLeaseCache_PersistAndRestore_WithManyDependencies(t *testing.T) { var requests []*SendRequest var responses []*SendResponse + var orderedRequestPaths []string // helper func to generate new auth leases with a child secret lease attached authAndSecretLease := func(id int, parentToken, newToken string) { t.Helper() + path := fmt.Sprintf("/v1/auth/approle-%d/login", id) + orderedRequestPaths = append(orderedRequestPaths, path) requests = append(requests, &SendRequest{ - Token: parentToken, - Request: httptest.NewRequest( - "PUT", - fmt.Sprintf("http://example.com/v1/auth/approle-%d/login", id), - strings.NewReader(""), - ), + Token: parentToken, + Request: httptest.NewRequest("PUT", "http://example.com"+path, strings.NewReader("")), }) responses = append(responses, newTestSendResponse(200, fmt.Sprintf(`{"auth": {"client_token": "%s", "renewable": true, "lease_duration": 600}}`, newToken))) // Fetch a leased secret using the new token + path = fmt.Sprintf("/v1/kv/%d", id) + orderedRequestPaths = append(orderedRequestPaths, path) requests = append(requests, &SendRequest{ - Token: newToken, - Request: httptest.NewRequest( - "GET", - fmt.Sprintf("http://example.com/v1/kv/%d", id), - strings.NewReader(""), - ), + Token: newToken, + Request: httptest.NewRequest("GET", "http://example.com"+path, strings.NewReader("")), }) responses = append(responses, newTestSendResponse(200, fmt.Sprintf(`{"lease_id": "secret-%d-lease", "renewable": true, "data": {"number": %d}, "lease_duration": 600}`, id, id))) } @@ -974,47 +969,17 @@ func TestLeaseCache_PersistAndRestore_WithManyDependencies(t *testing.T) { } // Ensure leases are retrieved in the correct order - approleRegex := regexp.MustCompile("/v1/auth/approle-(\\d+)/login") - kvRegex := regexp.MustCompile("/v1/kv/(\\d+)") - maxAppRoleAncestor := -1 - restoredTokens := make(map[int]struct{}) - var approleTokensFound, secretsFound int + var processed int leases, err := boltStorage.GetByType(context.Background(), cacheboltdb.LeaseType) for _, lease := range leases { index, err := cachememdb.Deserialize(lease) require.NoError(t, err) - t.Log(index.RequestPath) - if matches := approleRegex.FindStringSubmatch(index.RequestPath); len(matches) >= 2 { - id, err := strconv.Atoi(string(matches[1])) - require.NoError(t, err) - approleTokensFound++ - restoredTokens[id] = struct{}{} - - // These tokens are all children of each other, and should be restored in - // strictly ascending order. - if id <= 50 { - require.Equal(t, maxAppRoleAncestor+1, id) - maxAppRoleAncestor = id - } - // These tokens should not start getting restored until their parent(25) is restored. - if id >= 101 { - require.GreaterOrEqual(t, maxAppRoleAncestor, 25) - } - } else if matches := kvRegex.FindStringSubmatch(index.RequestPath); len(matches) >= 2 { - id, err := strconv.Atoi(string(matches[1])) - require.NoError(t, err) - secretsFound++ - - // Every secret lease should be restored after its own parent (the same number) - _, restored := restoredTokens[id] - require.True(t, restored, "kv lease restored before its parent token", id) - } + require.Equal(t, orderedRequestPaths[processed], index.RequestPath) + processed++ } - assert.Equal(t, 111, approleTokensFound) - assert.Equal(t, 111, secretsFound) - assert.Equal(t, 50, maxAppRoleAncestor, "expected to find all approle logins in the range 0-50") + assert.Equal(t, len(orderedRequestPaths), processed) restoredCache := testNewLeaseCache(t, nil) err = restoredCache.Restore(context.Background(), boltStorage) From b3d25c655a1506ec6e64374aac38fbd5a626a0d9 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Tue, 26 Oct 2021 12:44:22 +0100 Subject: [PATCH 09/10] Correct casing in comment --- command/agent/cache/cacheboltdb/bolt.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/cache/cacheboltdb/bolt.go b/command/agent/cache/cacheboltdb/bolt.go index 8e3cc73dfa5a4..afdb9bf6e4308 100644 --- a/command/agent/cache/cacheboltdb/bolt.go +++ b/command/agent/cache/cacheboltdb/bolt.go @@ -45,7 +45,7 @@ const ( // allowing us to correctly attach child contexts to their parent's context. LeaseType = "lease" - // LookupType - v2 Bucket/type to map from a memcachedb index ID to an + // lookupType - v2 Bucket/type to map from a memcachedb index ID to an // auto-incrementing BoltDB key. Facilitates deletes from the lease // bucket using an ID instead of the auto-incrementing BoltDB key. lookupType = "lookup" From 93cc29f96697aa1d63f30b63d9fefdd5c2064a6e Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Wed, 27 Oct 2021 11:05:14 +0100 Subject: [PATCH 10/10] Additional migration test scenario --- command/agent/cache/cacheboltdb/bolt.go | 8 +++- command/agent/cache/cacheboltdb/bolt_test.go | 43 +++++++++++++++++++- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/command/agent/cache/cacheboltdb/bolt.go b/command/agent/cache/cacheboltdb/bolt.go index 8e3cc73dfa5a4..041cb167ac729 100644 --- a/command/agent/cache/cacheboltdb/bolt.go +++ b/command/agent/cache/cacheboltdb/bolt.go @@ -187,7 +187,11 @@ func migrateFromV1ToV2Schema(tx *bolt.Tx) error { } } - if err := tx.Bucket([]byte(metaBucketName)).Put([]byte(storageVersionKey), []byte(storageVersion)); err != nil { + meta, err := tx.CreateBucketIfNotExists([]byte(metaBucketName)) + if err != nil { + return fmt.Errorf("failed to create meta bucket: %w", err) + } + if err := meta.Put([]byte(storageVersionKey), []byte(storageVersion)); err != nil { return fmt.Errorf("failed to update schema from v1 to v2: %w", err) } @@ -314,7 +318,7 @@ func (b *BoltStorage) GetByType(ctx context.Context, indexType string) ([][]byte bucket.ForEach(func(key, ciphertext []byte) error { plaintext, err := b.decrypt(ctx, ciphertext) if err != nil { - errors = multierror.Append(errors, fmt.Errorf("error decrypting entry %s: %w", string(key), err)) + errors = multierror.Append(errors, fmt.Errorf("error decrypting entry %s: %w", key, err)) return nil } diff --git a/command/agent/cache/cacheboltdb/bolt_test.go b/command/agent/cache/cacheboltdb/bolt_test.go index 867c04157c52d..ceb621005fd0c 100644 --- a/command/agent/cache/cacheboltdb/bolt_test.go +++ b/command/agent/cache/cacheboltdb/bolt_test.go @@ -264,7 +264,7 @@ func Test_SetGetRetrievalToken(t *testing.T) { } } -func TestBolt_MigtrateFromV1ToV2Schema(t *testing.T) { +func TestBolt_MigrateFromV1ToV2Schema(t *testing.T) { ctx := context.Background() path, err := ioutil.TempDir("", "bolt-test") @@ -335,3 +335,44 @@ func TestBolt_MigtrateFromV1ToV2Schema(t *testing.T) { require.NoError(t, err) assert.Len(t, leases, 3) } + +func TestBolt_MigrateFromInvalidToV2Schema(t *testing.T) { + ctx := context.Background() + + path, err := ioutil.TempDir("", "bolt-test") + require.NoError(t, err) + defer os.RemoveAll(path) + + dbPath := filepath.Join(path, DatabaseFileName) + db, err := bolt.Open(dbPath, 0o600, &bolt.Options{Timeout: 1 * time.Second}) + require.NoError(t, err) + b := &BoltStorage{ + db: db, + logger: hclog.Default(), + wrapper: getTestKeyManager(t).Wrapper(), + } + + // All GetByType calls should fail as there's no schema + for _, bucket := range []string{authLeaseType, secretLeaseType, LeaseType} { + _, err = b.GetByType(ctx, bucket) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + } + + // Now migrate to the v2 schema. + err = db.Update(migrateFromV1ToV2Schema) + require.NoError(t, err) + + // Deprecated auth and secret lease buckets still shouldn't exist + // All GetByType calls should fail as there's no schema + for _, bucket := range []string{authLeaseType, secretLeaseType} { + _, err = b.GetByType(ctx, bucket) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + } + + // GetByType for LeaseType should now return an empty result + leases, err := b.GetByType(ctx, LeaseType) + require.NoError(t, err) + require.Len(t, leases, 0) +}