From 14cffc38bb774c6a776d022e6145800501805214 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Wed, 27 Oct 2021 11:36:48 +0100 Subject: [PATCH] agent/cache: Store leases in-order in persistent cache so that restore respects dependencies (#12843) --- changelog/12843.txt | 3 + command/agent/cache/cacheboltdb/bolt.go | 230 ++++++++++++++----- command/agent/cache/cacheboltdb/bolt_test.go | 155 +++++++++++-- command/agent/cache/lease_cache.go | 116 ++++------ command/agent/cache/lease_cache_test.go | 194 +++++++++++----- 5 files changed, 507 insertions(+), 191 deletions(-) create mode 100644 changelog/12843.txt diff --git a/changelog/12843.txt b/changelog/12843.txt new file mode 100644 index 0000000000000..2beee7f660d4b --- /dev/null +++ b/changelog/12843.txt @@ -0,0 +1,3 @@ +```release-note:improvement +agent/cache: Process persistent cache leases in dependency order during restore to ensure child leases are always correctly restored +``` diff --git a/command/agent/cache/cacheboltdb/bolt.go b/command/agent/cache/cacheboltdb/bolt.go index 0a39c9cc15c61..c91b47a0da90b 100644 --- a/command/agent/cache/cacheboltdb/bolt.go +++ b/command/agent/cache/cacheboltdb/bolt.go @@ -2,6 +2,7 @@ package cacheboltdb import ( "context" + "encoding/binary" "fmt" "os" "path/filepath" @@ -17,7 +18,7 @@ import ( const ( // Keep track of schema version for future migrations storageVersionKey = "version" - storageVersion = "1" + storageVersion = "2" // v2 merges auth-lease and secret-lease buckets into one ordered bucket // DatabaseFileName - filename for the persistent cache file DatabaseFileName = "vault-agent-cache.db" @@ -26,15 +27,29 @@ const ( // bootstrapping keys metaBucketName = "meta" - // SecretLeaseType - Bucket/type for leases with secret info - SecretLeaseType = "secret-lease" + // DEPRECATED: secretLeaseType - v1 Bucket/type for leases with secret info + secretLeaseType = "secret-lease" - // AuthLeaseType - Bucket/type for leases with auth info - AuthLeaseType = "auth-lease" + // DEPRECATED: authLeaseType - v1 Bucket/type for leases with auth info + authLeaseType = "auth-lease" // TokenType - Bucket/type for auto-auth tokens TokenType = "token" + // LeaseType - v2 Bucket/type for auth AND secret leases. + // + // This bucket stores keys in the same order they were created using + // auto-incrementing keys and the fact that BoltDB stores keys in byte + // slice order. This means when we iterate through this bucket during + // restore, we will always restore parent tokens before their children, + // allowing us to correctly attach child contexts to their parent's context. + LeaseType = "lease" + + // lookupType - v2 Bucket/type to map from a memcachedb index ID to an + // auto-incrementing BoltDB key. Facilitates deletes from the lease + // bucket using an ID instead of the auto-incrementing BoltDB key. + lookupType = "lookup" + // AutoAuthToken - key for the latest auto-auth token AutoAuthToken = "auto-auth-token" @@ -71,7 +86,7 @@ func NewBoltStorage(config *BoltStorageConfig) (*BoltStorage, error) { return nil, err } err = db.Update(func(tx *bolt.Tx) error { - return createBoltSchema(tx) + return createBoltSchema(tx, storageVersion) }) if err != nil { return nil, err @@ -85,39 +100,130 @@ func NewBoltStorage(config *BoltStorageConfig) (*BoltStorage, error) { return bs, nil } -func createBoltSchema(tx *bolt.Tx) error { - // create the meta bucket at the top level +func createBoltSchema(tx *bolt.Tx, createVersion string) error { + switch { + case createVersion == "1": + if err := createV1BoltSchema(tx); err != nil { + return err + } + case createVersion == "2": + if err := createV2BoltSchema(tx); err != nil { + return err + } + default: + return fmt.Errorf("schema version %s not supported", createVersion) + } + meta, err := tx.CreateBucketIfNotExists([]byte(metaBucketName)) if err != nil { return fmt.Errorf("failed to create bucket %s: %w", metaBucketName, err) } - // check and set file version in the meta bucket + + // Check and set file version in the meta bucket. version := meta.Get([]byte(storageVersionKey)) switch { case version == nil: - err = meta.Put([]byte(storageVersionKey), []byte(storageVersion)) + err = meta.Put([]byte(storageVersionKey), []byte(createVersion)) if err != nil { return fmt.Errorf("failed to set storage version: %w", err) } - case string(version) != storageVersion: - return fmt.Errorf("storage migration from %s to %s not implemented", string(version), storageVersion) + + return nil + + case string(version) == createVersion: + return nil + + case string(version) == "1" && createVersion == "2": + return migrateFromV1ToV2Schema(tx) + + default: + return fmt.Errorf("storage migration from %s to %s not implemented", string(version), createVersion) + } +} + +func createV1BoltSchema(tx *bolt.Tx) error { + // Create the buckets for tokens and leases. + for _, bucket := range []string{TokenType, authLeaseType, secretLeaseType} { + if _, err := tx.CreateBucketIfNotExists([]byte(bucket)); err != nil { + return fmt.Errorf("failed to create %s bucket: %w", bucket, err) + } + } + + return nil +} + +func createV2BoltSchema(tx *bolt.Tx) error { + // Create the buckets for tokens and leases. + for _, bucket := range []string{TokenType, LeaseType, lookupType} { + if _, err := tx.CreateBucketIfNotExists([]byte(bucket)); err != nil { + return fmt.Errorf("failed to create %s bucket: %w", bucket, err) + } + } + + return nil +} + +func migrateFromV1ToV2Schema(tx *bolt.Tx) error { + if err := createV2BoltSchema(tx); err != nil { + return err } - // create the buckets for tokens and leases - _, err = tx.CreateBucketIfNotExists([]byte(TokenType)) + for _, v1BucketType := range []string{authLeaseType, secretLeaseType} { + if bucket := tx.Bucket([]byte(v1BucketType)); bucket != nil { + bucket.ForEach(func(key, value []byte) error { + autoIncKey, err := autoIncrementedLeaseKey(tx, string(key)) + if err != nil { + return fmt.Errorf("error migrating %s %q key to auto incremented key: %w", v1BucketType, string(key), err) + } + if err := tx.Bucket([]byte(LeaseType)).Put(autoIncKey, value); err != nil { + return fmt.Errorf("error migrating %s %q from v1 to v2 schema: %w", v1BucketType, string(key), err) + } + return nil + }) + + if err := tx.DeleteBucket([]byte(v1BucketType)); err != nil { + return fmt.Errorf("failed to clean up %s bucket during v1 to v2 schema migration: %w", v1BucketType, err) + } + } + } + + meta, err := tx.CreateBucketIfNotExists([]byte(metaBucketName)) if err != nil { - return fmt.Errorf("failed to create token bucket: %w", err) + return fmt.Errorf("failed to create meta bucket: %w", err) } - _, err = tx.CreateBucketIfNotExists([]byte(AuthLeaseType)) + if err := meta.Put([]byte(storageVersionKey), []byte(storageVersion)); err != nil { + return fmt.Errorf("failed to update schema from v1 to v2: %w", err) + } + + return nil +} + +func autoIncrementedLeaseKey(tx *bolt.Tx, id string) ([]byte, error) { + leaseBucket := tx.Bucket([]byte(LeaseType)) + keyValue, err := leaseBucket.NextSequence() if err != nil { - return fmt.Errorf("failed to create auth lease bucket: %w", err) + return nil, fmt.Errorf("failed to generate lookup key for id %q: %w", id, err) } - _, err = tx.CreateBucketIfNotExists([]byte(SecretLeaseType)) + + key := make([]byte, 8) + // MUST be big endian, because keys are ordered by byte slice comparison + // which progressively compares each byte in the slice starting at index 0. + // BigEndian in the range [255-257] looks like this: + // [0 0 0 0 0 0 0 255] + // [0 0 0 0 0 0 1 0] + // [0 0 0 0 0 0 1 1] + // LittleEndian in the same range looks like this: + // [255 0 0 0 0 0 0 0] + // [0 1 0 0 0 0 0 0] + // [1 1 0 0 0 0 0 0] + binary.BigEndian.PutUint64(key, keyValue) + + err = tx.Bucket([]byte(lookupType)).Put([]byte(id), key) if err != nil { - return fmt.Errorf("failed to create secret lease bucket: %w", err) + return nil, err } - return nil + return key, nil } // Set an index (token or lease) in bolt storage @@ -133,44 +239,56 @@ func (b *BoltStorage) Set(ctx context.Context, id string, plaintext []byte, inde } return b.db.Update(func(tx *bolt.Tx) error { - s := tx.Bucket([]byte(indexType)) - if s == nil { - return fmt.Errorf("bucket %q not found", indexType) - } - // If this is an auto-auth token, also stash it in the meta bucket for - // easy retrieval upon restore - if indexType == TokenType { + var key []byte + switch indexType { + case LeaseType: + // If this is a lease type, generate an auto-incrementing key and + // store an ID -> key lookup entry + key, err = autoIncrementedLeaseKey(tx, id) + if err != nil { + return err + } + case TokenType: + // If this is an auto-auth token, also stash it in the meta bucket for + // easy retrieval upon restore + key = []byte(id) meta := tx.Bucket([]byte(metaBucketName)) if err := meta.Put([]byte(AutoAuthToken), protoBlob); err != nil { return fmt.Errorf("failed to set latest auto-auth token: %w", err) } + default: + return fmt.Errorf("called Set for unsupported type %q", indexType) } - return s.Put([]byte(id), protoBlob) - }) -} - -func getBucketIDs(b *bolt.Bucket) ([][]byte, error) { - ids := [][]byte{} - err := b.ForEach(func(k, v []byte) error { - ids = append(ids, k) - return nil + s := tx.Bucket([]byte(indexType)) + if s == nil { + return fmt.Errorf("bucket %q not found", indexType) + } + return s.Put(key, protoBlob) }) - return ids, err } -// Delete an index (token or lease) by id from bolt storage -func (b *BoltStorage) Delete(id string) error { +// Delete an index (token or lease) by key from bolt storage +func (b *BoltStorage) Delete(id string, indexType string) error { return b.db.Update(func(tx *bolt.Tx) error { - // Since Delete returns a nil error if the key doesn't exist, just call - // delete in all three index buckets without checking existence first - if err := tx.Bucket([]byte(TokenType)).Delete([]byte(id)); err != nil { - return fmt.Errorf("failed to delete %q from token bucket: %w", id, err) + key := []byte(id) + if indexType == LeaseType { + key = tx.Bucket([]byte(lookupType)).Get(key) + if key == nil { + return fmt.Errorf("failed to lookup bolt DB key for id %q", id) + } + + err := tx.Bucket([]byte(lookupType)).Delete([]byte(id)) + if err != nil { + return fmt.Errorf("failed to delete %q from lookup bucket: %w", id, err) + } } - if err := tx.Bucket([]byte(AuthLeaseType)).Delete([]byte(id)); err != nil { - return fmt.Errorf("failed to delete %q from auth lease bucket: %w", id, err) + + bucket := tx.Bucket([]byte(indexType)) + if bucket == nil { + return fmt.Errorf("bucket %q not found during delete", indexType) } - if err := tx.Bucket([]byte(SecretLeaseType)).Delete([]byte(id)); err != nil { - return fmt.Errorf("failed to delete %q from secret lease bucket: %w", id, err) + if err := bucket.Delete(key); err != nil { + return fmt.Errorf("failed to delete %q from %q bucket: %w", id, indexType, err) } b.logger.Trace("deleted index from bolt db", "id", id) return nil @@ -193,10 +311,14 @@ func (b *BoltStorage) GetByType(ctx context.Context, indexType string) ([][]byte err := b.db.View(func(tx *bolt.Tx) error { var errors *multierror.Error - tx.Bucket([]byte(indexType)).ForEach(func(id, ciphertext []byte) error { + bucket := tx.Bucket([]byte(indexType)) + if bucket == nil { + return fmt.Errorf("bucket %q not found", indexType) + } + bucket.ForEach(func(key, ciphertext []byte) error { plaintext, err := b.decrypt(ctx, ciphertext) if err != nil { - errors = multierror.Append(errors, fmt.Errorf("error decrypting index id %s: %w", id, err)) + errors = multierror.Append(errors, fmt.Errorf("error decrypting entry %s: %w", key, err)) return nil } @@ -247,11 +369,11 @@ func (b *BoltStorage) GetRetrievalToken() ([]byte, error) { var token []byte err := b.db.View(func(tx *bolt.Tx) error { - keyBucket := tx.Bucket([]byte(metaBucketName)) - if keyBucket == nil { + metaBucket := tx.Bucket([]byte(metaBucketName)) + if metaBucket == nil { return fmt.Errorf("bucket %q not found", metaBucketName) } - value := keyBucket.Get([]byte(RetrievalTokenMaterial)) + value := metaBucket.Get([]byte(RetrievalTokenMaterial)) if value != nil { token = make([]byte, len(value)) copy(token, value) @@ -286,13 +408,13 @@ func (b *BoltStorage) Close() error { // the schema/layout func (b *BoltStorage) Clear() error { return b.db.Update(func(tx *bolt.Tx) error { - for _, name := range []string{AuthLeaseType, SecretLeaseType, TokenType} { + for _, name := range []string{TokenType, LeaseType, lookupType} { b.logger.Trace("deleting bolt bucket", "name", name) if err := tx.DeleteBucket([]byte(name)); err != nil { return err } } - return createBoltSchema(tx) + return createBoltSchema(tx, storageVersion) }) } diff --git a/command/agent/cache/cacheboltdb/bolt_test.go b/command/agent/cache/cacheboltdb/bolt_test.go index 8dfafc4ee63b2..ceb621005fd0c 100644 --- a/command/agent/cache/cacheboltdb/bolt_test.go +++ b/command/agent/cache/cacheboltdb/bolt_test.go @@ -2,15 +2,21 @@ package cacheboltdb import ( "context" + "fmt" "io/ioutil" "os" "path" + "path/filepath" + "strings" "testing" + "time" + "github.com/golang/protobuf/proto" "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/command/agent/cache/keymanager" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + bolt "go.etcd.io/bbolt" ) func getTestKeyManager(t *testing.T) keymanager.KeyManager { @@ -36,13 +42,13 @@ func TestBolt_SetGet(t *testing.T) { }) require.NoError(t, err) - secrets, err := b.GetByType(ctx, SecretLeaseType) + secrets, err := b.GetByType(ctx, LeaseType) assert.NoError(t, err) require.Len(t, secrets, 0) - err = b.Set(ctx, "test1", []byte("hello"), SecretLeaseType) + err = b.Set(ctx, "test1", []byte("hello"), LeaseType) assert.NoError(t, err) - secrets, err = b.GetByType(ctx, SecretLeaseType) + secrets, err = b.GetByType(ctx, LeaseType) assert.NoError(t, err) require.Len(t, secrets, 1) assert.Equal(t, []byte("hello"), secrets[0]) @@ -62,19 +68,19 @@ func TestBoltDelete(t *testing.T) { }) require.NoError(t, err) - err = b.Set(ctx, "secret-test1", []byte("hello1"), SecretLeaseType) + err = b.Set(ctx, "secret-test1", []byte("hello1"), LeaseType) require.NoError(t, err) - err = b.Set(ctx, "secret-test2", []byte("hello2"), SecretLeaseType) + err = b.Set(ctx, "secret-test2", []byte("hello2"), LeaseType) require.NoError(t, err) - secrets, err := b.GetByType(ctx, SecretLeaseType) + secrets, err := b.GetByType(ctx, LeaseType) require.NoError(t, err) assert.Len(t, secrets, 2) assert.ElementsMatch(t, [][]byte{[]byte("hello1"), []byte("hello2")}, secrets) - err = b.Delete("secret-test1") + err = b.Delete("secret-test1", LeaseType) require.NoError(t, err) - secrets, err = b.GetByType(ctx, SecretLeaseType) + secrets, err = b.GetByType(ctx, LeaseType) require.NoError(t, err) require.Len(t, secrets, 1) assert.Equal(t, []byte("hello2"), secrets[0]) @@ -95,19 +101,20 @@ func TestBoltClear(t *testing.T) { require.NoError(t, err) // Populate the bolt db - err = b.Set(ctx, "secret-test1", []byte("hello"), SecretLeaseType) + err = b.Set(ctx, "secret-test1", []byte("hello1"), LeaseType) require.NoError(t, err) - secrets, err := b.GetByType(ctx, SecretLeaseType) + secrets, err := b.GetByType(ctx, LeaseType) require.NoError(t, err) require.Len(t, secrets, 1) - assert.Equal(t, []byte("hello"), secrets[0]) + assert.Equal(t, []byte("hello1"), secrets[0]) - err = b.Set(ctx, "auth-test1", []byte("hello"), AuthLeaseType) + err = b.Set(ctx, "auth-test1", []byte("hello2"), LeaseType) require.NoError(t, err) - auths, err := b.GetByType(ctx, AuthLeaseType) + auths, err := b.GetByType(ctx, LeaseType) require.NoError(t, err) - require.Len(t, auths, 1) - assert.Equal(t, []byte("hello"), auths[0]) + require.Len(t, auths, 2) + assert.Equal(t, []byte("hello1"), auths[0]) + assert.Equal(t, []byte("hello2"), auths[1]) err = b.Set(ctx, "token-test1", []byte("hello"), TokenType) require.NoError(t, err) @@ -119,10 +126,7 @@ func TestBoltClear(t *testing.T) { // Clear the bolt db, and check that it's indeed clear err = b.Clear() require.NoError(t, err) - secrets, err = b.GetByType(ctx, SecretLeaseType) - require.NoError(t, err) - assert.Len(t, secrets, 0) - auths, err = b.GetByType(ctx, AuthLeaseType) + auths, err = b.GetByType(ctx, LeaseType) require.NoError(t, err) assert.Len(t, auths, 0) tokens, err = b.GetByType(ctx, TokenType) @@ -259,3 +263,116 @@ func Test_SetGetRetrievalToken(t *testing.T) { }) } } + +func TestBolt_MigrateFromV1ToV2Schema(t *testing.T) { + ctx := context.Background() + + path, err := ioutil.TempDir("", "bolt-test") + require.NoError(t, err) + defer os.RemoveAll(path) + + dbPath := filepath.Join(path, DatabaseFileName) + db, err := bolt.Open(dbPath, 0o600, &bolt.Options{Timeout: 1 * time.Second}) + require.NoError(t, err) + err = db.Update(func(tx *bolt.Tx) error { + return createBoltSchema(tx, "1") + }) + require.NoError(t, err) + b := &BoltStorage{ + db: db, + logger: hclog.Default(), + wrapper: getTestKeyManager(t).Wrapper(), + } + + // Manually insert some items into the v1 schema. + err = db.Update(func(tx *bolt.Tx) error { + blob, err := b.wrapper.Encrypt(ctx, []byte("ignored-contents"), []byte("")) + if err != nil { + return fmt.Errorf("error encrypting contents: %w", err) + } + protoBlob, err := proto.Marshal(blob) + if err != nil { + return err + } + + if err := tx.Bucket([]byte(authLeaseType)).Put([]byte("test-auth-id-1"), protoBlob); err != nil { + return err + } + if err := tx.Bucket([]byte(authLeaseType)).Put([]byte("test-auth-id-2"), protoBlob); err != nil { + return err + } + if err := tx.Bucket([]byte(secretLeaseType)).Put([]byte("test-secret-id-1"), protoBlob); err != nil { + return err + } + + return nil + }) + require.NoError(t, err) + + // Check we have the contents we would expect for the v1 schema. + leases, err := b.GetByType(ctx, authLeaseType) + require.NoError(t, err) + assert.Len(t, leases, 2) + leases, err = b.GetByType(ctx, secretLeaseType) + require.NoError(t, err) + assert.Len(t, leases, 1) + leases, err = b.GetByType(ctx, LeaseType) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + + // Now migrate to the v2 schema. + err = db.Update(migrateFromV1ToV2Schema) + require.NoError(t, err) + + // Check all the leases have been migrated into one bucket. + leases, err = b.GetByType(ctx, authLeaseType) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + leases, err = b.GetByType(ctx, secretLeaseType) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + leases, err = b.GetByType(ctx, LeaseType) + require.NoError(t, err) + assert.Len(t, leases, 3) +} + +func TestBolt_MigrateFromInvalidToV2Schema(t *testing.T) { + ctx := context.Background() + + path, err := ioutil.TempDir("", "bolt-test") + require.NoError(t, err) + defer os.RemoveAll(path) + + dbPath := filepath.Join(path, DatabaseFileName) + db, err := bolt.Open(dbPath, 0o600, &bolt.Options{Timeout: 1 * time.Second}) + require.NoError(t, err) + b := &BoltStorage{ + db: db, + logger: hclog.Default(), + wrapper: getTestKeyManager(t).Wrapper(), + } + + // All GetByType calls should fail as there's no schema + for _, bucket := range []string{authLeaseType, secretLeaseType, LeaseType} { + _, err = b.GetByType(ctx, bucket) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + } + + // Now migrate to the v2 schema. + err = db.Update(migrateFromV1ToV2Schema) + require.NoError(t, err) + + // Deprecated auth and secret lease buckets still shouldn't exist + // All GetByType calls should fail as there's no schema + for _, bucket := range []string{authLeaseType, secretLeaseType} { + _, err = b.GetByType(ctx, bucket) + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "not found")) + } + + // GetByType for LeaseType should now return an empty result + leases, err := b.GetByType(ctx, LeaseType) + require.NoError(t, err) + require.Len(t, leases, 0) +} diff --git a/command/agent/cache/lease_cache.go b/command/agent/cache/lease_cache.go index ad6d71c0c902f..8ecea7d93aeb6 100644 --- a/command/agent/cache/lease_cache.go +++ b/command/agent/cache/lease_cache.go @@ -357,7 +357,7 @@ func (c *LeaseCache) Send(ctx context.Context, req *SendRequest) (*SendResponse, index.Lease = secret.LeaseID index.LeaseToken = req.Token - index.Type = cacheboltdb.SecretLeaseType + index.Type = cacheboltdb.LeaseType case secret.Auth != nil: c.logger.Debug("processing auth response", "method", req.Request.Method, "path", req.Request.URL.Path) @@ -387,7 +387,7 @@ func (c *LeaseCache) Send(ctx context.Context, req *SendRequest) (*SendResponse, index.Token = secret.Auth.ClientToken index.TokenAccessor = secret.Auth.Accessor - index.Type = cacheboltdb.AuthLeaseType + index.Type = cacheboltdb.LeaseType default: // We shouldn't be hitting this, but will err on the side of caution and @@ -459,7 +459,7 @@ func (c *LeaseCache) startRenewing(ctx context.Context, index *cachememdb.Index, return } c.logger.Debug("evicting index from cache", "id", id, "method", req.Request.Method, "path", req.Request.URL.Path) - err := c.Evict(id) + err := c.Evict(index) if err != nil { c.logger.Error("failed to evict index", "id", id, "error", err) return @@ -556,7 +556,9 @@ func computeIndexID(req *SendRequest) (string, error) { // Append req.Token into the byte slice. This is needed since auto-auth'ed // requests sets the token directly into SendRequest.Token - b.Write([]byte(req.Token)) + if _, err := b.Write([]byte(req.Token)); err != nil { + return "", fmt.Errorf("failed to write token to hash input: %w", err) + } return hex.EncodeToString(cryptoutil.Blake2b256Hash(string(b.Bytes()))), nil } @@ -921,12 +923,12 @@ func (c *LeaseCache) Set(ctx context.Context, index *cachememdb.Index) error { } if c.ps != nil { - b, err := index.Serialize() + plaintext, err := index.Serialize() if err != nil { return err } - if err := c.ps.Set(ctx, index.ID, b, index.Type); err != nil { + if err := c.ps.Set(ctx, index.ID, plaintext, index.Type); err != nil { return err } c.logger.Trace("set entry in persistent storage", "type", index.Type, "path", index.RequestPath, "id", index.ID) @@ -937,16 +939,16 @@ func (c *LeaseCache) Set(ctx context.Context, index *cachememdb.Index) error { // Evict removes an Index from the cachememdb, and also removes it from the // persistent cache (if enabled) -func (c *LeaseCache) Evict(id string) error { - if err := c.db.Evict(cachememdb.IndexNameID, id); err != nil { +func (c *LeaseCache) Evict(index *cachememdb.Index) error { + if err := c.db.Evict(cachememdb.IndexNameID, index.ID); err != nil { return err } if c.ps != nil { - if err := c.ps.Delete(id); err != nil { + if err := c.ps.Delete(index.ID, index.Type); err != nil { return err } - c.logger.Trace("deleted item from persistent storage", "id", id) + c.logger.Trace("deleted item from persistent storage", "id", index.ID) } return nil @@ -970,39 +972,54 @@ func (c *LeaseCache) Flush() error { // tokens first, since restoring a lease's renewal context and watcher requires // looking up the token in the cachememdb. func (c *LeaseCache) Restore(ctx context.Context, storage *cacheboltdb.BoltStorage) error { - var errors *multierror.Error + var errs *multierror.Error // Process tokens first tokens, err := storage.GetByType(ctx, cacheboltdb.TokenType) if err != nil { - errors = multierror.Append(errors, err) + errs = multierror.Append(errs, err) } else { if err := c.restoreTokens(tokens); err != nil { - errors = multierror.Append(errors, err) + errs = multierror.Append(errs, err) } } - // Then process auth leases - authLeases, err := storage.GetByType(ctx, cacheboltdb.AuthLeaseType) + // Then process leases + leases, err := storage.GetByType(ctx, cacheboltdb.LeaseType) if err != nil { - errors = multierror.Append(errors, err) + errs = multierror.Append(errs, err) } else { - if err := c.restoreLeases(authLeases); err != nil { - errors = multierror.Append(errors, err) - } - } + for _, lease := range leases { + newIndex, err := cachememdb.Deserialize(lease) + if err != nil { + errs = multierror.Append(errs, err) + continue + } - // Then process secret leases - secretLeases, err := storage.GetByType(ctx, cacheboltdb.SecretLeaseType) - if err != nil { - errors = multierror.Append(errors, err) - } else { - if err := c.restoreLeases(secretLeases); err != nil { - errors = multierror.Append(errors, err) + c.logger.Trace("restoring lease", "id", newIndex.ID, "path", newIndex.RequestPath) + + // Check if this lease has already expired + expired, err := c.hasExpired(time.Now().UTC(), newIndex) + if err != nil { + c.logger.Warn("failed to check if lease is expired", "id", newIndex.ID, "error", err) + } + if expired { + continue + } + + if err := c.restoreLeaseRenewCtx(newIndex); err != nil { + errs = multierror.Append(errs, err) + continue + } + if err := c.db.Set(newIndex); err != nil { + errs = multierror.Append(errs, err) + continue + } + c.logger.Trace("restored lease", "id", newIndex.ID, "path", newIndex.RequestPath) } } - return errors.ErrorOrNil() + return errs.ErrorOrNil() } func (c *LeaseCache) restoreTokens(tokens [][]byte) error { @@ -1025,39 +1042,6 @@ func (c *LeaseCache) restoreTokens(tokens [][]byte) error { return errors.ErrorOrNil() } -func (c *LeaseCache) restoreLeases(leases [][]byte) error { - var errors *multierror.Error - - for _, lease := range leases { - newIndex, err := cachememdb.Deserialize(lease) - if err != nil { - errors = multierror.Append(errors, err) - continue - } - - // Check if this lease has already expired - expired, err := c.hasExpired(time.Now().UTC(), newIndex) - if err != nil { - c.logger.Warn("failed to check if lease is expired", "id", newIndex.ID, "error", err) - } - if expired { - continue - } - - if err := c.restoreLeaseRenewCtx(newIndex); err != nil { - errors = multierror.Append(errors, err) - continue - } - if err := c.db.Set(newIndex); err != nil { - errors = multierror.Append(errors, err) - continue - } - c.logger.Trace("restored lease", "id", newIndex.ID, "path", newIndex.RequestPath) - } - - return errors.ErrorOrNil() -} - // restoreLeaseRenewCtx re-creates a RenewCtx for an index object and starts // the watcher go routine func (c *LeaseCache) restoreLeaseRenewCtx(index *cachememdb.Index) error { @@ -1300,13 +1284,13 @@ func (c *LeaseCache) hasExpired(currentTime time.Time, index *cachememdb.Index) elapsed := currentTime.Sub(index.LastRenewed) var leaseDuration int - switch index.Type { - case cacheboltdb.AuthLeaseType: - leaseDuration = secret.Auth.LeaseDuration - case cacheboltdb.SecretLeaseType: + switch { + case secret.LeaseID != "": leaseDuration = secret.LeaseDuration + case secret.Auth != nil: + leaseDuration = secret.Auth.LeaseDuration default: - return false, fmt.Errorf("index type %q unexpected in expiration check", index.Type) + return false, errors.New("secret without lease encountered in expiration check") } if int(elapsed.Seconds()) > leaseDuration { diff --git a/command/agent/cache/lease_cache_test.go b/command/agent/cache/lease_cache_test.go index 66fb56be6b202..0575c9d0311f4 100644 --- a/command/agent/cache/lease_cache_test.go +++ b/command/agent/cache/lease_cache_test.go @@ -36,7 +36,6 @@ func testNewLeaseCache(t *testing.T, responses []*SendResponse) *LeaseCache { if err != nil { t.Fatal(err) } - lc, err := NewLeaseCache(&LeaseCacheConfig{ Client: client, BaseContext: context.Background(), @@ -46,7 +45,6 @@ func testNewLeaseCache(t *testing.T, responses []*SendResponse) *LeaseCache { if err != nil { t.Fatal(err) } - return lc } @@ -175,7 +173,7 @@ func TestLeaseCache_SendCacheable(t *testing.T) { lc := testNewLeaseCache(t, responses) // Register an token so that the token and lease requests are cached - lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, lc.RegisterAutoAuthToken("autoauthtoken")) // Make a request. A response with a new token is returned to the lease // cache and that will be cached. @@ -600,6 +598,7 @@ func TestLeaseCache_Concurrent_NonCacheable(t *testing.T) { defer cancel() wgDoneCh := make(chan struct{}) + errCh := make(chan error) go func() { var wg sync.WaitGroup @@ -618,7 +617,7 @@ func TestLeaseCache_Concurrent_NonCacheable(t *testing.T) { _, err := lc.Send(ctx, sendReq) if err != nil { - t.Fatal(err) + errCh <- err } }() } @@ -631,6 +630,8 @@ func TestLeaseCache_Concurrent_NonCacheable(t *testing.T) { case <-ctx.Done(): t.Fatalf("request timed out: %s", ctx.Err()) case <-wgDoneCh: + case err := <-errCh: + t.Fatal(err) } } @@ -649,6 +650,7 @@ func TestLeaseCache_Concurrent_Cacheable(t *testing.T) { var cacheCount atomic.Uint32 wgDoneCh := make(chan struct{}) + errCh := make(chan error) go func() { var wg sync.WaitGroup @@ -666,7 +668,7 @@ func TestLeaseCache_Concurrent_Cacheable(t *testing.T) { resp, err := lc.Send(ctx, sendReq) if err != nil { - t.Fatal(err) + errCh <- err } if resp.CacheMeta != nil && resp.CacheMeta.Hit { @@ -683,6 +685,8 @@ func TestLeaseCache_Concurrent_Cacheable(t *testing.T) { case <-ctx.Done(): t.Fatalf("request timed out: %s", ctx.Err()) case <-wgDoneCh: + case err := <-errCh: + t.Fatal(err) } // Ensure that all but one request got proxied. The other 99 should be @@ -711,6 +715,45 @@ func setupBoltStorage(t *testing.T) (tempCacheDir string, boltStorage *cachebolt return tempCacheDir, boltStorage } +func compareBeforeAndAfter(t *testing.T, before, after *LeaseCache, beforeLen, afterLen int) { + beforeDB, err := before.db.GetByPrefix(cachememdb.IndexNameID) + require.NoError(t, err) + assert.Len(t, beforeDB, beforeLen) + afterDB, err := after.db.GetByPrefix(cachememdb.IndexNameID) + require.NoError(t, err) + assert.Len(t, afterDB, afterLen) + for _, cachedItem := range beforeDB { + if strings.Contains(cachedItem.RequestPath, "expect-missing") { + continue + } + restoredItem, err := after.db.Get(cachememdb.IndexNameID, cachedItem.ID) + require.NoError(t, err) + + assert.NoError(t, err) + assert.Equal(t, cachedItem.ID, restoredItem.ID) + assert.Equal(t, cachedItem.Lease, restoredItem.Lease) + assert.Equal(t, cachedItem.LeaseToken, restoredItem.LeaseToken) + assert.Equal(t, cachedItem.Namespace, restoredItem.Namespace) + assert.Equal(t, cachedItem.RequestHeader, restoredItem.RequestHeader) + assert.Equal(t, cachedItem.RequestMethod, restoredItem.RequestMethod) + assert.Equal(t, cachedItem.RequestPath, restoredItem.RequestPath) + assert.Equal(t, cachedItem.RequestToken, restoredItem.RequestToken) + assert.Equal(t, cachedItem.Response, restoredItem.Response) + assert.Equal(t, cachedItem.Token, restoredItem.Token) + assert.Equal(t, cachedItem.TokenAccessor, restoredItem.TokenAccessor) + assert.Equal(t, cachedItem.TokenParent, restoredItem.TokenParent) + + // check what we can in the renewal context + assert.NotEmpty(t, restoredItem.RenewCtxInfo.CancelFunc) + assert.NotZero(t, restoredItem.RenewCtxInfo.DoneCh) + require.NotEmpty(t, restoredItem.RenewCtxInfo.Ctx) + assert.Equal(t, + cachedItem.RenewCtxInfo.Ctx.Value(contextIndexID), + restoredItem.RenewCtxInfo.Ctx.Value(contextIndexID), + ) + } +} + func TestLeaseCache_PersistAndRestore(t *testing.T) { // Emulate responses from the api proxy. The first two use the auto-auth // token, and the others use another token. @@ -827,7 +870,7 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { require.NotEmpty(t, deleteIDs) for _, deleteID := range deleteIDs { - err = boltStorage.Delete(deleteID) + err = boltStorage.Delete(deleteID, cacheboltdb.LeaseType) require.NoError(t, err) } @@ -842,43 +885,8 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { assert.Len(t, errors.Errors, 1) assert.Contains(t, errors.Error(), "could not find parent Token testtoken2") - // Now compare before and after - beforeDB, err := lc.db.GetByPrefix(cachememdb.IndexNameID) - require.NoError(t, err) - assert.Len(t, beforeDB, 7) - for _, cachedItem := range beforeDB { - if strings.Contains(cachedItem.RequestPath, "expect-missing") { - continue - } - restoredItem, err := restoredCache.db.Get(cachememdb.IndexNameID, cachedItem.ID) - require.NoError(t, err) - - assert.NoError(t, err) - assert.Equal(t, cachedItem.ID, restoredItem.ID) - assert.Equal(t, cachedItem.Lease, restoredItem.Lease) - assert.Equal(t, cachedItem.LeaseToken, restoredItem.LeaseToken) - assert.Equal(t, cachedItem.Namespace, restoredItem.Namespace) - assert.Equal(t, cachedItem.RequestHeader, restoredItem.RequestHeader) - assert.Equal(t, cachedItem.RequestMethod, restoredItem.RequestMethod) - assert.Equal(t, cachedItem.RequestPath, restoredItem.RequestPath) - assert.Equal(t, cachedItem.RequestToken, restoredItem.RequestToken) - assert.Equal(t, cachedItem.Response, restoredItem.Response) - assert.Equal(t, cachedItem.Token, restoredItem.Token) - assert.Equal(t, cachedItem.TokenAccessor, restoredItem.TokenAccessor) - assert.Equal(t, cachedItem.TokenParent, restoredItem.TokenParent) - - // check what we can in the renewal context - assert.NotEmpty(t, restoredItem.RenewCtxInfo.CancelFunc) - assert.NotZero(t, restoredItem.RenewCtxInfo.DoneCh) - require.NotEmpty(t, restoredItem.RenewCtxInfo.Ctx) - assert.Equal(t, - cachedItem.RenewCtxInfo.Ctx.Value(contextIndexID), - restoredItem.RenewCtxInfo.Ctx.Value(contextIndexID), - ) - } - afterDB, err := restoredCache.db.GetByPrefix(cachememdb.IndexNameID) - require.NoError(t, err) - assert.Len(t, afterDB, 5) + // Now compare the cache contents before and after + compareBeforeAndAfter(t, lc, restoredCache, 7, 5) // And finally send the cache requests once to make sure they're all being // served from the restoredCache unless they were intended to be missing after restore. @@ -899,6 +907,88 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { } } +func TestLeaseCache_PersistAndRestore_WithManyDependencies(t *testing.T) { + tempDir, boltStorage := setupBoltStorage(t) + defer os.RemoveAll(tempDir) + defer boltStorage.Close() + + var requests []*SendRequest + var responses []*SendResponse + var orderedRequestPaths []string + + // helper func to generate new auth leases with a child secret lease attached + authAndSecretLease := func(id int, parentToken, newToken string) { + t.Helper() + path := fmt.Sprintf("/v1/auth/approle-%d/login", id) + orderedRequestPaths = append(orderedRequestPaths, path) + requests = append(requests, &SendRequest{ + Token: parentToken, + Request: httptest.NewRequest("PUT", "http://example.com"+path, strings.NewReader("")), + }) + responses = append(responses, newTestSendResponse(200, fmt.Sprintf(`{"auth": {"client_token": "%s", "renewable": true, "lease_duration": 600}}`, newToken))) + + // Fetch a leased secret using the new token + path = fmt.Sprintf("/v1/kv/%d", id) + orderedRequestPaths = append(orderedRequestPaths, path) + requests = append(requests, &SendRequest{ + Token: newToken, + Request: httptest.NewRequest("GET", "http://example.com"+path, strings.NewReader("")), + }) + responses = append(responses, newTestSendResponse(200, fmt.Sprintf(`{"lease_id": "secret-%d-lease", "renewable": true, "data": {"number": %d}, "lease_duration": 600}`, id, id))) + } + + // Pathological case: a long chain of child tokens + authAndSecretLease(0, "autoauthtoken", "many-ancestors-token;0") + for i := 1; i <= 50; i++ { + // Create a new generation of child token + authAndSecretLease(i, fmt.Sprintf("many-ancestors-token;%d", i-1), fmt.Sprintf("many-ancestors-token;%d", i)) + } + + // Lots of sibling tokens with auto auth token as their parent + for i := 51; i <= 100; i++ { + authAndSecretLease(i, "autoauthtoken", fmt.Sprintf("many-siblings-token;%d", i)) + } + + // Also create some extra siblings for an auth token further down the chain + for i := 101; i <= 110; i++ { + authAndSecretLease(i, "many-ancestors-token;25", fmt.Sprintf("many-siblings-for-ancestor-token;%d", i)) + } + + lc := testNewLeaseCacheWithPersistence(t, responses, boltStorage) + + // Register an auto-auth token so that the token and lease requests are cached + err := lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, err) + + for _, req := range requests { + // Send once to cache + resp, err := lc.Send(context.Background(), req) + require.NoError(t, err) + assert.Equal(t, 200, resp.Response.StatusCode, "expected success") + assert.Nil(t, resp.CacheMeta) + } + + // Ensure leases are retrieved in the correct order + var processed int + + leases, err := boltStorage.GetByType(context.Background(), cacheboltdb.LeaseType) + for _, lease := range leases { + index, err := cachememdb.Deserialize(lease) + require.NoError(t, err) + require.Equal(t, orderedRequestPaths[processed], index.RequestPath) + processed++ + } + + assert.Equal(t, len(orderedRequestPaths), processed) + + restoredCache := testNewLeaseCache(t, nil) + err = restoredCache.Restore(context.Background(), boltStorage) + require.NoError(t, err) + + // Now compare the cache contents before and after + compareBeforeAndAfter(t, lc, restoredCache, 223, 223) +} + func TestEvictPersistent(t *testing.T) { ctx := context.Background() @@ -911,7 +1001,7 @@ func TestEvictPersistent(t *testing.T) { defer boltStorage.Close() lc := testNewLeaseCacheWithPersistence(t, responses, boltStorage) - lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, lc.RegisterAutoAuthToken("autoauthtoken")) // populate cache by sending request through sendReq := &SendRequest{ @@ -924,7 +1014,7 @@ func TestEvictPersistent(t *testing.T) { assert.Nil(t, resp.CacheMeta) // Check bolt for the cached lease - secrets, err := lc.ps.GetByType(ctx, cacheboltdb.SecretLeaseType) + secrets, err := lc.ps.GetByType(ctx, cacheboltdb.LeaseType) require.NoError(t, err) assert.Len(t, secrets, 1) @@ -938,7 +1028,7 @@ func TestEvictPersistent(t *testing.T) { time.Sleep(2 * time.Second) // Check that cached item is gone - secrets, err = lc.ps.GetByType(ctx, cacheboltdb.SecretLeaseType) + secrets, err = lc.ps.GetByType(ctx, cacheboltdb.LeaseType) require.NoError(t, err) assert.Len(t, secrets, 0) } @@ -978,7 +1068,7 @@ func Test_hasExpired(t *testing.T) { newTestSendResponse(201, `{"lease_id": "foo", "renewable": true, "data": {"value": "foo"}, "lease_duration": 60}`), } lc := testNewLeaseCache(t, responses) - lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, lc.RegisterAutoAuthToken("autoauthtoken")) cacheTests := []struct { token string @@ -990,14 +1080,14 @@ func Test_hasExpired(t *testing.T) { // auth lease token: "autoauthtoken", urlPath: "/v1/sample/auth", - leaseType: cacheboltdb.AuthLeaseType, + leaseType: cacheboltdb.LeaseType, wantStatusCode: responses[0].Response.StatusCode, }, { // secret lease token: "autoauthtoken", urlPath: "/v1/sample/secret", - leaseType: cacheboltdb.SecretLeaseType, + leaseType: cacheboltdb.LeaseType, wantStatusCode: responses[1].Response.StatusCode, }, } @@ -1039,13 +1129,13 @@ func TestLeaseCache_hasExpired_wrong_type(t *testing.T) { Content-Type: application/json Date: Tue, 02 Mar 2021 17:54:16 GMT -{"auth": {"client_token": "testtoken", "renewable": true, "lease_duration": 60}}`), +{}`), } lc := testNewLeaseCache(t, nil) expired, err := lc.hasExpired(time.Now().UTC(), index) assert.False(t, expired) - assert.EqualError(t, err, `index type "token" unexpected in expiration check`) + assert.EqualError(t, err, `secret without lease encountered in expiration check`) } func TestLeaseCacheRestore_expired(t *testing.T) { @@ -1061,7 +1151,7 @@ func TestLeaseCacheRestore_expired(t *testing.T) { lc := testNewLeaseCacheWithPersistence(t, responses, boltStorage) // Register an auto-auth token so that the token and lease requests are cached in mem - lc.RegisterAutoAuthToken("autoauthtoken") + require.NoError(t, lc.RegisterAutoAuthToken("autoauthtoken")) cacheTests := []struct { token string