From 44dcb1e2595ed957bdd308767ba412bbd6d6e668 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Mon, 4 Oct 2021 18:13:57 +0100 Subject: [PATCH 1/4] agent: tolerate partial restore failure from persistent cache --- command/agent/cache/lease_cache.go | 56 ++++++++---- command/agent/cache/lease_cache_test.go | 116 ++++++++++++++++-------- 2 files changed, 114 insertions(+), 58 deletions(-) diff --git a/command/agent/cache/lease_cache.go b/command/agent/cache/lease_cache.go index a8b2d4bd88cea..f45282dbf763c 100644 --- a/command/agent/cache/lease_cache.go +++ b/command/agent/cache/lease_cache.go @@ -17,6 +17,7 @@ import ( "time" hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-secure-stdlib/base62" "github.com/hashicorp/vault/api" "github.com/hashicorp/vault/command/agent/cache/cacheboltdb" @@ -969,56 +970,68 @@ func (c *LeaseCache) Flush() error { // tokens first, since restoring a lease's renewal context and watcher requires // looking up the token in the cachememdb. func (c *LeaseCache) Restore(ctx context.Context, storage *cacheboltdb.BoltStorage) error { + var errors *multierror.Error + // Process tokens first tokens, err := storage.GetByType(ctx, cacheboltdb.TokenType) if err != nil { - return err - } - if err := c.restoreTokens(tokens); err != nil { - return err + errors = multierror.Append(errors, err) + } else { + if err := c.restoreTokens(tokens); err != nil { + errors = multierror.Append(errors, err) + } } // Then process auth leases authLeases, err := storage.GetByType(ctx, cacheboltdb.AuthLeaseType) if err != nil { - return err - } - if err := c.restoreLeases(authLeases); err != nil { - return err + errors = multierror.Append(errors, err) + } else { + if err := c.restoreLeases(authLeases); err != nil { + errors = multierror.Append(errors, err) + } } // Then process secret leases secretLeases, err := storage.GetByType(ctx, cacheboltdb.SecretLeaseType) if err != nil { - return err - } - if err := c.restoreLeases(secretLeases); err != nil { - return err + errors = multierror.Append(errors, err) + } else { + if err := c.restoreLeases(secretLeases); err != nil { + errors = multierror.Append(errors, err) + } } - return nil + return errors.ErrorOrNil() } func (c *LeaseCache) restoreTokens(tokens [][]byte) error { + var errors *multierror.Error + for _, token := range tokens { newIndex, err := cachememdb.Deserialize(token) if err != nil { - return err + errors = multierror.Append(errors, err) + continue } newIndex.RenewCtxInfo = c.createCtxInfo(nil) if err := c.db.Set(newIndex); err != nil { - return err + errors = multierror.Append(errors, err) + continue } c.logger.Trace("restored token", "id", newIndex.ID) } - return nil + return errors.ErrorOrNil() } func (c *LeaseCache) restoreLeases(leases [][]byte) error { + var errors *multierror.Error + for _, lease := range leases { newIndex, err := cachememdb.Deserialize(lease) if err != nil { - return err + errors = multierror.Append(errors, err) + continue } // Check if this lease has already expired @@ -1031,14 +1044,17 @@ func (c *LeaseCache) restoreLeases(leases [][]byte) error { } if err := c.restoreLeaseRenewCtx(newIndex); err != nil { - return err + errors = multierror.Append(errors, err) + continue } if err := c.db.Set(newIndex); err != nil { - return err + errors = multierror.Append(errors, err) + continue } c.logger.Trace("restored lease", "id", newIndex.ID, "path", newIndex.RequestPath) } - return nil + + return errors.ErrorOrNil() } // restoreLeaseRenewCtx re-creates a RenewCtx for an index object and starts diff --git a/command/agent/cache/lease_cache_test.go b/command/agent/cache/lease_cache_test.go index 32842c059d3e1..fccdc2d6fe3c2 100644 --- a/command/agent/cache/lease_cache_test.go +++ b/command/agent/cache/lease_cache_test.go @@ -16,6 +16,7 @@ import ( "github.com/go-test/deep" hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-multierror" "github.com/hashicorp/vault/api" "github.com/hashicorp/vault/command/agent/cache/cacheboltdb" "github.com/hashicorp/vault/command/agent/cache/cachememdb" @@ -711,13 +712,18 @@ func setupBoltStorage(t *testing.T) (tempCacheDir string, boltStorage *cachebolt } func TestLeaseCache_PersistAndRestore(t *testing.T) { - // Emulate 4 responses from the api proxy. The first two use the auto-auth - // token, and the last two use another token. + // Emulate responses from the api proxy. The first two use the auto-auth + // token, and the others use another token. responses := []*SendResponse{ newTestSendResponse(200, `{"auth": {"client_token": "testtoken", "renewable": true, "lease_duration": 600}}`), newTestSendResponse(201, `{"lease_id": "foo", "renewable": true, "data": {"value": "foo"}, "lease_duration": 600}`), newTestSendResponse(202, `{"auth": {"client_token": "testtoken2", "renewable": true, "orphan": true, "lease_duration": 600}}`), newTestSendResponse(203, `{"lease_id": "secret2-lease", "renewable": true, "data": {"number": "two"}, "lease_duration": 600}`), + // 204 No content gets special handling - avoid. + newTestSendResponse(250, `{"auth": {"client_token": "testtoken3", "renewable": true, "orphan": true, "lease_duration": 600}}`), + newTestSendResponse(251, `{"lease_id": "secret3-lease", "renewable": true, "data": {"number": "three"}, "lease_duration": 600}`), + newTestSendResponse(252, `{"auth": {"client_token": "testtoken2", "renewable": true, "orphan": true, "lease_duration": 600}}`), + newTestSendResponse(253, `{"lease_id": "secret2-lease", "renewable": true, "data": {"number": "two"}, "lease_duration": 600}`), } tempDir, boltStorage := setupBoltStorage(t) @@ -726,59 +732,80 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { lc := testNewLeaseCacheWithPersistence(t, responses, boltStorage) // Register an auto-auth token so that the token and lease requests are cached - lc.RegisterAutoAuthToken("autoauthtoken") + err := lc.RegisterAutoAuthToken("autoauthtoken") + assert.NoError(t, err) cacheTests := []struct { - token string - method string - urlPath string - body string - wantStatusCode int + token string + method string + urlPath string + body string + delete bool + missing bool }{ { // Make a request. A response with a new token is returned to the // lease cache and that will be cached. - token: "autoauthtoken", - method: "GET", - urlPath: "http://example.com/v1/sample/api", - body: `{"value": "input"}`, - wantStatusCode: responses[0].Response.StatusCode, + token: "autoauthtoken", + method: "GET", + urlPath: "http://example.com/v1/sample/api", + body: `{"value": "input"}`, }, { // Modify the request a little bit to ensure the second response is // returned to the lease cache. - token: "autoauthtoken", - method: "GET", - urlPath: "http://example.com/v1/sample/api", - body: `{"value": "input_changed"}`, - wantStatusCode: responses[1].Response.StatusCode, + token: "autoauthtoken", + method: "GET", + urlPath: "http://example.com/v1/sample/api", + body: `{"value": "input_changed"}`, }, { // Simulate an approle login to get another token - method: "PUT", - urlPath: "http://example.com/v1/auth/approle/login", - body: `{"role_id": "my role", "secret_id": "my secret"}`, - wantStatusCode: responses[2].Response.StatusCode, + method: "PUT", + urlPath: "http://example.com/v1/auth/approle-expect-missing/login", + body: `{"role_id": "my role", "secret_id": "my secret"}`, + delete: true, + missing: true, }, { // Test caching with the token acquired from the approle login - token: "testtoken2", - method: "GET", - urlPath: "http://example.com/v1/sample2/api", - body: `{"second": "input"}`, - wantStatusCode: responses[3].Response.StatusCode, + token: "testtoken2", + method: "GET", + urlPath: "http://example.com/v1/sample-expect-missing/api", + body: `{"second": "input"}`, + missing: true, + }, + { + // Simulate another approle login to get another token + method: "PUT", + urlPath: "http://example.com/v1/auth/approle/login", + body: `{"role_id": "my role", "secret_id": "my secret"}`, + }, + { + // Test caching with the token acquired from the latest approle login + token: "testtoken3", + method: "GET", + urlPath: "http://example.com/v1/sample3/api", + body: `{"third": "input"}`, }, } - for _, ct := range cacheTests { + var deleteID string + for i, ct := range cacheTests { // Send once to cache sendReq := &SendRequest{ Token: ct.token, Request: httptest.NewRequest(ct.method, ct.urlPath, strings.NewReader(ct.body)), } + if ct.delete { + deleteID, err = computeIndexID(sendReq) + assert.NoError(t, err) + // Now reset the body after calculating the index + sendReq.Request = httptest.NewRequest(ct.method, ct.urlPath, strings.NewReader(ct.body)) + } resp, err := lc.Send(context.Background(), sendReq) require.NoError(t, err) - assert.Equal(t, resp.Response.StatusCode, ct.wantStatusCode, "expected proxied response") + assert.Equal(t, responses[i].Response.StatusCode, resp.Response.StatusCode, "expected proxied response") assert.Nil(t, resp.CacheMeta) // Send again to test cache. If this isn't cached, the response returned @@ -789,24 +816,32 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { } respCached, err := lc.Send(context.Background(), sendCacheReq) require.NoError(t, err, "failed to send request %+v", ct) - assert.Equal(t, respCached.Response.StatusCode, ct.wantStatusCode, "expected proxied response") + assert.Equal(t, responses[i].Response.StatusCode, respCached.Response.StatusCode, "expected proxied response") require.NotNil(t, respCached.CacheMeta) assert.True(t, respCached.CacheMeta.Hit) } + err = boltStorage.Delete(deleteID) + assert.NoError(t, err) + // Now we know the cache is working, so try restoring from the persisted // cache's storage restoredCache := testNewLeaseCache(t, nil) - err := restoredCache.Restore(context.Background(), boltStorage) - assert.NoError(t, err) + err = restoredCache.Restore(context.Background(), boltStorage) + errors, ok := err.(*multierror.Error) + require.True(t, ok) + assert.Len(t, errors.Errors, 1) + assert.Contains(t, errors.Error(), "could not find parent Token testtoken2") // Now compare before and after beforeDB, err := lc.db.GetByPrefix(cachememdb.IndexNameID) require.NoError(t, err) - assert.Len(t, beforeDB, 5) - + assert.Len(t, beforeDB, 7) for _, cachedItem := range beforeDB { + if strings.Contains(cachedItem.RequestPath, "expect-missing") { + continue + } restoredItem, err := restoredCache.db.Get(cachememdb.IndexNameID, cachedItem.ID) require.NoError(t, err) @@ -839,16 +874,21 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { // And finally send the cache requests once to make sure they're all being // served from the restoredCache - for _, ct := range cacheTests { + for i, ct := range cacheTests { sendCacheReq := &SendRequest{ Token: ct.token, Request: httptest.NewRequest(ct.method, ct.urlPath, strings.NewReader(ct.body)), } respCached, err := restoredCache.Send(context.Background(), sendCacheReq) require.NoError(t, err, "failed to send request %+v", ct) - assert.Equal(t, respCached.Response.StatusCode, ct.wantStatusCode, "expected proxied response") - require.NotNil(t, respCached.CacheMeta) - assert.True(t, respCached.CacheMeta.Hit) + if ct.missing { + assert.Equal(t, responses[i+4].Response.StatusCode, respCached.Response.StatusCode, "expected proxied response") + require.Nil(t, respCached.CacheMeta) + } else { + assert.Equal(t, responses[i].Response.StatusCode, respCached.Response.StatusCode, "expected proxied response") + require.NotNil(t, respCached.CacheMeta) + assert.True(t, respCached.CacheMeta.Hit) + } } } From a1952ac29ab0d81115a2fa15a84230d8c9dd4ff2 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Tue, 5 Oct 2021 14:22:00 +0100 Subject: [PATCH 2/4] Fix test --- command/agent/cache/lease_cache_test.go | 43 +++++++++++++------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/command/agent/cache/lease_cache_test.go b/command/agent/cache/lease_cache_test.go index fccdc2d6fe3c2..628832e38523e 100644 --- a/command/agent/cache/lease_cache_test.go +++ b/command/agent/cache/lease_cache_test.go @@ -717,13 +717,13 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { responses := []*SendResponse{ newTestSendResponse(200, `{"auth": {"client_token": "testtoken", "renewable": true, "lease_duration": 600}}`), newTestSendResponse(201, `{"lease_id": "foo", "renewable": true, "data": {"value": "foo"}, "lease_duration": 600}`), + // The auth token will get manually deleted from the bolt DB storage, causing both of the following two responses + // to be missing from the cache after a restore, because the lease is a child of the auth token. newTestSendResponse(202, `{"auth": {"client_token": "testtoken2", "renewable": true, "orphan": true, "lease_duration": 600}}`), newTestSendResponse(203, `{"lease_id": "secret2-lease", "renewable": true, "data": {"number": "two"}, "lease_duration": 600}`), // 204 No content gets special handling - avoid. newTestSendResponse(250, `{"auth": {"client_token": "testtoken3", "renewable": true, "orphan": true, "lease_duration": 600}}`), newTestSendResponse(251, `{"lease_id": "secret3-lease", "renewable": true, "data": {"number": "three"}, "lease_duration": 600}`), - newTestSendResponse(252, `{"auth": {"client_token": "testtoken2", "renewable": true, "orphan": true, "lease_duration": 600}}`), - newTestSendResponse(253, `{"lease_id": "secret2-lease", "renewable": true, "data": {"number": "two"}, "lease_duration": 600}`), } tempDir, boltStorage := setupBoltStorage(t) @@ -736,12 +736,12 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { assert.NoError(t, err) cacheTests := []struct { - token string - method string - urlPath string - body string - delete bool - missing bool + token string + method string + urlPath string + body string + deleteFromPersistentStore bool // If true, will be deleted from bolt DB to induce an error on restore + expectMissingAfterRestore bool // If true, the response is not expected to be present in the restored cache }{ { // Make a request. A response with a new token is returned to the @@ -761,11 +761,11 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { }, { // Simulate an approle login to get another token - method: "PUT", - urlPath: "http://example.com/v1/auth/approle-expect-missing/login", - body: `{"role_id": "my role", "secret_id": "my secret"}`, - delete: true, - missing: true, + method: "PUT", + urlPath: "http://example.com/v1/auth/approle-expect-missing/login", + body: `{"role_id": "my role", "secret_id": "my secret"}`, + deleteFromPersistentStore: true, + expectMissingAfterRestore: true, }, { // Test caching with the token acquired from the approle login @@ -773,7 +773,8 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { method: "GET", urlPath: "http://example.com/v1/sample-expect-missing/api", body: `{"second": "input"}`, - missing: true, + // This will be missing from the restored cache because its parent token was deleted + expectMissingAfterRestore: true, }, { // Simulate another approle login to get another token @@ -797,7 +798,7 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { Token: ct.token, Request: httptest.NewRequest(ct.method, ct.urlPath, strings.NewReader(ct.body)), } - if ct.delete { + if ct.deleteFromPersistentStore { deleteID, err = computeIndexID(sendReq) assert.NoError(t, err) // Now reset the body after calculating the index @@ -825,8 +826,9 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { assert.NoError(t, err) // Now we know the cache is working, so try restoring from the persisted - // cache's storage - restoredCache := testNewLeaseCache(t, nil) + // cache's storage. Responses 3 and 4 have been cleared from the cache, so + // re-send those. + restoredCache := testNewLeaseCache(t, responses[2:4]) err = restoredCache.Restore(context.Background(), boltStorage) errors, ok := err.(*multierror.Error) @@ -873,7 +875,7 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { assert.Len(t, afterDB, 5) // And finally send the cache requests once to make sure they're all being - // served from the restoredCache + // served from the restoredCache unless they were intended to be missing after restore. for i, ct := range cacheTests { sendCacheReq := &SendRequest{ Token: ct.token, @@ -881,11 +883,10 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { } respCached, err := restoredCache.Send(context.Background(), sendCacheReq) require.NoError(t, err, "failed to send request %+v", ct) - if ct.missing { - assert.Equal(t, responses[i+4].Response.StatusCode, respCached.Response.StatusCode, "expected proxied response") + assert.Equal(t, responses[i].Response.StatusCode, respCached.Response.StatusCode, "expected proxied response") + if ct.expectMissingAfterRestore { require.Nil(t, respCached.CacheMeta) } else { - assert.Equal(t, responses[i].Response.StatusCode, respCached.Response.StatusCode, "expected proxied response") require.NotNil(t, respCached.CacheMeta) assert.True(t, respCached.CacheMeta.Hit) } From 869d62f59c91a1daeee1f40dae074055b9c118f4 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Tue, 5 Oct 2021 14:31:06 +0100 Subject: [PATCH 3/4] Add changelog --- changelog/12688.txt | 7 +++---- changelog/12718.txt | 3 +++ 2 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 changelog/12718.txt diff --git a/changelog/12688.txt b/changelog/12688.txt index 56624722b98ee..e57e56bf7f32b 100644 --- a/changelog/12688.txt +++ b/changelog/12688.txt @@ -1,4 +1,3 @@ -+```release-note:improvement -+auth/kubernetes: validate JWT against the provided role on alias look ahead operations -+``` - +```release-note:improvement +auth/kubernetes: validate JWT against the provided role on alias look ahead operations +``` diff --git a/changelog/12718.txt b/changelog/12718.txt new file mode 100644 index 0000000000000..1e786d78c569f --- /dev/null +++ b/changelog/12718.txt @@ -0,0 +1,3 @@ +```release-note:improvement +agent/cache: tolerate partial restore failure from persistent cache +``` From 7136280aa50e255d07b7930cb5ec034d4cb8a94a Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Thu, 7 Oct 2021 14:35:31 +0100 Subject: [PATCH 4/4] Review comments: improved consistency, test robustness, comments, assertions --- command/agent/cache/lease_cache.go | 1 + command/agent/cache/lease_cache_test.go | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/command/agent/cache/lease_cache.go b/command/agent/cache/lease_cache.go index f45282dbf763c..ad6d71c0c902f 100644 --- a/command/agent/cache/lease_cache.go +++ b/command/agent/cache/lease_cache.go @@ -1021,6 +1021,7 @@ func (c *LeaseCache) restoreTokens(tokens [][]byte) error { } c.logger.Trace("restored token", "id", newIndex.ID) } + return errors.ErrorOrNil() } diff --git a/command/agent/cache/lease_cache_test.go b/command/agent/cache/lease_cache_test.go index 628832e38523e..66fb56be6b202 100644 --- a/command/agent/cache/lease_cache_test.go +++ b/command/agent/cache/lease_cache_test.go @@ -714,6 +714,8 @@ func setupBoltStorage(t *testing.T) (tempCacheDir string, boltStorage *cachebolt func TestLeaseCache_PersistAndRestore(t *testing.T) { // Emulate responses from the api proxy. The first two use the auto-auth // token, and the others use another token. + // The test re-sends each request to ensure that the response is cached + // so the number of responses and cacheTests specified should always be equal. responses := []*SendResponse{ newTestSendResponse(200, `{"auth": {"client_token": "testtoken", "renewable": true, "lease_duration": 600}}`), newTestSendResponse(201, `{"lease_id": "foo", "renewable": true, "data": {"value": "foo"}, "lease_duration": 600}`), @@ -733,7 +735,7 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { // Register an auto-auth token so that the token and lease requests are cached err := lc.RegisterAutoAuthToken("autoauthtoken") - assert.NoError(t, err) + require.NoError(t, err) cacheTests := []struct { token string @@ -791,7 +793,7 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { }, } - var deleteID string + var deleteIDs []string for i, ct := range cacheTests { // Send once to cache sendReq := &SendRequest{ @@ -799,8 +801,9 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { Request: httptest.NewRequest(ct.method, ct.urlPath, strings.NewReader(ct.body)), } if ct.deleteFromPersistentStore { - deleteID, err = computeIndexID(sendReq) - assert.NoError(t, err) + deleteID, err := computeIndexID(sendReq) + require.NoError(t, err) + deleteIDs = append(deleteIDs, deleteID) // Now reset the body after calculating the index sendReq.Request = httptest.NewRequest(ct.method, ct.urlPath, strings.NewReader(ct.body)) } @@ -822,8 +825,11 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) { assert.True(t, respCached.CacheMeta.Hit) } - err = boltStorage.Delete(deleteID) - assert.NoError(t, err) + require.NotEmpty(t, deleteIDs) + for _, deleteID := range deleteIDs { + err = boltStorage.Delete(deleteID) + require.NoError(t, err) + } // Now we know the cache is working, so try restoring from the persisted // cache's storage. Responses 3 and 4 have been cleared from the cache, so