Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix early rotation for roles with WALs, handle multiple WALs per role #28

Merged
merged 23 commits into from Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 7 additions & 5 deletions backend_test.go
Expand Up @@ -22,7 +22,7 @@ var (

func getBackend(throwsErr bool) (*backend, logical.Storage) {
config := &logical.BackendConfig{
Logger: logging.NewVaultLogger(log.Error),
Logger: logging.NewVaultLogger(log.Debug),

System: &logical.StaticSystemView{
DefaultLeaseTTLVal: defaultLeaseTTLVal,
Expand All @@ -42,7 +42,9 @@ func getBackend(throwsErr bool) (*backend, logical.Storage) {
b.cancelQueue = cancel

// Load queue and kickoff new periodic ticker
go b.initQueue(ictx, &logical.InitializationRequest{config.StorageView})
b.initQueue(ictx, &logical.InitializationRequest{
Storage: config.StorageView,
})

return b, config.StorageView
}
Expand All @@ -66,15 +68,15 @@ func (f *fakeLdapClient) Get(_ *client.Config, _ string) (*client.Entry, error)
return client.NewEntry(entry), err
}

func (f *fakeLdapClient) UpdatePassword(conf *client.Config, dn string, newPassword string) error {
func (f *fakeLdapClient) UpdatePassword(_ *client.Config, _ string, _ string) error {
var err error
if f.throwErrs {
err = errors.New("forced error")
}
return err
}

func (f *fakeLdapClient) UpdateRootPassword(conf *client.Config, newPassword string) error {
func (f *fakeLdapClient) UpdateRootPassword(_ *client.Config, _ string) error {
var err error
if f.throwErrs {
err = errors.New("forced error")
Expand All @@ -90,7 +92,7 @@ func (f *fakeLdapClient) Del(_ *client.Config, _ *ldap.DelRequest) error {
return fmt.Errorf("not implemented")
}

func (f *fakeLdapClient) Execute(conf *client.Config, entries []*ldif.Entry, continueOnError bool) (err error) {
func (f *fakeLdapClient) Execute(_ *client.Config, _ []*ldif.Entry, _ bool) (err error) {
return fmt.Errorf("not implemented")
}

Expand Down
23 changes: 17 additions & 6 deletions path_rotate.go
Expand Up @@ -24,12 +24,12 @@ func (b *backend) pathRotateCredentials() []*framework.Path {
Fields: map[string]*framework.FieldSchema{},
Operations: map[logical.Operation]framework.OperationHandler{
logical.UpdateOperation: &framework.PathOperation{
Callback: b.pathRotateCredentialsUpdate,
Callback: b.pathRotateRootCredentialsUpdate,
ForwardPerformanceStandby: true,
ForwardPerformanceSecondary: true,
},
logical.CreateOperation: &framework.PathOperation{
Callback: b.pathRotateCredentialsUpdate,
Callback: b.pathRotateRootCredentialsUpdate,
ForwardPerformanceStandby: true,
ForwardPerformanceSecondary: true,
},
Expand Down Expand Up @@ -64,7 +64,7 @@ func (b *backend) pathRotateCredentials() []*framework.Path {
}
}

func (b *backend) pathRotateCredentialsUpdate(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
func (b *backend) pathRotateRootCredentialsUpdate(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
if _, hasTimeout := ctx.Deadline(); !hasTimeout {
var cancel func()
ctx, cancel = context.WithTimeout(ctx, defaultCtxTimeout)
Expand Down Expand Up @@ -133,10 +133,14 @@ func (b *backend) pathRotateRoleCredentialsUpdate(ctx context.Context, req *logi
}
}

resp, err := b.setStaticAccountPassword(ctx, req.Storage, &setStaticAccountInput{
input := &setStaticAccountInput{
RoleName: name,
Role: role,
})
}
if walID, ok := item.Value.(string); ok {
input.WALID = walID
}
resp, err := b.setStaticAccountPassword(ctx, req.Storage, input)
if err != nil {
b.Logger().Warn("unable to rotate credentials in rotate-role", "error", err)
// Update the priority to re-try this rotation and re-add the item to
Expand All @@ -149,6 +153,8 @@ func (b *backend) pathRotateRoleCredentialsUpdate(ctx context.Context, req *logi
}
} else {
item.Priority = resp.RotationTime.Add(role.StaticAccount.RotationPeriod).Unix()
// Clear any stored WAL ID as we must have successfully deleted our WAL to get here.
item.Value = ""
}

// Add their rotation to the queue. We use pushErr here to distinguish between
Expand All @@ -159,9 +165,14 @@ func (b *backend) pathRotateRoleCredentialsUpdate(ctx context.Context, req *logi
return nil, pushErr
}

if err != nil {
return nil, fmt.Errorf("unable to finish rotating credentials; retries will "+
"continue in the background but it is also safe to retry manually: %w", err)
}

// We're not returning creds here because we do not know if its been processed
// by the queue.
return nil, err
return nil, nil
}

// rollBackPassword uses naive exponential backoff to retry updating to an old password,
Expand Down
52 changes: 44 additions & 8 deletions path_static_roles.go
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"time"

"github.com/hashicorp/go-multierror"
"github.com/hashicorp/vault/sdk/framework"
"github.com/hashicorp/vault/sdk/helper/locksutil"
"github.com/hashicorp/vault/sdk/logical"
Expand Down Expand Up @@ -146,7 +147,28 @@ func (b *backend) pathStaticRoleDelete(ctx context.Context, req *logical.Request
return nil, err
}

return nil, nil
walIDs, err := framework.ListWAL(ctx, req.Storage)
if err != nil {
return nil, err
}
var merr *multierror.Error
for _, walID := range walIDs {
wal, err := b.findStaticWAL(ctx, req.Storage, walID)
if err != nil {
merr = multierror.Append(merr, err)
continue
}
if wal != nil && name == wal.RoleName {
b.Logger().Debug("deleting WAL for deleted role", "WAL ID", walID, "role", name)
err = framework.DeleteWAL(ctx, req.Storage, walID)
if err != nil {
b.Logger().Debug("failed to delete WAL for deleted role", "WAL ID", walID, "error", err)
merr = multierror.Append(merr, err)
}
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we want to do the WAL delete before the role deletion, that way we have an avenue to retry the operation if the WAL cleanup fails?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried implementing this, but I think I'd prefer to leave WAL cleanup as a best effort, because although it has its own consistency problems, the other order does as well. If we delete WALs before the role and fail, we'll need to push the item back onto the queue, which itself could fail. In practice though, deleting roles and WALs both rely on the same storage layer, so at least these partial failure scenarios should be very rare.

We could address the inconsistency of leaving WALs lying around fairly robustly by keying WALs on a UUID whose lifetime is attached to a role's storage entry instead of keying WALs on the role name. That obviously wouldn't be a backport candidate though. WDYT?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, let's leave it as is for now and revisit any improvements in a subsequent PR


return nil, merr.ErrorOrNil()
}

func (b *backend) pathStaticRoleRead(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
Expand Down Expand Up @@ -221,6 +243,7 @@ func (b *backend) pathStaticRoleCreateUpdate(ctx context.Context, req *logical.R

// Only call setStaticAccountPassword if we're creating the role for the
// first time
var item *queue.Item
switch req.Operation {
case logical.CreateOperation:
// setStaticAccountPassword calls Storage.Put and saves the role to storage
Expand All @@ -229,10 +252,24 @@ func (b *backend) pathStaticRoleCreateUpdate(ctx context.Context, req *logical.R
Role: role,
})
if err != nil {
if resp != nil && resp.WALID != "" {
b.Logger().Debug("deleting WAL for failed role creation", "WAL ID", resp.WALID, "role", name)
walDeleteErr := framework.DeleteWAL(ctx, req.Storage, resp.WALID)
if walDeleteErr != nil {
b.Logger().Debug("failed to delete WAL for failed role creation", "WAL ID", resp.WALID, "error", walDeleteErr)
var merr *multierror.Error
merr = multierror.Append(merr, err)
merr = multierror.Append(merr, fmt.Errorf("failed to clean up WAL from failed role creation: %w", walDeleteErr))
err = merr.ErrorOrNil()
}
}
return nil, err
}
briankassouf marked this conversation as resolved.
Show resolved Hide resolved
// guard against RotationTime not being set or zero-value
lvr = resp.RotationTime
item = &queue.Item{
Key: name,
}
case logical.UpdateOperation:
// store updated Role
entry, err := logical.StorageEntryJSON(staticRolePath+name, role)
Expand All @@ -244,20 +281,19 @@ func (b *backend) pathStaticRoleCreateUpdate(ctx context.Context, req *logical.R
}

// In case this is an update, remove any previous version of the item from
// the queue

// the queue. The existing item could be tracking a WAL ID for this role,
// so it's important to keep the existing item rather than recreate it.
//TODO: Add retry logic
_, err = b.popFromRotationQueueByKey(name)
item, err = b.popFromRotationQueueByKey(name)
if err != nil {
return nil, err
}
}

item.Priority = lvr.Add(role.StaticAccount.RotationPeriod).Unix()

// Add their rotation to the queue
if err := b.pushItem(&queue.Item{
Key: name,
Priority: lvr.Add(role.StaticAccount.RotationPeriod).Unix(),
}); err != nil {
if err := b.pushItem(item); err != nil {
return nil, err
}

Expand Down
148 changes: 148 additions & 0 deletions path_static_roles_test.go
Expand Up @@ -2,6 +2,7 @@ package openldap

import (
"context"
"strings"
"testing"

"github.com/hashicorp/vault/sdk/logical"
Expand Down Expand Up @@ -452,3 +453,150 @@ func TestListRoles(t *testing.T) {
}
})
}

func TestWALsStillTrackedAfterUpdate(t *testing.T) {
ctx := context.Background()
b, storage := getBackend(false)
defer b.Cleanup(ctx)
configureOpenLDAPMount(t, b, storage)

createRole(t, b, storage, "hashicorp")

generateWALFromFailedRotation(t, b, storage, "hashicorp")
requireWALs(t, storage, 1)

_, err := b.HandleRequest(ctx, &logical.Request{
Operation: logical.UpdateOperation,
Path: staticRolePath + "hashicorp",
Storage: storage,
Data: map[string]interface{}{
"username": "hashicorp",
"dn": "uid=hashicorp,ou=users,dc=hashicorp,dc=com",
"rotation_period": "600s",
},
})
if err != nil {
t.Fatal(err)
}
walIDs := requireWALs(t, storage, 1)

// Now when we trigger a manual rotate, it should use the WAL's new password
// which will tell us that the in-memory structure still kept track of the
// WAL in addition to it still being in storage.
wal, err := b.findStaticWAL(ctx, storage, walIDs[0])
if err != nil {
t.Fatal(err)
}
_, err = b.HandleRequest(context.Background(), &logical.Request{
Operation: logical.UpdateOperation,
Path: "rotate-role/hashicorp",
Storage: storage,
})
if err != nil {
t.Fatal(err)
}
role, err := b.staticRole(ctx, storage, "hashicorp")
if err != nil {
t.Fatal(err)
}
if role.StaticAccount.Password != wal.NewPassword {
t.Fatal()
}
requireWALs(t, storage, 0)
}

func TestWALsDeletedOnRoleCreationFailed(t *testing.T) {
ctx := context.Background()
b, storage := getBackend(true)
defer b.Cleanup(ctx)
configureOpenLDAPMount(t, b, storage)

for i := 0; i < 3; i++ {
resp, err := b.HandleRequest(ctx, &logical.Request{
Operation: logical.CreateOperation,
Path: staticRolePath + "hashicorp",
Storage: storage,
Data: map[string]interface{}{
"username": "hashicorp",
"dn": "uid=hashicorp,ou=users,dc=hashicorp,dc=com",
"rotation_period": "5s",
},
})
if err == nil {
t.Fatal("expected error from OpenLDAP")
}
if !strings.Contains(err.Error(), "forced error") {
t.Fatal("expected forced error message", resp, err)
}
}

requireWALs(t, storage, 0)
}

func TestWALsDeletedOnRoleDeletion(t *testing.T) {
ctx := context.Background()
b, storage := getBackend(false)
defer b.Cleanup(ctx)
configureOpenLDAPMount(t, b, storage)

// Create the roles
roleNames := []string{"hashicorp", "2"}
for _, roleName := range roleNames {
createRole(t, b, storage, roleName)
}

// Fail to rotate the roles
for _, roleName := range roleNames {
generateWALFromFailedRotation(t, b, storage, roleName)
}

// Should have 2 WALs hanging around
requireWALs(t, storage, 2)

// Delete one of the static roles
_, err := b.HandleRequest(ctx, &logical.Request{
Operation: logical.DeleteOperation,
Path: "static-role/hashicorp",
Storage: storage,
})
if err != nil {
t.Fatal(err)
}

// 1 WAL should be cleared by the delete
requireWALs(t, storage, 1)
}

func configureOpenLDAPMount(t *testing.T, b *backend, storage logical.Storage) {
t.Helper()
resp, err := b.HandleRequest(context.Background(), &logical.Request{
Operation: logical.CreateOperation,
Path: configPath,
Storage: storage,
Data: map[string]interface{}{
"binddn": "tester",
"bindpass": "pa$$w0rd",
"url": "ldap://138.91.247.105",
"certificate": validCertificate,
},
})
if err != nil || (resp != nil && resp.IsError()) {
t.Fatalf("err:%s resp:%#v\n", err, resp)
}
}

func createRole(t *testing.T, b *backend, storage logical.Storage, roleName string) {
_, err := b.HandleRequest(context.Background(), &logical.Request{
Operation: logical.CreateOperation,
Path: "static-role/" + roleName,
Storage: storage,
Data: map[string]interface{}{
"username": roleName,
"dn": "uid=hashicorp,ou=users,dc=hashicorp,dc=com",
"rotation_period": "86400s",
},
})
if err != nil {
t.Fatal(err)
}
}