From 09af2ac68ed919ec236311730ba37c7e191f14cf Mon Sep 17 00:00:00 2001 From: Rakshith R Date: Mon, 21 Nov 2022 16:29:58 +0530 Subject: [PATCH] osd: re-open encrypted disk during osd-prepare-job if closed This commit implements this corner case during osd-prepare job. ``` The encrypted block is not opened, this is an extreme corner case The OSD deployment has been removed manually AND the node rebooted So we need to re-open the block to re-hydrate the OSDInfo. Handling this case would mean, writing the encryption key on a temporary file, then call luksOpen to open the encrypted block and then call ceph-volume to list against the opened encrypted block. We don't implement this, yet and return an error. ``` When underlying PVC for osd are CSI provisioned, the encrypted device is closed when PVC is unmounted due to osd pod being deleted. Therefore, this may occur more frequently and needs to be handled. This commit implements the fix for the same. Signed-off-by: Rakshith R --- pkg/daemon/ceph/osd/encryption.go | 16 ++++- pkg/daemon/ceph/osd/volume.go | 31 ++++++---- pkg/operator/ceph/cluster/osd/config.go | 6 +- pkg/operator/ceph/cluster/osd/config_test.go | 4 +- pkg/operator/ceph/cluster/osd/spec.go | 8 +-- pkg/util/exec/exec.go | 18 ++++++ pkg/util/exec/exec_test.go | 63 ++++++++++++++++++++ pkg/util/exec/test/mockexec.go | 10 ++++ pkg/util/exec/translate_exec.go | 6 ++ 9 files changed, 141 insertions(+), 21 deletions(-) diff --git a/pkg/daemon/ceph/osd/encryption.go b/pkg/daemon/ceph/osd/encryption.go index 50a9e57f822e1..40fb84bc0e47a 100644 --- a/pkg/daemon/ceph/osd/encryption.go +++ b/pkg/daemon/ceph/osd/encryption.go @@ -22,6 +22,7 @@ import ( "path" "regexp" "strings" + "time" "github.com/pkg/errors" v1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" @@ -32,8 +33,9 @@ import ( ) const ( - cryptsetupBinary = "cryptsetup" - dmsetupBinary = "dmsetup" + cryptsetupBinary = "cryptsetup" + dmsetupBinary = "dmsetup" + luksOpenCmdTimeOut = 10 * time.Second ) var ( @@ -160,6 +162,16 @@ func dumpLUKS(context *clusterd.Context, disk string) (string, error) { return cryptsetupOut, nil } +func openEncryptedDevice(context *clusterd.Context, disk, target, passphrase string) error { + args := []string{"luksOpen", "--verbose", "--allow-discards", disk, target} + err := context.Executor.ExecuteCommandWithStdin(luksOpenCmdTimeOut, cryptsetupBinary, &passphrase, args...) + if err != nil { + return errors.Wrapf(err, "failed to open encrypted device %q", disk) + } + + return nil +} + func isCephEncryptedBlock(context *clusterd.Context, currentClusterFSID string, disk string) bool { metadata, err := dumpLUKS(context, disk) if err != nil { diff --git a/pkg/daemon/ceph/osd/volume.go b/pkg/daemon/ceph/osd/volume.go index fcd616ddb8519..3f0eef5fe888c 100644 --- a/pkg/daemon/ceph/osd/volume.go +++ b/pkg/daemon/ceph/osd/volume.go @@ -956,17 +956,28 @@ func GetCephVolumeRawOSDs(context *clusterd.Context, clusterInfo *client.Cluster } } if encryptedBlock == "" { - // The encrypted block is not opened, this is an extreme corner case - // The OSD deployment has been removed manually AND the node rebooted - // So we need to re-open the block to re-hydrate the OSDInfo. - // - // Handling this case would mean, writing the encryption key on a temporary file, then call - // luksOpen to open the encrypted block and then call ceph-volume to list against the opened - // encrypted block. - // We don't implement this, yet and return an error. - return nil, errors.Errorf("failed to find the encrypted block device for %q, not opened?", block) - } + // The encrypted block is not opened. + // The OSD deployment has been removed manually. + // Let's re-open the block to re-hydrate the OSDInfo. + logger.Debugf("encrypted block device %q is not open, opening it now", block) + passphrase := os.Getenv(oposd.CephVolumeEncryptedKeyEnvVarName) + if passphrase == "" { + return nil, errors.Errorf("encryption passphrase is empty in env var %q", oposd.CephVolumeEncryptedKeyEnvVarName) + } + pvcName := os.Getenv(oposd.PVCNameEnvVarName) + if pvcName == "" { + return nil, errors.Errorf("pvc name is empty in env var %q", oposd.PVCNameEnvVarName) + } + target := oposd.EncryptionDMName(pvcName, oposd.DmcryptBlockType) + err = openEncryptedDevice(context, block, target, passphrase) + if err != nil { + return nil, errors.Wrapf(err, "failed to open encrypted block device %q on %q", block, target) + } + + // ceph-volume prefers to use /dev/mapper/ + encryptedBlock = oposd.EncryptionDMPath(pvcName, oposd.DmcryptBlockType) + } // If we have one child device, it should be the encrypted block but still verifying it isDeviceEncrypted, err := sys.IsDeviceEncrypted(context.Executor, encryptedBlock) if err != nil { diff --git a/pkg/operator/ceph/cluster/osd/config.go b/pkg/operator/ceph/cluster/osd/config.go index 55ac64b9c0822..ef57ee2799dc1 100644 --- a/pkg/operator/ceph/cluster/osd/config.go +++ b/pkg/operator/ceph/cluster/osd/config.go @@ -46,12 +46,12 @@ func encryptionKeyPath() string { return path.Join(opconfig.EtcCephDir, encryptionKeyFileName) } -func encryptionDMName(pvcName, blockType string) string { +func EncryptionDMName(pvcName, blockType string) string { return fmt.Sprintf("%s-%s", pvcName, blockType) } -func encryptionDMPath(pvcName, blockType string) string { - return path.Join("/dev/mapper", encryptionDMName(pvcName, blockType)) +func EncryptionDMPath(pvcName, blockType string) string { + return path.Join("/dev/mapper", EncryptionDMName(pvcName, blockType)) } func encryptionBlockDestinationCopy(mountPath, blockType string) string { diff --git a/pkg/operator/ceph/cluster/osd/config_test.go b/pkg/operator/ceph/cluster/osd/config_test.go index 4a8cd9e1c12dc..333aef3eedb97 100644 --- a/pkg/operator/ceph/cluster/osd/config_test.go +++ b/pkg/operator/ceph/cluster/osd/config_test.go @@ -46,9 +46,9 @@ func TestEncryptionBlockDestinationCopy(t *testing.T) { } func TestEncryptionDMPath(t *testing.T) { - assert.Equal(t, "/dev/mapper/set1-data-0-6rqdn-block-dmcrypt", encryptionDMPath("set1-data-0-6rqdn", DmcryptBlockType)) + assert.Equal(t, "/dev/mapper/set1-data-0-6rqdn-block-dmcrypt", EncryptionDMPath("set1-data-0-6rqdn", DmcryptBlockType)) } func TestEncryptionDMName(t *testing.T) { - assert.Equal(t, "set1-data-0-6rqdn-block-dmcrypt", encryptionDMName("set1-data-0-6rqdn", DmcryptBlockType)) + assert.Equal(t, "set1-data-0-6rqdn-block-dmcrypt", EncryptionDMName("set1-data-0-6rqdn", DmcryptBlockType)) } diff --git a/pkg/operator/ceph/cluster/osd/spec.go b/pkg/operator/ceph/cluster/osd/spec.go index 9185d3557b5ef..033f2f1f160d4 100644 --- a/pkg/operator/ceph/cluster/osd/spec.go +++ b/pkg/operator/ceph/cluster/osd/spec.go @@ -871,7 +871,7 @@ func (c *Cluster) generateEncryptionOpenBlockContainer(resources v1.ResourceRequ Command: []string{ "/bin/bash", "-c", - fmt.Sprintf(openEncryptedBlock, c.clusterInfo.FSID, pvcName, encryptionKeyPath(), encryptionBlockDestinationCopy(mountPath, blockType), encryptionDMName(pvcName, cryptBlockType), encryptionDMPath(pvcName, cryptBlockType)), + fmt.Sprintf(openEncryptedBlock, c.clusterInfo.FSID, pvcName, encryptionKeyPath(), encryptionBlockDestinationCopy(mountPath, blockType), EncryptionDMName(pvcName, cryptBlockType), EncryptionDMPath(pvcName, cryptBlockType)), }, VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, volumeMountPVCName), getDeviceMapperMount()}, SecurityContext: controller.PrivilegedContext(true), @@ -958,7 +958,7 @@ func (c *Cluster) generateEncryptionCopyBlockContainer(resources v1.ResourceRequ Command: []string{ "/bin/bash", "-c", - fmt.Sprintf(blockDevMapper, encryptionDMPath(pvcName, blockType), path.Join(mountPath, blockName)), + fmt.Sprintf(blockDevMapper, EncryptionDMPath(pvcName, blockType), path.Join(mountPath, blockName)), }, // volumeMountPVCName is crucial, especially when the block we copy is the metadata block // its value must be the name of the block PV so that all init containers use the same bridge (the emptyDir shared by all the init containers) @@ -1187,7 +1187,7 @@ func (c *Cluster) getExpandEncryptedPVCInitContainer(mountPath string, osdProps Command: []string{ "cryptsetup", }, - Args: []string{"--verbose", "resize", encryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)}, + Args: []string{"--verbose", "resize", EncryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)}, VolumeMounts: volMount, SecurityContext: controller.PrivilegedContext(true), Resources: osdProps.resources, @@ -1218,7 +1218,7 @@ func (c *Cluster) getEncryptedStatusPVCInitContainer(mountPath string, osdProps Command: []string{ "cryptsetup", }, - Args: []string{"--verbose", "status", encryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)}, + Args: []string{"--verbose", "status", EncryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)}, VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, osdProps.pvc.ClaimName)}, SecurityContext: controller.PrivilegedContext(true), Resources: osdProps.resources, diff --git a/pkg/util/exec/exec.go b/pkg/util/exec/exec.go index ad28e1f489150..54f40cbcbdcac 100644 --- a/pkg/util/exec/exec.go +++ b/pkg/util/exec/exec.go @@ -48,6 +48,7 @@ type Executor interface { ExecuteCommandWithOutput(command string, arg ...string) (string, error) ExecuteCommandWithCombinedOutput(command string, arg ...string) (string, error) ExecuteCommandWithTimeout(timeout time.Duration, command string, arg ...string) (string, error) + ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error } // CommandExecutor is the type of the Executor @@ -58,6 +59,14 @@ func (c *CommandExecutor) ExecuteCommand(command string, arg ...string) error { return c.ExecuteCommandWithEnv([]string{}, command, arg...) } +// ExecuteCommandWithStdin starts a process, provides stdin and wait for its completion with timeout. +func (c *CommandExecutor) ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error { + output, err := executeCommandWithTimeout(timeout, command, stdin, arg...) + logger.Infof("Command %q output: %q", command, output) + + return err +} + // ExecuteCommandWithEnv starts a process with env variables and wait for its completion func (*CommandExecutor) ExecuteCommandWithEnv(env []string, command string, arg ...string) error { cmd, stdout, stderr, err := startCommand(env, command, arg...) @@ -83,6 +92,11 @@ func IsTimeout(err error) bool { // ExecuteCommandWithTimeout starts a process and wait for its completion with timeout. func (*CommandExecutor) ExecuteCommandWithTimeout(timeout time.Duration, command string, arg ...string) (string, error) { + return executeCommandWithTimeout(timeout, command, nil, arg...) +} + +// executeCommandWithTimeout starts a process, provides stdin and wait for its completion with timeout. +func executeCommandWithTimeout(timeout time.Duration, command string, stdin *string, arg ...string) (string, error) { logCommand(command, arg...) //nolint:gosec // Rook controls the input to the exec arguments cmd := exec.Command(command, arg...) @@ -91,6 +105,10 @@ func (*CommandExecutor) ExecuteCommandWithTimeout(timeout time.Duration, command cmd.Stdout = &b cmd.Stderr = &b + if stdin != nil { + cmd.Stdin = strings.NewReader(*stdin) + } + if err := cmd.Start(); err != nil { return "", err } diff --git a/pkg/util/exec/exec_test.go b/pkg/util/exec/exec_test.go index 1940e67c30926..3774373c6427c 100644 --- a/pkg/util/exec/exec_test.go +++ b/pkg/util/exec/exec_test.go @@ -19,6 +19,7 @@ package exec import ( "os/exec" "testing" + "time" "github.com/pkg/errors" exectest "github.com/rook/rook/pkg/util/exec/test" @@ -116,3 +117,65 @@ func TestFakeTimeoutError(t *testing.T) { assert.True(t, IsTimeout(exectest.FakeTimeoutError("blah"))) assert.True(t, IsTimeout(exectest.FakeTimeoutError(""))) } + +func TestExecuteCommandWithTimeout(t *testing.T) { + type args struct { + timeout time.Duration + command string + stdin *string + arg []string + } + testString := "hello" + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "test stdin", + args: args{ + timeout: 30 * time.Second, + command: "cat", + stdin: &testString, + arg: []string{}, + }, + want: testString, + wantErr: false, + }, + { + name: "test nil stdin", + args: args{ + timeout: 30 * time.Second, + command: "echo", + stdin: nil, + arg: []string{testString}, + }, + want: testString, + wantErr: false, + }, + { + name: "test timeout", + args: args{ + timeout: 0 * time.Second, + command: "cat", + stdin: &testString, + arg: []string{}, + }, + want: "", + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := executeCommandWithTimeout(tt.args.timeout, tt.args.command, tt.args.stdin, tt.args.arg...) + if (err != nil) != tt.wantErr { + t.Errorf("executeCommandWithTimeout() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("executeCommandWithTimeout() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/util/exec/test/mockexec.go b/pkg/util/exec/test/mockexec.go index 77ab95c41a11d..e81912bc3eafc 100644 --- a/pkg/util/exec/test/mockexec.go +++ b/pkg/util/exec/test/mockexec.go @@ -34,6 +34,7 @@ type MockExecutor struct { MockExecuteCommandWithOutput func(command string, arg ...string) (string, error) MockExecuteCommandWithCombinedOutput func(command string, arg ...string) (string, error) MockExecuteCommandWithTimeout func(timeout time.Duration, command string, arg ...string) (string, error) + MockExecuteCommandWithStdin func(timeout time.Duration, command string, stdin *string, arg ...string) error } // ExecuteCommand mocks ExecuteCommand @@ -45,6 +46,15 @@ func (e *MockExecutor) ExecuteCommand(command string, arg ...string) error { return nil } +// ExecuteCommandWithStdin starts a process, provides stdin and wait for its completion with timeout. +func (e *MockExecutor) ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error { + if e.MockExecuteCommand != nil { + return e.MockExecuteCommandWithStdin(timeout, command, stdin, arg...) + } + + return nil +} + // ExecuteCommandWithEnv mocks ExecuteCommandWithEnv func (e *MockExecutor) ExecuteCommandWithEnv(env []string, command string, arg ...string) error { if e.MockExecuteCommandWithEnv != nil { diff --git a/pkg/util/exec/translate_exec.go b/pkg/util/exec/translate_exec.go index ec0b750f475ee..0419f23dc8a4d 100644 --- a/pkg/util/exec/translate_exec.go +++ b/pkg/util/exec/translate_exec.go @@ -38,6 +38,12 @@ func (e *TranslateCommandExecutor) ExecuteCommand(command string, arg ...string) return e.Executor.ExecuteCommand(transCommand, transArgs...) } +// ExecuteCommandWithStdin starts a process, provides stdin and wait for its completion with timeout. +func (e *TranslateCommandExecutor) ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error { + transCommand, transArgs := e.Translator(command, arg...) + return e.Executor.ExecuteCommandWithStdin(timeout, transCommand, stdin, transArgs...) +} + // ExecuteCommandWithEnv starts a process with an env variable and wait for its completion func (e *TranslateCommandExecutor) ExecuteCommandWithEnv(env []string, command string, arg ...string) error { transCommand, transArgs := e.Translator(command, arg...)