Skip to content

Commit

Permalink
osd: re-open encrypted disk during osd-prepare-job if closed
Browse files Browse the repository at this point in the history
This commit implements this corner case during osd-prepare job.
```
The encrypted block is not opened, this is an extreme corner case
The OSD deployment has been removed manually AND the node rebooted
So we need to re-open the block to re-hydrate the OSDInfo.

Handling this case would mean, writing the encryption key on a
temporary file, then call luksOpen to open the encrypted block and
then call ceph-volume to list against the opened encrypted block.
We don't implement this, yet and return an error.
```
When underlying PVC for osd are CSI provisioned, the encrypted device
is closed when PVC is unmounted due to osd pod being deleted.
Therefore, this may occur more frequently and needs to be handled.
This commit implements the fix for the same.

Signed-off-by: Rakshith R <rar@redhat.com>
  • Loading branch information
Rakshith-R committed Nov 29, 2022
1 parent fdc2ab5 commit 09af2ac
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 21 deletions.
16 changes: 14 additions & 2 deletions pkg/daemon/ceph/osd/encryption.go
Expand Up @@ -22,6 +22,7 @@ import (
"path"
"regexp"
"strings"
"time"

"github.com/pkg/errors"
v1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
Expand All @@ -32,8 +33,9 @@ import (
)

const (
cryptsetupBinary = "cryptsetup"
dmsetupBinary = "dmsetup"
cryptsetupBinary = "cryptsetup"
dmsetupBinary = "dmsetup"
luksOpenCmdTimeOut = 10 * time.Second
)

var (
Expand Down Expand Up @@ -160,6 +162,16 @@ func dumpLUKS(context *clusterd.Context, disk string) (string, error) {
return cryptsetupOut, nil
}

func openEncryptedDevice(context *clusterd.Context, disk, target, passphrase string) error {
args := []string{"luksOpen", "--verbose", "--allow-discards", disk, target}
err := context.Executor.ExecuteCommandWithStdin(luksOpenCmdTimeOut, cryptsetupBinary, &passphrase, args...)
if err != nil {
return errors.Wrapf(err, "failed to open encrypted device %q", disk)
}

return nil
}

func isCephEncryptedBlock(context *clusterd.Context, currentClusterFSID string, disk string) bool {
metadata, err := dumpLUKS(context, disk)
if err != nil {
Expand Down
31 changes: 21 additions & 10 deletions pkg/daemon/ceph/osd/volume.go
Expand Up @@ -956,17 +956,28 @@ func GetCephVolumeRawOSDs(context *clusterd.Context, clusterInfo *client.Cluster
}
}
if encryptedBlock == "" {
// The encrypted block is not opened, this is an extreme corner case
// The OSD deployment has been removed manually AND the node rebooted
// So we need to re-open the block to re-hydrate the OSDInfo.
//
// Handling this case would mean, writing the encryption key on a temporary file, then call
// luksOpen to open the encrypted block and then call ceph-volume to list against the opened
// encrypted block.
// We don't implement this, yet and return an error.
return nil, errors.Errorf("failed to find the encrypted block device for %q, not opened?", block)
}
// The encrypted block is not opened.
// The OSD deployment has been removed manually.
// Let's re-open the block to re-hydrate the OSDInfo.
logger.Debugf("encrypted block device %q is not open, opening it now", block)
passphrase := os.Getenv(oposd.CephVolumeEncryptedKeyEnvVarName)
if passphrase == "" {
return nil, errors.Errorf("encryption passphrase is empty in env var %q", oposd.CephVolumeEncryptedKeyEnvVarName)
}
pvcName := os.Getenv(oposd.PVCNameEnvVarName)
if pvcName == "" {
return nil, errors.Errorf("pvc name is empty in env var %q", oposd.PVCNameEnvVarName)
}

target := oposd.EncryptionDMName(pvcName, oposd.DmcryptBlockType)
err = openEncryptedDevice(context, block, target, passphrase)
if err != nil {
return nil, errors.Wrapf(err, "failed to open encrypted block device %q on %q", block, target)
}

// ceph-volume prefers to use /dev/mapper/<name>
encryptedBlock = oposd.EncryptionDMPath(pvcName, oposd.DmcryptBlockType)
}
// If we have one child device, it should be the encrypted block but still verifying it
isDeviceEncrypted, err := sys.IsDeviceEncrypted(context.Executor, encryptedBlock)
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions pkg/operator/ceph/cluster/osd/config.go
Expand Up @@ -46,12 +46,12 @@ func encryptionKeyPath() string {
return path.Join(opconfig.EtcCephDir, encryptionKeyFileName)
}

func encryptionDMName(pvcName, blockType string) string {
func EncryptionDMName(pvcName, blockType string) string {
return fmt.Sprintf("%s-%s", pvcName, blockType)
}

func encryptionDMPath(pvcName, blockType string) string {
return path.Join("/dev/mapper", encryptionDMName(pvcName, blockType))
func EncryptionDMPath(pvcName, blockType string) string {
return path.Join("/dev/mapper", EncryptionDMName(pvcName, blockType))
}

func encryptionBlockDestinationCopy(mountPath, blockType string) string {
Expand Down
4 changes: 2 additions & 2 deletions pkg/operator/ceph/cluster/osd/config_test.go
Expand Up @@ -46,9 +46,9 @@ func TestEncryptionBlockDestinationCopy(t *testing.T) {
}

func TestEncryptionDMPath(t *testing.T) {
assert.Equal(t, "/dev/mapper/set1-data-0-6rqdn-block-dmcrypt", encryptionDMPath("set1-data-0-6rqdn", DmcryptBlockType))
assert.Equal(t, "/dev/mapper/set1-data-0-6rqdn-block-dmcrypt", EncryptionDMPath("set1-data-0-6rqdn", DmcryptBlockType))
}

func TestEncryptionDMName(t *testing.T) {
assert.Equal(t, "set1-data-0-6rqdn-block-dmcrypt", encryptionDMName("set1-data-0-6rqdn", DmcryptBlockType))
assert.Equal(t, "set1-data-0-6rqdn-block-dmcrypt", EncryptionDMName("set1-data-0-6rqdn", DmcryptBlockType))
}
8 changes: 4 additions & 4 deletions pkg/operator/ceph/cluster/osd/spec.go
Expand Up @@ -871,7 +871,7 @@ func (c *Cluster) generateEncryptionOpenBlockContainer(resources v1.ResourceRequ
Command: []string{
"/bin/bash",
"-c",
fmt.Sprintf(openEncryptedBlock, c.clusterInfo.FSID, pvcName, encryptionKeyPath(), encryptionBlockDestinationCopy(mountPath, blockType), encryptionDMName(pvcName, cryptBlockType), encryptionDMPath(pvcName, cryptBlockType)),
fmt.Sprintf(openEncryptedBlock, c.clusterInfo.FSID, pvcName, encryptionKeyPath(), encryptionBlockDestinationCopy(mountPath, blockType), EncryptionDMName(pvcName, cryptBlockType), EncryptionDMPath(pvcName, cryptBlockType)),
},
VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, volumeMountPVCName), getDeviceMapperMount()},
SecurityContext: controller.PrivilegedContext(true),
Expand Down Expand Up @@ -958,7 +958,7 @@ func (c *Cluster) generateEncryptionCopyBlockContainer(resources v1.ResourceRequ
Command: []string{
"/bin/bash",
"-c",
fmt.Sprintf(blockDevMapper, encryptionDMPath(pvcName, blockType), path.Join(mountPath, blockName)),
fmt.Sprintf(blockDevMapper, EncryptionDMPath(pvcName, blockType), path.Join(mountPath, blockName)),
},
// volumeMountPVCName is crucial, especially when the block we copy is the metadata block
// its value must be the name of the block PV so that all init containers use the same bridge (the emptyDir shared by all the init containers)
Expand Down Expand Up @@ -1187,7 +1187,7 @@ func (c *Cluster) getExpandEncryptedPVCInitContainer(mountPath string, osdProps
Command: []string{
"cryptsetup",
},
Args: []string{"--verbose", "resize", encryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)},
Args: []string{"--verbose", "resize", EncryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)},
VolumeMounts: volMount,
SecurityContext: controller.PrivilegedContext(true),
Resources: osdProps.resources,
Expand Down Expand Up @@ -1218,7 +1218,7 @@ func (c *Cluster) getEncryptedStatusPVCInitContainer(mountPath string, osdProps
Command: []string{
"cryptsetup",
},
Args: []string{"--verbose", "status", encryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)},
Args: []string{"--verbose", "status", EncryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)},
VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, osdProps.pvc.ClaimName)},
SecurityContext: controller.PrivilegedContext(true),
Resources: osdProps.resources,
Expand Down
18 changes: 18 additions & 0 deletions pkg/util/exec/exec.go
Expand Up @@ -48,6 +48,7 @@ type Executor interface {
ExecuteCommandWithOutput(command string, arg ...string) (string, error)
ExecuteCommandWithCombinedOutput(command string, arg ...string) (string, error)
ExecuteCommandWithTimeout(timeout time.Duration, command string, arg ...string) (string, error)
ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error
}

// CommandExecutor is the type of the Executor
Expand All @@ -58,6 +59,14 @@ func (c *CommandExecutor) ExecuteCommand(command string, arg ...string) error {
return c.ExecuteCommandWithEnv([]string{}, command, arg...)
}

// ExecuteCommandWithStdin starts a process, provides stdin and wait for its completion with timeout.
func (c *CommandExecutor) ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error {
output, err := executeCommandWithTimeout(timeout, command, stdin, arg...)
logger.Infof("Command %q output: %q", command, output)

return err
}

// ExecuteCommandWithEnv starts a process with env variables and wait for its completion
func (*CommandExecutor) ExecuteCommandWithEnv(env []string, command string, arg ...string) error {
cmd, stdout, stderr, err := startCommand(env, command, arg...)
Expand All @@ -83,6 +92,11 @@ func IsTimeout(err error) bool {

// ExecuteCommandWithTimeout starts a process and wait for its completion with timeout.
func (*CommandExecutor) ExecuteCommandWithTimeout(timeout time.Duration, command string, arg ...string) (string, error) {
return executeCommandWithTimeout(timeout, command, nil, arg...)
}

// executeCommandWithTimeout starts a process, provides stdin and wait for its completion with timeout.
func executeCommandWithTimeout(timeout time.Duration, command string, stdin *string, arg ...string) (string, error) {
logCommand(command, arg...)
//nolint:gosec // Rook controls the input to the exec arguments
cmd := exec.Command(command, arg...)
Expand All @@ -91,6 +105,10 @@ func (*CommandExecutor) ExecuteCommandWithTimeout(timeout time.Duration, command
cmd.Stdout = &b
cmd.Stderr = &b

if stdin != nil {
cmd.Stdin = strings.NewReader(*stdin)
}

if err := cmd.Start(); err != nil {
return "", err
}
Expand Down
63 changes: 63 additions & 0 deletions pkg/util/exec/exec_test.go
Expand Up @@ -19,6 +19,7 @@ package exec
import (
"os/exec"
"testing"
"time"

"github.com/pkg/errors"
exectest "github.com/rook/rook/pkg/util/exec/test"
Expand Down Expand Up @@ -116,3 +117,65 @@ func TestFakeTimeoutError(t *testing.T) {
assert.True(t, IsTimeout(exectest.FakeTimeoutError("blah")))
assert.True(t, IsTimeout(exectest.FakeTimeoutError("")))
}

func TestExecuteCommandWithTimeout(t *testing.T) {
type args struct {
timeout time.Duration
command string
stdin *string
arg []string
}
testString := "hello"
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "test stdin",
args: args{
timeout: 30 * time.Second,
command: "cat",
stdin: &testString,
arg: []string{},
},
want: testString,
wantErr: false,
},
{
name: "test nil stdin",
args: args{
timeout: 30 * time.Second,
command: "echo",
stdin: nil,
arg: []string{testString},
},
want: testString,
wantErr: false,
},
{
name: "test timeout",
args: args{
timeout: 0 * time.Second,
command: "cat",
stdin: &testString,
arg: []string{},
},
want: "",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := executeCommandWithTimeout(tt.args.timeout, tt.args.command, tt.args.stdin, tt.args.arg...)
if (err != nil) != tt.wantErr {
t.Errorf("executeCommandWithTimeout() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("executeCommandWithTimeout() = %v, want %v", got, tt.want)
}
})
}
}
10 changes: 10 additions & 0 deletions pkg/util/exec/test/mockexec.go
Expand Up @@ -34,6 +34,7 @@ type MockExecutor struct {
MockExecuteCommandWithOutput func(command string, arg ...string) (string, error)
MockExecuteCommandWithCombinedOutput func(command string, arg ...string) (string, error)
MockExecuteCommandWithTimeout func(timeout time.Duration, command string, arg ...string) (string, error)
MockExecuteCommandWithStdin func(timeout time.Duration, command string, stdin *string, arg ...string) error
}

// ExecuteCommand mocks ExecuteCommand
Expand All @@ -45,6 +46,15 @@ func (e *MockExecutor) ExecuteCommand(command string, arg ...string) error {
return nil
}

// ExecuteCommandWithStdin starts a process, provides stdin and wait for its completion with timeout.
func (e *MockExecutor) ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error {
if e.MockExecuteCommand != nil {
return e.MockExecuteCommandWithStdin(timeout, command, stdin, arg...)
}

return nil
}

// ExecuteCommandWithEnv mocks ExecuteCommandWithEnv
func (e *MockExecutor) ExecuteCommandWithEnv(env []string, command string, arg ...string) error {
if e.MockExecuteCommandWithEnv != nil {
Expand Down
6 changes: 6 additions & 0 deletions pkg/util/exec/translate_exec.go
Expand Up @@ -38,6 +38,12 @@ func (e *TranslateCommandExecutor) ExecuteCommand(command string, arg ...string)
return e.Executor.ExecuteCommand(transCommand, transArgs...)
}

// ExecuteCommandWithStdin starts a process, provides stdin and wait for its completion with timeout.
func (e *TranslateCommandExecutor) ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error {
transCommand, transArgs := e.Translator(command, arg...)
return e.Executor.ExecuteCommandWithStdin(timeout, transCommand, stdin, transArgs...)
}

// ExecuteCommandWithEnv starts a process with an env variable and wait for its completion
func (e *TranslateCommandExecutor) ExecuteCommandWithEnv(env []string, command string, arg ...string) error {
transCommand, transArgs := e.Translator(command, arg...)
Expand Down

0 comments on commit 09af2ac

Please sign in to comment.