Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

osd: re-open encrypted disk during osd-prepare-job if closed #11338

Merged
merged 1 commit into from Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 14 additions & 2 deletions pkg/daemon/ceph/osd/encryption.go
Expand Up @@ -22,6 +22,7 @@ import (
"path"
"regexp"
"strings"
"time"

"github.com/pkg/errors"
v1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
Expand All @@ -32,8 +33,9 @@ import (
)

const (
cryptsetupBinary = "cryptsetup"
dmsetupBinary = "dmsetup"
cryptsetupBinary = "cryptsetup"
dmsetupBinary = "dmsetup"
luksOpenCmdTimeOut = 90 * time.Second
)

var (
Expand Down Expand Up @@ -160,6 +162,16 @@ func dumpLUKS(context *clusterd.Context, disk string) (string, error) {
return cryptsetupOut, nil
}

func openEncryptedDevice(context *clusterd.Context, disk, target, passphrase string) error {
args := []string{"luksOpen", "--verbose", "--allow-discards", disk, target}
err := context.Executor.ExecuteCommandWithStdin(luksOpenCmdTimeOut, cryptsetupBinary, &passphrase, args...)
leseb marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return errors.Wrapf(err, "failed to open encrypted device %q", disk)
}

return nil
}

func isCephEncryptedBlock(context *clusterd.Context, currentClusterFSID string, disk string) bool {
metadata, err := dumpLUKS(context, disk)
if err != nil {
Expand Down
35 changes: 25 additions & 10 deletions pkg/daemon/ceph/osd/volume.go
Expand Up @@ -956,17 +956,32 @@ func GetCephVolumeRawOSDs(context *clusterd.Context, clusterInfo *client.Cluster
}
}
if encryptedBlock == "" {
// The encrypted block is not opened, this is an extreme corner case
// The OSD deployment has been removed manually AND the node rebooted
// So we need to re-open the block to re-hydrate the OSDInfo.
//
// Handling this case would mean, writing the encryption key on a temporary file, then call
// luksOpen to open the encrypted block and then call ceph-volume to list against the opened
// encrypted block.
// We don't implement this, yet and return an error.
return nil, errors.Errorf("failed to find the encrypted block device for %q, not opened?", block)
}
// The encrypted block is not opened.
// The encrypted device is closed in some cases when
// the OSD deployment has been removed manually accompanied
// by any of following cases:
// - node reboot
// - csi managed PVC being unmounted etc
// Let's re-open the block to re-hydrate the OSDInfo.
logger.Debugf("encrypted block device %q is not open, opening it now", block)
passphrase := os.Getenv(oposd.CephVolumeEncryptedKeyEnvVarName)
leseb marked this conversation as resolved.
Show resolved Hide resolved
if passphrase == "" {
return nil, errors.Errorf("encryption passphrase is empty in env var %q", oposd.CephVolumeEncryptedKeyEnvVarName)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To consider in a future PR, we should write the secret to a file instead of using an env var, as mentioned in #10994.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should do that in this PR so that we don't risk forgetting to fix it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That requires changes in quite a lot of places actually.
It is better to do it in a separate future pr.

}
pvcName := os.Getenv(oposd.PVCNameEnvVarName)
if pvcName == "" {
return nil, errors.Errorf("pvc name is empty in env var %q", oposd.PVCNameEnvVarName)
}

target := oposd.EncryptionDMName(pvcName, oposd.DmcryptBlockType)
err = openEncryptedDevice(context, block, target, passphrase)
if err != nil {
return nil, errors.Wrapf(err, "failed to open encrypted block device %q on %q", block, target)
}

// ceph-volume prefers to use /dev/mapper/<name>
encryptedBlock = oposd.EncryptionDMPath(pvcName, oposd.DmcryptBlockType)
}
// If we have one child device, it should be the encrypted block but still verifying it
isDeviceEncrypted, err := sys.IsDeviceEncrypted(context.Executor, encryptedBlock)
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions pkg/operator/ceph/cluster/osd/config.go
Expand Up @@ -46,12 +46,12 @@ func encryptionKeyPath() string {
return path.Join(opconfig.EtcCephDir, encryptionKeyFileName)
}

func encryptionDMName(pvcName, blockType string) string {
func EncryptionDMName(pvcName, blockType string) string {
return fmt.Sprintf("%s-%s", pvcName, blockType)
}

func encryptionDMPath(pvcName, blockType string) string {
return path.Join("/dev/mapper", encryptionDMName(pvcName, blockType))
func EncryptionDMPath(pvcName, blockType string) string {
return path.Join("/dev/mapper", EncryptionDMName(pvcName, blockType))
}

func encryptionBlockDestinationCopy(mountPath, blockType string) string {
Expand Down
4 changes: 2 additions & 2 deletions pkg/operator/ceph/cluster/osd/config_test.go
Expand Up @@ -46,9 +46,9 @@ func TestEncryptionBlockDestinationCopy(t *testing.T) {
}

func TestEncryptionDMPath(t *testing.T) {
assert.Equal(t, "/dev/mapper/set1-data-0-6rqdn-block-dmcrypt", encryptionDMPath("set1-data-0-6rqdn", DmcryptBlockType))
assert.Equal(t, "/dev/mapper/set1-data-0-6rqdn-block-dmcrypt", EncryptionDMPath("set1-data-0-6rqdn", DmcryptBlockType))
}

func TestEncryptionDMName(t *testing.T) {
assert.Equal(t, "set1-data-0-6rqdn-block-dmcrypt", encryptionDMName("set1-data-0-6rqdn", DmcryptBlockType))
assert.Equal(t, "set1-data-0-6rqdn-block-dmcrypt", EncryptionDMName("set1-data-0-6rqdn", DmcryptBlockType))
}
8 changes: 4 additions & 4 deletions pkg/operator/ceph/cluster/osd/spec.go
Expand Up @@ -871,7 +871,7 @@ func (c *Cluster) generateEncryptionOpenBlockContainer(resources v1.ResourceRequ
Command: []string{
"/bin/bash",
"-c",
fmt.Sprintf(openEncryptedBlock, c.clusterInfo.FSID, pvcName, encryptionKeyPath(), encryptionBlockDestinationCopy(mountPath, blockType), encryptionDMName(pvcName, cryptBlockType), encryptionDMPath(pvcName, cryptBlockType)),
fmt.Sprintf(openEncryptedBlock, c.clusterInfo.FSID, pvcName, encryptionKeyPath(), encryptionBlockDestinationCopy(mountPath, blockType), EncryptionDMName(pvcName, cryptBlockType), EncryptionDMPath(pvcName, cryptBlockType)),
},
VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, volumeMountPVCName), getDeviceMapperMount()},
SecurityContext: controller.PrivilegedContext(true),
Expand Down Expand Up @@ -958,7 +958,7 @@ func (c *Cluster) generateEncryptionCopyBlockContainer(resources v1.ResourceRequ
Command: []string{
"/bin/bash",
"-c",
fmt.Sprintf(blockDevMapper, encryptionDMPath(pvcName, blockType), path.Join(mountPath, blockName)),
fmt.Sprintf(blockDevMapper, EncryptionDMPath(pvcName, blockType), path.Join(mountPath, blockName)),
},
// volumeMountPVCName is crucial, especially when the block we copy is the metadata block
// its value must be the name of the block PV so that all init containers use the same bridge (the emptyDir shared by all the init containers)
Expand Down Expand Up @@ -1187,7 +1187,7 @@ func (c *Cluster) getExpandEncryptedPVCInitContainer(mountPath string, osdProps
Command: []string{
"cryptsetup",
},
Args: []string{"--verbose", "resize", encryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)},
Args: []string{"--verbose", "resize", EncryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)},
VolumeMounts: volMount,
SecurityContext: controller.PrivilegedContext(true),
Resources: osdProps.resources,
Expand Down Expand Up @@ -1218,7 +1218,7 @@ func (c *Cluster) getEncryptedStatusPVCInitContainer(mountPath string, osdProps
Command: []string{
"cryptsetup",
},
Args: []string{"--verbose", "status", encryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)},
Args: []string{"--verbose", "status", EncryptionDMName(osdProps.pvc.ClaimName, DmcryptBlockType)},
VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, osdProps.pvc.ClaimName)},
SecurityContext: controller.PrivilegedContext(true),
Resources: osdProps.resources,
Expand Down
18 changes: 18 additions & 0 deletions pkg/util/exec/exec.go
Expand Up @@ -48,6 +48,7 @@ type Executor interface {
ExecuteCommandWithOutput(command string, arg ...string) (string, error)
ExecuteCommandWithCombinedOutput(command string, arg ...string) (string, error)
ExecuteCommandWithTimeout(timeout time.Duration, command string, arg ...string) (string, error)
ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error
}

// CommandExecutor is the type of the Executor
Expand All @@ -58,6 +59,14 @@ func (c *CommandExecutor) ExecuteCommand(command string, arg ...string) error {
return c.ExecuteCommandWithEnv([]string{}, command, arg...)
}

// ExecuteCommandWithStdin starts a process, provides stdin and wait for its completion with timeout.
func (c *CommandExecutor) ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error {
output, err := executeCommandWithTimeout(timeout, command, stdin, arg...)
logger.Infof("Command %q output: %q", command, output)

return err
}

// ExecuteCommandWithEnv starts a process with env variables and wait for its completion
func (*CommandExecutor) ExecuteCommandWithEnv(env []string, command string, arg ...string) error {
cmd, stdout, stderr, err := startCommand(env, command, arg...)
Expand All @@ -83,6 +92,11 @@ func IsTimeout(err error) bool {

// ExecuteCommandWithTimeout starts a process and wait for its completion with timeout.
func (*CommandExecutor) ExecuteCommandWithTimeout(timeout time.Duration, command string, arg ...string) (string, error) {
return executeCommandWithTimeout(timeout, command, nil, arg...)
}

// executeCommandWithTimeout starts a process, provides stdin and wait for its completion with timeout.
func executeCommandWithTimeout(timeout time.Duration, command string, stdin *string, arg ...string) (string, error) {
logCommand(command, arg...)
//nolint:gosec // Rook controls the input to the exec arguments
cmd := exec.Command(command, arg...)
Expand All @@ -91,6 +105,10 @@ func (*CommandExecutor) ExecuteCommandWithTimeout(timeout time.Duration, command
cmd.Stdout = &b
cmd.Stderr = &b

if stdin != nil {
cmd.Stdin = strings.NewReader(*stdin)
}

if err := cmd.Start(); err != nil {
return "", err
}
Expand Down
63 changes: 63 additions & 0 deletions pkg/util/exec/exec_test.go
Expand Up @@ -19,6 +19,7 @@ package exec
import (
"os/exec"
"testing"
"time"

"github.com/pkg/errors"
exectest "github.com/rook/rook/pkg/util/exec/test"
Expand Down Expand Up @@ -116,3 +117,65 @@ func TestFakeTimeoutError(t *testing.T) {
assert.True(t, IsTimeout(exectest.FakeTimeoutError("blah")))
assert.True(t, IsTimeout(exectest.FakeTimeoutError("")))
}

func TestExecuteCommandWithTimeout(t *testing.T) {
type args struct {
timeout time.Duration
command string
stdin *string
arg []string
}
testString := "hello"
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "test stdin",
args: args{
timeout: 30 * time.Second,
command: "cat",
stdin: &testString,
arg: []string{},
},
want: testString,
wantErr: false,
},
{
name: "test nil stdin",
args: args{
timeout: 30 * time.Second,
command: "echo",
stdin: nil,
arg: []string{testString},
},
want: testString,
wantErr: false,
},
{
name: "test timeout",
args: args{
timeout: 0 * time.Second,
command: "cat",
stdin: &testString,
arg: []string{},
},
want: "",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := executeCommandWithTimeout(tt.args.timeout, tt.args.command, tt.args.stdin, tt.args.arg...)
if (err != nil) != tt.wantErr {
t.Errorf("executeCommandWithTimeout() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("executeCommandWithTimeout() = %v, want %v", got, tt.want)
}
})
}
}
10 changes: 10 additions & 0 deletions pkg/util/exec/test/mockexec.go
Expand Up @@ -34,6 +34,7 @@ type MockExecutor struct {
MockExecuteCommandWithOutput func(command string, arg ...string) (string, error)
MockExecuteCommandWithCombinedOutput func(command string, arg ...string) (string, error)
MockExecuteCommandWithTimeout func(timeout time.Duration, command string, arg ...string) (string, error)
MockExecuteCommandWithStdin func(timeout time.Duration, command string, stdin *string, arg ...string) error
}

// ExecuteCommand mocks ExecuteCommand
Expand All @@ -45,6 +46,15 @@ func (e *MockExecutor) ExecuteCommand(command string, arg ...string) error {
return nil
}

// ExecuteCommandWithStdin starts a process, provides stdin and wait for its completion with timeout.
func (e *MockExecutor) ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error {
if e.MockExecuteCommand != nil {
return e.MockExecuteCommandWithStdin(timeout, command, stdin, arg...)
}

return nil
}

// ExecuteCommandWithEnv mocks ExecuteCommandWithEnv
func (e *MockExecutor) ExecuteCommandWithEnv(env []string, command string, arg ...string) error {
if e.MockExecuteCommandWithEnv != nil {
Expand Down
6 changes: 6 additions & 0 deletions pkg/util/exec/translate_exec.go
Expand Up @@ -38,6 +38,12 @@ func (e *TranslateCommandExecutor) ExecuteCommand(command string, arg ...string)
return e.Executor.ExecuteCommand(transCommand, transArgs...)
}

// ExecuteCommandWithStdin starts a process, provides stdin and wait for its completion with timeout.
func (e *TranslateCommandExecutor) ExecuteCommandWithStdin(timeout time.Duration, command string, stdin *string, arg ...string) error {
transCommand, transArgs := e.Translator(command, arg...)
return e.Executor.ExecuteCommandWithStdin(timeout, transCommand, stdin, transArgs...)
}

// ExecuteCommandWithEnv starts a process with an env variable and wait for its completion
func (e *TranslateCommandExecutor) ExecuteCommandWithEnv(env []string, command string, arg ...string) error {
transCommand, transArgs := e.Translator(command, arg...)
Expand Down