Skip to content

Commit

Permalink
Merge pull request #9520 from rook/mergify/bp/release-1.8/pr-9468
Browse files Browse the repository at this point in the history
core: rgw: allow specifying daemon startup probes (backport #9468)
  • Loading branch information
mergify[bot] committed Jan 4, 2022
2 parents a5f8e42 + 86c2301 commit f5bcc01
Show file tree
Hide file tree
Showing 18 changed files with 688 additions and 36 deletions.
26 changes: 20 additions & 6 deletions Documentation/ceph-cluster-crd.md
Expand Up @@ -617,8 +617,9 @@ Currently three health checks are implemented:
* `osd`: health check on the ceph osds
* `status`: ceph health status check, periodically check the Ceph health state and reflects it in the CephCluster CR status field.

The liveness probe of each daemon can also be controlled via `livenessProbe`, the setting is valid for `mon`, `mgr` and `osd`.
Here is a complete example for both `daemonHealth` and `livenessProbe`:
The liveness probe and startup probe of each daemon can also be controlled via `livenessProbe` and
`startupProbe` respectively. The settings are valid for `mon`, `mgr` and `osd`.
Here is a complete example for both `daemonHealth`, `livenessProbe`, and `startupProbe`:

```yaml
healthCheck:
Expand All @@ -639,21 +640,34 @@ healthCheck:
disabled: false
osd:
disabled: false
startupProbe:
mon:
disabled: false
mgr:
disabled: false
osd:
disabled: false
```

The probe itself can also be overridden, refer to the [Kubernetes documentation](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command).
The probe's timing values and thresholds (but not the probe itself) can also be overridden.
For more info, refer to the
[Kubernetes documentation](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command).

For example, you could change the `mgr` probe by applying:

```yaml
healthCheck:
startupProbe:
mgr:
disabled: false
probe:
initialDelaySeconds: 3
periodSeconds: 3
failureThreshold: 30
livenessProbe:
mgr:
disabled: false
probe:
httpGet:
path: /
port: 9283
initialDelaySeconds: 3
periodSeconds: 3
```
Expand Down
16 changes: 16 additions & 0 deletions Documentation/ceph-object-store-crd.md
Expand Up @@ -163,6 +163,12 @@ Rook-Ceph will be default monitor the state of the object store endpoints.
The following CRD settings are available:

* `healthCheck`: main object store health monitoring section
* `bucket`: Rook checks that the object store is usable regularly. This is explained in more
detail below. Use this config to disable or change the interval at which Rook verifies the
object store connectivity.
* `startupProbe`: Disable, or override timing and threshold values of the object gateway startup probe.
* `livenessProbe`: Disable, or override timing and threshold values of the object gateway liveness probe.
* `readinessProbe`: Disable, or override timing and threshold values of the object gateway readiness probe.

Here is a complete example:

Expand All @@ -171,6 +177,16 @@ healthCheck:
bucket:
disabled: false
interval: 60s
startupProbe:
disabled: false
livenessProbe:
disabled: false
periodSeconds: 5
failureThreshold: 4
readinessProbe:
disabled: false
periodSeconds: 5
failureThreshold: 2
```

The endpoint health check procedure is the following:
Expand Down
197 changes: 196 additions & 1 deletion deploy/charts/rook-ceph/templates/resources.yaml

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion deploy/examples/cluster.yaml
Expand Up @@ -265,11 +265,19 @@ spec:
status:
disabled: false
interval: 60s
# Change pod liveness probe, it works for all mon,mgr,osd daemons
# Change pod liveness probe timing or threshold values. Works for all mon,mgr,osd daemons.
livenessProbe:
mon:
disabled: false
mgr:
disabled: false
osd:
disabled: false
# Change pod startup probe timing or threshold values. Works for all mon,mgr,osd daemons.
startupProbe:
mon:
disabled: false
mgr:
disabled: false
osd:
disabled: false
197 changes: 196 additions & 1 deletion deploy/examples/crds.yaml

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions deploy/examples/object.yaml
Expand Up @@ -108,6 +108,8 @@ spec:
disabled: false
interval: 60s
# Configure the pod probes for the rgw daemon
startupProbe:
disabled: false
livenessProbe:
disabled: false
readinessProbe:
Expand Down
Expand Up @@ -20,6 +20,10 @@ import (
corev1 "k8s.io/api/core/v1"
)

/*
* Liveness probes
*/

// GetMonLivenessProbe returns the liveness probe for the MON service
func GetMonLivenessProbe(l CephClusterHealthCheckSpec) *corev1.Probe {
return l.LivenessProbe[ResourcesKeyMon].Probe
Expand All @@ -39,3 +43,27 @@ func GetOSDLivenessProbe(l CephClusterHealthCheckSpec) *corev1.Probe {
func GetMdsLivenessProbe(l CephClusterHealthCheckSpec) *corev1.Probe {
return l.LivenessProbe[ResourcesKeyMDS].Probe
}

/*
* Startup probes
*/

// GetMonStartupProbe returns the startup probe for the MON service
func GetMonStartupProbe(l CephClusterHealthCheckSpec) *corev1.Probe {
return l.StartupProbe[ResourcesKeyMon].Probe
}

// GetMgrStartupProbe returns the startup probe for the MGR service
func GetMgrStartupProbe(l CephClusterHealthCheckSpec) *corev1.Probe {
return l.StartupProbe[ResourcesKeyMgr].Probe
}

// GetOSDStartupProbe returns the startup probe for the OSD service
func GetOSDStartupProbe(l CephClusterHealthCheckSpec) *corev1.Probe {
return l.StartupProbe[ResourcesKeyOSD].Probe
}

// GetMdsStartupProbe returns the startup probe for the MDS service
func GetMdsStartupProbe(l CephClusterHealthCheckSpec) *corev1.Probe {
return l.StartupProbe[ResourcesKeyMDS].Probe
}
7 changes: 6 additions & 1 deletion pkg/apis/ceph.rook.io/v1/types.go
Expand Up @@ -59,9 +59,12 @@ type CephClusterHealthCheckSpec struct {
// +optional
// +nullable
DaemonHealth DaemonHealthSpec `json:"daemonHealth,omitempty"`
// LivenessProbe allows to change the livenessprobe configuration for a given daemon
// LivenessProbe allows changing the livenessProbe configuration for a given daemon
// +optional
LivenessProbe map[KeyType]*ProbeSpec `json:"livenessProbe,omitempty"`
// StartupProbe allows changing the startupProbe configuration for a given daemon
// +optional
StartupProbe map[KeyType]*ProbeSpec `json:"startupProbe,omitempty"`
}

// DaemonHealthSpec is a daemon health check
Expand Down Expand Up @@ -1309,6 +1312,8 @@ type BucketHealthCheckSpec struct {
LivenessProbe *ProbeSpec `json:"livenessProbe,omitempty"`
// +optional
ReadinessProbe *ProbeSpec `json:"readinessProbe,omitempty"`
// +optional
StartupProbe *ProbeSpec `json:"startupProbe,omitempty"`
}

// HealthCheckSpec represents the health check of an object store bucket
Expand Down
20 changes: 20 additions & 0 deletions pkg/apis/ceph.rook.io/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pkg/operator/ceph/cluster/mgr/spec.go
Expand Up @@ -184,11 +184,12 @@ func (c *Cluster) makeMgrDaemonContainer(mgrConfig *mgrConfig) v1.Container {
),
Resources: cephv1.GetMgrResources(c.spec.Resources),
SecurityContext: controller.PodSecurityContext(),
StartupProbe: controller.GenerateStartupProbeExecDaemon(config.MgrType, mgrConfig.DaemonID),
LivenessProbe: controller.GenerateLivenessProbeExecDaemon(config.MgrType, mgrConfig.DaemonID),
WorkingDir: config.VarLogCephDir,
}

// If the liveness probe is enabled
container = config.ConfigureStartupProbe(cephv1.KeyMgr, container, c.spec.HealthCheck)
container = config.ConfigureLivenessProbe(cephv1.KeyMgr, container, c.spec.HealthCheck)

// If host networking is enabled, we don't need a bind addr that is different from the public addr
Expand Down
3 changes: 2 additions & 1 deletion pkg/operator/ceph/cluster/mon/spec.go
Expand Up @@ -313,6 +313,7 @@ func (c *Cluster) makeMonDaemonContainer(monConfig *monConfig) corev1.Container
k8sutil.PodIPEnvVar(podIPEnvVar),
),
Resources: cephv1.GetMonResources(c.spec.Resources),
StartupProbe: controller.GenerateStartupProbeExecDaemon(config.MonType, monConfig.DaemonName),
LivenessProbe: controller.GenerateLivenessProbeExecDaemon(config.MonType, monConfig.DaemonName),
WorkingDir: config.VarLogCephDir,
}
Expand All @@ -326,7 +327,7 @@ func (c *Cluster) makeMonDaemonContainer(monConfig *monConfig) corev1.Container
}
}

// If the liveness probe is enabled
container = config.ConfigureStartupProbe(cephv1.KeyMon, container, c.spec.HealthCheck)
container = config.ConfigureLivenessProbe(cephv1.KeyMon, container, c.spec.HealthCheck)

// If host networking is enabled, we don't need a bind addr that is different from the public addr
Expand Down
3 changes: 2 additions & 1 deletion pkg/operator/ceph/cluster/osd/spec.go
Expand Up @@ -552,6 +552,7 @@ func (c *Cluster) makeDeployment(osdProps osdProperties, osd OSDInfo, provisionC
Env: envVars,
Resources: osdProps.resources,
SecurityContext: securityContext,
StartupProbe: controller.GenerateStartupProbeExecDaemon(opconfig.OsdType, osdID),
LivenessProbe: controller.GenerateLivenessProbeExecDaemon(opconfig.OsdType, osdID),
WorkingDir: opconfig.VarLogCephDir,
},
Expand All @@ -571,7 +572,7 @@ func (c *Cluster) makeDeployment(osdProps osdProperties, osd OSDInfo, provisionC
podTemplateSpec.Spec.Containers = append(podTemplateSpec.Spec.Containers, *controller.LogCollectorContainer(fmt.Sprintf("ceph-osd.%s", osdID), c.clusterInfo.Namespace, c.spec))
}

// If the liveness probe is enabled
podTemplateSpec.Spec.Containers[0] = opconfig.ConfigureStartupProbe(cephv1.KeyOSD, podTemplateSpec.Spec.Containers[0], c.spec.HealthCheck)
podTemplateSpec.Spec.Containers[0] = opconfig.ConfigureLivenessProbe(cephv1.KeyOSD, podTemplateSpec.Spec.Containers[0], c.spec.HealthCheck)

if c.spec.Network.IsHost() {
Expand Down
26 changes: 26 additions & 0 deletions pkg/operator/ceph/config/livenessprobe.go
Expand Up @@ -51,6 +51,32 @@ func ConfigureLivenessProbe(daemon cephv1.KeyType, container v1.Container, healt
return container
}

// ConfigureStartupProbe returns the desired startup probe for a given daemon
func ConfigureStartupProbe(daemon cephv1.KeyType, container v1.Container, healthCheck cephv1.CephClusterHealthCheckSpec) v1.Container {
// Map of functions
probeFnMap := map[cephv1.KeyType]fn{
cephv1.KeyMon: cephv1.GetMonStartupProbe,
cephv1.KeyMgr: cephv1.GetMgrStartupProbe,
cephv1.KeyOSD: cephv1.GetOSDStartupProbe,
cephv1.KeyMds: cephv1.GetMdsStartupProbe,
}

if _, ok := healthCheck.StartupProbe[daemon]; ok {
if healthCheck.StartupProbe[daemon].Disabled {
container.StartupProbe = nil
} else {
probe := probeFnMap[daemon](healthCheck)
// If the spec value is not empty, let's apply it along with default when some fields are not specified
if probe != nil {
// Set the startup probe on the container to overwrite the default probe created by Rook
container.StartupProbe = GetProbeWithDefaults(probe, container.StartupProbe)
}
}
}

return container
}

func GetProbeWithDefaults(desiredProbe, currentProbe *v1.Probe) *v1.Probe {
newProbe := *desiredProbe

Expand Down
92 changes: 90 additions & 2 deletions pkg/operator/ceph/config/livenessprobe_test.go
Expand Up @@ -62,8 +62,8 @@ func configLivenessProbeHelper(t *testing.T, keyType cephv1.KeyType) {
args args
want v1.Container
}{
{"probe-enabled", args{keyType, container, cephv1.CephClusterHealthCheckSpec{}}, container},
{"probe-disabled", args{keyType, container, cephv1.CephClusterHealthCheckSpec{LivenessProbe: l}}, v1.Container{}},
{string(keyType) + "_probe-enabled", args{keyType, container, cephv1.CephClusterHealthCheckSpec{}}, container},
{string(keyType) + "_probe-disabled", args{keyType, container, cephv1.CephClusterHealthCheckSpec{LivenessProbe: l}}, v1.Container{}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -74,6 +74,94 @@ func configLivenessProbeHelper(t *testing.T, keyType cephv1.KeyType) {
}
}

func TestConfigureStartupProbe(t *testing.T) {
keyTypes := []cephv1.KeyType{
cephv1.KeyMds,
cephv1.KeyMon,
cephv1.KeyMgr,
cephv1.KeyOSD,
}

for _, keyType := range keyTypes {
configStartupProbeHelper(t, keyType)
}

t.Run("integration check: configured probes should override values", func(t *testing.T) {
defaultProbe := &v1.Probe{
Handler: v1.Handler{
HTTPGet: &v1.HTTPGetAction{
Path: "/",
Port: intstr.FromInt(8443),
},
},
}
userProbe := &v1.Probe{
Handler: v1.Handler{
HTTPGet: &v1.HTTPGetAction{
Path: "/custom/path",
Port: intstr.FromInt(8080),
},
},
InitialDelaySeconds: 999,
TimeoutSeconds: 888,
PeriodSeconds: 777,
SuccessThreshold: 666,
FailureThreshold: 555,
}

healthCheckSpec := cephv1.CephClusterHealthCheckSpec{
StartupProbe: map[cephv1.KeyType]*cephv1.ProbeSpec{
cephv1.KeyMon: {
Disabled: false,
Probe: userProbe,
},
},
}

container := v1.Container{StartupProbe: defaultProbe}

got := ConfigureStartupProbe(cephv1.KeyMon, container, healthCheckSpec)
// the resultant container's startup probe should have been overridden, but the handler
// should always be the rook-given default
expectedProbe := *userProbe
expectedProbe.Handler = defaultProbe.Handler
assert.Equal(t, &expectedProbe, got.StartupProbe)
})
}

func configStartupProbeHelper(t *testing.T, keyType cephv1.KeyType) {
p := &v1.Probe{
Handler: v1.Handler{
HTTPGet: &v1.HTTPGetAction{
Path: "/",
Port: intstr.FromInt(8080),
},
},
}
container := v1.Container{StartupProbe: p}
l := map[cephv1.KeyType]*cephv1.ProbeSpec{keyType: {Disabled: true}}
type args struct {
daemon cephv1.KeyType
container v1.Container
healthCheck cephv1.CephClusterHealthCheckSpec
}
tests := []struct {
name string
args args
want v1.Container
}{
{string(keyType) + "_probe-enabled", args{keyType, container, cephv1.CephClusterHealthCheckSpec{}}, container},
{string(keyType) + "_probe-disabled", args{keyType, container, cephv1.CephClusterHealthCheckSpec{StartupProbe: l}}, v1.Container{}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := ConfigureStartupProbe(tt.args.daemon, tt.args.container, tt.args.healthCheck); !reflect.DeepEqual(got, tt.want) {
t.Errorf("ConfigureStartupProbe() = %v, want %v", got, tt.want)
}
})
}
}

func TestGetProbeWithDefaults(t *testing.T) {
t.Run("using default probe", func(t *testing.T) {
currentProb := &v1.Probe{
Expand Down

0 comments on commit f5bcc01

Please sign in to comment.