Skip to content

Commit

Permalink
core: support priority class for crashcollector
Browse files Browse the repository at this point in the history
Support priorityClass to crashcollectors as mons, mgrs, and osds.

https://rook.io/docs/rook/v1.8/ceph-cluster-crd.html#priority-class-names-configuration-settings

The main use case is applying the high priority to crashcollectors to preempt normal pods
under heavy load. Without this feature, we might lose crash information.

Closes: rook#9500

Signed-off-by: Satoru Takeuchi <satoru.takeuchi@gmail.com>
  • Loading branch information
satoru-takeuchi committed Jan 19, 2022
1 parent c6ca35e commit 78feac0
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 9 deletions.
3 changes: 2 additions & 1 deletion Documentation/ceph-cluster-crd.md
Expand Up @@ -601,10 +601,11 @@ Priority class names can be specified so that the Rook components will have thos

You can set priority class names for Rook components for the list of key value pairs:

* `all`: Set priority class names for MGRs, Mons, OSDs.
* `all`: Set priority class names for MGRs, Mons, OSDs, and crashcollectors.
* `mgr`: Set priority class names for MGRs.
* `mon`: Set priority class names for Mons.
* `osd`: Set priority class names for OSDs.
* `crashcollector`: Set priority class names for crashcollectors.

The specific component keys will act as overrides to `all`.

Expand Down
1 change: 1 addition & 0 deletions deploy/charts/rook-ceph-cluster/values.yaml
Expand Up @@ -238,6 +238,7 @@ cephClusterSpec:
# mon: rook-ceph-mon-priority-class
# osd: rook-ceph-osd-priority-class
# mgr: rook-ceph-mgr-priority-class
# crashcollector: rook-ceph-crashcollector-priority-class

storage: # cluster level storage configuration and selection
useAllNodes: true
Expand Down
1 change: 1 addition & 0 deletions deploy/examples/cluster.yaml
Expand Up @@ -207,6 +207,7 @@ spec:
# mon: rook-ceph-mon-priority-class
# osd: rook-ceph-osd-priority-class
# mgr: rook-ceph-mgr-priority-class
# crashcollector: rook-ceph-crashcollector-priority-class
storage: # cluster level storage configuration and selection
useAllNodes: true
useAllDevices: true
Expand Down
8 changes: 8 additions & 0 deletions pkg/apis/ceph.rook.io/v1/priorityclasses.go
Expand Up @@ -55,3 +55,11 @@ func GetCleanupPriorityClassName(p PriorityClassNamesSpec) string {
}
return p[KeyCleanup]
}

// GetCrashCollectorPriorityClassName returns the priority class name for the crashcollector
func GetCrashCollectorPriorityClassName(p PriorityClassNamesSpec) string {
if _, ok := p[KeyCrashCollector]; !ok {
return p.All()
}
return p[KeyCrashCollector]
}
10 changes: 6 additions & 4 deletions pkg/apis/ceph.rook.io/v1/priorityclasses_test.go
Expand Up @@ -30,6 +30,7 @@ all: all-class
mgr: mgr-class
mon: mon-class
osd: osd-class
crashcollector: crashcollector-class
`)

// convert the raw spec yaml into JSON
Expand All @@ -43,10 +44,11 @@ osd: osd-class

// the unmarshalled priority class names spec should equal the expected spec below
expected := PriorityClassNamesSpec{
"all": "all-class",
"mgr": "mgr-class",
"mon": "mon-class",
"osd": "osd-class",
"all": "all-class",
"mgr": "mgr-class",
"mon": "mon-class",
"osd": "osd-class",
"crashcollector": "crashcollector-class",
}
assert.Equal(t, expected, priorityClassNames)
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/operator/ceph/cluster/crash/crash.go
Expand Up @@ -113,10 +113,11 @@ func (r *ReconcileNode) createOrUpdateCephCrash(node corev1.Node, tolerations []
Containers: []corev1.Container{
getCrashDaemonContainer(cephCluster, *cephVersion),
},
Tolerations: tolerations,
RestartPolicy: corev1.RestartPolicyAlways,
HostNetwork: cephCluster.Spec.Network.IsHost(),
Volumes: volumes,
Tolerations: tolerations,
RestartPolicy: corev1.RestartPolicyAlways,
HostNetwork: cephCluster.Spec.Network.IsHost(),
Volumes: volumes,
PriorityClassName: cephv1.GetCrashCollectorPriorityClassName(cephCluster.Spec.PriorityClassNames),
},
}

Expand Down
63 changes: 63 additions & 0 deletions pkg/operator/ceph/cluster/crash/crash_test.go
Expand Up @@ -18,21 +18,26 @@ package crash

import (
"context"
"fmt"
"testing"

cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
rookclient "github.com/rook/rook/pkg/client/clientset/versioned/fake"
"github.com/rook/rook/pkg/client/clientset/versioned/scheme"
"github.com/rook/rook/pkg/clusterd"
cephver "github.com/rook/rook/pkg/operator/ceph/version"
"github.com/rook/rook/pkg/operator/k8sutil"
"github.com/rook/rook/pkg/operator/test"
"github.com/stretchr/testify/assert"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/batch/v1"
"k8s.io/api/batch/v1beta1"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
cntrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

Expand Down Expand Up @@ -105,3 +110,61 @@ func TestCreateOrUpdateCephCron(t *testing.T) {
assert.Error(t, err)
assert.True(t, kerrors.IsNotFound(err))
}

func TestCreateOrUpdateCephCrash(t *testing.T) {
cephCluster := cephv1.CephCluster{
ObjectMeta: metav1.ObjectMeta{Namespace: "rook-ceph"},
}
cephCluster.Spec.Labels = cephv1.LabelsSpec{}
cephCluster.Spec.PriorityClassNames = cephv1.PriorityClassNamesSpec{}
cephVersion := &cephver.CephVersion{Major: 16, Minor: 2, Extra: 0}
ctx := context.TODO()
context := &clusterd.Context{
Clientset: test.New(t, 1),
RookClientset: rookclient.NewSimpleClientset(),
}

s := scheme.Scheme
err := appsv1.AddToScheme(s)
if err != nil {
assert.Fail(t, "failed to build scheme")
}
r := &ReconcileNode{
scheme: s,
client: fake.NewClientBuilder().WithScheme(s).WithRuntimeObjects().Build(),
context: context,
}

node := corev1.Node{}
nodeSelector := map[string]string{corev1.LabelHostname: "testnode"}
node.SetLabels(nodeSelector)
tolerations := []corev1.Toleration{corev1.Toleration{}}
res, err := r.createOrUpdateCephCrash(node, tolerations, cephCluster, cephVersion)
assert.NoError(t, err)
assert.Equal(t, controllerutil.OperationResult("created"), res)
name := k8sutil.TruncateNodeName(fmt.Sprintf("%s-%%s", AppName), "testnode")
deploy := appsv1.Deployment{}
err = r.client.Get(ctx, types.NamespacedName{Namespace: "rook-ceph", Name: name}, &deploy)
assert.NoError(t, err)
podSpec := deploy.Spec.Template
assert.Equal(t, nodeSelector, podSpec.Spec.NodeSelector)
assert.Equal(t, "", podSpec.ObjectMeta.Labels["foo"])
assert.Equal(t, tolerations, podSpec.Spec.Tolerations)
assert.Equal(t, false, podSpec.Spec.HostNetwork)
assert.Equal(t, "", podSpec.Spec.PriorityClassName)

cephCluster.Spec.Labels[cephv1.KeyCrashCollector] = map[string]string{"foo": "bar"}
cephCluster.Spec.Network.HostNetwork = true
cephCluster.Spec.PriorityClassNames[cephv1.KeyCrashCollector] = "test-priority-class"
tolerations = []corev1.Toleration{corev1.Toleration{Key: "key", Operator: "Equal", Value: "value", Effect: "NoSchedule"}}
res, err = r.createOrUpdateCephCrash(node, tolerations, cephCluster, cephVersion)
assert.NoError(t, err)
assert.Equal(t, controllerutil.OperationResult("updated"), res)
err = r.client.Get(ctx, types.NamespacedName{Namespace: "rook-ceph", Name: name}, &deploy)
assert.NoError(t, err)
podSpec = deploy.Spec.Template
assert.Equal(t, "bar", podSpec.ObjectMeta.Labels["foo"])
assert.Equal(t, tolerations, podSpec.Spec.Tolerations)
assert.Equal(t, true, podSpec.Spec.HostNetwork)
assert.Equal(t, "test-priority-class", podSpec.Spec.PriorityClassName)
}

0 comments on commit 78feac0

Please sign in to comment.