-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
multus-validation.yaml
173 lines (173 loc) · 8.99 KB
/
multus-validation.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
####################################################################################################
# This manifest contains a Kubernetes Job and supporting definitions for running Rook's Multus
# validation tool. The Multus validation tool's help text is copied below. Modify the Job
# definition in this manifest based on the needs of your cluster.
####################################################################################################
# THE BELOW HELP TEXT IS AUTO-GENERATED BY `make docs`
#
# Run a validation test that determines whether the current Multus and system
# configurations will support Rook with Multus.
#
# This should be run BEFORE Rook is installed.
#
# This is a fairly long-running test. It starts up a web server and many
# clients to verify that Multus network communication works properly.
#
# It does *not* perform any load testing. Networks that cannot support high
# volumes of Ceph traffic may still encounter runtime issues. This may be
# particularly noticeable with high I/O load or during OSD rebalancing
# (see: https://docs.ceph.com/en/latest/architecture/#rebalancing).
# For example, during Rook or Ceph cluster upgrade.
#
# Override the kube config file location by setting the KUBECONFIG environment variable.
#
# Usage:
# rook multus validation run [--public-network=<nad-name>] [--cluster-network=<nad-name>] [flags]
#
# Flags:
# --cluster-network string The name of the Network Attachment Definition (NAD) that will be used for Ceph's cluster network. This should be a namespaced name in the form <namespace>/<name> if the NAD is defined in a different namespace from the cluster namespace.
# -c, --config string The validation test config file to use. This cannot be used with other flags.
# --daemons-per-node int The number of validation test daemons to run per node. It is recommended to set this to the maximum number of Ceph daemons that can run on any node in the worst case of node failure(s). The default value is set to the worst-case value for a Rook Ceph cluster with 3 portable OSDs, 3 portable monitors, and where all optional child resources have been created with 1 daemon such that they all might run on a single node in a failure scenario. If you aren't sure what to choose for this value, add 1 for each additional OSD beyond 3. (default 19)
# --flaky-threshold-seconds timeoutSeconds This is the time window in which validation clients are all expected to become 'Ready' together. Validation clients are all started at approximately the same time, and they should all stabilize at approximately the same time. Once the first validation client becomes 'Ready', the tool checks that all of the remaining clients become 'Ready' before this threshold duration elapses. In networks that have connectivity issues, limited bandwidth, or high latency, clients will contend for network traffic with each other, causing some clients to randomly fail and become 'Ready' later than others. These randomly-failing clients are considered 'flaky.' Adjust this value to reflect expectations for the underlying network. For fast and reliable networks, this can be set to a smaller value. For networks that are intended to be slow, this can be set to a larger value. Additionally, for very large Kubernetes clusters, it may take longer for all clients to start, and it therefore may take longer for all clients to become 'Ready'; in that case, this value can be set slightly higher. (default 30s)
# -h, --help help for run
# -n, --namespace string The namespace for validation test resources. It is recommended to set this to the namespace in which Rook's Ceph cluster will be installed. (default "rook-ceph")
# --nginx-image string The Nginx image used for the validation server and clients. (default "quay.io/nginx/nginx-unprivileged:stable-alpine")
# --public-network string The name of the Network Attachment Definition (NAD) that will be used for Ceph's public network. This should be a namespaced name in the form <namespace>/<name> if the NAD is defined in a different namespace from the cluster namespace.
# --timeout-minutes timeoutMinutes The time to wait for resources to change to the expected state. For example, for the test web server to start, for test clients to become ready, or for test resources to be deleted. At longest, this may need to reflect the time it takes for client pods to to pull images, get address assignments, and then for each client to determine that its network connection is stable. Minimum: 1 minute. Recommended: 2 minutes or more. (default 3m0s)
#
# Global Flags:
# --log-level string logging level for logging/tracing output (valid values: ERROR,WARNING,INFO,DEBUG) (default "INFO")
# THE ABOVE HELP TEXT IS AUTO-GENERATED BY `make docs`
####################################################################################################
---
# Service account for job that validates multus configuration
apiVersion: v1
kind: ServiceAccount
metadata:
name: rook-ceph-multus-validation
namespace: rook-ceph # namespace:cluster
# imagePullSecrets:
# - name: my-registry-secret
---
# Aspects of multus validation job that require access to the operator/cluster namespace
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: rook-ceph-multus-validation
namespace: rook-ceph # namespace:cluster
rules:
- apiGroups: [""]
resources: ["configmaps", "configmaps/finalizers", "pods"]
verbs: ["get", "list", "create", "update", "delete"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["list", "create", "delete", "deletecollection"]
- apiGroups: ["k8s.cni.cncf.io"]
resources: ["network-attachment-definitions"]
verbs: ["get"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["get", "list", "delete"]
---
# Allow the multus validation job to run in this namespace
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: rook-ceph-multus-validation
namespace: rook-ceph # namespace:cluster
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: rook-ceph-multus-validation
subjects:
- kind: ServiceAccount
name: rook-ceph-multus-validation
namespace: rook-ceph # namespace:cluster
---
# A job that runs the multus validation tool
apiVersion: batch/v1
kind: Job
metadata:
name: rook-ceph-multus-validation
namespace: rook-ceph # namespace:cluster
labels:
app: rook-ceph-multus-validation
spec:
template:
metadata:
labels:
app: rook-ceph-multus-validation
spec:
serviceAccountName: rook-ceph-multus-validation
containers:
- name: multus-validation
image: rook/ceph:v1.13.5
command: ["rook"]
args:
- "multus"
- "validation"
- "run"
# - "--public-network=<NAD-NAME>" # uncomment and replace NAD name if using public network
# - "--cluster-network=<NAD-NAME>" # uncomment and replace NAD name if using cluster network
# - "--nginx-image=<IMAGE>" # uncomment and replace IMAGE with the nginx image you want use for the validation server and clients
# - "--daemons-per-node=<COUNT>" # uncomment and replace COUNT with the maximum number of daemons that should be running on each node during validation
env:
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: ROOK_LOG_LEVEL
value: DEBUG
restartPolicy: Never
---
# This Pod Security Policy (PSP) allows the job to run in Kubernetes environments using PSPs
# apiVersion: rbac.authorization.k8s.io/v1
# kind: RoleBinding
# metadata:
# name: rook-ceph-multus-validation-psp
# namespace: rook-ceph # namespace:cluster
# roleRef:
# apiGroup: rbac.authorization.k8s.io
# kind: ClusterRole
# name: psp:rook
# subjects:
# - kind: ServiceAccount
# name: rook-ceph-multus-validation
# namespace: rook-ceph # namespace:cluster
# ---
# SecurityContextConstraints(SCC) for the Rook and Ceph daemons
# kind: SecurityContextConstraints
# apiVersion: security.openshift.io/v1
# metadata:
# name: rook-ceph-multus-validation
# allowPrivilegedContainer: true
# allowHostDirVolumePlugin: true
# allowHostPID: false
# # set to true if running rook with host networking enabled
# allowHostNetwork: true
# # set to true if running rook with the provider as host
# allowHostPorts: true
# priority:
# allowedCapabilities: ["MKNOD"]
# allowHostIPC: true
# readOnlyRootFilesystem: false
# # drop all default privileges
# requiredDropCapabilities: ["All"]
# defaultAddCapabilities: []
# runAsUser:
# type: RunAsAny
# seLinuxContext:
# type: RunAsAny
# fsGroup:
# type: RunAsAny
# supplementalGroups:
# type: RunAsAny
# seccompProfiles:
# - "*"
# volumes:
# - configMap
# - emptyDir
# - projected
# users:
# - system:serviceaccount:rook-ceph:rook-ceph-multus-validation # serviceaccount:namespace:cluster
---