Skip to content

Commit

Permalink
Merge pull request #2176 from travisn/backport-hostname-change
Browse files Browse the repository at this point in the history
Backport: Configure OSDs when hostname is different from node name
  • Loading branch information
travisn committed Oct 1, 2018
2 parents 5ef4bb4 + f8f4d08 commit be9fd85
Show file tree
Hide file tree
Showing 15 changed files with 143 additions and 29 deletions.
2 changes: 1 addition & 1 deletion Documentation/helm-operator.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ The following tables lists the configurable parameters of the rook-operator char
| Parameter | Description | Default |
| ------------------------- | --------------------------------------------------------------- | ------------------------------------------------------ |
| `image.repository` | Image | `rook/ceph` |
| `image.tag` | Image tag | `v0.8.2` |
| `image.tag` | Image tag | `v0.8.3` |
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `rbacEnable` | If true, create & use RBAC resources | `true` |
| `pspEnable` | If true, create & use PSP resources | `true` |
Expand Down
2 changes: 1 addition & 1 deletion Documentation/toolbox.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: rook-ceph-tools
image: rook/ceph-toolbox:v0.8.2
image: rook/ceph-toolbox:v0.8.3
imagePullPolicy: IfNotPresent
env:
- name: ROOK_ADMIN_SECRET
Expand Down
2 changes: 1 addition & 1 deletion cluster/examples/coreos/after-reboot-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
effect: NoSchedule
containers:
- name: ceph-after-reboot-check
image: rook/ceph-toolbox:v0.8.2
image: rook/ceph-toolbox:v0.8.3
imagePullPolicy: IfNotPresent
command: ["/scripts/status-check.sh"]
env:
Expand Down
2 changes: 1 addition & 1 deletion cluster/examples/coreos/before-reboot-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
effect: NoSchedule
containers:
- name: ceph-before-reboot-check
image: rook/ceph-toolbox:v0.8.2
image: rook/ceph-toolbox:v0.8.3
imagePullPolicy: IfNotPresent
command: ["/scripts/status-check.sh"]
env:
Expand Down
2 changes: 1 addition & 1 deletion cluster/examples/kubernetes/ceph/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ spec:
serviceAccountName: rook-ceph-system
containers:
- name: rook-ceph-operator
image: rook/ceph:v0.8.2
image: rook/ceph:v0.8.3
args: ["ceph", "operator"]
volumeMounts:
- mountPath: /var/lib/rook
Expand Down
2 changes: 1 addition & 1 deletion cluster/examples/kubernetes/ceph/toolbox.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: rook-ceph-tools
image: rook/ceph-toolbox:v0.8.2
image: rook/ceph-toolbox:v0.8.3
imagePullPolicy: IfNotPresent
env:
- name: ROOK_ADMIN_SECRET
Expand Down
2 changes: 1 addition & 1 deletion cluster/examples/kubernetes/cockroachdb/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ spec:
serviceAccountName: rook-cockroachdb-operator
containers:
- name: rook-cockroachdb-operator
image: rook/cockroachdb:v0.8.2
image: rook/cockroachdb:v0.8.3
args: ["cockroachdb", "operator"]
env:
- name: POD_NAME
Expand Down
2 changes: 1 addition & 1 deletion cluster/examples/kubernetes/minio/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ spec:
serviceAccountName: rook-minio-operator
containers:
- name: rook-minio-operator
image: rook/minio:v0.8.2
image: rook/minio:v0.8.3
args: ["minio", "operator"]
env:
- name: POD_NAME
Expand Down
13 changes: 12 additions & 1 deletion pkg/operator/ceph/cluster/osd/osd.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,20 @@ func (c *Cluster) Start() error {
logger.Warningf("failed to get storage nodes from namespace %s: %v", rookSystemNS, err)
return err
}
hostnameMap, err := k8sutil.GetNodeHostNames(c.context.Clientset)
if err != nil {
logger.Warningf("failed to get node hostnames: %v", err)
return err
}
for nodeName := range allNodeDevices {
hostname, ok := hostnameMap[nodeName]
if !ok || nodeName == "" {
// fall back to the node name if no hostname is set
logger.Warningf("failed to get hostname for node %s. %+v", nodeName, err)
hostname = nodeName
}
storageNode := rookalpha.Node{
Name: nodeName,
Name: hostname,
}
c.Storage.Nodes = append(c.Storage.Nodes, storageNode)
}
Expand Down
22 changes: 11 additions & 11 deletions pkg/operator/ceph/cluster/osd/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ const (
func (c *Cluster) makeJob(nodeName string, devices []rookalpha.Device,
selection rookalpha.Selection, resources v1.ResourceRequirements, storeConfig config.StoreConfig, metadataDevice, location string) (*batch.Job, error) {

podSpec, err := c.provisionPodTemplateSpec(devices, selection, resources, storeConfig, metadataDevice, location, v1.RestartPolicyOnFailure)
podSpec, err := c.provisionPodTemplateSpec(devices, selection, resources, storeConfig, metadataDevice, nodeName, location, v1.RestartPolicyOnFailure)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -124,12 +124,12 @@ func (c *Cluster) makeDeployment(nodeName string, devices []rookalpha.Device, se
osdID := strconv.Itoa(osd.ID)
tiniEnvVar := v1.EnvVar{Name: "TINI_SUBREAPER", Value: ""}
envVars := []v1.EnvVar{
nodeNameEnvVar(),
nodeNameEnvVar(nodeName),
k8sutil.PodIPEnvVar(k8sutil.PrivateIPEnvVar),
k8sutil.PodIPEnvVar(k8sutil.PublicIPEnvVar),
tiniEnvVar,
}
configEnvVars := append(c.getConfigEnvVars(storeConfig, dataDir, location), []v1.EnvVar{
configEnvVars := append(c.getConfigEnvVars(storeConfig, dataDir, nodeName, location), []v1.EnvVar{
tiniEnvVar,
{Name: "ROOK_OSD_ID", Value: osdID},
}...)
Expand Down Expand Up @@ -244,7 +244,7 @@ func (c *Cluster) makeDeployment(nodeName string, devices []rookalpha.Device, se
}

func (c *Cluster) provisionPodTemplateSpec(devices []rookalpha.Device, selection rookalpha.Selection, resources v1.ResourceRequirements,
storeConfig config.StoreConfig, metadataDevice, location string, restart v1.RestartPolicy) (*v1.PodTemplateSpec, error) {
storeConfig config.StoreConfig, metadataDevice, nodeName, location string, restart v1.RestartPolicy) (*v1.PodTemplateSpec, error) {
volumes := []v1.Volume{k8sutil.ConfigOverrideVolume()}

if c.dataDirHostPath != "" {
Expand Down Expand Up @@ -277,7 +277,7 @@ func (c *Cluster) provisionPodTemplateSpec(devices []rookalpha.Device, selection

podSpec := v1.PodSpec{
ServiceAccountName: c.serviceAccount,
Containers: []v1.Container{c.provisionOSDContainer(devices, selection, resources, storeConfig, metadataDevice, location)},
Containers: []v1.Container{c.provisionOSDContainer(devices, selection, resources, storeConfig, metadataDevice, nodeName, location)},
RestartPolicy: restart,
Volumes: volumes,
HostNetwork: c.HostNetwork,
Expand All @@ -300,9 +300,9 @@ func (c *Cluster) provisionPodTemplateSpec(devices []rookalpha.Device, selection
}, nil
}

func (c *Cluster) getConfigEnvVars(storeConfig config.StoreConfig, dataDir, location string) []v1.EnvVar {
func (c *Cluster) getConfigEnvVars(storeConfig config.StoreConfig, dataDir, nodeName, location string) []v1.EnvVar {
envVars := []v1.EnvVar{
nodeNameEnvVar(),
nodeNameEnvVar(nodeName),
{Name: "ROOK_CLUSTER_ID", Value: string(c.ownerRef.UID)},
k8sutil.PodIPEnvVar(k8sutil.PrivateIPEnvVar),
k8sutil.PodIPEnvVar(k8sutil.PublicIPEnvVar),
Expand Down Expand Up @@ -338,9 +338,9 @@ func (c *Cluster) getConfigEnvVars(storeConfig config.StoreConfig, dataDir, loca
}

func (c *Cluster) provisionOSDContainer(devices []rookalpha.Device, selection rookalpha.Selection, resources v1.ResourceRequirements,
storeConfig config.StoreConfig, metadataDevice, location string) v1.Container {
storeConfig config.StoreConfig, metadataDevice, nodeName, location string) v1.Container {

envVars := c.getConfigEnvVars(storeConfig, k8sutil.DataDir, location)
envVars := c.getConfigEnvVars(storeConfig, k8sutil.DataDir, nodeName, location)
devMountNeeded := false
privileged := false

Expand Down Expand Up @@ -418,8 +418,8 @@ func (c *Cluster) provisionOSDContainer(devices []rookalpha.Device, selection ro
}
}

func nodeNameEnvVar() v1.EnvVar {
return v1.EnvVar{Name: "ROOK_NODE_NAME", ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "spec.nodeName"}}}
func nodeNameEnvVar(name string) v1.EnvVar {
return v1.EnvVar{Name: "ROOK_NODE_NAME", Value: name}
}

func dataDevicesEnvVar(dataDevices string) v1.EnvVar {
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/ceph/cluster/osd/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import (

func TestPodContainer(t *testing.T) {
cluster := &Cluster{Namespace: "myosd", Version: "23"}
c, err := cluster.provisionPodTemplateSpec([]rookalpha.Device{}, rookalpha.Selection{}, v1.ResourceRequirements{}, config.StoreConfig{}, "", "", v1.RestartPolicyAlways)
c, err := cluster.provisionPodTemplateSpec([]rookalpha.Device{}, rookalpha.Selection{}, v1.ResourceRequirements{}, config.StoreConfig{}, "", "node", "", v1.RestartPolicyAlways)
assert.NotNil(t, c)
assert.Nil(t, err)
assert.Equal(t, 1, len(c.Spec.Containers))
Expand Down
9 changes: 9 additions & 0 deletions pkg/operator/discover/discover.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,15 @@ func (d *Discover) createDiscoverDaemonSet(namespace, discoverImage, securityAcc

// ListDevices lists all devices discovered on all nodes or specific node if node name is provided.
func ListDevices(context *clusterd.Context, namespace, nodeName string) (map[string][]sys.LocalDisk, error) {
// convert the host name label to the k8s node name to look up the configmap with the devices
if len(nodeName) > 0 {
var err error
nodeName, err = k8sutil.GetNodeNameFromHostname(context.Clientset, nodeName)
if err != nil {
logger.Warningf("failed to get node name from hostname. %+v", err)
}
}

var devices map[string][]sys.LocalDisk
listOpts := metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", k8sutil.AppAttr, discoverDaemon.AppName)}
// wait for device discovery configmaps
Expand Down
43 changes: 41 additions & 2 deletions pkg/operator/k8sutil/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
helper "k8s.io/kubernetes/pkg/api/v1/helper"
"k8s.io/kubernetes/pkg/kubelet/apis"
)

func ValidNode(node v1.Node, placement rookalpha.Placement) (bool, error) {
Expand Down Expand Up @@ -92,10 +93,16 @@ func GetValidNodes(rookNodes []rookalpha.Node, clientset kubernetes.Interface, p

for _, node := range allNodes.Items {
for _, rookNode := range rookNodes {
if rookNode.Name == node.Name {
hostname := node.Labels[apis.LabelHostname]
if len(hostname) == 0 {
// fall back to the node name if the hostname label is not set
hostname = node.Name
}
if rookNode.Name == hostname || rookNode.Name == node.Name {
rookNode.Name = hostname
valid, err := ValidNode(node, placement)
if err != nil {
logger.Warning("failed to validate node %s %v", node.Name, err)
logger.Warning("failed to validate node %s %v", rookNode.Name, err)
} else if valid {
validNodes = append(validNodes, rookNode)
}
Expand All @@ -105,3 +112,35 @@ func GetValidNodes(rookNodes []rookalpha.Node, clientset kubernetes.Interface, p
}
return validNodes
}

// GetNodeNameFromHostname returns the name of the node resource looked up by the hostname label
// Typically these will be the same name, but sometimes they are not such as when nodes have a longer
// dns name, but the hostname is short.
func GetNodeNameFromHostname(clientset kubernetes.Interface, hostName string) (string, error) {
options := metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", apis.LabelHostname, hostName)}
nodes, err := clientset.CoreV1().Nodes().List(options)
if err != nil {
return hostName, err
}

for _, node := range nodes.Items {
return node.Name, nil
}
return hostName, fmt.Errorf("node not found")
}

// GetNodeHostNames returns the name of the node resource mapped to their hostname label.
// Typically these will be the same name, but sometimes they are not such as when nodes have a longer
// dns name, but the hostname is short.
func GetNodeHostNames(clientset kubernetes.Interface) (map[string]string, error) {
nodes, err := clientset.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
return nil, err
}

nodeMap := map[string]string{}
for _, node := range nodes.Items {
nodeMap[node.Name] = node.Labels[apis.LabelHostname]
}
return nodeMap, nil
}
23 changes: 17 additions & 6 deletions tests/framework/installer/install_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ type InstallHelper struct {
helmHelper *utils.HelmHelper
Env objects.EnvironmentManifest
k8sVersion string
changeHostnames bool
T func() *testing.T
}

Expand All @@ -112,6 +113,11 @@ func (h *InstallHelper) CreateK8sRookOperator(namespace string) (err error) {
return err
}

if h.changeHostnames {
// give nodes a hostname that is different from its k8s node name to confirm that all the daemons will be initialized properly
h.k8shelper.ChangeHostnames()
}

rookOperator := h.installData.GetRookOperator(namespace)

_, err = h.k8shelper.KubectlWithStdin(rookOperator, createFromStdinArgs...)
Expand Down Expand Up @@ -459,6 +465,10 @@ func (h *InstallHelper) UninstallRookFromMultipleNS(helmInstalled bool, systemNa
logger.Infof("removing %s from node %s. err=%v", h.hostPathToDelete, node, err)
}
}
if h.changeHostnames {
// revert the hostname labels for the test
h.k8shelper.RestoreHostnames()
}
}

func (h *InstallHelper) cleanupDir(node, dir string) error {
Expand Down Expand Up @@ -528,12 +538,13 @@ func NewK8sRookhelper(clientset *kubernetes.Clientset, t func() *testing.T) *Ins
panic("failed to get kubectl client :" + err.Error())
}
ih := &InstallHelper{
k8shelper: k8shelp,
installData: NewK8sInstallData(),
helmHelper: utils.NewHelmHelper(),
Env: objects.Env,
k8sVersion: version.String(),
T: t,
k8shelper: k8shelp,
installData: NewK8sInstallData(),
helmHelper: utils.NewHelmHelper(),
Env: objects.Env,
k8sVersion: version.String(),
changeHostnames: k8shelp.VersionAtLeast("v1.11.0"),
T: t,
}
flag.Parse()
return ih
Expand Down
44 changes: 44 additions & 0 deletions tests/framework/utils/k8s_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
storagev1util "k8s.io/kubernetes/pkg/apis/storage/v1/util"
"k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/util/version"
)

Expand All @@ -57,6 +58,8 @@ const (
RetryLoop = 30
//RetryInterval param for test - wait time while in RetryLoop
RetryInterval = 5
//hostnameTestPrefix is a prefix added to the node hostname
hostnameTestPrefix = "testprefix-"
)

//CreateK8sHelper creates a instance of k8sHelper
Expand Down Expand Up @@ -1135,6 +1138,47 @@ func (k8sh *K8sHelper) GetExternalRGWServiceURL(storeName string, namespace stri
return endpoint, err
}

// ChangeHostnames modifies the node hostname label to run tests in an environment where the node name is different from the hostname label
func (k8sh *K8sHelper) ChangeHostnames() error {
nodes, err := k8sh.Clientset.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
return err
}
for _, node := range nodes.Items {
hostname := node.Labels[apis.LabelHostname]
if !strings.HasPrefix(hostname, hostnameTestPrefix) {
node.Labels[apis.LabelHostname] = hostnameTestPrefix + hostname
logger.Infof("changed hostname of node %s to %s", node.Name, node.Labels[apis.LabelHostname])
_, err := k8sh.Clientset.CoreV1().Nodes().Update(&node)
if err != nil {
return err
}
}
}

return nil
}

// RestoreHostnames removes the test suffix from the node hostname labels
func (k8sh *K8sHelper) RestoreHostnames() ([]string, error) {
nodes, err := k8sh.Clientset.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
return nil, err
}
for _, node := range nodes.Items {
hostname := node.Labels[apis.LabelHostname]
if strings.HasPrefix(hostname, hostnameTestPrefix) {
node.Labels[apis.LabelHostname] = hostname[len(hostnameTestPrefix):]
logger.Infof("restoring hostname of node %s to %s", node.Name, node.Labels[apis.LabelHostname])
_, err := k8sh.Clientset.CoreV1().Nodes().Update(&node)
if err != nil {
return nil, err
}
}
}
return nil, nil
}

//IsRookInstalled returns true is rook-ceph-mgr service is running(indicating rook is installed)
func (k8sh *K8sHelper) IsRookInstalled(namespace string) bool {
opts := metav1.GetOptions{}
Expand Down

0 comments on commit be9fd85

Please sign in to comment.