Configure Node
This is assuming that the box is already imaged
Create affinity rules for new nodes
Become root
sudo su -
RUN THESE COMMAND IF YOU HAVE SPACE IN THE LOGICAL VOLUME (Go to #4 otherwise)
Extend 80GB to /var/ and 10G to /var/log/lvextend -L +10G /dev/vg1/lv_var_log
xfs_growfs /dev/vg1/lv_var_log
lvextend -L +80G /dev/vg1/lv_var
xfs_growfs /dev/vg1/lv_var
Add a 80GB HDD (Thick Provision Eager Zeroed) to the VM and extend 70GB to /var/ and 10G to /var/log/
#Run the command below
if
disk doesn't show up in lsblk
for
x in
0
1
2
;
do
echo
"- - -"
> /sys/
class
/scsi_host/host${x}/scan; done
find /sys/
class
/scsi_device/ -mindepth
1
-maxdepth
1
|
while
read f;
do
echo
1
>
"${f}/device/rescan"
; done
#Change the name
if
necessary
parted /dev/sdb
#run the below in (parted)
mklabel gpt
mkpart primary 2048s -
1
quit
#Extend share
pvcreate /dev/sdb1 && vgextend vg1 /dev/sdb1 && lvextend -L +10G /dev/vg1/lv_var_log && xfs_growfs /dev/vg1/lv_var_log && lvextend -l +
100
%FREE /dev/vg1/lv_var && xfs_growfs /dev/vg1/lv_var
Permit Root login without password and add Bootstrap node SSH to allowed key list
sed -i
's/PermitRootLogin.*/PermitRootLogin without-password/g'
/etc/ssh/sshd_config
systemctl restart sshd
mkdir .ssh && chmod
700
.ssh && touch .ssh/authorized_keys && chmod
600
.ssh/authorized_keys
echo <serverAbcs1 ssh_key> > .ssh/authorized_keys
Run YUM update to prevent docker headaches later on
yum update -y
Rest of the configuration
#install python library
yum install -y pyOpenSSL python-rhsm-certificates jq python-configparser rng-tools python2-passlib
#remove immutable flag so docker can use
this
chattr -i /etc/resolv.conf
#OpenShift plays with DNS resolution so
this
is just to prevent
long
waits when troubleshooting issues
sed -i
's/^#\(UseDNS\).*$/\1 no/g'
/etc/ssh/sshd_config && systemctl restart sshd
#This was a security lockdown feature, but ends up scanning the docker containers and messing with them, causing them to
break
/bin/rm -f /etc/cron.daily/unowned_files
#Required
for
dnsmasq service
systemctl enable NetworkManager && systemctl start NetworkManager
#Make sure that running lots of pods doesn't cause us to run out of entropy on the hosts
systemctl enable rngd && systemctl start rngd
CUDAML Specific: Create NIC bonding
cat > /etc/sysconfig/network-scripts/ifcfg-bond0 << EOF
DEVICE=bond0
TYPE=Bond
NAME=bond0
BONDING_MASTER=yes
BOOTPROTO=none
ONBOOT=
"yes"
IPV6INIT=
"no"
NM_CONTROLLED=no
BONDING_OPTS=
"mode=4 miimon=100 lacp_rate=1"
EOF
egrep IPADDR /etc/sysconfig/network-scripts/ifcfg-em1 >> /etc/sysconfig/network-scripts/ifcfg-bond0
egrep GATEWAY /etc/sysconfig/network-scripts/ifcfg-em1 >> /etc/sysconfig/network-scripts/ifcfg-bond0
egrep NETMASK /etc/sysconfig/network-scripts/ifcfg-em1 >> /etc/sysconfig/network-scripts/ifcfg-bond0
echo
'ZONE=public'
>> /etc/sysconfig/network-scripts/ifcfg-bond0
cat > /etc/sysconfig/network-scripts/ifcfg-em1 << EOF
NAME=em1
DEVICE=em1
BOOTPROTO=none
ONBOOT=yes
NM_CONTROLLED=no
IPV6INIT=no
MASTER=bond0
SLAVE=yes
EOF
cat > /etc/sysconfig/network-scripts/ifcfg-em2 << EOF
NAME=em2
DEVICE=em2
BOOTPROTO=none
ONBOOT=yes
NM_CONTROLLED=no
IPV6INIT=no
MASTER=bond0
SLAVE=yes
EOF
cat > /etc/sysconfig/network-scripts/ifcfg-bond0.
160
<< EOF
bond0.
160
DEVICE=bond0.
160
NAME=bond0.
160
BOOTPROTO=none
IPADDR=
"10.17.160.148"
NETMASK=
"255.255.252.0"
ONBOOT=
"yes"
VLAN=yes
IPV6INIT=
"no"
NM_CONTROLLED=no
ZONE=
public
EOF
modprobe --first-time 8021q
systemctl restart network
Add Node to OpenShift Cluster
SSH to the Openshift Bootstrap server and become root
ssh serverAbcs01.xyz.domain
sudo su -
Navigate to the ansible folder and edit the host file
cd ~/ansible
vi hosts-
3.11
Add the new host FQDN along with the node type under [new_nodes]
Run the scale up Ansible Playbook
ansible-playbook -i hosts-
3.11
~/openshift-ansible-
3.11
/playbooks/openshift-node/scaleup.yml
SSH to a Master node and verify that the nodes were added
ssh serverAbcm01.xyz.domain
sudo su -
oc get nodes
Optional: If the nodes were added, but shows up as "Not Ready", and "oc describe node <node_name" return "Error 'NetworkPluginNotReady message:docker: network plugin is not ready: cni config uninitialized'"
cat > /etc/origin/node/resolv.conf <<EOF
nameserver
192.168
.
20.10
EOF
cat > /etc/dnsmasq.d/origin-upstream-dns.conf << EOF
server=
192.168
.
20.10
EOF
#CUDAML BOX ONLY
cat > /etc/resolv.conf << EOF
domain xyz.domain
search cluster.local xyz.domain org11.net
nameserver <change me -- server IP
Set the new node as unschedulable
oc adm manage-node <node-name> --schedulable=
false
SSH to the node that were created. Downgrade docker (if needed to MATCH the version on the master nodes) on the node and reboot
Notice
MATCH the docker version to the Master nodes!
Previous versions we used have a bug that causes PLEG to become unhealthy and mark pods as unknown:
docker-1.13.1-91.git07f3374.el7.centos.x86_64
docker-common-1.13.1-91.git07f3374.el7.centos.x86_64
docker-client-1.13.1-91.git07f3374.el7.centos.x86_64
systemctl stop origin-node
systemctl stop docker
sudo yum downgrade -y docker-
1.13
.
1
-
109
.gitcccb291.el7.centos.x86_64 docker-common-
1.13
.
1
-
109
.gitcccb291.el7.centos.x86_64 docker-client-
1.13
.
1
-
109
.gitcccb291.el7.centos.x86_64
sudo reboot
Compute Node Specific Configuration
The steps below are only for OpenShift Compute Node.
- Perform these steps on the Bootstrap node
- Run oc login -u duc.le os.org11inc.xyz to login to the cluster
- Make sure to change the hostnames in these scripts first
These scripts should already exists in /root/ansible/shiftn-setup (See serverAbcS01) - Create the directory and add these scripts in other wise
1-configure-for-cifs.sh Collapse source#!/bin/bash
KUBE_PLUG_DIR=
"/usr/libexec/kubernetes/kubelet-plugins/volume/exec/fstab~cifs"
for
x
in
`
seq
9 9`;
do
for
y
in
m n i;
do
if
[
"${x}"
-gt
"3"
] && [
"${y}"
!=
"n"
];
then
continue
;
fi
;
ssh
PUNV-
shift
${y}0${x}
"yum clean all; yum install -y cifs-utils; mkdir -p ${KUBE_PLUG_DIR};"
;
cat
- <<
"EOF"
|
ssh
PUNV-
shift
${y}0${x}
"cat > ${KUBE_PLUG_DIR}/cifs"
#!/bin/bash
set
-u
# ====================================================================
# Example configuration:
# ====================================================================
# --------------------------------------------------------------------
# secret.yml:
# --------------------------------------------------------------------
# apiVersion: v1
# kind: Secret
# metadata:
# name: cifs-secret
# namespace: default
# type: fstab/cifs
# data:
# username: 'ZXhhbXBsZQo='
# password: 'c2VjcmV0Cg=='
# domain: 'ZG9tYWluCg=='
#
# --------------------------------------------------------------------
# pod.yml:
# --------------------------------------------------------------------
# apiVersion: v1
# kind: Pod
# metadata:
# name: busybox
# namespace: default
# spec:
# containers:
# - name: busybox
# image: busybox
# command:
# - sleep
# - "3600"
# imagePullPolicy: IfNotPresent
# volumeMounts:
# - name: test
# mountPath: /data
# volumes:
# - name: test
# flexVolume:
# driver: "fstab/cifs"
# fsType: "cifs"
# secretRef:
# name: "cifs-secret"
# options:
# networkPath: "//example-server/backup"
# mountOptions: "dir_mode=0755,file_mode=0644,noperm"
# --------------------------------------------------------------------
# Uncomment the following lines to see how this plugin is called:
echo
>>
/tmp/cifs
.log
date
>>
/tmp/cifs
.log
echo
"$@"
>>
/tmp/cifs
.log
init() {
assertBinaryInstalled
mount
.cifs cifs-utils
assertBinaryInstalled jq jq
assertBinaryInstalled mountpoint util-linux
assertBinaryInstalled base64 coreutils
echo
'{ "status": "Success", "message": "The fstab/cifs flexvolume plugin was initialized successfully", "capabilities": { "attach": false, "selinuxRelabel": false, "fsGroup": false } }'
exit
0
}
assertBinaryInstalled() {
binary=
"$1"
package=
"$2"
if
!
which
"$binary"
>
/dev/null
;
then
errorExit
"Failed to initialize the fstab/cifs flexvolume plugin. $binary command not found. Please install the $package package."
fi
}
errorExit() {
if
[[ $
# -ne 1 ]] ; then
echo
'{ "status": "Failure", "message": "Unknown error in the fstab/cifs flexvolume plugin." }'
else
jq -Mcn --arg message
"$1"
'{ "status": "Failure", "message": $message }'
fi
exit
1
}
doMount() {
if
[[ -z ${1:-} || -z ${2:-} ]] ;
then
errorExit
"cifs mount: syntax error. usage: cifs mount <mount dir> <json options>"
fi
mountPoint=
"$1"
shift
json=$(
printf
'%s '
"${@}"
)
if
! jq -e . >
/dev/null
2>&1 <<<
"$json"
;
then
errorExit
"cifs mount: syntax error. invalid json: '$json'"
fi
networkPath=
"$(jq --raw-output -e '.networkPath' <<< "
$json
" 2>/dev/null)"
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: option networkPath missing in flexvolume configuration."
fi
mountOptions=
"$(jq --raw-output -e '.mountOptions' <<< "
$json
" 2>/dev/null)"
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: option mountOptions missing in flexvolume configuration."
fi
fsGroup=
"$(jq --raw-output -e '.["
kubernetes.io
/fsGroup
"]' <<< "
$json
" 2>/dev/null)"
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: fsGroup not found."
fi
cifsUsernameBase64=
"$(jq --raw-output -e '.["
kubernetes.io
/secret/username
"]' <<< "
$json
" 2>/dev/null)"
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: username not found. the flexVolume definition must contain a secretRef to a secret with username, password, and domain."
fi
cifsPasswordBase64=
"$(jq --raw-output -e '.["
kubernetes.io
/secret/password
"]' <<< "
$json
" 2>/dev/null)"
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: password not found. the flexVolume definition must contain a secretRef to a secret with username, password, and domain."
fi
cifsDomainBase64=
"$(jq --raw-output -e '.["
kubernetes.io
/secret/domain
"]' <<< "
$json
" 2>/dev/null)"
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: domain not found. the flexVolume definition must contain a secretRef to a secret with username, password, and domain."
fi
cifsUsername=
"$(base64 --decode <<< "
$cifsUsernameBase64
" 2>/dev/null)"
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: username secret is not base64 encoded."
fi
cifsPassword=
"$(base64 --decode <<< "
$cifsPasswordBase64
" 2>/dev/null)"
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: password secret is not base64 encoded."
fi
cifsDomain=
"$(base64 --decode <<< "
$cifsDomainBase64
" 2>/dev/null)"
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: domain secret is not base64 encoded."
fi
if
!
mkdir
-p
"$mountPoint"
>
/dev/null
2>&1 ;
then
errorExit
"cifs mount: failed to create mount directory: '$mountPoint'"
fi
if
[[ $(mountpoint
"$mountPoint"
) = *
"is a mountpoint"
* ]] ;
then
errorExit
"cifs mount: there is already a filesystem mounted under the mount directory: '$mountPoint'"
fi
if
[[ ! -z $(
ls
-A
"$mountPoint"
2>
/dev/null
) ]] ;
then
errorExit
"cifs mount: mount directory is not an empty directory: '$mountPoint'"
fi
result=$(
mount
-t cifs
"$networkPath"
"$mountPoint"
-o
"uid=$fsGroup,gid=$fsGroup,username=$cifsUsername,password=$cifsPassword,domain=$cifsDomain,context=system_u:object_r:container_file_t:s0,$mountOptions"
2>&1)
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs mount: failed to mount the network path: $result"
fi
echo
'{ "status": "Success" }'
exit
0
}
doUnmount() {
if
[[ -z ${1:-} ]] ;
then
errorExit
"cifs unmount: syntax error. usage: cifs unmount <mount dir>"
fi
mountPoint=
"$1"
if
[[ $(mountpoint
"$mountPoint"
) != *
"is a mountpoint"
* ]] ;
then
errorExit
"cifs unmount: no filesystem mounted under directory: '$mountPoint'"
fi
result=$(
umount
"$mountPoint"
2>&1)
if
[[ $? -
ne
0 ]] ;
then
errorExit
"cifs unmount: failed to unmount the network path: $result"
fi
echo
'{ "status": "Success" }'
exit
0
}
not_supported() {
echo
'{ "status": "Not supported" }'
exit
1
}
command
=${1:-}
if
[[ -n $
command
]];
then
shift
fi
case
"$command"
in
init)
init
"$@"
;;
mount
)
doMount
"$@"
;;
unmount)
doUnmount
"$@"
;;
*)
not_supported
"$@"
;;
esac
EOF
ssh
PUNV-
shift
${y}0${x}
"chmod 755 ${KUBE_PLUG_DIR}/cifs;"
;
done
;
done
;
2-configure-cifs-for-shiftn-only.sh Collapse sourcefor
x
in
`
seq
-w 9 16`;
do
echo
"Copying SELinux module definition to server ..."
cat
- <<
"EOF"
|
ssh
-q serverAbcn${x}
"cat > ${HOME}/container_access_cifs.te"
module container_access_cifs 1.0;
require {
type
cifs_t;
type
container_t;
class filesystem
mount
;
class
file
{ ioctl
read
write create getattr setattr lock append unlink link rename
open
};
class
dir
{ ioctl
read
write create getattr setattr lock unlink link rename add_name remove_name reparent search
rmdir
open
};
}
#============= container_t ==============
allow container_t cifs_t:filesystem
mount
;
allow container_t cifs_t:
file
{ ioctl
read
write create getattr setattr lock append unlink link rename
open
};
allow container_t cifs_t:
dir
{ ioctl
read
write create getattr setattr lock unlink link rename add_name remove_name reparent search
rmdir
open
};
EOF
echo
"Compiling SELinux module definition ..."
ssh
-q serverAbcn${x}
"checkmodule -M -m -o container_access_cifs.mod container_access_cifs.te && semodule_package -o container_access_cifs.pp -m container_access_cifs.mod && semodule -i container_access_cifs.pp;"
;
done
;
3-configure-for-trident.sh Collapse source#!/bin/bash
for
x
in
`
seq
9 9`;
do
for
y
in
m n i;
do
if
[
"${x}"
-gt
"3"
] && [
"${y}"
!=
"n"
];
then
continue
;
fi
;
ssh
PUNV-
shift
${y}0${x}
"yum install -y sg3_utils"
;
#ssh -t serverAbc${y}${x} "systemctl status iscsi";
ssh
-t PUNV-
shift
${y}0${x}
"systemctl restart iscsi"
;
done
;
done
;
4-configure-docker-image-cleanup.sh Collapse source#!/bin/bash
CRON_FILE=
"/etc/cron.weekly/docker-image-cleanup.sh"
for
x
in
`
seq
9 9`;
do
for
y
in
n i;
do
if
[
"${x}"
-gt
"3"
] && [
"${y}"
!=
"n"
];
then
continue
;
fi
;
ssh
-q PUNV-
shift
${y}0${x}
"touch ${CRON_FILE}; chmod 755 ${CRON_FILE};"
;
cat
- <<
"EOF"
|
ssh
-q PUNV-
shift
${y}${x}
"cat > ${CRON_FILE}"
#!/bin/sh
docker image prune -af
EOF
done
;
done
;
5-configure-nodes-for-httpd-proxying.sh Collapse source#!/bin/bash
for
x
in
`
seq
9 9`;
do
#ssh serverAbcn0${x} "/usr/sbin/getsebool httpd_can_network_connect";
ssh
serverAbcn0${x}
"/usr/sbin/setsebool -P httpd_can_network_connect 1"
;
done
;
6-configure-docker-registries.sh Collapse source#!/bin/bash
orgA_DIR=
"/etc/docker/certs.d/orgA-docker.xyz.domain:5000"
LOCAL_DIR=
"/etc/docker/certs.d/docker-registry.default.svc:5000"
for
x
in
`
seq
9 9`;
do
for
y
in
m n i;
do
if
[
"${x}"
-gt
"3"
] && [
"${y}"
!=
"n"
];
then
continue
;
fi
;
ssh
PUNV-
shift
${y}0${x}
"mkdir -p ${orgA_DIR}; mkdir -p ${LOCAL_DIR};"
;
ssh
PUNV-
shift
${y}0${x}
"ln -sf /etc/pki/ca-trust/source/anchors/hqi1v-opsica01-2017.crt ${orgA_DIR}/;"
;
ssh
PUNV-
shift
${y}0${x}
"ln -sf /etc/origin/node/client-ca.crt ${LOCAL_DIR}/;"
;
done
;
done
;
7-fix-docker-log-rotation.sh Collapse source#!/bin/bash
for
x
in
`
seq
14 16`;
do
for
y
in
m n i;
do
if
[
"${x}"
-gt
"3"
] && [
"${y}"
!=
"n"
];
then
continue
;
fi
;
ssh
PUNV-
shift
${y}${x}
"echo -e '{\n \"log-driver\": \"json-file\",\n \"log-opts\": {\"max-size\": \"50m\", \"max-file\": \"3\"}\n}' > /etc/docker/daemon.json; systemctl restart docker;"
;
done
;
done
;
8-label-all-nodes-for-filebeat.sh Collapse source#!/bin/bash
for
x
in
`
seq
1 8`;
do
for
y
in
m n i;
do
if
[
"${x}"
-gt
"3"
] && [
"${y}"
!=
"n"
];
then
continue
;
fi
;
oc label node PUNV-
shift
${y}0${x}.xyz.domain logging-infra-filebeat=
true
;
done
;
done
;
9-label-compute-nodes-as-normal.sh Collapse source#!/bin/bash
for
x
in
`
seq
1 8`;
do
oc label node serverAbcn0${x}.xyz.domain orgA-compute=normal;
done
;
- SSH to the new node and Reboot it
SSH to the Master node and make the new node schedulable
oc adm manage-node <node_name> --schedulable=
true
CUDA Node (Machine Learning) Specific Configuration
The steps below are only for OpenShift CUDA compute node.
SSH to an Openshift master node and add labels + assign node permission to machine learning team
#Add label
oc edit node serverxyzl07.xyz.domain -o yaml
labels:
....
logging-infra-filebeat:
"true"
orgA-compute: gpu
openshift.com/gpu-accelerator:
"true"
tfserving:
"true"
#Add
new
cuda node under
"resourceNames"
so the Machine Learning team can manage it
oc edit clusterrole machinelearning-view-nodes
SSH to a host with perl installed (or run yum install perl) and run the perl script below
#Create a perl script to generate iqn
for
iscsi connection
vi iqn.pl
:set paste
#!/usr/bin/perl
# Generate a sample IQN
for
the hostname
$|=
1
;
use strict;
$ARGV[
0
] || die
"Usage: $0 hostname\n"
;;
my $hostname = $ARGV[
0
];
#sample: iqn.
1994
-
05
.com.redhat:86a2b96edecc
my
@chars
= (
'0'
..
'9'
,
'a'
..
'f'
);
my $len =
12
;
my $string;
my $hex;
while
($len--){ $hex .= $chars[rand
@chars
] };
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime();
$year+=
1900
;
my $iqn =
"iqn.$year-$mon.net.org11.$hostname:$hex"
;
print $iqn,
"\n"
;
#Make the script executable
chmod +x iqn.pl
#run the script with the node hostname
./iqn.pl XYZSERVER
#
this
will generate:
iqn.
2020
-
0
.net.org11.XYZSERVER7:39462cf8328b
Go to the Storage Management and add iSCSI connection (We're using v5000 in Vegas, so this is https://lvp0p-v5kmva-mgmt)
Click here to expand...Add iSCSI to CUDAML node and enable iSCSI
groupadd -g
1000460000
-r machinelearning-containers;
useradd -c
"machinelearning container user"
-M -r -s /sbin/nologin -u
1000460000
-g
1000460000
machinelearning-containers;
# Set iSCSI initiator name
cat > /etc/iscsi/initiatorname.iscsi << EOF
InitiatorName=iqn.
2020
-
0
.net.org11.serverxyzl06:2114872d8665
EOF
# Configure CHAP secrets
vim /etc/iscsi/iscsid.conf
node.session.auth.authmethod = CHAP
node.session.auth.username = iqn.
2020
-
0
.net.org11.XYZSRVERl07:39462cf8328b
node.session.auth.password = <CHAP_secret_for_host>
node.session.auth.password_in = <CHAP_secret_for_cluster>
discovery.sendtargets.auth.authmethod = CHAP
discovery.sendtargets.auth.username = iqn.
2020
-
0
.net.org11.XYZSERVER:39462cf8328b
discovery.sendtargets.auth.password = <CHAP_secret_for_host>
discovery.sendtargets.auth.password_in = <CHAP_secret_for_cluster>
# Set iSCSI session timeout
sed -i
's/^\(node.session.err_timeo.abort_timeout =\).*$/\1 60/g'
/etc/iscsi/iscsid.conf;
# Set iSCSI session max commands in queue
sed -i
's/^\(node.session.cmds_max =\).*$/\1 1024/g'
/etc/iscsi/iscsid.conf;
# Set iSCSI session queue depth
sed -i
's/^\(node.session.queue_depth =\).*$/\1 128/g'
/etc/iscsi/iscsid.conf;
cat > /etc/udev/rules.d/
99
-storwize.rules <<
"EOF"
ACTION==
"add|change"
, KERNEL==
"sd[a-z]"
, ATTRS{vendor}==
"IBM"
, ATTR{queue/scheduler}=
"noop"
, ATTR{queue/add_random}=
"0"
, ATTR{queue/rq_affinity}=
"2"
, ATTR{queue/nr_requests}=
"1024"
, ATTR{queue/max_sectors_kb}=
"2048"
, RUN+=
"/sbin/hdparm -Q 64 /dev/%k"
, RUN+=
"/bin/sh -c 'echo 60 > /sys/$DEVPATH/device/timeout'"
EOF
udevadm control --reload
udevadm trigger
#Scan
for
iSCSI target in Vegas v5k
iscsiadm -m discovery -t sendtargets -p
10.17
.
160.31
:
3260
iscsiadm -m discovery -t sendtargets -p
10.17
.
160.32
:
3260
iscsiadm -m discovery -t sendtargets -p
10.17
.
160.33
:
3260
iscsiadm -m discovery -t sendtargets -p
10.17
.
160.34
:
3260
#Add iSCSI node
iscsiadm -m node --portal
10.17
.
160.31
:
3260
--target iqn.
1986
-
03
.com.ibm:
2145
.lvp0p-v5kmva.node2 -l
iscsiadm -m node --portal
10.17
.
160.33
:
3260
--target iqn.
1986
-
03
.com.ibm:
2145
.lvp0p-v5kmva.node2 -l
iscsiadm -m node --portal
10.17
.
160.32
:
3260
--target iqn.
1986
-
03
.com.ibm:
2145
.lvp0p-v5kmva.node1 -l
iscsiadm -m node --portal
10.17
.
160.34
:
3260
--target iqn.
1986
-
03
.com.ibm:
2145
.lvp0p-v5kmva.node1 -l
#Set iSCSI startup to automatically connect to these node
iscsiadm -m node --portal
10.17
.
160.31
:
3260
--target iqn.
1986
-
03
.com.ibm:
2145
.lvp0p-v5kmva.node2 -o update -n node.startup -v automatic
iscsiadm -m node --portal
10.17
.
160.33
:
3260
--target iqn.
1986
-
03
.com.ibm:
2145
.lvp0p-v5kmva.node2 -o update -n node.startup -v automatic
iscsiadm -m node --portal
10.17
.
160.32
:
3260
--target iqn.
1986
-
03
.com.ibm:
2145
.lvp0p-v5kmva.node1 -o update -n node.startup -v automatic
iscsiadm -m node --portal
10.17
.
160.34
:
3260
--target iqn.
1986
-
03
.com.ibm:
2145
.lvp0p-v5kmva.node1 -o update -n node.startup -v automatic
iscsiadm -m node
systemctl restart multipathd
systemctl status multipathd
Add NVIDIA Yum Repo and install NVIDIA packages
cat > /etc/yum.repos.d/nvidia.repo <<
"EOF"
[cuda]
name=Official Nvidia CUDA YUM Repo
gpgcheck=
1
gpgkey=http:
//hqi1v-opspxe01.xyz.domain/public/nvidia/7/cuda/x86_64/7fa2af80.pub
baseurl=http:
//hqi1v-opspxe01.xyz.domain/public/nvidia/7/cuda/x86_64/latest
[libnvidia-container]
name=libnvidia-container
gpgcheck=
1
gpgkey=http:
//hqi1v-opspxe01.xyz.domain/public/libnvidia-container/7/x86_64/gpgkey
baseurl=http:
//hqi1v-opspxe01.xyz.domain/public/libnvidia-container/7/x86_64/latest
[nvidia-container-runtime]
name=nvidia-container-runtime
gpgcheck=
1
gpgkey=http:
//hqi1v-opspxe01.xyz.domain/public/nvidia-container-runtime/7/x86_64/gpgkey
baseurl=http:
//hqi1v-opspxe01.xyz.domain/public/nvidia-container-runtime/7/x86_64/latest
EOF
yum -y install kernel-devel-`uname -r`
yum -y install nvidia-driver nvidia-driver-devel nvidia-driver-cuda nvidia-modprobe
yum -y install nvidia-container-runtime-hook --nogpgcheck
Remove default NVIDIA driver and add the NVIDIA yum-installed package driver. Verify by setting up docker container that works with this driver
#Remove
default
driver
modprobe -r nouveau
#Add NVIDIA driver
nvidia-modprobe && nvidia-modprobe -u
#Setup container using docker to test NVIDIA driver
docker run --privileged -it --rm docker.io/mirrorgooglecontainers/cuda-vector-add:v0.
1
#Remove
this
image once the previous command produce a SUCCESS
docker rmi docker.io/mirrorgooglecontainers/cuda-vector-add:v0.
1
Setup SELINUX context so that any arbitrary user in docker container can use the CUDA library
#Change security context
for
this
folder
chcon -t container_file_t /dev/nvidia*;
#Download
this
selinux post processed file
curl -LO https:
//raw.githubusercontent.com/zvonkok/origin-ci-gpu/master/selinux/nvidia-container.pp
#Add to our SELINUX context
semodule -i nvidia-container.pp
#Reset SELINUX to the updated context
nvidia-container-cli -k list | restorecon -v -f -
restorecon -Rv /dev
restorecon -Rv /var/lib/kubelet
#Test using docker image again
docker run --user
1000
:
1000
--security-opt=no-
new
-privileges --cap-drop=ALL --security-opt label=type:nvidia_container_t -it --rm docker.io/mirrorgooglecontainers/cuda-vector-add:v0.
1
#Remove once the previous command produce a SUCCESS
docker rmi docker.io/mirrorgooglecontainers/cuda-vector-add:v0.
1
Setup SELINUX context so OpenShift can R/W CIFS in CUDA Docker container and reboot after finished
cat > container_mount_cifs.te <<
"EOF"
module container_mount_cifs
1.0
;
require {
type cifs_t;
type container_t;
class
filesystem mount;
}
allow container_t cifs_t:filesystem mount;
EOF
checkmodule -M -m -o container_mount_cifs.mod container_mount_cifs.te;
semodule_package -o container_mount_cifs.pp -m container_mount_cifs.mod;
semodule -i container_mount_cifs.pp;
semanage fcontext --add --type container_file_t
"/var/lib/origin/openshift.local.volumes/pods/[^/]+/volumes/fstab~cifs(/.*)?"
cat > container_access_cifs.te <<
"EOF"
module container_access_cifs
1.0
;
require {
type cifs_t;
type container_t;
class
filesystem mount;
class
file { ioctl read write create getattr setattr lock append unlink link rename open };
class
dir { ioctl read write create getattr setattr lock unlink link rename add_name remove_name reparent search rmdir open };
}
allow container_t cifs_t:filesystem mount;
allow container_t cifs_t:file { ioctl read write create getattr setattr lock append unlink link rename open };
allow container_t cifs_t:dir { ioctl read write create getattr setattr lock unlink link rename add_name remove_name reparent search rmdir open };
EOF
checkmodule -M -m -o container_access_cifs.mod container_access_cifs.te;
semodule_package -o container_access_cifs.pp -m container_access_cifs.mod;
semodule -i container_access_cifs.pp;
cat > nvidia_container_access_cifs.te <<
"EOF"
module nvidia_container_access_cifs
1.0
;
require {
type cifs_t;
type nvidia_container_t;
class
filesystem mount;
class
file { ioctl read write create getattr setattr lock append unlink link rename open };
class
dir { ioctl read write create getattr setattr lock unlink link rename add_name remove_name reparent search rmdir open };
}
allow nvidia_container_t cifs_t:filesystem mount;
allow nvidia_container_t cifs_t:file { ioctl read write create getattr setattr lock append unlink link rename open };
allow nvidia_container_t cifs_t:dir { ioctl read write create getattr setattr lock unlink link rename add_name remove_name reparent search rmdir open };
EOF
checkmodule -M -m -o nvidia_container_access_cifs.mod nvidia_container_access_cifs.te;
semodule_package -o nvidia_container_access_cifs.pp -m nvidia_container_access_cifs.mod;
semodule -i nvidia_container_access_cifs.pp;
#Test
docker run --user
1000
:
1000
--security-opt=no-
new
-privileges --cap-drop=ALL --security-opt label=type:nvidia_container_t -it --rm docker.io/mirrorgooglecontainers/cuda-vector-add:v0.
1
semanage fcontext --add --type container_file_t --ftype c
"/dev/nvidia.*"
sed -i
's/ -Z / --context=system_u:object_r:container_file_t:s0 /g'
/usr/lib/udev/rules.d/
60
-nvidia-uvm.rules
cat >> /etc/rc.local <<
"EOFEOF"
/sbin/modprobe nvidia-uvm
if
[
"$?"
-eq
0
]; then
D=`grep nvidia-uvm /proc/devices | awk
'{print $1}'
`
mknod -m
666
--context=system_u:object_r:container_file_t:s0 /dev/nvidia-uvm c $D
0
mknod -m
666
--context=system_u:object_r:container_file_t:s0 /dev/nvidia-uvm-tools c $D
1
else
exit
1
fi
EOFEOF
reboot
Add new cudaml entry to all cudaml nodes /etc/hosts file
[root
@XYZ
~]# cat /etc/hosts
127.0
.
0.1
localhost localhost.localdomain localhost4 localhost4.localdomain4
::
1
localhost localhost.localdomain localhost6 localhost6.localdomain6
10.17
.
148.141
serverxyzl01.xyz.domain
10.17
.
148.142
serverxyzl02.xyz.domain
10.17
.
148.143
serverxyzl03.xyz.domain
10.17
.
148.144
serverxyzl04.xyz.domain
10.17
.
148.146
serverxyzl05.xyz.domain
10.17
.
148.147
serverxyzl06.xyz.domain
10.17
.
148.148
serverxyzl07.xyz.domain
10.17
.
148.149
serverxyzl08.xyz.domain
Setup PCS clustering
#install pre-req
yum install -y pcs lvm2-cluster gfs2-utils fence-agents-scsi
#Open firewall on old nodes
for
tcpport in
2224
3121
5403
21064
;
do
fwAddSrcIpPortProto
10.17
.
148.149
${tcpport} tcp; done;
for
udpport in
5404
5405
;
do
fwAddSrcIpPortProto
10.17
.
148.149
${udpport} udp; done;
#Open firewall on
new
nodes
for
svr in `seq
141
144
`;
do
for
tcpport in
2224
3121
5403
21064
;
do
fwAddSrcIpPortProto
10.17
.
148
.${svr} ${tcpport} tcp; done;
for
udpport in
5404
5405
;
do
fwAddSrcIpPortProto
10.17
.
148
.${svr} ${udpport} udp; done; done;
for
svr in `seq
146
149
`;
do
for
tcpport in
2224
3121
5403
21064
;
do
fwAddSrcIpPortProto
10.17
.
148
.${svr} ${tcpport} tcp; done;
for
udpport in
5404
5405
;
do
fwAddSrcIpPortProto
10.17
.
148
.${svr} ${udpport} udp; done; done;
#Apply changes
firewall-cmd --reload;
#Setup pubkey so all those nodes can ssh to each other
cd .ssh/
cat > id_rsa.pub << EOF
<CHECK OTHER BOX FOR THIS KEY>
EOF
#Setup
private
key
touch id_rsa;
chmod
600
id_rsa;
cat > id_rsa << EOF
<CHECK OTHER BOX FOR THIS KEY>
EOF
cat id_rsa.pub >> authorized_keys;
#change hacluster password
passwd hacluster
New Password: <KEY IN LASTPASS>
Confirm Password: <KEY IN LASTPASS>
#Enable cluster
lvmconf --enable-cluster;
#Create directory
for
iscsi mount
mkdir -p /data/mvai-dev;
# Enable pacemaker daemon
systemctl enable pcsd;
systemctl start pcsd;
systemctl status pcsd;
#DO THIS PART ON
01
NODE (e.g. serverxyzl01.xyz.domain)
#Authenticate hacluster on
new
node
pcs cluster auth serverxyzl07.xyz.domain
#Add
1
new
journal to /dev/mapper/vg_mvai--dev-lv_data_mvai--dev
gfs2_jadd -j1 /dev/mapper/vg_mvai--dev-lv_data_mvai--dev
#Update STONITH host list with
new
node
pcs stonith update iscsi-stonith-device pcmk_host_list=
"serverxyzl01.xyz.domain serverxyzl02.xyz.domain serverxyzl03.xyz.domain serverxyzl04.xyz.domain serverxyzl05.xyz.domain serverxyzl06.xyz.domain serverxyzl07.xyz.domain"
#Add the
new
node to the existing cluster
pcs cluster node add serverxyzl07.xyz.domain
#DO THIS PART ON NEW NODE:
pcs cluster start
pcs cluster enable
#Add
'network.service'
to the After variable in /etc/systemd/system/multi-user.target.wants/corosync.service
systemctl daemon-reload;
#Bounce the
new
node
reboot
SSH to Master node in Openshift and make the new node schedulable after it came up from rebooting
oc adm manage-node serverxyzl07.xyz.domain --schedulable