Commit 231d7f1e authored by Robert Dietrich's avatar Robert Dietrich

fixed how master node is determined, changed respective if statements

parent 782d9bcc
#!/bin/bash
if [ "$SLURM_JOB_USER" != "rdietric" ] && [ "$SLURM_JOB_USER" != "fwinkler" ]; then
#if [ "$SLURM_JOB_USER" != "rdietric" ] && [ "$SLURM_JOB_USER" != "fwinkler" ]; then
# exit 0
#fi
if [[ $HOSTNAME = taurusi7* ]]; then
exit 0
fi
......@@ -29,18 +33,38 @@ fi
BATCHSYSTEM_ENV_FILE=${LOCAL_STORE}/pika_batchsystem_env_${PIKA_JOB_ID}
# determine master node
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
if [ -x "$(command -v ${PYTHON_ROOT}/bin/nodeset)" ]; then
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | ${PYTHON_ROOT}/bin/nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
else
echo -e "Error: PIKA nodeset is NOT available!" >> $DEBUG_PATH 2>&1
if [ -x "$(command -v nodeset)" ]; then
echo "Try system default nodeset." >> $DEBUG_PATH 2>&1
save_pypath=$PYTHONHOME
unset PYTHONHOME
unset PYTHONPATH
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
PYTHONHOME=$save_pypath
PYTHONPATH=$save_pypath
else
echo -e "Error: nodeset not available!" >> $DEBUG_PATH 2>&1
fi
fi
if [ "$MASTER_NODE" = "" ]; then
echo "PIKA_JOB_NODELIST=${PIKA_JOB_NODELIST}" >> $DEBUG_PATH 2>&1
fi
echo -e "\nMASTER_NODE=$MASTER_NODE" >> $DEBUG_PATH 2>&1
# this node's name
PIKA_HOSTNAME=$(hostname | cut -d. -f1)
echo "PIKA_HOSTNAME=$PIKA_HOSTNAME" >> $DEBUG_PATH 2>&1
# update job metadata
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_update_metadata_epilog_include.sh >> $DEBUG_PATH 2>&1
# Reset counters for exclusive jobs AND monitoring enabled
if [ $PIKA_MONITORING -eq 1 ] && [ $PIKA_JOB_EXCLUSIVE -eq 1 ]; then
echo "PUTNOTIF severity=okay time=$(date +%s) plugin=likwid message=rstCtrs" | nc -U ${PIKA_COLLECTD_SOCKET}
fi
# Reset counters for exclusive jobs AND monitoring enabled (need for LIKWID direct MSR access)
#if [ $PIKA_MONITORING -eq 1 ] && [ $PIKA_JOB_EXCLUSIVE -eq 1 ]; then
# echo "PUTNOTIF severity=okay time=$(date +%s) plugin=likwid message=rstCtrs" | nc -U ${PIKA_COLLECTD_SOCKET}
#fi
# cleanup local data
rm -f ${BATCHSYSTEM_ENV_FILE}
......
......@@ -7,7 +7,7 @@
# -1 -> redis server down or error in python script
# master node retrieves additional job information
if [[ ${PIKA_HOSTNAME} = *"${MASTER_NODE}"* ]]; then
if [ "${PIKA_HOSTNAME}" = "${MASTER_NODE}" ]; then
echo -e "\nGet job meta data (master node)" >> $DEBUG_PATH 2>&1
PIKA_MONITORING=`python3 ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env_redis.py --jobid=${PIKA_JOB_ID} --env_file=${BATCHSYSTEM_ENV_FILE} --force 2>&1`
else
......@@ -24,6 +24,6 @@ fi
echo -e "\nPIKA_MONITORING=$PIKA_MONITORING" >> $DEBUG_PATH 2>&1
# write monitoring flag into file for master node
if [[ ${PIKA_HOSTNAME} = *"${MASTER_NODE}"* ]]; then
if [ "${PIKA_HOSTNAME}" = "${MASTER_NODE}" ]; then
echo $PIKA_MONITORING > ${LOCAL_STORE}/pika_monitoring_${PIKA_JOB_ID}
fi
#!/bin/bash
if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then
if [ "${PIKA_HOSTNAME}" = "${MASTER_NODE}" ]; then
# get utility functions
source ${PIKA_ROOT}/pika_utils.sh >> $DEBUG_PATH 2>&1
......@@ -38,12 +38,12 @@ if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then
echo "JOB_NUM_NODES=$JOB_NUM_NODES" >> $DEBUG_PATH 2>&1
fi
echo -e "\nCheck for GPUs" >> $DEBUG_PATH 2>&1
#echo -e "\nCheck for GPUs" >> $DEBUG_PATH 2>&1
if [ -z "${SLURM_JOB_GPUS}" ]; then
echo "No GPUs on this node" >> $DEBUG_PATH 2>&1
echo -e "\nNo GPUs on this node" >> $DEBUG_PATH 2>&1
SLURM_JOB_GPUS=""
else
echo "SLURM_JOB_GPUS=$SLURM_JOB_GPUS" >> $DEBUG_PATH 2>&1
echo -e "\nSLURM_JOB_GPUS=$SLURM_JOB_GPUS" >> $DEBUG_PATH 2>&1
fi
# check if job is part of an array job
......
#!/bin/bash
if [ "$SLURM_JOB_USER" != "rdietric" ] && [ "$SLURM_JOB_USER" != "fwinkler" ]; then
#if [ "$SLURM_JOB_USER" != "rdietric" ] && [ "$SLURM_JOB_USER" != "fwinkler" ]; then
# exit 0
#fi
if [[ $HOSTNAME = taurusi7* ]]; then
exit 0
fi
......@@ -38,6 +42,15 @@ else
DEBUG_PATH=/dev/null
fi
#### Developer Debugging ####
if [ "$SLURM_JOB_USER" = "rdietric" ]; then
echo -e "\n### $SLURM_JOB_USER ###" >> $DEBUG_PATH 2>&1
export LD_LIBRARY_PATH=${PIKA_BUILD_PATH}/likwid/${LIKWID_VERSION}/lib:$LD_LIBRARY_PATH
/sw/taurus/tools/pika/daemon/collectd/collectd-plugins/c/topo >> $DEBUG_PATH 2>&1
echo "### End $SLURM_JOB_USER ###" >> $DEBUG_PATH 2>&1
fi
#############################
# print date
date >> $DEBUG_PATH 2>&1
......@@ -70,21 +83,6 @@ if [ ! -f "/etc/cron.daily/pika_logrotate.sh" ]; then
cp ${PIKA_ROOT}/daemon/logrotate/pika_logrotate.sh /etc/cron.daily >> $DEBUG_PATH 2>&1
fi
# determine master node
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
# this node's name
PIKA_HOSTNAME=$(hostname | cut -d. -f1)
echo -e "\nMASTER_NODE=$MASTER_NODE" >> $DEBUG_PATH 2>&1
echo -e "\nPIKA_HOSTNAME=$PIKA_HOSTNAME" >> $DEBUG_PATH 2>&1
# file where job information is stored (can be sourced later)
BATCHSYSTEM_ENV_FILE=${LOCAL_STORE}/pika_batchsystem_env_${PIKA_JOB_ID}
# set defaults for all pika metadata provided by SLURM
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env.sh >> $DEBUG_PATH 2>&1
# check for Python installation
echo -e "\nCheck PIKA python3:" >> $DEBUG_PATH 2>&1
pika_python_bin=${PYTHON_ROOT}/bin/python3
......@@ -101,6 +99,35 @@ else
done
fi
# determine master node
if [ -x "$(command -v ${PYTHON_ROOT}/bin/nodeset)" ]; then
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | ${PYTHON_ROOT}/bin/nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
else
echo "Error: PIKA nodeset is NOT available!" >> $DEBUG_PATH 2>&1
if [ -x "$(command -v nodeset)" ]; then
echo "Try system default nodeset." >> $DEBUG_PATH 2>&1
save_pypath=$PYTHONHOME
unset PYTHONHOME
unset PYTHONPATH
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
PYTHONHOME=$save_pypath
PYTHONPATH=$save_pypath
else
echo -e "Error: nodeset not available!" >> $DEBUG_PATH 2>&1
fi
fi
echo -e "\nMASTER_NODE=$MASTER_NODE" >> $DEBUG_PATH 2>&1
# this node's name
PIKA_HOSTNAME=$(hostname | cut -d. -f1)
echo "PIKA_HOSTNAME=$PIKA_HOSTNAME" >> $DEBUG_PATH 2>&1
# file where job information is stored (can be sourced later)
BATCHSYSTEM_ENV_FILE=${LOCAL_STORE}/pika_batchsystem_env_${PIKA_JOB_ID}
# set defaults for all pika metadata provided by SLURM
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env.sh >> $DEBUG_PATH 2>&1
##### (7) get additional job metadata from redis
PIKA_MONITORING=1
if [ -x "$(command -v ${pika_python_bin})" ]; then
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment