...
 
Commits (6)
...@@ -2,15 +2,21 @@ ...@@ -2,15 +2,21 @@
source ../pika_install.conf source ../pika_install.conf
install_path=$PIKA_INSTALL_PATH
if [ `id -u` -ne 0 ]; then
install_path=$PIKA_BUILD_PATH
fi
# set collectd install path # set collectd install path
COLLECTD_INST_PATH=$PIKA_INSTALL_PATH/collectd/${COLLECTD_VERSION} COLLECTD_INST_PATH=$install_path/collectd/${COLLECTD_VERSION}
PYTHON_ROOT=$PIKA_INSTALL_PATH/python/$PYTHON_VERSION PYTHON_ROOT=$install_path/python/$PYTHON_VERSION
# expose Python3 path # expose Python3 path
export PATH=$PYTHON_ROOT/bin:$PATH export PATH=$PYTHON_ROOT/bin:$PATH
mkdir -p $PIKA_INSTALL_PATH/../sources mkdir -p $install_path/../sources
cd $PIKA_INSTALL_PATH/../sources cd $install_path/../sources
# download, unpack, remove source package # download, unpack, remove source package
if [ $COLLECTD_VERSION == 'GIT' ]; then if [ $COLLECTD_VERSION == 'GIT' ]; then
...@@ -69,7 +75,7 @@ export COLLECTD_ROOT=${COLLECTD_INST_PATH} ...@@ -69,7 +75,7 @@ export COLLECTD_ROOT=${COLLECTD_INST_PATH}
#export LIKWID_ROOT=$PIKA_ROOT/sw/pika/$PIKA_VERSION/likwid/$LIKWID_VERSION #export LIKWID_ROOT=$PIKA_ROOT/sw/pika/$PIKA_VERSION/likwid/$LIKWID_VERSION
export LIKWID_ROOT=${COLLECTD_INST_PATH}/../../likwid/$LIKWID_VERSION export LIKWID_ROOT=${COLLECTD_INST_PATH}/../../likwid/$LIKWID_VERSION
cd ${PIKA_ROOT}/daemon/collectd/collectd-plugins/c cd ${PIKA_ROOT}/daemon/collectd/collectd-plugins/c
make make likwid
# copy custom types into collectd installation # copy custom types into collectd installation
cp $PIKA_ROOT/daemon/collectd/custom_types.db ${COLLECTD_INST_PATH}/share/collectd/ cp $PIKA_ROOT/daemon/collectd/custom_types.db ${COLLECTD_INST_PATH}/share/collectd/
...@@ -2,6 +2,12 @@ ...@@ -2,6 +2,12 @@
source ../pika_install.conf source ../pika_install.conf
install_path=$PIKA_INSTALL_PATH
if [ `id -u` -ne 0 ]; then
install_path=$PIKA_BUILD_PATH
fi
# set compiler # set compiler
if [ -n "${PIKA_TARGET}" ] && [ ${PIKA_TARGET} == 'power' ]; then if [ -n "${PIKA_TARGET}" ] && [ ${PIKA_TARGET} == 'power' ]; then
COMPILER=GCCPOWER COMPILER=GCCPOWER
...@@ -9,11 +15,11 @@ else ...@@ -9,11 +15,11 @@ else
COMPILER=GCC #GCCPOWER for IBM Power systems COMPILER=GCC #GCCPOWER for IBM Power systems
fi fi
mkdir -p $PIKA_INSTALL_PATH/../sources mkdir -p $install_path/../sources
cd $PIKA_INSTALL_PATH/../sources cd $install_path/../sources
PYTHON_ROOT=$PIKA_INSTALL_PATH/python/${PYTHON_VERSION} PYTHON_ROOT=$install_path/python/${PYTHON_VERSION}
LIKWID_INST_PATH=$PIKA_INSTALL_PATH/likwid/${LIKWID_VERSION} LIKWID_INST_PATH=$install_path/likwid/${LIKWID_VERSION}
### Build Likwid ### Build Likwid
...@@ -65,8 +71,8 @@ cp config.mk config.mk.backup ...@@ -65,8 +71,8 @@ cp config.mk config.mk.backup
sed -i "/^PREFIX .*/ s|.*|PREFIX = $LIKWID_INST_PATH|" config.mk sed -i "/^PREFIX .*/ s|.*|PREFIX = $LIKWID_INST_PATH|" config.mk
# set access mode # set access mode
sed -i "/^ACCESSMODE = .*/ s|.*|ACCESSMODE = direct|" config.mk #sed -i "/^ACCESSMODE = .*/ s|.*|ACCESSMODE = direct|" config.mk
#sed -i "/^ACCESSMODE = .*/ s|.*|ACCESSMODE = perf_event|" config.mk sed -i "/^ACCESSMODE = .*/ s|.*|ACCESSMODE = perf_event|" config.mk
# do not build access daemon or frequency changer # do not build access daemon or frequency changer
sed -i "/^BUILDDAEMON = .*/ s|.*|BUILDDAEMON = false|" config.mk sed -i "/^BUILDDAEMON = .*/ s|.*|BUILDDAEMON = false|" config.mk
......
...@@ -3,16 +3,31 @@ ...@@ -3,16 +3,31 @@
source ../pika_install.conf source ../pika_install.conf
#delete old installation #delete old installation
rm -rf /opt/pika/ if [ `id -u` -ne 0 ]; then
if [ ! -z "$PIKA_BUILD_PATH" ] && [ -d "$PIKA_BUILD_PATH" ]; then
rm -rf $PIKA_BUILD_PATH/../sources
cd $PIKA_BUILD_PATH/..
rm -rf $PIKA_VERSION
cd -
else
echo Error with build path $PIKA_BUILD_PATH
fi
else
rm -rf /opt/pika/
fi
./install_python3.sh ./install_python3.sh 2>&1 | tee python_install.log
./install_likwid.sh ./install_likwid.sh 2>&1 | tee likwid_install.log
# collectd requires likwid and python # collectd requires likwid and python
./install_collectd.sh ./install_collectd.sh 2>&1 | tee collectd_install.log
# go to PIKA install root folder # go to PIKA install root folder
cd $PIKA_INSTALL_PATH/.. if [ `id -u` -ne 0 ]; then
cd $PIKA_BUILD_PATH/..
else
cd $PIKA_INSTALL_PATH/..
fi
#create tarball in /sw/taurus/tools/pika/archives #create tarball in /sw/taurus/tools/pika/archives
tar czf ${PIKA_PACKAGE_PATH} ${PIKA_VERSION} tar czf ${PIKA_PACKAGE_PATH} ${PIKA_VERSION}
...@@ -5,10 +5,16 @@ ...@@ -5,10 +5,16 @@
source ../pika_install.conf source ../pika_install.conf
mkdir -p $PIKA_INSTALL_PATH/../sources install_path=$PIKA_INSTALL_PATH
cd $PIKA_INSTALL_PATH/../sources
DEST_INST=${PIKA_INSTALL_PATH}/python/${PYTHON_VERSION} if [ `id -u` -ne 0 ]; then
install_path=$PIKA_BUILD_PATH
fi
mkdir -p $install_path/../sources
cd $install_path/../sources
DEST_INST=${install_path}/python/${PYTHON_VERSION}
# download python # download python
wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz
...@@ -28,11 +34,15 @@ rm -rf Python-${PYTHON_VERSION}.tar.xz ...@@ -28,11 +34,15 @@ rm -rf Python-${PYTHON_VERSION}.tar.xz
export PATH=${DEST_INST}/bin:$PATH export PATH=${DEST_INST}/bin:$PATH
export LD_LIBRARY_PATH=${DEST_INST}/lib:${DEST_INST}/lib/python3.7:${DEST_INST}/lib/python3.6:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${DEST_INST}/lib:${DEST_INST}/lib/python3.7:${DEST_INST}/lib/python3.6:$LD_LIBRARY_PATH
PYTHONHOME=${DEST_INST}
PYTHONROOT=${DEST_INST}
pip3 install --upgrade pip pip3 install --upgrade pip
# install influxdb and mysql client # install influxdb and mysql client
pip3 install influxdb pip3 install influxdb
pip3 install mysql-connector #==2.1.4 #pip3 install mysql-connector #==2.1.4
pip3 install PyMySQL
#pip3 install nvidia-ml-py #pip3 install nvidia-ml-py
pip3 install ClusterShell pip3 install ClusterShell
#pip install python-memcached #pip install python-memcached
......
...@@ -9,7 +9,7 @@ COMPUTE_NODES=$(sinfo -o %N --noheader) ...@@ -9,7 +9,7 @@ COMPUTE_NODES=$(sinfo -o %N --noheader)
clush -t 30 -B -u 30 -w $COMPUTE_NODES "ls /opt/slurm/prolog.d/07_pika;ls /opt/slurm/epilog.d/03_pika" clush -t 30 -B -u 30 -w $COMPUTE_NODES "ls /opt/slurm/prolog.d/07_pika;ls /opt/slurm/epilog.d/03_pika"
#purge pika on all compute nodes #purge pika on all compute nodes
clush -t 30 -B -u 30 -w $COMPUTE_NODES "sudo /sw/taurus/tools/pika/job_control/slurm/taurus/pika_control.sh purge" clush -t 30 -B -u 90 -w $COMPUTE_NODES "sudo /sw/taurus/tools/pika/job_control/slurm/taurus/pika_control.sh purge" 2>&1 | tee pika_purge.txt
#install pika in all compute nodes #install pika in all compute nodes
clush -t 30 -B -u 30 -w $COMPUTE_NODES "sudo /sw/taurus/tools/pika/job_control/slurm/taurus/pika_control.sh install" clush -t 30 -B -u 90 -w $COMPUTE_NODES "sudo /sw/taurus/tools/pika/job_control/slurm/taurus/pika_control.sh install" 2>&1 | tee pika_install.txt
#!/bin/bash #!/bin/bash
if [ "$SLURM_JOB_USER" != "rdietric" ] && [ "$SLURM_JOB_USER" != "fwinkler" ]; then #if [ "$SLURM_JOB_USER" != "rdietric" ] && [ "$SLURM_JOB_USER" != "fwinkler" ]; then
# exit 0
#fi
if [[ $HOSTNAME = taurusi7* ]]; then
exit 0 exit 0
fi fi
...@@ -29,18 +33,38 @@ fi ...@@ -29,18 +33,38 @@ fi
BATCHSYSTEM_ENV_FILE=${LOCAL_STORE}/pika_batchsystem_env_${PIKA_JOB_ID} BATCHSYSTEM_ENV_FILE=${LOCAL_STORE}/pika_batchsystem_env_${PIKA_JOB_ID}
# determine master node # determine master node
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | nodeset -e | cut -d ' ' -f 1 | cut -d. -f1` if [ -x "$(command -v ${PYTHON_ROOT}/bin/nodeset)" ]; then
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | ${PYTHON_ROOT}/bin/nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
else
echo -e "Error: PIKA nodeset is NOT available!" >> $DEBUG_PATH 2>&1
if [ -x "$(command -v nodeset)" ]; then
echo "Try system default nodeset." >> $DEBUG_PATH 2>&1
save_pypath=$PYTHONHOME
unset PYTHONHOME
unset PYTHONPATH
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
PYTHONHOME=$save_pypath
PYTHONPATH=$save_pypath
else
echo -e "Error: nodeset not available!" >> $DEBUG_PATH 2>&1
fi
fi
if [ "$MASTER_NODE" = "" ]; then
echo "PIKA_JOB_NODELIST=${PIKA_JOB_NODELIST}" >> $DEBUG_PATH 2>&1
fi
echo -e "\nMASTER_NODE=$MASTER_NODE" >> $DEBUG_PATH 2>&1
# this node's name # this node's name
PIKA_HOSTNAME=$(hostname | cut -d. -f1) PIKA_HOSTNAME=$(hostname | cut -d. -f1)
echo "PIKA_HOSTNAME=$PIKA_HOSTNAME" >> $DEBUG_PATH 2>&1
# update job metadata # update job metadata
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_update_metadata_epilog_include.sh >> $DEBUG_PATH 2>&1 source ${PIKA_ROOT}/job_control/slurm/taurus/pika_update_metadata_epilog_include.sh >> $DEBUG_PATH 2>&1
# Reset counters for exclusive jobs AND monitoring enabled # Reset counters for exclusive jobs AND monitoring enabled (need for LIKWID direct MSR access)
if [ $PIKA_MONITORING -eq 1 ] && [ $PIKA_JOB_EXCLUSIVE -eq 1 ]; then #if [ $PIKA_MONITORING -eq 1 ] && [ $PIKA_JOB_EXCLUSIVE -eq 1 ]; then
echo "PUTNOTIF severity=okay time=$(date +%s) plugin=likwid message=rstCtrs" | nc -U ${PIKA_COLLECTD_SOCKET} # echo "PUTNOTIF severity=okay time=$(date +%s) plugin=likwid message=rstCtrs" | nc -U ${PIKA_COLLECTD_SOCKET}
fi #fi
# cleanup local data # cleanup local data
rm -f ${BATCHSYSTEM_ENV_FILE} rm -f ${BATCHSYSTEM_ENV_FILE}
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
# -1 -> redis server down or error in python script # -1 -> redis server down or error in python script
# master node retrieves additional job information # master node retrieves additional job information
if [[ ${PIKA_HOSTNAME} = *"${MASTER_NODE}"* ]]; then if [ "${PIKA_HOSTNAME}" = "${MASTER_NODE}" ]; then
echo -e "\nGet job meta data (master node)" >> $DEBUG_PATH 2>&1 echo -e "\nGet job meta data (master node)" >> $DEBUG_PATH 2>&1
PIKA_MONITORING=`python3 ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env_redis.py --jobid=${PIKA_JOB_ID} --env_file=${BATCHSYSTEM_ENV_FILE} --force 2>&1` PIKA_MONITORING=`python3 ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env_redis.py --jobid=${PIKA_JOB_ID} --env_file=${BATCHSYSTEM_ENV_FILE} --force 2>&1`
else else
...@@ -24,6 +24,6 @@ fi ...@@ -24,6 +24,6 @@ fi
echo -e "\nPIKA_MONITORING=$PIKA_MONITORING" >> $DEBUG_PATH 2>&1 echo -e "\nPIKA_MONITORING=$PIKA_MONITORING" >> $DEBUG_PATH 2>&1
# write monitoring flag into file for master node # write monitoring flag into file for master node
if [[ ${PIKA_HOSTNAME} = *"${MASTER_NODE}"* ]]; then if [ "${PIKA_HOSTNAME}" = "${MASTER_NODE}" ]; then
echo $PIKA_MONITORING > ${LOCAL_STORE}/pika_monitoring_${PIKA_JOB_ID} echo $PIKA_MONITORING > ${LOCAL_STORE}/pika_monitoring_${PIKA_JOB_ID}
fi fi
#!/bin/bash #!/bin/bash
if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then if [ "${PIKA_HOSTNAME}" = "${MASTER_NODE}" ]; then
# get utility functions # get utility functions
source ${PIKA_ROOT}/pika_utils.sh >> $DEBUG_PATH 2>&1 source ${PIKA_ROOT}/pika_utils.sh >> $DEBUG_PATH 2>&1
...@@ -38,12 +38,12 @@ if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then ...@@ -38,12 +38,12 @@ if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then
echo "JOB_NUM_NODES=$JOB_NUM_NODES" >> $DEBUG_PATH 2>&1 echo "JOB_NUM_NODES=$JOB_NUM_NODES" >> $DEBUG_PATH 2>&1
fi fi
echo -e "\nCheck for GPUs" >> $DEBUG_PATH 2>&1 #echo -e "\nCheck for GPUs" >> $DEBUG_PATH 2>&1
if [ -z "${SLURM_JOB_GPUS}" ]; then if [ -z "${SLURM_JOB_GPUS}" ]; then
echo "No GPUs on this node" >> $DEBUG_PATH 2>&1 echo -e "\nNo GPUs on this node" >> $DEBUG_PATH 2>&1
SLURM_JOB_GPUS="" SLURM_JOB_GPUS=""
else else
echo "SLURM_JOB_GPUS=$SLURM_JOB_GPUS" >> $DEBUG_PATH 2>&1 echo -e "\nSLURM_JOB_GPUS=$SLURM_JOB_GPUS" >> $DEBUG_PATH 2>&1
fi fi
# check if job is part of an array job # check if job is part of an array job
......
...@@ -28,18 +28,29 @@ def main(job_id, debug_path, env_file, force): ...@@ -28,18 +28,29 @@ def main(job_id, debug_path, env_file, force):
debug_file = open(debug_file_path,'w') debug_file = open(debug_file_path,'w')
debug_file.write("debug before: {0} {1}\n".format(job_id, time.time())) debug_file.write("debug before: {0} {1}\n".format(job_id, time.time()))
slurm_env_string = None
haveConnectionError = False
try:
slurm_env_string = connection.get("prope_" + str(job_id))
except: # redis.exceptions.TimeoutError:
haveConnectionError = True
t = 0 t = 0
slurm_env_string = connection.get("prope_" + str(job_id))
while slurm_env_string == None and t < 10: while slurm_env_string == None and t < 10:
slurm_env_string = connection.get("prope_" + str(job_id)) try:
slurm_env_string = connection.get("prope_" + str(job_id))
except: # redis.exceptions.TimeoutError:
haveConnectionError = True
continue
sleep(1) sleep(1)
t = t + 1 t = t + 1
#pprint(slurm_env_string) #pprint(slurm_env_string)
if debug_path and debug_file: if debug_file:
time_attemps = "Time attemps = " + str(t) + str("\n") if haveConnectionError:
debug_file.write(time_attemps) debug_file.write("Redis connection error ocurred!\n")
debug_file.write("Connection attemps = {:d}\n".format(t))
debug_file.write("debug after: {0} {1}\n".format(job_id, time.time())) debug_file.write("debug after: {0} {1}\n".format(job_id, time.time()))
#f.close() #f.close()
......
#!/bin/bash #!/bin/bash
if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then if [ "${PIKA_HOSTNAME}" = "${MASTER_NODE}" ]; then
# get utility functions # get utility functions
source ${PIKA_ROOT}/pika_utils.sh >> $DEBUG_PATH 2>&1 source ${PIKA_ROOT}/pika_utils.sh >> $DEBUG_PATH 2>&1
...@@ -10,8 +10,13 @@ if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then ...@@ -10,8 +10,13 @@ if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then
# get start time from prolog # get start time from prolog
LOCAL_TIME_STORE=${LOCAL_STORE}/pika_local_time_${PIKA_JOB_ID} LOCAL_TIME_STORE=${LOCAL_STORE}/pika_local_time_${PIKA_JOB_ID}
JOB_START=`cat ${LOCAL_TIME_STORE}/START_${PIKA_HOSTNAME}` if [ -d ${LOCAL_TIME_STORE} ] && [ -f ${LOCAL_TIME_STORE}/START_${PIKA_HOSTNAME} ]; then
rm -rf ${LOCAL_TIME_STORE} JOB_START=`cat ${LOCAL_TIME_STORE}/START_${PIKA_HOSTNAME}`
rm -rf ${LOCAL_TIME_STORE}
else
rm -rf ${LOCAL_TIME_STORE}
exit 0
fi
echo -e "\nJOB_START=${JOB_START}" >> $DEBUG_PATH 2>&1 echo -e "\nJOB_START=${JOB_START}" >> $DEBUG_PATH 2>&1
# save local end time # save local end time
...@@ -31,7 +36,11 @@ if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then ...@@ -31,7 +36,11 @@ if [ ${PIKA_HOSTNAME} = ${MASTER_NODE} ]; then
# if Redis script worked overwrite metadata # if Redis script worked overwrite metadata
if [ "${PIKA_MONITORING}" -ge 0 ]; then if [ "${PIKA_MONITORING}" -ge 0 ]; then
source ${BATCHSYSTEM_ENV_FILE} >> $DEBUG_PATH 2>&1 if [ -f ${BATCHSYSTEM_ENV_FILE} ]; then
source ${BATCHSYSTEM_ENV_FILE} >> $DEBUG_PATH 2>&1
else
echo "${BATCHSYSTEM_ENV_FILE} does not exist!" >> $DEBUG_PATH 2>&1
fi
else else
echo -e "\nNo job metadata from redis available." >> $DEBUG_PATH 2>&1 echo -e "\nNo job metadata from redis available." >> $DEBUG_PATH 2>&1
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env.sh >> $DEBUG_PATH 2>&1 source ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env.sh >> $DEBUG_PATH 2>&1
......
#!/bin/bash #!/bin/bash
if [ "$SLURM_JOB_USER" != "rdietric" ] && [ "$SLURM_JOB_USER" != "fwinkler" ]; then #if [ "$SLURM_JOB_USER" != "rdietric" ] && [ "$SLURM_JOB_USER" != "fwinkler" ]; then
# exit 0
#fi
if [[ $HOSTNAME = taurusi7* ]]; then
exit 0 exit 0
fi fi
...@@ -38,6 +42,15 @@ else ...@@ -38,6 +42,15 @@ else
DEBUG_PATH=/dev/null DEBUG_PATH=/dev/null
fi fi
#### Developer Debugging ####
if [ "$SLURM_JOB_USER" = "rdietric" ]; then
echo -e "\n### $SLURM_JOB_USER ###" >> $DEBUG_PATH 2>&1
export LD_LIBRARY_PATH=${PIKA_BUILD_PATH}/likwid/${LIKWID_VERSION}/lib:$LD_LIBRARY_PATH
/sw/taurus/tools/pika/daemon/collectd/collectd-plugins/c/topo >> $DEBUG_PATH 2>&1
echo "### End $SLURM_JOB_USER ###" >> $DEBUG_PATH 2>&1
fi
#############################
# print date # print date
date >> $DEBUG_PATH 2>&1 date >> $DEBUG_PATH 2>&1
...@@ -70,21 +83,6 @@ if [ ! -f "/etc/cron.daily/pika_logrotate.sh" ]; then ...@@ -70,21 +83,6 @@ if [ ! -f "/etc/cron.daily/pika_logrotate.sh" ]; then
cp ${PIKA_ROOT}/daemon/logrotate/pika_logrotate.sh /etc/cron.daily >> $DEBUG_PATH 2>&1 cp ${PIKA_ROOT}/daemon/logrotate/pika_logrotate.sh /etc/cron.daily >> $DEBUG_PATH 2>&1
fi fi
# determine master node
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
# this node's name
PIKA_HOSTNAME=$(hostname | cut -d. -f1)
echo -e "\nMASTER_NODE=$MASTER_NODE" >> $DEBUG_PATH 2>&1
echo -e "\nPIKA_HOSTNAME=$PIKA_HOSTNAME" >> $DEBUG_PATH 2>&1
# file where job information is stored (can be sourced later)
BATCHSYSTEM_ENV_FILE=${LOCAL_STORE}/pika_batchsystem_env_${PIKA_JOB_ID}
# set defaults for all pika metadata provided by SLURM
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env.sh >> $DEBUG_PATH 2>&1
# check for Python installation # check for Python installation
echo -e "\nCheck PIKA python3:" >> $DEBUG_PATH 2>&1 echo -e "\nCheck PIKA python3:" >> $DEBUG_PATH 2>&1
pika_python_bin=${PYTHON_ROOT}/bin/python3 pika_python_bin=${PYTHON_ROOT}/bin/python3
...@@ -101,6 +99,35 @@ else ...@@ -101,6 +99,35 @@ else
done done
fi fi
# determine master node
if [ -x "$(command -v ${PYTHON_ROOT}/bin/nodeset)" ]; then
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | ${PYTHON_ROOT}/bin/nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
else
echo "Error: PIKA nodeset is NOT available!" >> $DEBUG_PATH 2>&1
if [ -x "$(command -v nodeset)" ]; then
echo "Try system default nodeset." >> $DEBUG_PATH 2>&1
save_pypath=$PYTHONHOME
unset PYTHONHOME
unset PYTHONPATH
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
PYTHONHOME=$save_pypath
PYTHONPATH=$save_pypath
else
echo -e "Error: nodeset not available!" >> $DEBUG_PATH 2>&1
fi
fi
echo -e "\nMASTER_NODE=$MASTER_NODE" >> $DEBUG_PATH 2>&1
# this node's name
PIKA_HOSTNAME=$(hostname | cut -d. -f1)
echo "PIKA_HOSTNAME=$PIKA_HOSTNAME" >> $DEBUG_PATH 2>&1
# file where job information is stored (can be sourced later)
BATCHSYSTEM_ENV_FILE=${LOCAL_STORE}/pika_batchsystem_env_${PIKA_JOB_ID}
# set defaults for all pika metadata provided by SLURM
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env.sh >> $DEBUG_PATH 2>&1
##### (7) get additional job metadata from redis ##### (7) get additional job metadata from redis
PIKA_MONITORING=1 PIKA_MONITORING=1
if [ -x "$(command -v ${pika_python_bin})" ]; then if [ -x "$(command -v ${pika_python_bin})" ]; then
......
...@@ -2,16 +2,21 @@ ...@@ -2,16 +2,21 @@
export PIKA_VERSION=1.1 export PIKA_VERSION=1.1
export COLLECTD_VERSION=5.10.0 export COLLECTD_VERSION=5.10.0
export LIKWID_VERSION=5.0.1 export LIKWID_VERSION=git #5.0.1
export PIKA_ROOT=/sw/taurus/tools/pika export PIKA_ROOT=/sw/taurus/tools/pika
if [[ $(hostname -s) = taurusml* ]]; then if [[ $(hostname -s) = taurusml* ]]; then
export PYTHON_VERSION=3.6.10 export PYTHON_VERSION=3.6.10
export PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}-ml.tar.gz export PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}-ml.tar.gz
#export PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}-direct-ml.tar.gz
export PIKA_COLLECTD_BATCH_SIZE=500
else else
export PYTHON_VERSION=3.7.6 export PYTHON_VERSION=3.7.7
export PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}.tar.gz export PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}.tar.gz
export PIKA_COLLECTD_BATCH_SIZE=200
fi fi
......