prolog.sh 4.56 KB
Newer Older
Frank Winkler's avatar
Frank Winkler committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
#!/bin/bash

source /sw/taurus/tools/pika/pika-current.conf

##### (1) if SLURM_JOB_ID is of length zero
if [[ -z "${SLURM_JOB_ID}" ]]; then
  mkdir -p /tmp/pika_debug
  echo -e "\n SLURM_JOB_ID is not available. Exit prolog" > /tmp/pika_debug/slurm_job_id_not_available 2>&1
  env | grep SLURM >> /tmp/pika_debug/slurm_job_id_not_available 2>&1
  exit 0
else
  export PIKA_JOB_ID=${SLURM_JOB_ID}
fi


##### (2) check for active jobs and make job visible to other prologs #####
# number of local running jobs
LOCAL_JOBS_RUNNING=`ls -l ${LOCAL_STORE} | grep -c pika_prolog_`
# create prolog file (which is removed in epilog)
touch ${LOCAL_STORE}/pika_prolog_${PIKA_JOB_ID}


##### (3) Generate debug file
if [ ${PIKA_DEBUG} -eq 1 ]; then
   # delete debug files older than 7 days
   find /tmp/pika_debug/pika_* -mtime +7 -exec rm {} \;
   find /tmp/pika_debug/memcache_* -mtime +7 -exec rm {} \;

   mkdir -p /tmp/pika_debug
   DEBUG_PATH=/tmp/pika_debug/pika_${PIKA_JOB_ID}
   echo -e "Start prolog debugging..." > $DEBUG_PATH 2>&1
   chmod o+r $DEBUG_PATH
else
   DEBUG_PATH=/dev/null
fi

37 38 39
# print date
date >> $DEBUG_PATH 2>&1

Frank Winkler's avatar
Frank Winkler committed
40 41 42 43 44 45 46 47 48 49 50

##### (4) if SLURM_NODELIST is of length zero
if [[ -z "${SLURM_NODELIST}" ]]; then
  echo -e "\n SLURM_NODELIST is not available. Exit prolog" >> $DEBUG_PATH 2>&1
  exit 0
else
  export PIKA_JOB_NODELIST=${SLURM_NODELIST}
fi


##### (5) pika package installation
fwinkler's avatar
fwinkler committed
51 52
lock_collectd=${LOCAL_STORE}/pika_collectd_setup.lock

Frank Winkler's avatar
Frank Winkler committed
53 54
# install pika python and likwid in /opt/pika if it is not already there
if [ ! -d "${PIKA_INSTALL_PATH}" ]; then
55
  echo -e "\nInstall/Replace PIKA software stack to ${PIKA_INSTALL_PATH}" >> $DEBUG_PATH 2>&1
Frank Winkler's avatar
Frank Winkler committed
56 57

  # check if an old collectd daemon is still running, if so kill it
58
  echo -e "\nCheck if an old PIKA collectd is still running" >> $DEBUG_PATH 2>&1
Frank Winkler's avatar
Frank Winkler committed
59 60 61 62 63 64 65
  DAEMON="pika_collectd"
  COLLECTD_PID=`ps -eo pid,cmd | grep -v grep | grep "$DAEMON" | awk '{print $1}'`
  echo -e "\nCOLLECTD_PID=$COLLECTD_PID" >> $DEBUG_PATH 2>&1

  if [ -z "$COLLECTD_PID" ]; then
        echo -e "\ncollectd is not running." >> $DEBUG_PATH 2>&1
  else
66 67 68 69 70 71
    old_pika_collectd_procs=`ps -eo pid,cmd | grep -v grep | grep -c "$DAEMON"`
    echo -e "\nNumber of active old PIKA Collectd processes: ${old_pika_collectd_procs}. Try to terminate them." >> $DEBUG_PATH 2>&1
    kill -TERM $COLLECTD_PID
    sleep 1
    old_pika_collectd_procs=`ps -eo pid,cmd | grep -v grep | grep -c "$DAEMON"`
    if [ $old_pika_collectd_procs -gt 0 ]; then
72
      echo -e "\nkill -KILL $COLLECTD_PID" >> $DEBUG_PATH 2>&1
73 74 75 76
      kill -KILL $COLLECTD_PID >> $DEBUG_PATH 2>&1
    fi
    old_pika_collectd_procs=`ps -eo pid,cmd | grep -v grep | grep -c "$DAEMON"`
    echo -e "\nError: Could not terminate old PIKA Collectd processes. ${old_pika_collectd_procs} are still running." >> $DEBUG_PATH 2>&1
77 78 79 80 81
  fi
  
  # delete old installation if it is still there
  if  [ -d "/opt/pika" ]; then
    rm -rf /opt/pika
Frank Winkler's avatar
Frank Winkler committed
82 83 84 85 86 87 88
  fi

  # temporary: delete old prope installations
  if  [ -d "/opt/prope" ]; then
    rm -rf /opt/prope
  fi

Frank Winkler's avatar
Frank Winkler committed
89 90 91 92 93 94 95 96
  mkdir -p ${PIKA_INSTALL_PATH}
  echo -e "tar xzf ${PIKA_PACKAGE_PATH} -C ${PIKA_INSTALL_PATH}/.." >> $DEBUG_PATH 2>&1
  tar xzf ${PIKA_PACKAGE_PATH} -C ${PIKA_INSTALL_PATH}/.. >> $DEBUG_PATH 2>&1
fi


##### (6) pika presetup
# setup logrotate
fwinkler's avatar
fwinkler committed
97
# check if pika_logrotate.sh is in /etc/cron.daily
fwinkler's avatar
fwinkler committed
98
if [ ! -f "/etc/cron.daily/pika_logrotate.sh" ]; then
fwinkler's avatar
fwinkler committed
99 100
  cp ${PIKA_ROOT}/daemon/logrotate/pika_logrotate.sh /etc/cron.daily >> $DEBUG_PATH 2>&1
fi
Frank Winkler's avatar
Frank Winkler committed
101 102 103 104 105 106

# check python
echo -e "\nCheck python3 path:" >> $DEBUG_PATH 2>&1
which python3 >> $DEBUG_PATH 2>&1

# determine master node
107
MASTER_NODE=`echo ${PIKA_JOB_NODELIST} | nodeset -e | cut -d ' ' -f 1 | cut -d. -f1`
Frank Winkler's avatar
Frank Winkler committed
108 109

# this node's name
110
PIKA_HOSTNAME=$(hostname | cut -d. -f1)
Frank Winkler's avatar
Frank Winkler committed
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134

echo -e "\nMASTER_NODE=$MASTER_NODE" >> $DEBUG_PATH 2>&1
echo -e "\nPIKA_HOSTNAME=$PIKA_HOSTNAME" >> $DEBUG_PATH 2>&1

# file where job information is stored (can be sourced later)
BATCHSYSTEM_ENV_FILE=${LOCAL_STORE}/pika_batchsystem_env_${PIKA_JOB_ID}

# set defaults for all pika metadata provided by SLURM
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_slurm_env.sh >> $DEBUG_PATH 2>&1


##### (7) get additional job metadata from redis
PIKA_MONITORING=1
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_get_metadata_prolog_include.sh >> $DEBUG_PATH 2>&1


##### (8) based on the PIKA_MONITORING value, start or stop collectd
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_collectd_prolog_include.sh >> $DEBUG_PATH 2>&1


##### (9) save job metadata
source ${PIKA_ROOT}/job_control/slurm/taurus/pika_save_metadata_prolog_include.sh >> $DEBUG_PATH 2>&1

echo -e "\nProlog finished sucessfully!" >> $DEBUG_PATH 2>&1
135
exit 0