Commit ea8efe8e authored by Robert Dietrich's avatar Robert Dietrich

improved the install scripts and configuration files

parent b7d9f8ad
# The PIKA Collection Daemon
The PIKA node monitoring uses collectd to acquire and send metrics to InfluxDB.
......@@ -9,19 +9,23 @@ We prepared simple bash scripts that can be used to generate the monitoring pack
All scripts use the `source` command to read the environment from *pika_install.conf* and can be executed standalone.
Called as *root*, the *PIKA_INSTALL_PATH* is used as build and install directory, otherwise *PIKA_BUILD_PATH* is used.
*install_pika.sh* is the main script. It deletes any old installation of the PIKA package.
*install_pika.sh* is the main script.
It optionally accepts two parameters that change the PIKA version (*default: 1.2*) and the LIKWID access mode (*default: perf_event*): *-v PIKA_VERSION* and *direct*.
(*install_likwid.sh* also accepts the paramter *direct* to use the direct access mode to the MSR registers.)
Before starting the install process, the previous installation of the PIKA package is deleted (if it exists).
Then it runs *install_python3.sh*, *install_likwid.sh* and *install_collectd.sh*.
(For a manual installation, make sure that collectd is installed last.)
Finally, a tarball is created which can be unpacked on the target nodes.
Depending on the configuration (OS, software, hardware, etc.) different packages have to be created for each system.
This may require adding more if branches to the configuration files.
A PIKA package should be created on a node of the partition or system on which it will later run.
The default LIKWID access mode is perf_event. *install_pika.sh* and *install_likwid.sh* optionally accept the paramter *direct* to use the direct access mode to the MSR registers.
The folder compute_node/patches contains patches that can be applied to collectd and LIKWID.
The [collectd daemon patch](compute_node/patches/collectd-5.10.0_daemon.patch) adds the read alignment options (https://github.com/collectd/collectd/pull/3327).
The [collectd daemon patch](compute_node/patches/collectd-5.11.0_daemon.patch) adds the read alignment options (https://github.com/collectd/collectd/pull/3327).
The [LIKWID Set-Counters-Patch](compute_node/patches/pika_likwid-5.0.0_src.patch) adds an API function to (re)set the active counters.
All other patches are deprecated with the most recent versions of collectd and LIKWID.
......@@ -37,9 +41,9 @@ Some recommendations for the configuration (*/etc/influxdb/influxdb.conf*):
[meta]
retention-autocreate = false
[data]
index-version = "inmem"
index-version = "inmem" # this is the default
query-log-enabled = false
cache-max-memory-size = "2g"
cache-max-memory-size = "2g" # default is "1g"
[coordinator]
write-timeout = "29s"
max-concurrent-queries = 0
......
......@@ -2,14 +2,7 @@
source ../pika_install.conf
install_path=$PIKA_INSTALL_PATH
if [ `id -u` -ne 0 ]; then
install_path=$PIKA_BUILD_PATH
fi
# set collectd install path
COLLECTD_INST_PATH=$install_path/collectd/${COLLECTD_VERSION}
# overwrite Python3 root path (in case we build as user)
PYTHON_ROOT=$install_path/python/$PYTHON_VERSION
# expose Python3 path
......@@ -23,7 +16,8 @@ if [ $COLLECTD_VERSION == 'GIT' ]; then
echo "Get Collectd from git"
git clone https://github.com/rdietric/collectd.git collectd-git
cd collectd-git
git checkout prope
#git checkout prope
git checkout alignread
./build.sh
else
if [ $COLLECTD_VERSION == '5.10.0' ]; then
......@@ -39,11 +33,13 @@ else
fi
# apply the "AlignRead" patch to the daemon
echo "Patch file in $PWD: patch -p0 < $PIKA_PATCHES/collectd-${COLLECTD_VERSION}_daemon.patch"
patch -p0 < $PIKA_PATCHES/collectd-${COLLECTD_VERSION}_daemon.patch
echo "Patch file in $PWD: patch -p0 < $pika_patches_path/collectd-${COLLECTD_VERSION}_daemon.patch"
patch -p0 < $pika_patches_path/collectd-${COLLECTD_VERSION}_daemon.patch
echo "Patch file in $PWD: patch -p0 < $PIKA_PATCHES/collectd_gpu_nvidia.patch"
patch -p0 < $PIKA_PATCHES/collectd_gpu_nvidia.patch
if [ $COLLECTD_VERSION != '5.11.0' ]; then
echo "Patch file in $PWD: patch -p0 < $pika_patches_path/collectd_gpu_nvidia.patch"
patch -p0 < $pika_patches_path/collectd_gpu_nvidia.patch
fi
# export source directory
export COLLECTD_SRC=$PWD
......@@ -52,15 +48,21 @@ else
cd ..
fi
# check if configure exists, otherwise run build.sh
if [ ! -f configure ]; then
echo "configure does not exits. Run build.sh"
./build.sh
fi
# build collectd
echo "PYTHON_CONFIG=$PYTHON_ROOT/bin/python3-config ./configure --prefix=${COLLECTD_INST_PATH} --with-cuda=${CUDA_PATH}"
PYTHON_CONFIG=$PYTHON_ROOT/bin/python3-config ./configure --prefix=${COLLECTD_INST_PATH} --with-cuda=${CUDA_PATH}
echo "PYTHON_CONFIG=$PYTHON_ROOT/bin/python3-config ./configure --prefix=${COLLECTD_INSTALL_PATH} --with-cuda=${cuda_path}"
PYTHON_CONFIG=$PYTHON_ROOT/bin/python3-config ./configure --prefix=${COLLECTD_INSTALL_PATH} --with-cuda=${cuda_path}
# add the path where the nvml library is located, if building on a system without NVIDIA GPU
export LIBRARY_PATH=$LIBRARY_PATH:/sw/taurus/tools/pika/install/compute_node/x86
# add paths to plugin.h and collectd.h and to nvml.h as configure for the gpu-nvidia plugin is broken
export C_INCLUDE_PATH=$PWD/src:$PWD/src/daemon:$CUDA_PATH/include:$C_INCLUDE_PATH
export C_INCLUDE_PATH=$PWD/src:$PWD/src/daemon:$cuda_path/include:$C_INCLUDE_PATH
make -j
make install
......@@ -68,14 +70,13 @@ make install
# export collectd build directory
export COLLECTD_BUILD_DIR=$PWD
# export collectd install path
export COLLECTD_ROOT=${COLLECTD_INST_PATH}
# build likwid plugin
### build LIKWID plugin
# Makefile for the collectd LIKWID plugin requires COLLECTD_ROOT and LIKWID_ROOT
export COLLECTD_ROOT=${COLLECTD_INSTALL_PATH}
#export LIKWID_ROOT=$PIKA_ROOT/sw/pika/$PIKA_VERSION/likwid/$LIKWID_VERSION
export LIKWID_ROOT=${COLLECTD_INST_PATH}/../../likwid/$LIKWID_VERSION
export LIKWID_ROOT=${COLLECTD_INSTALL_PATH}/../../likwid/$LIKWID_VERSION
cd ${PIKA_ROOT}/daemon/collectd/collectd-plugins/c
make likwid
# copy custom types into collectd installation
cp $PIKA_ROOT/daemon/collectd/custom_types.db ${COLLECTD_INST_PATH}/share/collectd/
cp $PIKA_ROOT/daemon/collectd/custom_types.db ${COLLECTD_INSTALL_PATH}/share/collectd/
#!/bin/bash
source ../pika_install.conf
install_path=$PIKA_INSTALL_PATH
if [ `id -u` -ne 0 ]; then
install_path=$PIKA_BUILD_PATH
fi
source ../pika_install.conf "$@"
# set compiler
if [ -n "${PIKA_TARGET}" ] && [ ${PIKA_TARGET} == 'power' ]; then
if [ -n "${pika_target_arch}" ] && [ ${pika_target_arch} == 'power' ]; then
COMPILER=GCCPOWER
else
COMPILER=GCC #GCCPOWER for IBM Power systems
......@@ -31,6 +25,11 @@ if [ $LIKWID_VERSION == '5.0.0' ] || [ $LIKWID_VERSION == '5.0.1' ]; then
rm -f v${LIKWID_VERSION}.tar.gz
elif [ $LIKWID_VERSION == 'git' ]; then
git clone https://github.com/RRZE-HPC/likwid.git likwid-${LIKWID_VERSION}
if [ -n "$LIKWID_VERSION_SHA" ]; then
cd likwid-${LIKWID_VERSION}
git checkout $LIKWID_VERSION_SHA
cd ..
fi
else
wget https://github.com/RRZE-HPC/likwid/archive/likwid-${LIKWID_VERSION}.tar.gz
tar xfz likwid-${LIKWID_VERSION}.tar.gz
......@@ -43,24 +42,24 @@ cd likwid-${LIKWID_VERSION}
# apply PIKA patch (set counters)
if [ $LIKWID_VERSION == '5.0.1' ] || [ $LIKWID_VERSION == 'git' ]; then
patch -p0 < $PIKA_PATCHES/pika_likwid-5.0.0_src.patch
patch -p0 < $pika_patches_path/pika_likwid-5.0.0_src.patch
elif [ $LIKWID_VERSION == '4.3.4' ]; then
wget https://github.com/RRZE-HPC/likwid/releases/download/4.3.4/likwid-4.3.4-perf.patch
patch -p1 < likwid-4.3.4-perf.patch
patch -p0 < $PIKA_PATCHES/prope_likwid-4.3.3_src.patch
patch -p0 < $pika_patches_path/prope_likwid-4.3.3_src.patch
elif [ $LIKWID_VERSION == '4.3.3' ]; then
patch -p0 < $PIKA_PATCHES/prope_likwid-4.3.3_src.patch
patch -p0 < $pika_patches_path/prope_likwid-4.3.3_src.patch
else
patch -p0 < $PIKA_PATCHES/pika_likwid-${LIKWID_VERSION}_src.patch
patch -p0 < $pika_patches_path/pika_likwid-${LIKWID_VERSION}_src.patch
fi
if [ $LIKWID_VERSION == '5.0.1' ]; then
cd bench/perl
patch -p0 < $PIKA_PATCHES/likwid-5.0.1_power9.patch
patch -p0 < $pika_patches_path/likwid-5.0.1_power9.patch
cd ../..
cd src
patch -p0 < $PIKA_PATCHES/likwid-5.0.1_fixmemleak.patch
patch -p0 < $pika_patches_path/likwid-5.0.1_fixmemleak.patch
cd ..
fi
......@@ -71,8 +70,7 @@ cp config.mk config.mk.backup
sed -i "/^PREFIX .*/ s|.*|PREFIX = $LIKWID_INST_PATH|" config.mk
# set access mode
#sed -i "/^ACCESSMODE = .*/ s|.*|ACCESSMODE = direct|" config.mk
sed -i "/^ACCESSMODE = .*/ s|.*|ACCESSMODE = perf_event|" config.mk
sed -i "/^ACCESSMODE = .*/ s|.*|ACCESSMODE = $likwid_mode|" config.mk
# do not build access daemon or frequency changer
sed -i "/^BUILDDAEMON = .*/ s|.*|BUILDDAEMON = false|" config.mk
......@@ -83,8 +81,8 @@ if [ $COMPILER == 'GCCPOWER' ]; then
sed -i "/^COMPILER = .*/ s|.*|COMPILER = $COMPILER|" config.mk
fi
make -j4 -k
make install -k
make -j4 # -k
make install # -k
cd ..
# softlinks to our directory that we can still modify those files
......
#!/bin/bash
source ../pika_install.conf
source ../pika_install.conf "$@"
#delete old installation
if [ `id -u` -ne 0 ]; then
if [ ! -z "$PIKA_BUILD_PATH" ] && [ -d "$PIKA_BUILD_PATH" ]; then
rm -rf $PIKA_BUILD_PATH/../sources
cd $PIKA_BUILD_PATH/..
rm -rf $PIKA_VERSION
cd -
else
echo Error with build path $PIKA_BUILD_PATH
fi
else
rm -rf /opt/pika/
#delete previous installation
# if install path is NOT empty AND install path is a directory AND do not delete everything ;-)
if [ ! -z "$install_path" ] && [ -d "$install_path" ] && [ "$install_path" != "/" ]; then
echo Remove previous installation in $install_path
rm -rf $install_path/../sources
cd $install_path/..
rm -rf $PIKA_VERSION
cd -
fi
./install_python3.sh 2>&1 | tee python_install.log
./install_likwid.sh 2>&1 | tee likwid_install.log
# collectd requires likwid and python
# collectd requires python and the LIKWID plugin LIKWID
./install_collectd.sh 2>&1 | tee collectd_install.log
# go to PIKA install root folder
......
#!/bin/bash
### install python from sources ###
# requirements: libffi-devel
# to install python from sources, libffi-devel is required
source ../pika_install.conf
install_path=$PIKA_INSTALL_PATH
if [ `id -u` -ne 0 ]; then
install_path=$PIKA_BUILD_PATH
fi
mkdir -p $install_path/../sources
cd $install_path/../sources
......
#!/bin/bash
# build for a specific version of PIKA
export PIKA_ROOT=/sw/taurus/tools/pika
source ${PIKA_ROOT}/pika-1.0.conf
export PIKA_PATCHES=${PIKA_ROOT}/install/compute_node/patches
if [[ $(hostname -s) = taurusml* ]]; then
export PIKA_TARGET=power
export CUDA_PATH=/usr/local/cuda-9.2
else
export CUDA_PATH=/sw/installed/CUDA/10.1.243
fi
#!/bin/bash
# build for a specific version of PIKA
export PIKA_ROOT=/sw/taurus/tools/pika
source ${PIKA_ROOT}/pika-1.1.conf
export PIKA_PATCHES=${PIKA_ROOT}/install/compute_node/patches
if [[ $(hostname -s) = taurusml* ]]; then
export PIKA_TARGET=power
export CUDA_PATH=/usr/local/cuda-9.2
else
export CUDA_PATH=/sw/installed/CUDA/10.1.243
fi
pika_install-0.9.conf
\ No newline at end of file
#!/bin/bash
# set defaults
pika_version=1.2 # should match PIKA_VERSION in pika-VERSION.conf
likwid_mode=perf_event
# parse command line arguments for "-v PIKA_VERSION" and "direct"
while [ "$1" != "" ]; do
#echo "Handle parameter $1"
if [ "$1" = "-v" ] && [ "$2" != "" ]; then
pika_version=$2
echo "Build package for PIKA version $pika_version"
elif [ "$1" = "direct" ]; then
likwid_mode="$1"
echo "Use LIKWID access mode $likwid_mode"
# overwrite the PIKA package path (append LIKWID mode)
PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}-${likwid_mode}.tar.gz
fi
shift
done
# build for a specific version of PIKA
source ${PIKA_ROOT}/pika-${pika_version}.conf
# set location of the patch files
pika_patches_path=${PIKA_ROOT}/install/compute_node/patches
# collectd requires CUDA to build the gpu_nvidia plugin
if [[ $(hostname -s) = taurusml* ]]; then
pika_target_arch=power
cuda_path=/usr/local/cuda-9.2
else
cuda_path=/sw/installed/CUDA/10.1.243
fi
# set the install path depending on whether we are root or normal user
install_path=$PIKA_INSTALL_PATH
if [ `id -u` -ne 0 ]; then
install_path=$PIKA_BUILD_PATH
fi
#!/bin/bash
export PIKA_VERSION=1.2
export COLLECTD_VERSION=5.11.0
export LIKWID_VERSION=git #5.0.1
export LIKWID_VERSION_SHA=f25cd0be0f336426f31695ea6a70bb80802bc86a
export PIKA_ROOT=/sw/taurus/tools/pika
if [[ $(hostname -s) = taurusml* ]]; then
export PYTHON_VERSION=3.6.10
export PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}-ml.tar.gz
#export PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}-direct-ml.tar.gz
export PIKA_COLLECTD_BATCH_SIZE=400
else
export PYTHON_VERSION=3.7.7
export PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}.tar.gz
if [[ $(hostname -s) = taurusi7* ]]; then
#export PIKA_PACKAGE_PATH=${PIKA_ROOT}/archives/pika-${PIKA_VERSION}-direct.tar.gz
#export PIKA_LIKWID_MODE=direct
export PIKA_COLLECTD_BATCH_SIZE=1000
else
export PIKA_COLLECTD_BATCH_SIZE=200
fi
fi
# requires the versions to be set
source ${PIKA_ROOT}/pika.conf
......@@ -37,4 +37,4 @@ export LD_LIBRARY_PATH=${PYTHON_ROOT}/lib:${PYTHON_ROOT}/lib/python${python_majo
export PATH=$PYTHON_ROOT/bin:$PATH
# expose the collectd install path
export COLLECTD_PATH=${PIKA_INSTALL_PATH}/collectd/${COLLECTD_VERSION}
export COLLECTD_INSTALL_PATH=${PIKA_INSTALL_PATH}/collectd/${COLLECTD_VERSION}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment