diff --git a/Dockerfile b/Dockerfile index e0121582e4dffee8ed997d687ec390dd8ca117c2..fddfc549e40058224829cf18f2bde1d2d636e420 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ RUN pip install mkdocs>=1.1.2 mkdocs-material>=7.1.0 mkdocs-htmlproofer-plugin== RUN apt-get update && apt-get install -y nodejs npm aspell git git-lfs -RUN npm install -g markdownlint-cli markdown-link-check +RUN npm install -g markdownlint-cli@0.32.2 markdown-link-check ########################################### # prepare git for automatic merging in CI # @@ -38,6 +38,9 @@ RUN echo 'test \! -e /docs/tud_theme/javascripts/mermaid.min.js && test -x /docs RUN echo 'exec "$@"' >> /entrypoint.sh RUN chmod u+x /entrypoint.sh +# Workaround https://gitlab.com/gitlab-org/gitlab-runner/-/issues/29022 +RUN git config --global --add safe.directory /docs + WORKDIR /docs CMD ["mkdocs", "build", "--verbose", "--strict"] diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md index f4cdcd9de79a45aa10a32f4e5bdb2b4edcde5419..adaf75cdf9a356307f023a85620fbc9f482dc019 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md @@ -328,8 +328,8 @@ specifications for each component of the heterogeneous job should be separated w Running a job step on a specific component is supported by the option `--het-group`. ```console -marie@login$ salloc --ntasks 1 --cpus-per-task 4 --partition <partition> --mem=200G : \ - --ntasks 8 --cpus-per-task 1 --gres=gpu:8 --mem=80G --partition <partition> +marie@login$ salloc --ntasks=1 --cpus-per-task=4 --partition <partition> --mem=200G : \ + --ntasks=8 --cpus-per-task=1 --gres=gpu:8 --mem=80G --partition <partition> [...] marie@login$ srun ./my_application <args for master tasks> : ./my_application <args for worker tasks> ``` @@ -340,16 +340,16 @@ components by a line containing the directive `#SBATCH hetjob`. ```bash #!/bin/bash -#SBATCH --ntasks 1 -#SBATCH --cpus-per-task 4 -#SBATCH --partition <partition> +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --partition=<partition> #SBATCH --mem=200G #SBATCH hetjob # required to separate groups -#SBATCH --ntasks 8 -#SBATCH --cpus-per-task 1 +#SBATCH --ntasks=8 +#SBATCH --cpus-per-task=1 #SBATCH --gres=gpu:8 #SBATCH --mem=80G -#SBATCH --partition <partition> +#SBATCH --partition=<partition> srun ./my_application <args for master tasks> : ./my_application <args for worker tasks> @@ -474,7 +474,7 @@ at no extra cost. ??? example "Show all jobs since the beginning of year 2021" ```console - marie@login$ sacct -S 2021-01-01 [-E now] + marie@login$ sacct --starttime 2021-01-01 [--endtime now] ``` ## Jobs at Reservations @@ -501,24 +501,21 @@ as user to specify the requirements. These features should be thought of as chan (e.g., a filesystem get stuck on a certain node). A feature can be used with the Slurm option `-C, --constraint=<ARG>` like -`srun --constraint=fs_lustre_scratch2 ...` with `srun` or `sbatch`. Combinations like -`--constraint="fs_beegfs_global0`are allowed. For a detailed description of the possible -constraints, please refer to the [Slurm documentation](https://slurm.schedmd.com/srun.html). +`srun --constraint="fs_lustre_scratch2" [...]` with `srun` or `sbatch`. + +Multiple features can also be combined using AND, OR, matching OR, resource count etc. +E.g., `--constraint="fs_beegfs|fs_lustre_ssd"` requests for nodes with at least one of the +features `fs_beegfs` and `fs_lustre_ssd`. For a detailed description of the possible +constraints, please refer to the [Slurm documentation](https://slurm.schedmd.com/srun.html#OPT_constraint). !!! hint A feature is checked only for scheduling. Running jobs are not affected by changing features. -### Available Features - -| Feature | Description | -|:--------|:-------------------------------------------------------------------------| -| DA | Subset of Haswell nodes with a high bandwidth to NVMe storage (island 6) | - -#### Filesystem Features +### Filesystem Features A feature `fs_*` is active if a certain filesystem is mounted and available on a node. Access to -these filesystems are tested every few minutes on each node and the Slurm features set accordingly. +these filesystems are tested every few minutes on each node and the Slurm features are set accordingly. | Feature | Description | [Workspace Name](../data_lifecycle/workspaces.md#extension-of-a-workspace) | |:---------------------|:-------------------------------------------------------------------|:---------------------------------------------------------------------------| diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md index 703cae5642f256bdb124433d4d306537ff80375c..179450edea866047db26218b4665a349a46f4d03 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md @@ -186,7 +186,7 @@ When `srun` is used within a submission script, it inherits parameters from `sba `--ntasks=1`, `--cpus-per-task=4`, etc. So we actually implicitly run the following ```bash -srun --ntasks=1 --cpus-per-task=4 ... --partition=ml some-gpu-application +srun --ntasks=1 --cpus-per-task=4 [...] --partition=ml <some-gpu-application> ``` Now, our goal is to run four instances of this program concurrently in a single batch script. Of @@ -237,7 +237,7 @@ inherited from the surrounding `sbatch` context. The following line would be suf job in this example: ```bash -srun --exclusive --gres=gpu:1 --ntasks=1 some-gpu-application & +srun --exclusive --gres=gpu:1 --ntasks=1 <some-gpu-application> & ``` Yet, it adds some extra safety to leave them in, enabling the Slurm batch system to complain if not @@ -278,7 +278,8 @@ use up all resources in the nodes: #SBATCH --exclusive # ensure that nobody spoils my measurement on 2 x 2 x 8 cores #SBATCH --time=00:10:00 #SBATCH --job-name=Benchmark - #SBATCH --mail-user=your.name@tu-dresden.de + #SBATCH --mail-type=end + #SBATCH --mail-user=<your.email>@tu-dresden.de srun ./my_benchmark ``` @@ -313,14 +314,14 @@ name specific to the job: ```Bash #!/bin/bash - #SBATCH --array 0-9 + #SBATCH --array=0-9 #SBATCH --output=arraytest-%A_%a.out #SBATCH --error=arraytest-%A_%a.err #SBATCH --ntasks=864 #SBATCH --time=08:00:00 #SBATCH --job-name=Science1 #SBATCH --mail-type=end - #SBATCH --mail-user=your.name@tu-dresden.de + #SBATCH --mail-user=<your.email>@tu-dresden.de echo "Hi, I am step $SLURM_ARRAY_TASK_ID in this array job $SLURM_ARRAY_JOB_ID" ``` diff --git a/doc.zih.tu-dresden.de/docs/software/mathematics.md b/doc.zih.tu-dresden.de/docs/software/mathematics.md index d28c6eae651e4a9d7d9b6190c4768ff16e1e4cff..8562135e253ecb578ffb6b264d6a505965a9252a 100644 --- a/doc.zih.tu-dresden.de/docs/software/mathematics.md +++ b/doc.zih.tu-dresden.de/docs/software/mathematics.md @@ -562,3 +562,7 @@ To learn more about the MATLAB Parallel Computing Toolbox, check out these resou Tutorials](http://www.mathworks.com/products/parallel-computing/tutorials.html) * [Parallel Computing Videos](http://www.mathworks.com/products/parallel-computing/videos.html) * [Parallel Computing Webinars](http://www.mathworks.com/products/parallel-computing/webinars.html) +* [MATLAB NHR Tutorial Slides: Parallel Computing with MATLAB](https://event.zih.tu-dresden.de/nhr/matlab/module1/materials) +* [MATLAB NHR Tutorial Slides: Machine Learning with MATLAB](https://event.zih.tu-dresden.de/nhr/matlab/module2/materials) +* [MATLAB NHR Tutorial Slides: Deep Learning with MATLAB](https://event.zih.tu-dresden.de/nhr/matlab/module3/materials) +* [MATLAB NHR Tutorial Slides: Interoperability of MATLAB and Python](https://event.zih.tu-dresden.de/nhr/matlab/module4/materials) diff --git a/doc.zih.tu-dresden.de/docs/software/misc/must-error-01.png b/doc.zih.tu-dresden.de/docs/software/misc/must-error-01.png new file mode 100644 index 0000000000000000000000000000000000000000..d3f6fe02a9744724bd2084b75a5b8415eb41342c Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/must-error-01.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/must-error-02.png b/doc.zih.tu-dresden.de/docs/software/misc/must-error-02.png new file mode 100644 index 0000000000000000000000000000000000000000..fc91e2a5d4f81908474a7f60e2c457861a9ed311 Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/must-error-02.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/pika_cpu_idle.png b/doc.zih.tu-dresden.de/docs/software/misc/pika_cpu_idle.png new file mode 100644 index 0000000000000000000000000000000000000000..a9e499af841d2f01f33f94757955285700d04dae Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/pika_cpu_idle.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/pika_footprint.png b/doc.zih.tu-dresden.de/docs/software/misc/pika_footprint.png new file mode 100644 index 0000000000000000000000000000000000000000..ef98039c2aaa6ae6e2e9291be3f2b05e5039ef78 Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/pika_footprint.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/pika_io_block.png b/doc.zih.tu-dresden.de/docs/software/misc/pika_io_block.png new file mode 100644 index 0000000000000000000000000000000000000000..5b3cb64c577ed533e8d3e01cb1fe247037ea53ca Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/pika_io_block.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/pika_mem_leak.png b/doc.zih.tu-dresden.de/docs/software/misc/pika_mem_leak.png new file mode 100644 index 0000000000000000000000000000000000000000..dea70e4cdbd580ab609ca160389adbe4d635a6f0 Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/pika_mem_leak.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/pika_smt_2.png b/doc.zih.tu-dresden.de/docs/software/misc/pika_smt_2.png new file mode 100644 index 0000000000000000000000000000000000000000..71306f4aff11c85a540093808284be733cea3c24 Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/pika_smt_2.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/pika_timelines.png b/doc.zih.tu-dresden.de/docs/software/misc/pika_timelines.png new file mode 100644 index 0000000000000000000000000000000000000000..3b4bf2c451796809a80a16b7773fcdfc6ea9d651 Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/pika_timelines.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md b/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md index b604bf5398681458ac416336ea7c42a0b3a25b15..66ee5b4cc26e4a646203d9911ca429ed4dba2e41 100644 --- a/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md +++ b/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md @@ -6,51 +6,67 @@ applications with this interface is error prone and often time consuming. Some u may only manifest on some platforms or some application runs, which further complicates the detection of these errors. Thus, special debugging tools for MPI applications exist that automatically check whether an application conforms to the MPI standard and whether its MPI calls -are safe. At ZIH, we maintain and support MUST for this task, though different types of these tools -exist (see last section). +are safe. At ZIH, we maintain and support **MUST** for this task, though different types of these +tools exist (see last section). ## MUST -MUST checks if your application conforms to the MPI standard and will issue warnings if there are -errors or non-portable constructs. You can apply MUST without modifying your source code, though we -suggest to add the debugging flag "-g" during compilation. +[MUST](https://itc.rwth-aachen.de/must/) checks if your application conforms to the MPI +standard and will issue warnings if there are errors or non-portable constructs. You can apply MUST +without modifying your source code, though we suggest to add the debugging flag `-g` during +compilation. -See also [MUST Introduction Slides](misc/parallel_debugging_must.pdf). +See also [MUST Introduction Slides](misc/parallel_debugging_must.pdf) for a starting point. ### Setup and Modules You need to load a module file in order to use MUST. Each MUST installation uses a specific combination of a compiler and an MPI library, make sure to use a combination that fits your needs. -Right now we only provide a single combination on each system, contact us if you need further +Right now we provide two combinations, [contact us](../support/support.md) if you need further combinations. You can query for the available modules with: ```console marie@login$ module avail must - MUST/1.6.0-rc3-intel-2018a (L) + MUST/1.6.0-rc3-intel-2018a MUST/1.7.2-intel-2020a (D) ``` You can load a MUST module as follows: ```console marie@login$ module load MUST -Module MUST/1.6.0-rc3-intel-2018a and 16 dependencies loaded. +Module MUST/1.7.2-intel-2020a and 16 dependencies loaded. ``` Besides loading a MUST module, no further changes are needed during compilation and linking. ### Running your Application with MUST -In order to run your application with MUST you need to replace the `srun` command with `mustrun`: +In order to launch your application with MUST you need to replace the `srun` command with +`mustrun --must:mpiexec srun --must:np --ntasks`: ```console -marie@login$ mustrun -np <number of MPI processes> ./<your binary> +marie@login$ mustrun --must:mpiexec srun --must:np --ntasks --ntasks <number of MPI processes> ./<your binary> ``` +Besides replacing the `srun` command you need to be aware that **MUST always allocates an extra +process**, i.e. if you issue a +`mustrun --must:mpiexec srun --must:np --ntasks --ntasks 4 ./<your binary>` then +MUST will start **5 processes** instead. This is usually not critical. However, in interactive and +batch jobs **make sure to allocate an extra CPU for this task**. + Suppose your application is called `fancy-program` and is normally run with 4 processes. -The invocation should then be +The MUST workflow should then be ```console -marie@login$ mustrun -np 4 ./fancy-program +marie@login$ module load MUST + +# Compile your application with the debugging flag "-g" on the correct architecture, e.g.: +marie@login$ srun --ntasks 1 --partition <partition> mpicc -g -o fancy-program fancy-program.c + +# Allocate interactive session with 1 extra process for MUST +marie@login$ salloc --ntasks 5 --partition <partition> + +marie@login$ mustrun --must:mpiexec srun --must:np --ntasks --must:stacktrace backward --ntasks 4 ./fancy-program [MUST] MUST configuration ... centralized checks with fall-back application crash handling (very slow) [MUST] Weaver ... success [MUST] Code generation ... success @@ -65,15 +81,23 @@ marie@login$ mustrun -np 4 ./fancy-program [MUST] Execution finished, inspect "/home/marie/MUST_Output.html"! ``` -Besides replacing the `srun` command you need to be aware that **MUST always allocates an extra -process**, i.e. if you issue a `mustrun -np 4 ./a.out` then MUST will start 5 processes instead. -This is usually not critical, however in batch jobs **make sure to allocate an extra CPU for this -task**. +??? hint "Twice `--ntasks`" + + You might wonder about the two `--ntasks` arguments in the above outlined `mustrun` comannd. + Mustrun is able to use invoke another command instead of mpiexec. For ZIH systems, this will be + `srun` (`--must-mpiexec: srun`). Now, you need to specify what argument of the MPI run arguments + holds the number of application processes. For Slurm, it is `--ntasks <N>`. Thus, you need to + specify `--must:np --ntasks --ntasks <N>`. + +With the additional flag `--must:stacktrace backward` you can produce an additional stacktrace +with line number of the error location which allows to pinpoint the error location in your code. +This might slow down code execution slightly. Finally, MUST assumes that your application may crash at any time. To still gather correctness results under this assumption is extremely expensive in terms of performance overheads. Thus, if your application does not crash, you should add `--must:nocrash` to the `mustrun` command to make MUST aware of this knowledge. Overhead is drastically reduced with this switch. +Further details on alternative launch modes are described in the MUST documentation. ### Result Files @@ -81,18 +105,147 @@ After running your application with MUST you will have its output in the working application. The output is named `MUST_Output.html`. Open this files in a browser to analyze the results. The HTML file is color coded: -- Entries in green represent notes and useful information -- Entries in yellow represent warnings -- Entries in red represent errors +- Entries in green represent notes and useful information +- Entries in yellow represent warnings +- Entries in red represent errors + +### Example Usage of MUST + +In this section, we provide a detailed example explaining the usage of MUST. The example is taken +from the [MUST documentation v1.7.2](https://hpc.rwth-aachen.de/must/files/Documentation-1.7.2.pdf). + +??? example "example.c" + + This C programm contains three MPI usage errors. Save it as `example.c`. + + ``` + #include <stdio.h> + #include <mpi.h> + + int main (int argc , char ** argv) { + int rank , + size , + sBuf [ 2 ] = { 1 , 2 } , + rBuf [ 2 ] ; + MPI_Status status ; + MPI_Datatype newType ; + + MPI_Init(&argc ,&argv ) ; + MPI_Comm_rank (MPI_COMM_WORLD, &rank ) ; + MPI_Comm_size (MPI_COMM_WORLD, &size ) ; + + // Enough tasks? + if ( size < 2 ) { + printf("This test needs at least 2 processes ! \n"); + MPI_Finalize(); + return 1 ; + } + + // Say hello + printf("Hello, I am rank %d of %d processes. \n", rank , size); + + //) Create a datatype + MPI_Type_contiguous( 2, MPI_INT, &newType); + MPI_Type_commit(&newType); + + // 2) Use MPI Sendrecv to perform a ring communication + MPI_Sendrecv(sBuf, 1, newType, (rank+1)%size, 123, + rBuf, sizeof(int)*2, MPI_BYTE, (rank=1+size) %size, 123 , MPI_COMM_WORLD, &status ) ; + + // 3) Use MPI Send and MPI Recv to perform a ring communication + MPI_Send(sBuf, 1, newType, (rank+1)%size, 456, MPI_COMM_WORLD); + MPI_Recv(rBuf, sizeof(int)*2, MPI_BYTE, (rank=1+size)%size, 456, MPI_COMM_WORLD, &status); + + // Say bye bye + printf("Signing off, rank %d. \n" , rank); + + MPI_Finalize(); + return 0 ; + } + /*EOF*/ + ``` + +??? example "Compile and execute" + + The first step is to prepare the environment by loading a MUST module. + + ```console + marie@login$ module purge + marie@login$ module load MUST + Module MUST/1.7.2-intel-2020a and 16 dependencies loaded. + ``` + + Now, you compile the `example.c` program using the MPI compiler wrapper. The compiled binary is + called `example`. + + ```console + marie@login$ mpicc example.c -g -o example + ``` + + Finally, you execute the example application on the compute nodes. As you can see, the following + command line will submit a job to the batch system. + + ``` + marie@login $ mustrun --must:mpiexec srun --must:np --ntasks --ntasks 4 --time 00:10:00 example + [MUST] MUST configuration ... centralized checks with fall-back application crash handling (very slow) + [MUST] Information: overwritting old intermediate data in directory "/scratch/ws/0/marie-must/must_temp"! + [MUST] Using prebuilt infrastructure at /sw/installed/MUST/1.7.2-intel-2020a/modules/mode1-layer2 + [MUST] Weaver ... success + [MUST] Generating P^nMPI configuration ... success + [MUST] Search for linked P^nMPI ... not found ... using LD_PRELOAD to load P^nMPI ... success + [MUST] Executing application: + srun: job 32765491 queued and waiting for resources + srun: job 32778008 has been allocated resources + Hello , I am rank 2 of 4 processes. + Hello , I am rank 3 of 4 processes. + Hello , I am rank 0 of 4 processes. + Hello , I am rank 1 of 4 processes. + ============MUST=============== + ERROR: MUST detected a deadlock, detailed information is available in the MUST output file. You should either investigate details with a debugger or abort, the operation of MUST will stop from now. + =============================== + ``` + +??? example "Analysis of MUST output files and MPI usage errors" + + MUST produces an `MUST_Output.html` file and a directory `MUST_Output-files` with additional + html files. Copy the files to your local host, e.g. + + ```console + marie@local$ scp -r taurus.hrsk.tu-dresden.de:/scratch/ws/0/marie-must/{MUST_Output-files,MUST_Output.html} + ``` + + and open the file `MUST_Output.html` using a webbrowser. Alternativly, you can open the html file with a + `firefox` instance on the HPC sytems. This requires to [forward the X11 support via SSH](../access/ssh_login.md#x11-forwarding). + + MUST detects all three MPI usage errors within this example: + + * A type mismatch + * A send-send deadlock + * A leaked datatype + + The type mismatch is reported as follows: + +  + {: align="center" summary="Type mismatch error report from MUST."} + + MUST also offers a detailed page for the type mismatch error. + +  + {: summary="Retrieve job results via GUI using the Job Monitor." align="center"} + + In order not to exceed the scope of this example, we do not explain the MPI usage errors in more + details. Please, feel free to deep-dive into the error description provided in the official + [MUST documentation v1.7.2](https://hpc.rwth-aachen.de/must/files/Documentation-1.7.2.pdf) (Sec. + 4). ## Further MPI Correctness Tools Besides MUST, there exist further MPI correctness tools, these are: -- Marmot (predecessor of MUST) -- MPI checking library of the Intel Trace Collector -- ISP (From Utah) -- Umpire (predecessor of MUST) +- Marmot (predecessor of MUST) +- MPI checking library of the Intel Trace Collector +- ISP (From Utah) +- Umpire (predecessor of MUST) ISP provides a more thorough deadlock detection as it investigates alternative execution paths, however its overhead is drastically higher as a result. Contact our support if you have a specific diff --git a/doc.zih.tu-dresden.de/docs/software/pika.md b/doc.zih.tu-dresden.de/docs/software/pika.md index 3b9cd3fd7ff821f3dc5d76241b46b2645b9fc01b..40202decaedd121aab6d7bf00d7958377ff19d8c 100644 --- a/doc.zih.tu-dresden.de/docs/software/pika.md +++ b/doc.zih.tu-dresden.de/docs/software/pika.md @@ -6,9 +6,10 @@ systems have the possibility to visualize and analyze the efficiency of their jo !!! hint - To understand this small guide, it is recommended to open the + To understand this guide, it is recommended that you open the [web interface](https://selfservice.zih.tu-dresden.de/l/index.php/hpcportal/jobmonitoring/zih/jobs) - in a separate window. Furthermore, at least one real HPC job should have been submitted. + in a separate window. Furthermore, you should have submitted at least one real HPC job at ZIH + systems. ## Overview @@ -20,11 +21,11 @@ for the visualization and analysis of job performance data. ## Table View and Job Search The analysis of HPC jobs in PIKA is designed as a top-down approach. Starting from the table view, -users can either analyze running or completed jobs. They can navigate from groups of jobs with the +you can either analyze running or completed jobs. You can navigate from groups of jobs with the same name to the metadata of an individual job and finally investigate the job’s runtime metrics in a timeline view. -To find jobs with specific properties, the table can be sorted by any column, e.g., by consumed CPU +To find jobs with specific properties, you can sort the table by any column, e.g., by consumed CPU hours to find jobs where an optimization has a large impact on the system utilization. Additionally, there is a filter mask to find jobs that match several properties. When a job has been selected, the timeline view opens. @@ -32,39 +33,63 @@ timeline view opens. ## Timeline Visualization PIKA provides timeline charts to visualize the resource utilization of a job over time. After a job -is completed, timeline charts can help to identify periods of inefficient resource usage. However, -they are also suitable for the live assessment of performance during the job’s runtime. In case of -unexpected performance behavior, users can cancel the job, thus avoiding long execution with subpar -performance. +is completed, timeline charts can help you to identify periods of inefficient resource usage. +However, they are also suitable for the live assessment of performance during the job’s runtime. In +case of unexpected performance behavior, you can cancel the job, thus avoiding long execution with +subpar performance. + +The following timeline visualization shows a job with 840 cores, spread over 35 (dual-socket +Haswell) nodes that have been allocated for exclusive use. + + +{: align="center"} PIKA provides the following runtime metrics: -|Metric| Hardware Unit| -|---|---| -|CPU Usage|CPU core| -|IPC (instructions per cycle)|CPU core| -|FLOPS (normalized to single precision) |CPU core| -|Main Memory Bandwidth|CPU socket| -|CPU Power|CPU socket| -|Main Memory Utilization|node| -|I/O Bandwidth (local, Lustre) |node| -|I/O Metadata (local, Lustre) |node| -|GPU Usage|GPU device| -|GPU Memory Utilization|GPU device| -|GPU Power Consumption|GPU device| -|GPU Temperature|GPU device| +|Metric| Hardware Unit| Sampling Frequency| +|---|---|---:| +|CPU Usage|CPU core|30s| +|IPC (instructions per cycle)|CPU core|60s| +|FLOPS (normalized to single precision) |CPU core|60s| +|Main Memory Bandwidth|CPU socket|60s| +|CPU Power|CPU socket|60s| +|Main Memory Utilization|node|30s| +|I/O Bandwidth (local, Lustre) |node|30s| +|I/O Metadata (local, Lustre) |node|30s| +|GPU Usage|GPU device|30s| +|GPU Memory Utilization|GPU device|30s| +|GPU Power Consumption|GPU device|30s| +|GPU Temperature|GPU device|30s| Each monitored metric is represented by a timeline, whereby metrics with the same unit and data -source are displayed in a common chart, e.g., different Lustre metadata operations. Each metric is +source are displayed in a common chart, e.g., different Lustre metadata operations. Each metric is measured with a certain granularity concerning the hardware, e.g. per hardware thread, per CPU socket or per node. +Most metrics are recorded every 30 seconds except IPC, FLOPS, Main Memory Bandwidth and Power +Consumption. The latter are determined every 60 seconds, as they are a combination of different +hardware counters, which leads to a higher measurement overhead. Depending on the architecture, +metrics such as normalized FLOPS (2 x double-precision + 1 x single-precision) can require +multiplexing, since single and double precision FLOPS cannot be measured simultaneously. +The sampling frequency cannot be changed by the user. !!! hint Be aware that CPU socket or node metrics can share the resources of other jobs running on the same CPU socket or node. This can result e.g., in cache perturbation and thus a sub-optimal - performance. To get valid performance data for those metrics, it is recommended to submit an - exclusive job! + performance. To get valid performance data for those metrics, it is recommended to submit an + exclusive job (`--exclusive`)! + +If the current partition supports simultaneous multithreading (SMT) the maximum number of hardware +threads per physical core is displayed in the SMT column. The Slurm configuration on ZIH systems +disables SMT by default. Therefore, in the example below, only a maximum CPU usage of 0.5 can be +achieved, since PIKA combines two hardware threads per physical core. If you want to use SMT, you +must set the Slurm environment variable `SLURM_HINT=multithread`. In this case, `srun` distributes +the tasks to all available hardware threads, thus a CPU usage of 1 can be reached. However, the SMT +configuration only refers to the `srun` command. For single node jobs without `srun` command the +tasks are automatically distributed to all available hardware threads. + + +{: align="center"} !!! note @@ -73,7 +98,8 @@ socket or per node. performance data per physical core. The following table explains different timeline visualization modes. -By default, each timeline shows the average value over all hardware units (HUs) per measured interval. +By default, each timeline shows the average value over all hardware units (HUs) per measured +interval. |Visualization Mode| Description| |---|---| @@ -108,9 +134,12 @@ usually contains an unlimited number of values. A scatter plot enables the comb footprint metrics (except for job states and job tags), which is particularly useful for investigating their correlation. + +{: align="center"} + ## Hints -If users wish to perform their own measurement of performance counters using performance tools other +If you wish to perform your own measurement of performance counters using performance tools other than PIKA, it is recommended to disable PIKA monitoring. This can be done using the following Slurm flags in the job script: @@ -123,7 +152,24 @@ flags in the job script: ## Known Issues -The PIKA metric FLOPS is not supported by the Intel Haswell cpu architecture. +The PIKA metric FLOPS is not supported by the Intel Haswell CPU architecture. However, PIKA provides this metric to show the computational intensity. **Do not rely on FLOPS on Haswell!** We use the event `AVX_INSTS_CALC` which counts the `insertf128` instruction. + +## Case Studies + +### Idle CPUs + + +{: align="center"} + +### Blocking I/O Operations + + +{: align="center"} + +### Memory Leaks + + +{: align="center"} diff --git a/doc.zih.tu-dresden.de/docs/software/tensorflow.md b/doc.zih.tu-dresden.de/docs/software/tensorflow.md index 58b99bd1c302c0ed65619fc200602f2732f84df1..f11ecb3ac94e3cc65cf671815d813bacc9b9815f 100644 --- a/doc.zih.tu-dresden.de/docs/software/tensorflow.md +++ b/doc.zih.tu-dresden.de/docs/software/tensorflow.md @@ -96,7 +96,7 @@ the notebook by pre-loading a specific TensorFlow module: You can also define your own Jupyter kernel for more specific tasks. Please read about Jupyter kernels and virtual environments in our - [JupyterHub](../access/jupyterhub.md#creating-and-using-a-custom-environment) documentation. + [JupyterHub](../access/jupyterhub_custom_environments.md) documentation. ## TensorFlow in Containers diff --git a/doc.zih.tu-dresden.de/docs/software/vampir.md b/doc.zih.tu-dresden.de/docs/software/vampir.md index ebaa368e73f445422644b6159c1ab677fc50fecf..64dfd00d36eb8079406ce7a47ee55324d8de32fe 100644 --- a/doc.zih.tu-dresden.de/docs/software/vampir.md +++ b/doc.zih.tu-dresden.de/docs/software/vampir.md @@ -73,7 +73,17 @@ Launching VampirServer... Submitting slurm 30 minutes job (this might take a while)... ``` +This way, a job with a timelimit of 30 minutes and default resources is submitted. This might fit +your needs. If not, please feel free to request a customized job running VampirServer, e.g. + +```console +marie@login$ vampirserver start --ntasks=8 --time=01:00:00 --mem-per-cpu=3000M --partition=romeo +Launching VampirServer... +Submitting slurm 01:00:00 minutes job (this might take a while)... +``` + Above automatically allocates its resources via the respective batch system. If you want to start + VampirServer without a batch allocation or from inside an interactive allocation, use ```console diff --git a/doc.zih.tu-dresden.de/util/download-newest-mermaid.js.sh b/doc.zih.tu-dresden.de/util/download-newest-mermaid.js.sh index 9986ad6f49e2e739f8a53d7911f4e346196d21a4..d01622e4bba4188479370be170339b1f01308074 100755 --- a/doc.zih.tu-dresden.de/util/download-newest-mermaid.js.sh +++ b/doc.zih.tu-dresden.de/util/download-newest-mermaid.js.sh @@ -6,4 +6,4 @@ scriptpath=${BASH_SOURCE[0]} basedir=`dirname "$scriptpath"` basedir=`dirname "$basedir"` cd $basedir/tud_theme/javascripts -wget https://unpkg.com/mermaid/dist/mermaid.min.js +wget https://unpkg.com/mermaid@9.4.0/dist/mermaid.min.js diff --git a/doc.zih.tu-dresden.de/wordlist.aspell b/doc.zih.tu-dresden.de/wordlist.aspell index 18b8555ca8c9e6f4756b57e9920c6fbeb5ba77df..54c0092c1c8cc3eda9c37f9780936ffe7ecc8b29 100644 --- a/doc.zih.tu-dresden.de/wordlist.aspell +++ b/doc.zih.tu-dresden.de/wordlist.aspell @@ -172,6 +172,7 @@ ifort ImageNet img Infiniband +InfluxDB init inode Instrumenter @@ -248,6 +249,7 @@ multicore multiphysics Multiphysics multithreaded +multithreading Multithreading NAMD Nationales