diff --git a/Dockerfile b/Dockerfile index 4b11d882809ca6eb1d574e20306bb2cd279f5a42..ec246a77566bbbba03b613ce49ec745a82f46f51 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-bullseye +FROM python:3.12-bookworm SHELL ["/bin/bash", "-c"] @@ -6,7 +6,7 @@ SHELL ["/bin/bash", "-c"] # Base # ######## -RUN pip install mkdocs>=1.1.2 mkdocs-material==8.5.11 mkdocs-htmlproofer-plugin==1.2.1 mkdocs-video==1.3.0 +RUN pip install mkdocs>=1.6.0 mkdocs-material==9.5.22 mkdocs-htmlproofer-plugin==1.2.1 mkdocs-video==1.5.0 ########## # Linter # @@ -14,7 +14,7 @@ RUN pip install mkdocs>=1.1.2 mkdocs-material==8.5.11 mkdocs-htmlproofer-plugin= RUN apt-get update && apt-get install -y nodejs npm aspell git git-lfs -RUN npm install -g markdownlint-cli@0.32.2 markdown-link-check +RUN npm install -g markdownlint-cli@0.40.0 markdown-link-check ########################################### # prepare git for automatic merging in CI # diff --git a/doc.zih.tu-dresden.de/docs/contrib/content_rules.md b/doc.zih.tu-dresden.de/docs/contrib/content_rules.md index 80dbdbaeb0d3eb07690c8ddb16fbae0b0fc4b82e..7ded65dcd5782ae242607313c14feb8373c9cbb9 100644 --- a/doc.zih.tu-dresden.de/docs/contrib/content_rules.md +++ b/doc.zih.tu-dresden.de/docs/contrib/content_rules.md @@ -336,6 +336,7 @@ The table also holds a second placeholder, if, e.g., you need a second login to | E-mail | marie@tu-dresden.de | martin@tu-dresden.de | | Project title | `p_number_crunch` | `p_long_computations` | | Workspace title | `number_crunch` | `long_computations` | +| Job ID | `123456` | `456789` | {: summary="Generic placeholders", align="bottom"} !!! example "Output of `ls` command" diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/hardware_overview.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/hardware_overview.md index 3bf43c019f85446223119d6e7c2405e04edcc250..c8d49918b8f905b92951b01427d5639f2bbdc465 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/hardware_overview.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/hardware_overview.md @@ -110,7 +110,7 @@ CPUs. - 630 nodes, each with - 2 x Intel Xeon Platinum 8470 (52 cores) @ 2.00 GHz, Multithreading enabled - - 512 GB RAM + - 512 GB RAM (8 x 32 GB DDR5-4800 MT/s per socket) - 12 nodes provide 1.8 TB local storage on NVMe device at `/tmp` - All other nodes are diskless and have no or very limited local storage (i.e. `/tmp`) - Login nodes: `login[1-4].barnard.hpc.tu-dresden.de` @@ -126,7 +126,7 @@ and is designed for AI and ML tasks. - 34 nodes, each with - 8 x NVIDIA A100-SXM4 Tensor Core-GPUs - 2 x AMD EPYC CPU 7352 (24 cores) @ 2.3 GHz, Multithreading available - - 1 TB RAM + - 1 TB RAM (16 x 32 GB DDR4-2933 MT/s per socket) - 3.5 TB local memory on NVMe device at `/tmp` - Login nodes: `login[1-2].alpha.hpc.tu-dresden.de` - Hostnames: `i[8001-8037].alpha.hpc.tu-dresden.de` @@ -139,7 +139,7 @@ The cluster `Romeo` is a general purpose cluster by NEC based on AMD Rome CPUs. - 192 nodes, each with - 2 x AMD EPYC CPU 7702 (64 cores) @ 2.0 GHz, Multithreading available - - 512 GB RAM + - 512 GB RAM (8 x 32 GB DDR4-3200 MT/s per socket) - 200 GB local memory on SSD at `/tmp` - Login nodes: `login[1-2].romeo.hpc.tu-dresden.de` - Hostnames: `i[7001-7190].romeo.hpc.tu-dresden.de` @@ -153,7 +153,7 @@ architecture. - 1 node, with - 32 x Intel(R) Xeon(R) Platinum 8276M CPU @ 2.20 GHz (28 cores) - - 47 TB RAM + - 47 TB RAM (12 x 128 GB DDR4-2933 MT/s per socket) - Configured as one single node - 48 TB RAM (usable: 47 TB - one TB is used for cache coherence protocols) - 370 TB of fast NVME storage available at `/nvme/<projectname>` @@ -168,7 +168,7 @@ The cluster `Power9` by IBM is based on Power9 CPUs and provides NVIDIA V100 GPU - 32 nodes, each with - 2 x IBM Power9 CPU (2.80 GHz, 3.10 GHz boost, 22 cores) - - 256 GB RAM DDR4 2666 MHz + - 256 GB RAM (8 x 16 GB DDR4-2666 MT/s per socket) - 6 x NVIDIA VOLTA V100 with 32 GB HBM2 - NVLINK bandwidth 150 GB/s between GPUs and host - Login nodes: `login[1-2].power9.hpc.tu-dresden.de` diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md index ff2a9ceb43aca51cedecf234ca67b680799eabab..f96168d8dc01b2ce9425d0f5b226f59bf35aa800 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md @@ -165,30 +165,44 @@ allocation with desired switch count or the time limit expires. Acceptable time ## Interactive Jobs Interactive activities like editing, compiling, preparing experiments etc. are normally limited to -the login nodes. For longer interactive sessions, you can allocate cores on the compute node with -the command `salloc`. It takes the same options as `sbatch` to specify the required resources. +the login nodes. For longer interactive sessions, you can allocate resources on the compute node +with the command `salloc`. It takes the same options as `sbatch` to specify the required resources. `salloc` returns a new shell on the node where you submitted the job. You need to use the command `srun` in front of the following commands to have these commands executed on the allocated -resources. If you allocate more than one task, please be aware that `srun` will run the command on -each allocated task by default! To release the allocated resources, invoke the command `exit` or +resources. If you request for more than one task, please be aware that `srun` will run the command +on each allocated task by default! To release the allocated resources, invoke the command `exit` or `scancel <jobid>`. -```console -marie@login$ salloc --nodes=2 -salloc: Pending job allocation 27410653 -salloc: job 27410653 queued and waiting for resources -salloc: job 27410653 has been allocated resources -salloc: Granted job allocation 27410653 -salloc: Waiting for resource configuration -salloc: Nodes taurusi[6603-6604] are ready for job -marie@login$ hostname -tauruslogin5.taurus.hrsk.tu-dresden.de -marie@login$ srun hostname -taurusi6604.taurus.hrsk.tu-dresden.de -taurusi6603.taurus.hrsk.tu-dresden.de -marie@login$ exit # ending the resource allocation -``` +!!! example "Example: Interactive allocation using `salloc`" + + The following code listing depicts the allocation of two nodes with two tasks on each node with a + time limit of one hour on the cluster `Barnard` for interactive usage. + + ```console linenums="1" + marie@login.barnard$ salloc --nodes=2 --ntasks-per-node=2 --time=01:00:00 + salloc: Pending job allocation 1234567 + salloc: job 1234567 queued and waiting for resources + salloc: job 1234567 has been allocated resources + salloc: Granted job allocation 1234567 + salloc: Waiting for resource configuration + salloc: Nodes n[1184,1223] are ready for job + [...] + marie@login.barnard$ hostname + login1 + marie@login.barnard$ srun hostname + n1184 + n1184 + n1223 + n1223 + marie@login.barnard$ exit # ending the resource allocation + ``` + + After Slurm successfully allocated resources for the job, a new shell is created on the submit + host (cf. lines 9-10). + + In order to use the allocated resources, you need to invoke your commands with `srun` (cf. lines + 11 ff). The command `srun` also creates an allocation, if it is running outside any `sbatch` or `salloc` allocation. @@ -218,13 +232,6 @@ taurusi6604.taurus.hrsk.tu-dresden.de shell, as shown in the example above. If you missed adding `-l` at submitting the interactive session, no worry, you can source this files also later on manually (`source /etc/profile`). -!!! note "Partition `interactive`" - - A dedicated partition `interactive` is reserved for short jobs (< 8h) with no more than one job - per user. An interactive partition is available for every regular partition, e.g. - `alpha-interactive` for `alpha`. Please check the availability of nodes there with - `sinfo |grep 'interactive\|AVAIL' |less` - ### Interactive X11/GUI Jobs Slurm will forward your X11 credentials to the first (or even all) node for a job with the diff --git a/doc.zih.tu-dresden.de/docs/quickstart/getting_started.md b/doc.zih.tu-dresden.de/docs/quickstart/getting_started.md index 95af8dde2310081e23e24c56ce092561e723aa3a..7296a067140c885708fc5de6fadf021651206dbc 100644 --- a/doc.zih.tu-dresden.de/docs/quickstart/getting_started.md +++ b/doc.zih.tu-dresden.de/docs/quickstart/getting_started.md @@ -148,9 +148,9 @@ To start we recommend the Lustre filesystem **horse**. The following command creates a workspace ```console - marie@login$ ws_allocate -F horse -r 7 -m marie@tu-dresden.de -n test-workspace -d 90 + marie@login$ ws_allocate -F horse -r 7 -m marie@tu-dresden.de -n number_crunch -d 90 Info: creating workspace. - /data/horse/ws/marie-test-workspace + /data/horse/ws/marie-number_crunch remaining extensions : 10 remaining time in days: 90 ``` @@ -160,10 +160,10 @@ To start we recommend the Lustre filesystem **horse**. - `ws_allocate` - command to allocate - `-F horse` - on the horse filesystem - `-r 7 -m marie@tu-dresden.de` - send a reminder to `marie@tu-dresden.de` 7 days before expiration - - `-n test-workspace` - workspace name + - `-n number_crunch` - workspace name - `-d 90` - a life time of 90 days - The path to this workspace is `/data/horse/ws/marie-test-workspace`. You will need it when + The path to this workspace is `/data/horse/ws/marie-number_crunch`. You will need it when transferring data or running jobs. Find more [information on workspaces in the compendium](../data_lifecycle/workspaces.md). @@ -177,7 +177,7 @@ The approach depends on the data volume: up to 100 MB or above. Use the command `cp` to copy the file `example.R` from your ZIH home directory to a workspace: ```console - marie@login$ cp /home/marie/example.R /data/horse/ws/marie-test-workspace + marie@login$ cp /home/marie/example.R /data/horse/ws/marie-number_crunch ``` Analogously use command `mv` to move a file. @@ -191,7 +191,7 @@ The approach depends on the data volume: up to 100 MB or above. filesystem location to another: ```console - marie@login$ dtcp -r /walrus/ws/large-dataset /data/horse/ws/marie-test-workspace/data + marie@login$ dtcp -r /walrus/ws/large-dataset /data/horse/ws/marie-number_crunch/data ``` Analogously use the command `dtmv` to move a file or folder. @@ -199,31 +199,32 @@ The approach depends on the data volume: up to 100 MB or above. transfer section. ### Transferring Data *To/From* ZIH HPC Systems -<!-- [NT] currently not available + ???+ example "`scp` for transferring data to ZIH HPC systems" Copy the file `example.R` from your local machine to a workspace on the ZIH systems: ```console - marie@local$ scp /home/marie/Documents/example.R marie@dataport1.hpc.tu-dresden.de:/data/horse/ws/your_workspace/ + marie@local$ scp /home/marie/Documents/example.R marie@dataport1.hpc.tu-dresden.de:/data/horse/ws/marie-number_crunch/ Password: - example.R 100% 312 32.2KB/s 00:00`` + example.R 100% 312 32.2KB/s 00:00 ``` Note, the target path contains `dataport1.hpc.tu-dresden.de`, which is one of the - so called [dataport nodes](../data_transfer/dataport_nodes.md) that allows for data transfer from/to the outside. + so called [dataport nodes](../data_transfer/dataport_nodes.md) that allows for data transfer + from/to the outside. ???+ example "`scp` to transfer data from ZIH HPC systems to local machine" Copy the file `results.csv` from a workspace on the ZIH HPC systems to your local machine: ```console - marie@local$ scp marie@dataport1.hpc.tu-dresden.de:/data/horse/ws/marie-test-workspace/results.csv /home/marie/Documents/ + marie@local$ scp marie@dataport1.hpc.tu-dresden.de:/data/horse/ws/marie-number_crunch/results.csv /home/marie/Documents/ ``` Feel free to explore further [examples](http://bropages.org/scp) of the `scp` command and possibilities of the [dataport nodes](../data_transfer/dataport_nodes.md). ---> + !!! caution "Terabytes of data" If you are planning to move terabytes or even more from an outside machine into ZIH systems, @@ -423,7 +424,7 @@ See [Slurm documentation](../jobs_and_resources/slurm.md#interactive-jobs) for m ```console marie@login$ srun --ntasks=1 --cpus-per-task=4 --time=1:00:00 --mem-per-cpu=1700 --pty bash -l #allocate 4 cores for the interactive job marie@compute$ module load Python #load necessary packages -marie@compute$ cd /data/horse/ws/marie-test-workspace/ #go to your created workspace +marie@compute$ cd /data/horse/ws/marie-number_crunch/ #go to your created workspace marie@compute$ python test.py #execute your file Hello, World! ``` diff --git a/doc.zih.tu-dresden.de/docs/software/cfd.md b/doc.zih.tu-dresden.de/docs/software/cfd.md index 7e0eebc74081f188a1441eea1136df0ba636458e..5fcd9082f7aed6e738cbe1463b6445dedfcc9152 100644 --- a/doc.zih.tu-dresden.de/docs/software/cfd.md +++ b/doc.zih.tu-dresden.de/docs/software/cfd.md @@ -76,11 +76,10 @@ geometry and mesh generator cfx5pre, and the post-processor cfx5post. #SBATCH --mail-user=marie@tu-dresden.de # email address (only tu-dresden) #SBATCH --mail-type=ALL - module load ANSYS - - nodeset -e $SLURM_JOB_NODELIST | xargs -n1 > hostsfile_job_$SLURM_JOBID.txt - - fluent 2ddp -t$SLURM_NTASKS -g -mpi=intel -pinfiniband -cnf=hostsfile_job_$SLURM_JOBID.txt < input.in + module purge + module load release/23.10 + module load ANSYS/2023R1 + fluent 2ddp -t$SLURM_NTASKS -g -mpi=openmpi -pinfiniband -cnf=$(/software/util/slurm/bin/create_rankfile -f CCM) -i input.jou ``` To use fluent interactively, please try: diff --git a/doc.zih.tu-dresden.de/docs/software/python_virtual_environments.md b/doc.zih.tu-dresden.de/docs/software/python_virtual_environments.md index 0b2f334437718c53e50109079b27633a9ee3bab5..5c4759850caf8d705b9880d05d1da22a4bb116a7 100644 --- a/doc.zih.tu-dresden.de/docs/software/python_virtual_environments.md +++ b/doc.zih.tu-dresden.de/docs/software/python_virtual_environments.md @@ -200,13 +200,14 @@ can deactivate the conda environment as follows: marie@alpha$ ws_allocate my_conda_virtualenv 100 # use a workspace for the environment marie@alpha$ cd /data/horse/ws/marie-my_conda_virtualenv marie@alpha$ module load Anaconda3 - Module Anaconda3/2021.11 loaded. + Module Anaconda3/2022.05 loaded. marie@alpha$ conda create --prefix my-torch-env python=3.8 Collecting package metadata (current_repodata.json): done Solving environment: done [...] Proceed ([y]/n)? y [...] + marie@alpha$ source $EBROOTANACONDA3/etc/profile.d/conda.sh marie@alpha$ conda activate my-torch-env (my-torch-env) marie@alpha$ conda install -c pytorch torchvision Collecting package metadata (current_repodata.json): done diff --git a/doc.zih.tu-dresden.de/docs/software/visualization.md b/doc.zih.tu-dresden.de/docs/software/visualization.md index 427bf746840383e69c9a4a85a84997d618cc9b15..3a4ce5b05caa6fbd6bff94c184304de30f151db2 100644 --- a/doc.zih.tu-dresden.de/docs/software/visualization.md +++ b/doc.zih.tu-dresden.de/docs/software/visualization.md @@ -158,7 +158,7 @@ processes. ```console marie@login$ module ParaView/5.7.0-osmesa - marie@login$ srun --nodes=1 --ntasks=8 --mem-per-cpu=2500 --partition=interactive --pty pvserver --force-offscreen-rendering + marie@login$ srun --nodes=1 --ntasks=8 --mem-per-cpu=2500 --pty pvserver --force-offscreen-rendering srun: job 2744818 queued and waiting for resources srun: job 2744818 has been allocated resources Waiting for client... @@ -254,5 +254,5 @@ it into thinking your provided GL rendering version is higher than what it actua marie@login$ export MESA_GL_VERSION_OVERRIDE=3.2 # 3rd, start the ParaView GUI inside an interactive job. Don't forget the --x11 parameter for X forwarding: - marie@login$ srun --ntasks=1 --cpus-per-task=1 --partition=interactive --mem-per-cpu=2500 --pty --x11=first paraview + marie@login$ srun --ntasks=1 --cpus-per-task=1 --mem-per-cpu=2500 --pty --x11=first paraview ``` diff --git a/doc.zih.tu-dresden.de/mkdocs.yml b/doc.zih.tu-dresden.de/mkdocs.yml index d2b2bc9feb444046b96ee93b84c0826418e0985a..06d4aee3a10cd19514cbf93a1c920439cdd678a1 100644 --- a/doc.zih.tu-dresden.de/mkdocs.yml +++ b/doc.zih.tu-dresden.de/mkdocs.yml @@ -173,7 +173,6 @@ edit_uri: blob/preview/doc.zih.tu-dresden.de/docs/ theme: # basetheme - name: material # disable fonts being loaded from google fonts @@ -221,7 +220,8 @@ markdown_extensions: plugins: - - search + - search: + separator: '[\s\-,:!=\[\]\(\)"/]+|(?!\b)(?=[A-Z][a-z])|\.(?!\d)|&[lg]t;' - markdown-caption # https://github.com/manuzhang/mkdocs-htmlproofer-plugin - htmlproofer: diff --git a/doc.zih.tu-dresden.de/tud_theme/partials/header.html b/doc.zih.tu-dresden.de/tud_theme/partials/header.html index 4b28fab9289088db78f1a0ee519a7f5594ad8d38..9fad2d191b113d4367536ec342f6a95d7438b8f6 100644 --- a/doc.zih.tu-dresden.de/tud_theme/partials/header.html +++ b/doc.zih.tu-dresden.de/tud_theme/partials/header.html @@ -69,7 +69,7 @@ </div> <!-- Button to open search modal --> - {% if "search" in config["plugins"] %} + {% if "material/search" in config["plugins"] %} <label class="md-header__button md-icon" for="__search"> {% include ".icons/material/magnify.svg" %} </label>