diff --git a/Dockerfile b/Dockerfile index e0121582e4dffee8ed997d687ec390dd8ca117c2..fddfc549e40058224829cf18f2bde1d2d636e420 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ RUN pip install mkdocs>=1.1.2 mkdocs-material>=7.1.0 mkdocs-htmlproofer-plugin== RUN apt-get update && apt-get install -y nodejs npm aspell git git-lfs -RUN npm install -g markdownlint-cli markdown-link-check +RUN npm install -g markdownlint-cli@0.32.2 markdown-link-check ########################################### # prepare git for automatic merging in CI # @@ -38,6 +38,9 @@ RUN echo 'test \! -e /docs/tud_theme/javascripts/mermaid.min.js && test -x /docs RUN echo 'exec "$@"' >> /entrypoint.sh RUN chmod u+x /entrypoint.sh +# Workaround https://gitlab.com/gitlab-org/gitlab-runner/-/issues/29022 +RUN git config --global --add safe.directory /docs + WORKDIR /docs CMD ["mkdocs", "build", "--verbose", "--strict"] diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md index 6edd5359443cea01d19578a35d2a5ebf6f706696..adaf75cdf9a356307f023a85620fbc9f482dc019 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm.md @@ -328,8 +328,8 @@ specifications for each component of the heterogeneous job should be separated w Running a job step on a specific component is supported by the option `--het-group`. ```console -marie@login$ salloc --ntasks 1 --cpus-per-task 4 --partition <partition> --mem=200G : \ - --ntasks 8 --cpus-per-task 1 --gres=gpu:8 --mem=80G --partition <partition> +marie@login$ salloc --ntasks=1 --cpus-per-task=4 --partition <partition> --mem=200G : \ + --ntasks=8 --cpus-per-task=1 --gres=gpu:8 --mem=80G --partition <partition> [...] marie@login$ srun ./my_application <args for master tasks> : ./my_application <args for worker tasks> ``` @@ -340,16 +340,16 @@ components by a line containing the directive `#SBATCH hetjob`. ```bash #!/bin/bash -#SBATCH --ntasks 1 -#SBATCH --cpus-per-task 4 -#SBATCH --partition <partition> +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --partition=<partition> #SBATCH --mem=200G #SBATCH hetjob # required to separate groups -#SBATCH --ntasks 8 -#SBATCH --cpus-per-task 1 +#SBATCH --ntasks=8 +#SBATCH --cpus-per-task=1 #SBATCH --gres=gpu:8 #SBATCH --mem=80G -#SBATCH --partition <partition> +#SBATCH --partition=<partition> srun ./my_application <args for master tasks> : ./my_application <args for worker tasks> @@ -474,7 +474,7 @@ at no extra cost. ??? example "Show all jobs since the beginning of year 2021" ```console - marie@login$ sacct -S 2021-01-01 [-E now] + marie@login$ sacct --starttime 2021-01-01 [--endtime now] ``` ## Jobs at Reservations diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md index 6b86ae2ace0a893da87479ec00aaca778ee8a4c0..37f170cef51753b866c1bf80edbaa746b1d7570f 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md @@ -186,7 +186,7 @@ When `srun` is used within a submission script, it inherits parameters from `sba `--ntasks=1`, `--cpus-per-task=4`, etc. So we actually implicitly run the following ```bash -srun --ntasks=1 --cpus-per-task=4 ... --partition=ml some-gpu-application +srun --ntasks=1 --cpus-per-task=4 [...] --partition=ml <some-gpu-application> ``` Now, our goal is to run four instances of this program concurrently in a single batch script. Of @@ -237,7 +237,7 @@ inherited from the surrounding `sbatch` context. The following line would be suf job in this example: ```bash -srun --exclusive --gres=gpu:1 --ntasks=1 some-gpu-application & +srun --exclusive --gres=gpu:1 --ntasks=1 <some-gpu-application> & ``` Yet, it adds some extra safety to leave them in, enabling the Slurm batch system to complain if not @@ -278,7 +278,8 @@ use up all resources in the nodes: #SBATCH --exclusive # ensure that nobody spoils my measurement on 2 x 2 x 8 cores #SBATCH --time=00:10:00 #SBATCH --job-name=Benchmark - #SBATCH --mail-user=your.name@tu-dresden.de + #SBATCH --mail-type=end + #SBATCH --mail-user=<your.email>@tu-dresden.de srun ./my_benchmark ``` @@ -313,14 +314,14 @@ name specific to the job: ```Bash #!/bin/bash - #SBATCH --array 0-9 + #SBATCH --array=0-9 #SBATCH --output=arraytest-%A_%a.out #SBATCH --error=arraytest-%A_%a.err #SBATCH --ntasks=864 #SBATCH --time=08:00:00 #SBATCH --job-name=Science1 #SBATCH --mail-type=end - #SBATCH --mail-user=your.name@tu-dresden.de + #SBATCH --mail-user=<your.email>@tu-dresden.de echo "Hi, I am step $SLURM_ARRAY_TASK_ID in this array job $SLURM_ARRAY_JOB_ID" ``` diff --git a/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md b/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md index 08ca90384e9120d6d0e87ef848d4618c9846539e..66ee5b4cc26e4a646203d9911ca429ed4dba2e41 100644 --- a/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md +++ b/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md @@ -66,7 +66,7 @@ marie@login$ srun --ntasks 1 --partition <partition> mpicc -g -o fancy-program f # Allocate interactive session with 1 extra process for MUST marie@login$ salloc --ntasks 5 --partition <partition> -marie@login$ mustrun --must:mpiexec srun --must:np --ntasks --ntasks 4 --must:stacktrace backward ./fancy-program +marie@login$ mustrun --must:mpiexec srun --must:np --ntasks --must:stacktrace backward --ntasks 4 ./fancy-program [MUST] MUST configuration ... centralized checks with fall-back application crash handling (very slow) [MUST] Weaver ... success [MUST] Code generation ... success diff --git a/doc.zih.tu-dresden.de/docs/software/tensorflow.md b/doc.zih.tu-dresden.de/docs/software/tensorflow.md index 58b99bd1c302c0ed65619fc200602f2732f84df1..f11ecb3ac94e3cc65cf671815d813bacc9b9815f 100644 --- a/doc.zih.tu-dresden.de/docs/software/tensorflow.md +++ b/doc.zih.tu-dresden.de/docs/software/tensorflow.md @@ -96,7 +96,7 @@ the notebook by pre-loading a specific TensorFlow module: You can also define your own Jupyter kernel for more specific tasks. Please read about Jupyter kernels and virtual environments in our - [JupyterHub](../access/jupyterhub.md#creating-and-using-a-custom-environment) documentation. + [JupyterHub](../access/jupyterhub_custom_environments.md) documentation. ## TensorFlow in Containers diff --git a/doc.zih.tu-dresden.de/docs/software/vampir.md b/doc.zih.tu-dresden.de/docs/software/vampir.md index ebaa368e73f445422644b6159c1ab677fc50fecf..64dfd00d36eb8079406ce7a47ee55324d8de32fe 100644 --- a/doc.zih.tu-dresden.de/docs/software/vampir.md +++ b/doc.zih.tu-dresden.de/docs/software/vampir.md @@ -73,7 +73,17 @@ Launching VampirServer... Submitting slurm 30 minutes job (this might take a while)... ``` +This way, a job with a timelimit of 30 minutes and default resources is submitted. This might fit +your needs. If not, please feel free to request a customized job running VampirServer, e.g. + +```console +marie@login$ vampirserver start --ntasks=8 --time=01:00:00 --mem-per-cpu=3000M --partition=romeo +Launching VampirServer... +Submitting slurm 01:00:00 minutes job (this might take a while)... +``` + Above automatically allocates its resources via the respective batch system. If you want to start + VampirServer without a batch allocation or from inside an interactive allocation, use ```console diff --git a/doc.zih.tu-dresden.de/util/download-newest-mermaid.js.sh b/doc.zih.tu-dresden.de/util/download-newest-mermaid.js.sh index 9986ad6f49e2e739f8a53d7911f4e346196d21a4..d01622e4bba4188479370be170339b1f01308074 100755 --- a/doc.zih.tu-dresden.de/util/download-newest-mermaid.js.sh +++ b/doc.zih.tu-dresden.de/util/download-newest-mermaid.js.sh @@ -6,4 +6,4 @@ scriptpath=${BASH_SOURCE[0]} basedir=`dirname "$scriptpath"` basedir=`dirname "$basedir"` cd $basedir/tud_theme/javascripts -wget https://unpkg.com/mermaid/dist/mermaid.min.js +wget https://unpkg.com/mermaid@9.4.0/dist/mermaid.min.js