diff --git a/Dockerfile b/Dockerfile index 57490c2509a22302ba13ed4bd05d32f0d7b0fb51..b272bf553212534167e23e083d4a0c088700a025 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ FROM python:3.8-bullseye +SHELL ["/bin/bash", "-c"] + ######## # Base # ######## @@ -14,6 +16,19 @@ RUN apt update && apt install -y nodejs npm aspell git RUN npm install -g markdownlint-cli markdown-link-check +########################################### +# prepare git for automatic merging in CI # +########################################### +RUN git config --global user.name 'Gitlab Bot' +RUN git config --global user.email 'hpcsupport@zih.tu-dresden.de' + +RUN mkdir -p ~/.ssh + +#see output of `ssh-keyscan gitlab.hrz.tu-chemnitz.de` +RUN echo $'# gitlab.hrz.tu-chemnitz.de:22 SSH-2.0-OpenSSH_7.4\n\ +gitlab.hrz.tu-chemnitz.de ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDNixJ1syD506jOtiLPxGhAXsNnVfweFfzseh9/WrNxbTgIhi09fLb5aZI2CfOOWIi4fQz07S+qGugChBs4lJenLYAu4b0IAnEv/n/Xnf7wITf/Wlba2VSKiXdDqbSmNbOQtbdBLNu1NSt+inFgrreaUxnIqvWX4pBDEEGBAgG9e2cteXjT/dHp4+vPExKEjM6Nsxw516Cqv5H1ZU7XUTHFUYQr0DoulykDoXU1i3odJqZFZQzcJQv/RrEzya/2bwaatzKfbgoZLlb18T2LjkP74b71DeFIQWV2e6e3vsNwl1NsvlInEcsSZB1TZP+mKke7JWiI6HW2IrlSaGqM8n4h\n\ +gitlab.hrz.tu-chemnitz.de ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJ/cSNsKRPrfXCMjl+HsKrnrI3HgbCyKWiRa715S99BR\n' > ~/.ssh/known_hosts + WORKDIR /docs CMD ["mkdocs", "build", "--verbose", "--strict"] diff --git a/doc.zih.tu-dresden.de/README.md b/doc.zih.tu-dresden.de/README.md index f1d0e97563caae06b8859b8c0632e7dacc2fb641..bf1b82f52a145f959068fa063d9dbdf31fb2eae3 100644 --- a/doc.zih.tu-dresden.de/README.md +++ b/doc.zih.tu-dresden.de/README.md @@ -41,8 +41,6 @@ Now, create a local clone of your fork #### Install Dependencies See [Installation with Docker](#preview-using-mkdocs-with-dockerfile). -**TODO:** virtual environment -**TODO:** What we need for markdownlinter and checks? <!--- All branches are protected, i.e., only ZIH staff can create branches and push to them ---> diff --git a/doc.zih.tu-dresden.de/docs/access/desktop_cloud_visualization.md b/doc.zih.tu-dresden.de/docs/access/desktop_cloud_visualization.md index b9c0d1cd8f894c6944b52daa07fa09c772c73dc0..7395aad287f5c197ae8ba639491c493e87f2ffe9 100644 --- a/doc.zih.tu-dresden.de/docs/access/desktop_cloud_visualization.md +++ b/doc.zih.tu-dresden.de/docs/access/desktop_cloud_visualization.md @@ -11,7 +11,7 @@ if you want to know whether your browser is supported by DCV. **Check out our new documentation about** [Virtual Desktops](../software/virtual_desktops.md). -To start a JupyterHub session on the dcv partition (taurusi210\[4-8\]) with one GPU, six CPU cores +To start a JupyterHub session on the partition `dcv` (`taurusi210[4-8]`) with one GPU, six CPU cores and 2583 MB memory per core, click on: [https://taurus.hrsk.tu-dresden.de/jupyter/hub/spawn#/~(partition~'dcv~cpuspertask~'6~gres~'gpu*3a1~mempercpu~'2583~environment~'production)](https://taurus.hrsk.tu-dresden.de/jupyter/hub/spawn#/~(partition~'dcv~cpuspertask~'6~gres~'gpu*3a1~mempercpu~'2583~environment~'production)) Optionally, you can modify many different Slurm parameters. For this diff --git a/doc.zih.tu-dresden.de/docs/access/graphical_applications_with_webvnc.md b/doc.zih.tu-dresden.de/docs/access/graphical_applications_with_webvnc.md index 6837ace6473f9532e608778ec96049394b4c4494..c652738dc859beecf3dc9669fdde684dc49d04f3 100644 --- a/doc.zih.tu-dresden.de/docs/access/graphical_applications_with_webvnc.md +++ b/doc.zih.tu-dresden.de/docs/access/graphical_applications_with_webvnc.md @@ -38,7 +38,7 @@ marie@login$ srun --pty --partition=interactive --mem-per-cpu=2500 --cpus-per-ta [...] ``` -Of course, you can adjust the batch job parameters to your liking. Note that the default timelimit +Of course, you can adjust the batch job parameters to your liking. Note that the default time limit in partition `interactive` is only 30 minutes, so you should specify a longer one with `--time` (or `-t`). The script will automatically generate a self-signed SSL certificate and place it in your home diff --git a/doc.zih.tu-dresden.de/docs/access/jupyterhub.md b/doc.zih.tu-dresden.de/docs/access/jupyterhub.md index b6b0f25d3963da0529f26274a3daf4bdfcb0bbe0..f9a916195ecbf814cf426beb4d26885500b3b3de 100644 --- a/doc.zih.tu-dresden.de/docs/access/jupyterhub.md +++ b/doc.zih.tu-dresden.de/docs/access/jupyterhub.md @@ -1,7 +1,7 @@ # JupyterHub With our JupyterHub service we offer you a quick and easy way to work with Jupyter notebooks on ZIH -systems. This page covers starting and stopping JuperterHub sessions, error handling and customizing +systems. This page covers starting and stopping JupyterHub sessions, error handling and customizing the environment. We also provide a comprehensive documentation on how to use @@ -21,7 +21,8 @@ cannot give extensive support in every case. !!! note This service is only available for users with an active HPC project. - See [here](../access/overview.md) how to apply for an HPC project. + See [Application for Login and Resources](../application/overview.md), if you need to apply for + an HPC project. JupyterHub is available at [https://taurus.hrsk.tu-dresden.de/jupyter](https://taurus.hrsk.tu-dresden.de/jupyter). @@ -100,7 +101,7 @@ running the code. We currently offer one for Python, C++, MATLAB and R. ## Stop a Session -It is good practise to stop your session once your work is done. This releases resources for other +It is good practice to stop your session once your work is done. This releases resources for other users and your quota is less charged. If you just log out or close the window, your server continues running and **will not stop** until the Slurm job runtime hits the limit (usually 8 hours). @@ -147,8 +148,8 @@ Useful pages for valid batch system parameters: If the connection to your notebook server unexpectedly breaks, you will get this error message. Sometimes your notebook server might hit a batch system or hardware limit and gets killed. Then -usually the logfile of the corresponding batch job might contain useful information. These logfiles -are located in your `home` directory and have the name `jupyter-session-<jobid>.log`. +usually the log file of the corresponding batch job might contain useful information. These log +files are located in your `home` directory and have the name `jupyter-session-<jobid>.log`. ## Advanced Tips @@ -309,4 +310,4 @@ You can switch kernels of existing notebooks in the kernel menu: You have now the option to preload modules from the [module system](../software/modules.md). Select multiple modules that will be preloaded before your notebook server starts. The list of available modules depends on the module environment you want to start the session in (`scs5` or -`ml`). The right module environment will be chosen by your selected partition. +`ml`). The right module environment will be chosen by your selected partition. diff --git a/doc.zih.tu-dresden.de/docs/access/jupyterhub_for_teaching.md b/doc.zih.tu-dresden.de/docs/access/jupyterhub_for_teaching.md index 92ad16d1325173c384c7472658239baca3e26157..797d9fc8e455b14e40a5ec7f3737874b2ac500ae 100644 --- a/doc.zih.tu-dresden.de/docs/access/jupyterhub_for_teaching.md +++ b/doc.zih.tu-dresden.de/docs/access/jupyterhub_for_teaching.md @@ -1,7 +1,7 @@ # JupyterHub for Teaching -On this page we want to introduce to you some useful features if you -want to use JupyterHub for teaching. +On this page, we want to introduce to you some useful features if you want to use JupyterHub for +teaching. !!! note @@ -9,23 +9,21 @@ want to use JupyterHub for teaching. Please be aware of the following notes: -- ZIH systems operate at a lower availability level than your usual Enterprise Cloud VM. There - can always be downtimes, e.g. of the filesystems or the batch system. +- ZIH systems operate at a lower availability level than your usual Enterprise Cloud VM. There can + always be downtimes, e.g. of the filesystems or the batch system. - Scheduled downtimes are announced by email. Please plan your courses accordingly. - Access to HPC resources is handled through projects. See your course as a project. Projects need to be registered beforehand (more info on the page [Access](../application/overview.md)). - Don't forget to [add your users](../application/project_management.md#manage-project-members-dis-enable) - (eg. students or tutors) to your project. + (e.g. students or tutors) to your project. - It might be a good idea to [request a reservation](../jobs_and_resources/overview.md#exclusive-reservation-of-hardware) - of part of the compute resources for your project/course to - avoid unnecessary waiting times in the batch system queue. + of part of the compute resources for your project/course to avoid unnecessary waiting times in + the batch system queue. ## Clone a Repository With a Link -This feature bases on -[nbgitpuller](https://github.com/jupyterhub/nbgitpuller). -Documentation can be found at -[this page](https://jupyterhub.github.io/nbgitpuller/). +This feature bases on [nbgitpuller](https://github.com/jupyterhub/nbgitpuller). Further information +can be found in the [external documentation about nbgitpuller](https://jupyterhub.github.io/nbgitpuller/). This extension for Jupyter notebooks can clone every public git repository into the users work directory. It's offering a quick way to distribute notebooks and other material to your students. @@ -50,14 +48,14 @@ The following parameters are available: |---|---| |`repo` | path to git repository| |`branch` | branch in the repository to pull from default: `master`| -|`urlpath` | URL to redirect the user to a certain file [more info](https://jupyterhub.github.io/nbgitpuller/topic/url-options.html#urlpath)| +|`urlpath` | URL to redirect the user to a certain file, [more info about parameter urlpath](https://jupyterhub.github.io/nbgitpuller/topic/url-options.html#urlpath)| |`depth` | clone only a certain amount of latest commits not recommended| This [link generator](https://jupyterhub.github.io/nbgitpuller/link?hub=https://taurus.hrsk.tu-dresden.de/jupyter/) might help creating those links -## Spawner Options Passthrough with URL Parameters +## Spawn Options Pass-through with URL Parameters The spawn form now offers a quick start mode by passing URL parameters. diff --git a/doc.zih.tu-dresden.de/docs/access/ssh_login.md b/doc.zih.tu-dresden.de/docs/access/ssh_login.md index 69dc79576910d37b001aaaff4cfc43c8ab583b18..a0fef440151984abbe662fe8f096de166eae6dad 100644 --- a/doc.zih.tu-dresden.de/docs/access/ssh_login.md +++ b/doc.zih.tu-dresden.de/docs/access/ssh_login.md @@ -9,7 +9,7 @@ connection to enter the campus network. While active, it allows the user to conn HPC login nodes. For more information on our VPN and how to set it up, please visit the corresponding -[ZIH service catalogue page](https://tu-dresden.de/zih/dienste/service-katalog/arbeitsumgebung/zugang_datennetz/vpn). +[ZIH service catalog page](https://tu-dresden.de/zih/dienste/service-katalog/arbeitsumgebung/zugang_datennetz/vpn). ## Connecting from Linux @@ -148,7 +148,11 @@ We recommend one of the following applications: * [MobaXTerm](https://mobaxterm.mobatek.net): [ZIH documentation](misc/basic_usage_of_MobaXterm.pdf) * [PuTTY](https://www.putty.org): [ZIH documentation](misc/basic_usage_of_PuTTY.pdf) - * OpenSSH Server: [docs](https://docs.microsoft.com/de-de/windows-server/administration/openssh/openssh_install_firstuse) + * For Windows 10 (1809 and higher): + * [Windows Terminal](https://www.microsoft.com/store/productId/9N0DX20HK701) + * Together with the built-in [OpenSSH Client](https://docs.microsoft.com/de-de/windows-server/administration/openssh/openssh_overview) + +## SSH Key Fingerprints The page [key fingerprints](key_fingerprints.md) holds the up-to-date fingerprints for the login nodes. Make sure they match. diff --git a/doc.zih.tu-dresden.de/docs/accessibility.md b/doc.zih.tu-dresden.de/docs/accessibility.md index 418d8a11c98be59a121a47f0d497dfce1a79aa05..ba40340fe0d9995c27b4013d06a01400dc279e87 100644 --- a/doc.zih.tu-dresden.de/docs/accessibility.md +++ b/doc.zih.tu-dresden.de/docs/accessibility.md @@ -39,4 +39,4 @@ Postanschrift: Archivstraße 1, 01097 Dresden E-Mail: <info.behindertenbeauftragter@sk.sachsen.de> Telefon: +49 351 564-12161 Fax: +49 351 564-12169 -Webseite: [https://www.inklusion.sachsen.de](https://www.inklusion.sachsen.de) +Webseite: [https://www.inklusion.sachsen.de/](https://www.inklusion.sachsen.de/) diff --git a/doc.zih.tu-dresden.de/docs/application/project_request_form.md b/doc.zih.tu-dresden.de/docs/application/project_request_form.md index b5b9e348a94c4178d382e5ca27d67047c06f1481..e829f316cb26f11b9b9048a889c8b5e918b2e870 100644 --- a/doc.zih.tu-dresden.de/docs/application/project_request_form.md +++ b/doc.zih.tu-dresden.de/docs/application/project_request_form.md @@ -36,15 +36,16 @@ Any project have: ## Third step: Hardware {loading=lazy width=300 style="float:right"} -This step inquire the required hardware. You can find the specifications -[here](../jobs_and_resources/hardware_overview.md). +This step inquire the required hardware. The +[hardware specifications](../jobs_and_resources/hardware_overview.md) might help you to estimate, +e. g. the compute time. -Please fill in the total computing time you expect in the project runtime. The compute time is +Please fill in the total computing time you expect in the project runtime. The compute time is given in cores per hour (CPU/h), this refers to the 'virtual' cores for nodes with hyperthreading. -If they require GPUs, then this is given as GPU units per hour (GPU/h). Please add 6 CPU hours per +If they require GPUs, then this is given as GPU units per hour (GPU/h). Please add 6 CPU hours per GPU hour in your application. -The project home is a shared storage in your project. Here you exchange data or install software +The project home is a shared storage in your project. Here you exchange data or install software for your project group in userspace. The directory is not intended for active calculations, for this the scratch is available. diff --git a/doc.zih.tu-dresden.de/docs/data_lifecycle/intermediate_archive.md b/doc.zih.tu-dresden.de/docs/data_lifecycle/intermediate_archive.md index 6aee19dd87cf1f9bcf589c2950ca11e5b99b1b65..bcfc86b6b35f01bc0a5a1eebffdf65ee6319d171 100644 --- a/doc.zih.tu-dresden.de/docs/data_lifecycle/intermediate_archive.md +++ b/doc.zih.tu-dresden.de/docs/data_lifecycle/intermediate_archive.md @@ -1,12 +1,12 @@ # Intermediate Archive With the "Intermediate Archive", ZIH is closing the gap between a normal disk-based filesystem and -[Longterm Archive](preservation_research_data.md). The Intermediate Archive is a hierarchical +[Long-term Archive](preservation_research_data.md). The Intermediate Archive is a hierarchical filesystem with disks for buffering and tapes for storing research data. Its intended use is the storage of research data for a maximal duration of 3 years. For storing the data after exceeding this time, the user has to supply essential metadata and migrate the files to -the [Longterm Archive](preservation_research_data.md). Until then, she/he has to keep track of her/his +the [Long-term Archive](preservation_research_data.md). Until then, she/he has to keep track of her/his files. Some more information: diff --git a/doc.zih.tu-dresden.de/docs/data_lifecycle/preservation_research_data.md b/doc.zih.tu-dresden.de/docs/data_lifecycle/preservation_research_data.md index 5c035e56d8a3fa647f9d847a08ed5be9ef903f93..79ae1cf00b45f8bf46bc054e1502fc9404417b75 100644 --- a/doc.zih.tu-dresden.de/docs/data_lifecycle/preservation_research_data.md +++ b/doc.zih.tu-dresden.de/docs/data_lifecycle/preservation_research_data.md @@ -1,4 +1,4 @@ -# Longterm Preservation for Research Data +# Long-term Preservation for Research Data ## Why should research data be preserved? @@ -55,7 +55,7 @@ Below are some examples: - ISBN - possible meta-data for an electronically saved image would be: - resolution of the image - - information about the colour depth of the picture + - information about the color depth of the picture - file format (jpg or tiff or ...) - file size how was this image created (digital camera, scanner, ...) - description of what the image shows @@ -79,6 +79,6 @@ information about managing research data. ## I want to store my research data at ZIH. How can I do that? -Longterm preservation of research data is under construction at ZIH and in a testing phase. +Long-term preservation of research data is under construction at ZIH and in a testing phase. Nevertheless you can already use the archiving service. If you would like to become a test user, please write an E-Mail to [Dr. Klaus Köhler](mailto:klaus.koehler@tu-dresden.de). diff --git a/doc.zih.tu-dresden.de/docs/data_lifecycle/quotas.md b/doc.zih.tu-dresden.de/docs/data_lifecycle/quotas.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/doc.zih.tu-dresden.de/docs/data_lifecycle/workspaces.md b/doc.zih.tu-dresden.de/docs/data_lifecycle/workspaces.md index d970f38b9abb883fca7864a7af01a6f7037623a8..cad27c4df4070206644612d85d7fadc7658e15f4 100644 --- a/doc.zih.tu-dresden.de/docs/data_lifecycle/workspaces.md +++ b/doc.zih.tu-dresden.de/docs/data_lifecycle/workspaces.md @@ -2,7 +2,7 @@ Storage systems differ in terms of capacity, streaming bandwidth, IOPS rate, etc. Price and efficiency don't allow to have it all in one. That is why fast parallel filesystems at ZIH have -restrictions with regards to **age of files** and [quota](quotas.md). The mechanism of workspaces +restrictions with regards to **age of files** and [quota](permanent.md#quotas). The mechanism of workspaces enables users to better manage their HPC data. The concept of workspaces is common and used at a large number of HPC centers. diff --git a/doc.zih.tu-dresden.de/docs/data_transfer/export_nodes.md b/doc.zih.tu-dresden.de/docs/data_transfer/export_nodes.md index 80ea758c57b09601cadd001aa018c56a2f219a3f..d492594b85f4e9d033d273749983e75458068dc1 100644 --- a/doc.zih.tu-dresden.de/docs/data_transfer/export_nodes.md +++ b/doc.zih.tu-dresden.de/docs/data_transfer/export_nodes.md @@ -9,9 +9,13 @@ that you cannot log in via SSH to the export nodes, but only use `scp`, `rsync` The export nodes are reachable under the hostname `taurusexport.hrsk.tu-dresden.de` (or `taurusexport3.hrsk.tu-dresden.de` and `taurusexport4.hrsk.tu-dresden.de`). +Please keep in mind that there are different +[filesystems](../data_lifecycle/file_systems.md#recommendations-for-filesystem-usage). Choose the +one that matches your needs. + ## Access From Linux -There are at least three tool to exchange data between your local workstation and ZIH systems. All +There are at least three tools to exchange data between your local workstation and ZIH systems. They are explained in the following section in more detail. !!! important @@ -33,13 +37,27 @@ in a directory, the option `-r` has to be specified. marie@local$ scp -r <directory> taurusexport:<target-location> ``` -??? example "Example: Copy a file from ZIH systems to your workstation" + For example, if you want to copy your data file `mydata.csv` to the directory `input` in your + home directory, you would use the following: ```console - marie@login$ scp taurusexport:<file> <target-location> + marie@local$ scp mydata.csv taurusexport:input/ + ``` + +??? example "Example: Copy a file from ZIH systems to your workstation" + + ```bash + marie@local$ scp taurusexport:<file> <target-location> # Add -r to copy whole directory - marie@login$ scp -r taurusexport:<directory> <target-location> + marie@local$ scp -r taurusexport:<directory> <target-location> + ``` + + For example, if you have a directory named `output` in your home directory on ZIH systems and + you want to copy it to the directory `/tmp` on your workstation, you would use the following: + + ```console + marie@local$ scp -r taurusexport:output /tmp ``` ### SFTP @@ -115,6 +133,14 @@ the local machine. ## Access From Windows +### Command Line + +Windows 10 (1809 and higher) comes with a +[built-in OpenSSH support](https://docs.microsoft.com/en-us/windows-server/administration/openssh/openssh_overview) +including the above described [SCP](#SCP) and [SFTP](#SFTP). + +### GUI - Using WinSCP + First you have to install [WinSCP](http://winscp.net/eng/download.php). Then you have to execute the WinSCP application and configure some diff --git a/doc.zih.tu-dresden.de/docs/index.md b/doc.zih.tu-dresden.de/docs/index.md index 60d43b4e73f285901931f652c55aedabc393c451..60f6f081cf4a1c2ea76663bccd65e9ff866597fb 100644 --- a/doc.zih.tu-dresden.de/docs/index.md +++ b/doc.zih.tu-dresden.de/docs/index.md @@ -26,4 +26,4 @@ Contributions from user-side are highly welcome. Please find out more in our [gu **2021-10-05** Offline-maintenance (black building test) -**2021-09-29** Introduction to HPC at ZIH ([slides](misc/HPC-Introduction.pdf)) +**2021-09-29** Introduction to HPC at ZIH ([HPC introduction slides](misc/HPC-Introduction.pdf)) diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/alpha_centauri.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/alpha_centauri.md index 3d342f628fc7abfeb851500d3cc6fc785d1a03e2..c2e1bac98c1aeaad9910a98d5b6282df4f3160d7 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/alpha_centauri.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/alpha_centauri.md @@ -5,7 +5,8 @@ It has 34 nodes, each with: * 8 x NVIDIA A100-SXM4 (40 GB RAM) * 2 x AMD EPYC CPU 7352 (24 cores) @ 2.3 GHz with multi-threading enabled -* 1 TB RAM 3.5 TB `/tmp` local NVMe device +* 1 TB RAM +* 3.5 TB `/tmp` local NVMe device * Hostnames: `taurusi[8001-8034]` * Slurm partition `alpha` for batch jobs and `alpha-interactive` for interactive jobs @@ -64,7 +65,8 @@ True ### Python Virtual Environments -Virtual environments allow users to install additional python packages and create an isolated +[Virtual environments](../software/python_virtual_environments.md) allow users to install +additional python packages and create an isolated runtime environment. We recommend using `virtualenv` for this purpose. ```console diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/binding_and_distribution_of_tasks.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/binding_and_distribution_of_tasks.md index 4677a625300c59a04160389f4cf9a3bf975018c8..ad411b78cff4c8c4fcd06c4028cb14b6c6438c4f 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/binding_and_distribution_of_tasks.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/binding_and_distribution_of_tasks.md @@ -32,22 +32,24 @@ and `--distribution` for different job types. ## OpenMP Strategies -The illustration below shows the default binding of a pure OpenMP-job on a single node with 16 CPUs +The illustration below shows the default binding of a pure OpenMP job on a single node with 16 CPUs on which 16 threads are allocated. -```Bash -#!/bin/bash -#SBATCH --nodes=1 -#SBATCH --tasks-per-node=1 -#SBATCH --cpus-per-task=16 + +{: align=center} -export OMP_NUM_THREADS=16 +!!! example "Default binding and default distribution" -srun --ntasks 1 --cpus-per-task $OMP_NUM_THREADS ./application -``` + ```bash + #!/bin/bash + #SBATCH --nodes=1 + #SBATCH --tasks-per-node=1 + #SBATCH --cpus-per-task=16 - -{: align=center} + export OMP_NUM_THREADS=16 + + srun --ntasks 1 --cpus-per-task $OMP_NUM_THREADS ./application + ``` ## MPI Strategies @@ -74,8 +76,10 @@ node and odd on each second socket of each node. ### Core Bound -Note: With this command the tasks will be bound to a core for the entire runtime of your -application. +!!! note + + With this command the tasks will be bound to a core for the entire runtime of your + application. #### Distribution: block:block diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/partitions_and_limits.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/partitions_and_limits.md index edf5bae8582cff37ba5dca68d70c70a35438f341..1b0b7e4343c271fca4782e1de6b9038c9e771895 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/partitions_and_limits.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/partitions_and_limits.md @@ -55,7 +55,7 @@ ZIH systems comprises different sets of nodes with different amount of installed where your job may be run. To achieve the shortest possible waiting time for your jobs, you should be aware of the limits shown in the following table. -??? hint "Partitions and memory limits" +???+ hint "Partitions and memory limits" | Partition | Nodes | # Nodes | Cores per Node | MB per Core | MB per Node | GPUs per Node | |:-------------------|:-----------------------------------------|:--------|:----------------|:------------|:------------|:------------------| diff --git a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md index 2af016d0188ae4f926b45e7b8fdc14b039e8baa3..65e445f354d08a3473e226cc97c45ff6c01e8c48 100644 --- a/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md +++ b/doc.zih.tu-dresden.de/docs/jobs_and_resources/slurm_examples.md @@ -58,10 +58,10 @@ For MPI-parallel jobs one typically allocates one core per task that has to be s ### Multiple Programs Running Simultaneously in a Job In this short example, our goal is to run four instances of a program concurrently in a **single** -batch script. Of course we could also start a batch script four times with `sbatch` but this is not -what we want to do here. Please have a look at -[this subsection](#multiple-programs-running-simultaneously-in-a-job) -in case you intend to run GPU programs simultaneously in a **single** job. +batch script. Of course, we could also start a batch script four times with `sbatch` but this is not +what we want to do here. However, you can also find an example about +[how to run GPU programs simultaneously in a single job](#running-multiple-gpu-applications-simultaneously-in-a-batch-job) +below. !!! example " " @@ -355,4 +355,4 @@ file) that will be executed one after each other with different CPU numbers: ## Array-Job with Afterok-Dependency and Datamover Usage -This is a *todo* +This part is under construction. diff --git a/doc.zih.tu-dresden.de/docs/software/big_data_frameworks.md b/doc.zih.tu-dresden.de/docs/software/big_data_frameworks.md index 247d35c545a70013e45160f6f45f67d9cca80e4b..df7fc8b56a8a015b5a13a8c871b5163b2c1d473d 100644 --- a/doc.zih.tu-dresden.de/docs/software/big_data_frameworks.md +++ b/doc.zih.tu-dresden.de/docs/software/big_data_frameworks.md @@ -1,4 +1,4 @@ -# Big Data Frameworks +# Big Data Analytics [Apache Spark](https://spark.apache.org/), [Apache Flink](https://flink.apache.org/) and [Apache Hadoop](https://hadoop.apache.org/) are frameworks for processing and integrating @@ -32,14 +32,13 @@ The steps are: Apache Spark can be used in [interactive](#interactive-jobs) and [batch](#batch-jobs) jobs as well as via [Jupyter notebooks](#jupyter-notebook). All three ways are outlined in the following. -The usage of Flink with Jupyter notebooks is currently under examination. ## Interactive Jobs ### Default Configuration -The Spark module is available in both `scs5` and `ml` environments. -Thus, Spark can be executed using different CPU architectures, e.g., Haswell and Power9. +The Spark and Flink modules are available in both `scs5` and `ml` environments. +Thus, Spark and Flink can be executed using different CPU architectures, e.g., Haswell and Power9. Let us assume that two nodes should be used for the computation. Use a `srun` command similar to the following to start an interactive session using the partition haswell. The following code @@ -61,8 +60,9 @@ Once you have the shell, load desired Big Data framework using the command marie@compute$ module load Flink ``` -Before the application can be started, the Spark cluster needs to be set up. To do this, configure -Spark first using configuration template at `$SPARK_HOME/conf`: +Before the application can be started, the cluster with the allocated nodes needs to be set up. To +do this, configure the cluster first using the configuration template at `$SPARK_HOME/conf` for +Spark or `$FLINK_ROOT_DIR/conf` for Flink: === "Spark" ```console @@ -74,7 +74,7 @@ Spark first using configuration template at `$SPARK_HOME/conf`: ``` This places the configuration in a directory called `cluster-conf-<JOB_ID>` in your `home` -directory, where `<JOB_ID>` stands for the id of the Slurm job. After that, you can start Spark in +directory, where `<JOB_ID>` stands for the id of the Slurm job. After that, you can start in the usual way: === "Spark" @@ -86,7 +86,7 @@ the usual way: marie@compute$ start-cluster.sh ``` -The Spark processes should now be set up and you can start your application, e. g.: +The necessary background processes should now be set up and you can start your application, e. g.: === "Spark" ```console @@ -237,50 +237,34 @@ example below: ## Jupyter Notebook -You can run Jupyter notebooks with Spark on the ZIH systems in a similar way as described on the -[JupyterHub](../access/jupyterhub.md) page. Interaction of Flink with JupyterHub is currently -under examination and will be posted here upon availability. +You can run Jupyter notebooks with Spark and Flink on the ZIH systems in a similar way as described +on the [JupyterHub](../access/jupyterhub.md) page. -### Preparation - -If you want to run Spark in Jupyter notebooks, you have to prepare it first. This is comparable -to [normal Python virtual environments](../software/python_virtual_environments.md#python-virtual-environment). -You start with an allocation: - -```console -marie@login$ srun --pty --ntasks=1 --cpus-per-task=2 --mem-per-cpu=2500 --time=01:00:00 bash -l -``` +### Spawning a Notebook -When a node is allocated, install the required packages: +Go to [https://taurus.hrsk.tu-dresden.de/jupyter](https://taurus.hrsk.tu-dresden.de/jupyter). +In the tab "Advanced", go to the field "Preload modules" and select the following Spark or Flink +module: -```console -marie@compute$ cd $HOME -marie@compute$ mkdir jupyter-kernel -marie@compute$ module load Python -marie@compute$ virtualenv --system-site-packages jupyter-kernel/env #Create virtual environment -[...] -marie@compute$ source jupyter-kernel/env/bin/activate #Activate virtual environment. -(env) marie@compute$ pip install ipykernel -[...] -(env) marie@compute$ python -m ipykernel install --user --name haswell-py3.7-spark --display-name="haswell-py3.7-spark" -Installed kernelspec haswell-py3.7-spark in [...] - -(env) marie@compute$ pip install findspark -(env) marie@compute$ deactivate -``` +=== "Spark" + ``` + Spark/3.0.1-Hadoop-2.7-Java-1.8-Python-3.7.4-GCCcore-8.3.0 + ``` +=== "Flink" + ``` + Flink/1.12.3-Java-1.8.0_161-OpenJDK-Python-3.7.4-GCCcore-8.3.0 + ``` -You are now ready to spawn a notebook with Spark. +When your Jupyter instance is started, you can set up Spark/Flink. Since the setup in the notebook +requires more steps than in an interactive session, we have created example notebooks that you can +use as a starting point for convenience: [SparkExample.ipynb](misc/SparkExample.ipynb), +[FlinkExample.ipynb](misc/FlinkExample.ipynb) -### Spawning a Notebook +!!! warning -Assuming that you have prepared everything as described above, you can go to -[https://taurus.hrsk.tu-dresden.de/jupyter](https://taurus.hrsk.tu-dresden.de/jupyter). -In the tab "Advanced", go to the field "Preload modules" and select one of the Spark modules. When -your Jupyter instance is started, check whether the kernel that you created in the preparation -phase (see above) is shown in the top right corner of the notebook. If it is not already selected, -select the kernel `haswell-py3.7-spark`. Then, you can set up Spark. Since the setup in the -notebook requires more steps than in an interactive session, we have created an example notebook -that you can use as a starting point for convenience: [SparkExample.ipynb](misc/SparkExample.ipynb) + The notebooks only work with the Spark or Flink module mentioned above. When using other + Spark/Flink modules, it is possible that you have to do additional or other steps in order to + make Spark/Flink running. !!! note diff --git a/doc.zih.tu-dresden.de/docs/software/building_software.md b/doc.zih.tu-dresden.de/docs/software/building_software.md index c3bd76ce331034247b162630b08d36f982ebc45d..c83932a16c1c0227cb160d4853cd1815626fc404 100644 --- a/doc.zih.tu-dresden.de/docs/software/building_software.md +++ b/doc.zih.tu-dresden.de/docs/software/building_software.md @@ -1,12 +1,12 @@ # Building Software While it is possible to do short compilations on the login nodes, it is generally considered good -practice to use a job for that, especially when using many parallel make processes. Note that -starting on December 6th 2016, the `/projects` filesystem will be mounted read-only on all compute +practice to use a job for that, especially when using many parallel make processes. Since 2016, +the `/projects` filesystem is mounted read-only on all compute nodes in order to prevent users from doing large I/O there (which is what the `/scratch` is for). -In consequence, you cannot compile in `/projects` within a job anymore. If you wish to install +In consequence, you cannot compile in `/projects` within a job. If you wish to install software for your project group anyway, you can use a build directory in the `/scratch` filesystem -instead: +instead. Every sane build system should allow you to keep your source code tree and your build directory separate, some even demand them to be different directories. Plus, you can set your installation @@ -17,16 +17,16 @@ For instance, when using CMake and keeping your source in `/projects`, you could ```console # save path to your source directory: -marie@login$ export SRCDIR=/projects/p_myproject/mysource +marie@login$ export SRCDIR=/projects/p_marie/mysource # create a build directory in /scratch: -marie@login$ mkdir /scratch/p_myproject/mysoftware_build +marie@login$ mkdir /scratch/p_marie/mysoftware_build # change to build directory within /scratch: -marie@login$ cd /scratch/p_myproject/mysoftware_build +marie@login$ cd /scratch/p_marie/mysoftware_build # create Makefiles: -marie@login$ cmake -DCMAKE_INSTALL_PREFIX=/projects/p_myproject/mysoftware $SRCDIR +marie@login$ cmake -DCMAKE_INSTALL_PREFIX=/projects/p_marie/mysoftware $SRCDIR # build in a job: marie@login$ srun --mem-per-cpu=1500 --cpus-per-task=12 --pty make -j 12 diff --git a/doc.zih.tu-dresden.de/docs/software/cfd.md b/doc.zih.tu-dresden.de/docs/software/cfd.md index 186d7b3a5a97a2daf06d8618c7c91dc91d7ab971..62ed65116e51ae8bbb593664f4bc48a3373d3a41 100644 --- a/doc.zih.tu-dresden.de/docs/software/cfd.md +++ b/doc.zih.tu-dresden.de/docs/software/cfd.md @@ -16,7 +16,7 @@ The OpenFOAM (Open Field Operation and Manipulation) CFD Toolbox can simulate an fluid flows involving chemical reactions, turbulence and heat transfer, to solid dynamics, electromagnetics and the pricing of financial options. OpenFOAM is developed primarily by [OpenCFD Ltd](https://www.openfoam.com) and is freely available and open-source, -licensed under the GNU General Public Licence. +licensed under the GNU General Public License. The command `module spider OpenFOAM` provides the list of installed OpenFOAM versions. In order to use OpenFOAM, it is mandatory to set the environment by sourcing the `bashrc` (for users running diff --git a/doc.zih.tu-dresden.de/docs/software/containers.md b/doc.zih.tu-dresden.de/docs/software/containers.md index bbb3e80772f3fcc71480e4555fb146f602806804..d15535933ef7f2b9e0330d07e35168f10fc22ded 100644 --- a/doc.zih.tu-dresden.de/docs/software/containers.md +++ b/doc.zih.tu-dresden.de/docs/software/containers.md @@ -12,10 +12,10 @@ Singularity. Information about the use of Singularity on ZIH systems can be foun In some cases using Singularity requires a Linux machine with root privileges (e.g. using the partition `ml`), the same architecture and a compatible kernel. For many reasons, users on ZIH systems cannot be granted root permissions. A solution is a Virtual Machine (VM) on the partition -`ml` which allows users to gain root permissions in an isolated environment. There are two main +`ml` which allows users to gain root permissions in an isolated environment. There are two main options on how to work with Virtual Machines on ZIH systems: -1. [VM tools](virtual_machines_tools.md): Automative algorithms for using virtual machines; +1. [VM tools](virtual_machines_tools.md): Automated algorithms for using virtual machines; 1. [Manual method](virtual_machines.md): It requires more operations but gives you more flexibility and reliability. @@ -35,7 +35,7 @@ execution. Follow the instructions for [locally installing Singularity](#local-i [container creation](#container-creation). Moreover, existing Docker container can easily be converted, see [Import a docker container](#importing-a-docker-container). -If you are already familar with Singularity, you might be more intressted in our [singularity +If you are already familiar with Singularity, you might be more interested in our [singularity recipes and hints](singularity_recipe_hints.md). ### Local Installation diff --git a/doc.zih.tu-dresden.de/docs/software/custom_easy_build_environment.md b/doc.zih.tu-dresden.de/docs/software/custom_easy_build_environment.md index d482d89a45a3849054af19a75ccaf64daeb6e9eb..231ce447b0fa8157ebb9b4a8ea6dd9bb1542fa7b 100644 --- a/doc.zih.tu-dresden.de/docs/software/custom_easy_build_environment.md +++ b/doc.zih.tu-dresden.de/docs/software/custom_easy_build_environment.md @@ -1,133 +1,155 @@ # EasyBuild -Sometimes the \<a href="SoftwareModulesList" target="\_blank" -title="List of Modules">modules installed in the cluster\</a> are not -enough for your purposes and you need some other software or a different -version of a software. - -\<br />For most commonly used software, chances are high that there is -already a *recipe* that EasyBuild provides, which you can use. But what -is Easybuild? - -\<a href="<https://easybuilders.github.io/easybuild/>" -target="\_blank">EasyBuild\</a>\<span style="font-size: 1em;"> is the -software used to build and install software on, and create modules for, -Taurus.\</span> - -\<span style="font-size: 1em;">The aim of this page is to introduce -users to working with EasyBuild and to utilizing it to create -modules**.**\</span> - -**Prerequisites:** \<a href="Login" target="\_blank">access\</a> to the -Taurus system and basic knowledge about Linux, \<a href="SystemTaurus" -target="\_blank" title="SystemTaurus">Taurus\</a> and the \<a -href="RuntimeEnvironment" target="\_blank" -title="RuntimeEnvironment">modules system \</a>on Taurus. - -\<span style="font-size: 1em;">EasyBuild uses a configuration file -called recipe or "EasyConfig", which contains all the information about -how to obtain and build the software:\</span> +Sometimes the [modules](modules.md) installed in the cluster are not enough for your purposes and +you need some other software or a different version of a software. + +For most commonly used software, chances are high that there is already a *recipe* that EasyBuild +provides, which you can use. But what is EasyBuild? + +[EasyBuild](https://easybuild.io/) is the software used to build and install +software on ZIH systems. + +The aim of this page is to introduce users to working with EasyBuild and to utilizing it to create +modules. + +## Prerequisites + +1. [Shell access](../access/ssh_login.md) to ZIH systems +1. basic knowledge about: + - [the ZIH system](../jobs_and_resources/hardware_overview.md) + - [the module system](modules.md) on ZIH systems + +EasyBuild uses a configuration file called recipe or "EasyConfig", which contains all the +information about how to obtain and build the software: - Name - Version - Toolchain (think: Compiler + some more) - Download URL -- Buildsystem (e.g. configure && make or cmake && make) +- Build system (e.g. `configure && make` or `cmake && make`) - Config parameters - Tests to ensure a successful build -The "Buildsystem" part is implemented in so-called "EasyBlocks" and -contains the common workflow. Sometimes those are specialized to -encapsulate behaviour specific to multiple/all versions of the software. -\<span style="font-size: 1em;">Everything is written in Python, which -gives authors a great deal of flexibility.\</span> +The build system part is implemented in so-called "EasyBlocks" and contains the common workflow. +Sometimes, those are specialized to encapsulate behavior specific to multiple/all versions of the +software. Everything is written in Python, which gives authors a great deal of flexibility. ## Set up a custom module environment and build your own modules -Installation of the new software (or version) does not require any -specific credentials. +Installation of the new software (or version) does not require any specific credentials. -\<br />Prerequisites: 1 An existing EasyConfig 1 a place to put your -modules. \<span style="font-size: 1em;">Step by step guide:\</span> +### Prerequisites -1\. Create a \<a href="WorkSpaces" target="\_blank">workspace\</a> where -you'll install your modules. You need a place where your modules will be -placed. This needs to be done only once : +1. An existing EasyConfig +1. a place to put your modules. - ws_allocate -F scratch EasyBuild 50 # +### Step by step guide -2\. Allocate nodes. You can do this with interactive jobs (see the -example below) and/or put commands in a batch file and source it. The -latter is recommended for non-interactive jobs, using the command sbatch -in place of srun. For the sake of illustration, we use an interactive -job as an example. The node parameters depend, to some extent, on the -architecture you want to use. ML nodes for the Power9 and others for the -x86. We will use Haswell nodes. +**Step 1:** Create a [workspace](../data_lifecycle/workspaces.md#allocate-a-workspace) where you +install your modules. You need a place where your modules are placed. This needs to be done only +once: - srun -p haswell -N 1 -c 4 --time=08:00:00 --pty /bin/bash +```console +marie@login$ ws_allocate -F scratch EasyBuild 50 +marie@login$ ws_list | grep 'directory.*EasyBuild' + workspace directory : /scratch/ws/1/marie-EasyBuild +``` -\*Using EasyBuild on the login nodes is not allowed\* +**Step 2:** Allocate nodes. You can do this with interactive jobs (see the example below) and/or +put commands in a batch file and source it. The latter is recommended for non-interactive jobs, +using the command `sbatch` instead of `srun`. For the sake of illustration, we use an +interactive job as an example. Depending on the partitions that you want the module to be usable on +later, you need to select nodes with the same architecture. Thus, use nodes from partition ml for +building, if you want to use the module on nodes of that partition. In this example, we assume +that we want to use the module on nodes with x86 architecture and thus, we use Haswell nodes. -3\. Load EasyBuild module. +```console +marie@login$ srun --partition=haswell --nodes=1 --cpus-per-task=4 --time=08:00:00 --pty /bin/bash -l +``` - module load EasyBuild +!!! warning -\<br />4. Specify Workspace. The rest of the guide is based on it. -Please create an environment variable called \`WORKSPACE\` with the -location of your Workspace: + Using EasyBuild on the login nodes is not allowed. - WORKSPACE=<location_of_your_workspace> # For example: WORKSPACE=/scratch/ws/anpo879a-EasyBuild +**Step 3:** Specify the workspace. The rest of the guide is based on it. Please create an +environment variable called `WORKSPACE` with the path to your workspace: -5\. Load the correct modenv according to your current or target -architecture: \`ml modenv/scs5\` for x86 (default) or \`modenv/ml\` for -Power9 (ml partition). Load EasyBuild module +```console +marie@compute$ export WORKSPACE=/scratch/ws/1/marie-EasyBuild #see output of ws_list above +``` - ml modenv/scs5 - module load EasyBuild +**Step 4:** Load the correct module environment `modenv` according to your current or target +architecture: -6\. Set up your environment: +=== "x86 (default, e. g. partition haswell)" + ```console + marie@compute$ module load modenv/scs5 + ``` +=== "Power9 (partition ml)" + ```console + marie@ml$ module load modenv/ml + ``` - export EASYBUILD_ALLOW_LOADED_MODULES=EasyBuild,modenv/scs5 - export EASYBUILD_DETECT_LOADED_MODULES=unload - export EASYBUILD_BUILDPATH="/tmp/${USER}-EasyBuild${SLURM_JOB_ID:-}" - export EASYBUILD_SOURCEPATH="${WORKSPACE}/sources" - export EASYBUILD_INSTALLPATH="${WORKSPACE}/easybuild-$(basename $(readlink -f /sw/installed))" - export EASYBUILD_INSTALLPATH_MODULES="${EASYBUILD_INSTALLPATH}/modules" - module use "${EASYBUILD_INSTALLPATH_MODULES}/all" - export LMOD_IGNORE_CACHE=1 +**Step 5:** Load module `EasyBuild` -7\. \<span style="font-size: 13px;">Now search for an existing -EasyConfig: \</span> +```console +marie@compute$ module load EasyBuild +``` - eb --search TensorFlow +**Step 6:** Set up your environment: -\<span style="font-size: 13px;">8. Build the EasyConfig and its -dependencies\</span> +```console +marie@compute$ export EASYBUILD_ALLOW_LOADED_MODULES=EasyBuild,modenv/scs5 +marie@compute$ export EASYBUILD_DETECT_LOADED_MODULES=unload +marie@compute$ export EASYBUILD_BUILDPATH="/tmp/${USER}-EasyBuild${SLURM_JOB_ID:-}" +marie@compute$ export EASYBUILD_SOURCEPATH="${WORKSPACE}/sources" +marie@compute$ export EASYBUILD_INSTALLPATH="${WORKSPACE}/easybuild-$(basename $(readlink -f /sw/installed))" +marie@compute$ export EASYBUILD_INSTALLPATH_MODULES="${EASYBUILD_INSTALLPATH}/modules" +marie@compute$ module use "${EASYBUILD_INSTALLPATH_MODULES}/all" +marie@compute$ export LMOD_IGNORE_CACHE=1 +``` - eb TensorFlow-1.8.0-fosscuda-2018a-Python-3.6.4.eb -r +**Step 7:** Now search for an existing EasyConfig: -\<span style="font-size: 13px;">After this is done (may take A LONG -time), you can load it just like any other module.\</span> +```console +marie@compute$ eb --search TensorFlow +``` -9\. To use your custom build modules you only need to rerun step 4, 5, 6 -and execute the usual: +**Step 8:** Build the EasyConfig and its dependencies (option `-r`) - module load <name_of_your_module> # For example module load TensorFlow-1.8.0-fosscuda-2018a-Python-3.6.4 +```console +marie@compute$ eb TensorFlow-1.8.0-fosscuda-2018a-Python-3.6.4.eb -r +``` -The key is the \`module use\` command which brings your modules into -scope so \`module load\` can find them and the LMOD_IGNORE_CACHE line -which makes LMod pick up the custom modules instead of searching the +This may take a long time. After this is done, you can load it just like any other module. + +**Step 9:** To use your custom build modules you only need to rerun steps 3, 4, 5, 6 and execute +the usual: + +```console +marie@compute$ module load TensorFlow-1.8.0-fosscuda-2018a-Python-3.6.4 #replace with the name of your module +``` + +The key is the `module use` command, which brings your modules into scope, so `module load` can find +them. The `LMOD_IGNORE_CACHE` line makes `LMod` pick up the custom modules instead of searching the system cache. ## Troubleshooting -When building your EasyConfig fails, you can first check the log -mentioned and scroll to the bottom to see what went wrong. +When building your EasyConfig fails, you can first check the log mentioned and scroll to the bottom +to see what went wrong. + +It might also be helpful to inspect the build environment EasyBuild uses. For that you can run: + +```console +marie@compute$ eb myEC.eb --dump-env-script` +``` + +This command creates a sourceable `.env`-file with `module load` and `export` commands that show +what EasyBuild does before running, e.g., the configuration step. -It might also be helpful to inspect the build environment EB uses. For -that you can run \`eb myEC.eb --dump-env-script\` which creates a -sourceable .env file with \`module load\` and \`export\` commands that -show what EB does before running, e.g., the configure step. +It might also be helpful to use -It might also be helpful to use '\<span style="font-size: 1em;">export -LMOD_IGNORE_CACHE=0'\</span> +```console +marie@compute$ export LMOD_IGNORE_CACHE=0 +``` diff --git a/doc.zih.tu-dresden.de/docs/software/data_analytics.md b/doc.zih.tu-dresden.de/docs/software/data_analytics.md index 44414493405bc36ffed74bb85fb805b331308af7..b4a5f7f8b9f86c9a47fec20b875970efd4d787b2 100644 --- a/doc.zih.tu-dresden.de/docs/software/data_analytics.md +++ b/doc.zih.tu-dresden.de/docs/software/data_analytics.md @@ -24,7 +24,8 @@ marie@compute$ module spider <software_name> Refer to the section covering [modules](modules.md) for further information on the modules system. Additional software or special versions of [individual modules](custom_easy_build_environment.md) -can be installed individually by each user. If possible, the use of virtual environments is +can be installed individually by each user. If possible, the use of +[virtual environments](python_virtual_environments.md) is recommended (e.g. for Python). Likewise, software can be used within [containers](containers.md). For the transfer of larger amounts of data into and within the system, the diff --git a/doc.zih.tu-dresden.de/docs/software/debuggers.md b/doc.zih.tu-dresden.de/docs/software/debuggers.md index d88ca5f068f0145e8acc46407feca93a14968522..0d4bda97f61fe6453d6027406ff88145c4204cfb 100644 --- a/doc.zih.tu-dresden.de/docs/software/debuggers.md +++ b/doc.zih.tu-dresden.de/docs/software/debuggers.md @@ -73,8 +73,8 @@ modified by DDT available, which has better support for Fortran 90 (e.g. derive  - Intuitive graphical user interface and great support for parallel applications -- We have 1024 licences, so many user can use this tool for parallel debugging -- Don't expect that debugging an MPI program with 100ths of process will always work without +- We have 1024 licenses, so many user can use this tool for parallel debugging +- Don't expect that debugging an MPI program with hundreds of processes will always work without problems - The more processes and nodes involved, the higher is the probability for timeouts or other problems @@ -159,7 +159,7 @@ marie@login$ srun -n 1 valgrind ./myprog - Not recommended for MPI parallel programs, since usually the MPI library will throw a lot of errors. But you may use Valgrind the following way such that every rank - writes its own Valgrind logfile: + writes its own Valgrind log file: ```console marie@login$ module load Valgrind diff --git a/doc.zih.tu-dresden.de/docs/software/fem_software.md b/doc.zih.tu-dresden.de/docs/software/fem_software.md index 3be2314889bfe45f9554fb499c4d757337bef33d..160aeded633f50e9abfdfae6d74a7627257ca565 100644 --- a/doc.zih.tu-dresden.de/docs/software/fem_software.md +++ b/doc.zih.tu-dresden.de/docs/software/fem_software.md @@ -176,7 +176,7 @@ under: `<MaxNumberProcessors>2</MaxNumberProcessors>` -that you can simply change to something like 16 oder 24. For now, you should stay within single-node +that you can simply change to something like 16 or 24. For now, you should stay within single-node boundaries, because multi-node calculations require additional parameters. The number you choose should match your used `--cpus-per-task` parameter in your job file. diff --git a/doc.zih.tu-dresden.de/docs/software/gpu_programming.md b/doc.zih.tu-dresden.de/docs/software/gpu_programming.md index 9847cc9dbfec4137eada70dbc23285c7825effc7..070176efcb2ab0f463da30675841ade0e0a585a3 100644 --- a/doc.zih.tu-dresden.de/docs/software/gpu_programming.md +++ b/doc.zih.tu-dresden.de/docs/software/gpu_programming.md @@ -2,8 +2,9 @@ ## Directive Based GPU Programming -Directives are special compiler commands in your C/C++ or Fortran source code. The tell the compiler -how to parallelize and offload work to a GPU. This section explains how to use this technique. +Directives are special compiler commands in your C/C++ or Fortran source code. They tell the +compiler how to parallelize and offload work to a GPU. This section explains how to use this +technique. ### OpenACC @@ -19,10 +20,11 @@ newer for full support for the NVIDIA Tesla K20x GPUs at ZIH. #### Using OpenACC with PGI compilers -* For compilaton please add the compiler flag `-acc`, to enable OpenACC interpreting by the compiler; -* `-Minfo` will tell you what the compiler is actually doing to your code; +* For compilation, please add the compiler flag `-acc` to enable OpenACC interpreting by the + compiler; +* `-Minfo` tells you what the compiler is actually doing to your code; * If you only want to use the created binary at ZIH resources, please also add `-ta=nvidia:keple`; -* OpenACC Turorial: intro1.pdf, intro2.pdf. +* OpenACC Tutorial: intro1.pdf, intro2.pdf. ### HMPP @@ -38,4 +40,4 @@ use the following slides as an introduction: * Introduction to CUDA; * Advanced Tuning for NVIDIA Kepler GPUs. -In order to compiler an application with CUDA use the `nvcc` compiler command. +In order to compile an application with CUDA use the `nvcc` compiler command. diff --git a/doc.zih.tu-dresden.de/docs/software/hyperparameter_optimization.md b/doc.zih.tu-dresden.de/docs/software/hyperparameter_optimization.md index 38190764e6c9efedb275ec9ff4324d916c851566..8f61fe49fd56642aaded82cf711ca92d0035b99f 100644 --- a/doc.zih.tu-dresden.de/docs/software/hyperparameter_optimization.md +++ b/doc.zih.tu-dresden.de/docs/software/hyperparameter_optimization.md @@ -270,9 +270,9 @@ This GUI guides through the configuration process and as result a configuration automatically according to the GUI input. If you are more familiar with using OmniOpt later on, this configuration file can be modified directly without using the GUI. -A screenshot of the GUI, including a properly configuration for the MNIST fashion example is shown -below. The GUI, in which the below displayed values are already entered, can be reached -[here](https://imageseg.scads.ai/omnioptgui/?maxevalserror=5&mem_per_worker=1000&number_of_parameters=3¶m_0_values=10%2C50%2C100¶m_1_values=8%2C16%2C32¶m_2_values=10%2C15%2C30¶m_0_name=out-layer1¶m_1_name=batchsize¶m_2_name=batchsize&account=&projectname=mnist_fashion_optimization_set_1&partition=alpha&searchtype=tpe.suggest¶m_0_type=hp.choice¶m_1_type=hp.choice¶m_2_type=hp.choice&max_evals=1000&objective_program=bash%20%3C%2Fpath%2Fto%2Fwrapper-script%2Frun-mnist-fashion.sh%3E%20--out-layer1%3D%28%24x_0%29%20--batchsize%3D%28%24x_1%29%20--epochs%3D%28%24x_2%29&workdir=%3C%2Fscratch%2Fws%2Fomniopt-workdir%2F%3E). +A screenshot of +[the GUI](https://imageseg.scads.ai/omnioptgui/?maxevalserror=5&mem_per_worker=1000&number_of_parameters=3¶m_0_values=10%2C50%2C100¶m_1_values=8%2C16%2C32¶m_2_values=10%2C15%2C30¶m_0_name=out-layer1¶m_1_name=batchsize¶m_2_name=batchsize&account=&projectname=mnist_fashion_optimization_set_1&partition=alpha&searchtype=tpe.suggest¶m_0_type=hp.choice¶m_1_type=hp.choice¶m_2_type=hp.choice&max_evals=1000&objective_program=bash%20%3C%2Fpath%2Fto%2Fwrapper-script%2Frun-mnist-fashion.sh%3E%20--out-layer1%3D%28%24x_0%29%20--batchsize%3D%28%24x_1%29%20--epochs%3D%28%24x_2%29&workdir=%3C%2Fscratch%2Fws%2Fomniopt-workdir%2F%3E), +including a properly configuration for the MNIST fashion example is shown below. Please modify the paths for `objective program` and `workdir` according to your needs. diff --git a/doc.zih.tu-dresden.de/docs/software/misc/FlinkExample.ipynb b/doc.zih.tu-dresden.de/docs/software/misc/FlinkExample.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..5a867b8750704ea92a318087d82bb0ca3355018d --- /dev/null +++ b/doc.zih.tu-dresden.de/docs/software/misc/FlinkExample.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "!{sys.executable} -m pip install apache-flink --user" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "echo $FLINK_ROOT_DIR\n", + "echo $JAVA_HOME\n", + "hostname\n", + "if [ ! -d $HOME/jupyter-flink-conf ]\n", + "then\n", + "cp -r $FLINK_ROOT_DIR/conf $HOME/jupyter-flink-conf\n", + "chmod -R u+w $HOME/jupyter-flink-conf\n", + "fi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "os.environ['FLINK_CONF_DIR'] = os.environ['HOME'] + '/cluster-conf-' + os.environ['SLURM_JOBID'] + '/flink'\n", + "os.environ['PYTHONPATH'] = os.environ['PYTHONPATH'] + ':' + os.environ['HOME'] + '/.local/lib/python3.6/site-packages'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!SHELL=/bin/bash bash framework-configure.sh flink $HOME/jupyter-flink-conf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "exitcode = os.system('start-cluster.sh')\n", + "if not exitcode:\n", + " print(\"started Flink cluster successful\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "echo \"This is a short story for you. In this story nothing is happening. Have a nice day!\" > myFlinkTestFile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pyflink.datastream import StreamExecutionEnvironment\n", + "from pyflink.datastream.connectors import FileSource\n", + "from pyflink.datastream.connectors import StreamFormat\n", + "from pyflink.common.watermark_strategy import WatermarkStrategy\n", + "from pyflink.common.typeinfo import Types\n", + "\n", + "env = StreamExecutionEnvironment.get_execution_environment()\n", + "env.set_parallelism(2)\n", + "#set the Python executable for the workers\n", + "env.set_python_executable(sys.executable)\n", + "# define the source\n", + "ds = env.from_source(source=FileSource.for_record_stream_format(StreamFormat.text_line_format(),\n", + " \"myFlinkTestFile\").process_static_file_set().build(),\n", + " watermark_strategy=WatermarkStrategy.for_monotonous_timestamps(),\n", + " source_name=\"file_source\")\n", + "\n", + "def split(line):\n", + " yield from line.split()\n", + "\n", + " \n", + "# compute word count\n", + "ds = ds.flat_map(split) \\\n", + " .map(lambda i: (i, 1), output_type=Types.TUPLE([Types.STRING(), Types.INT()])) \\\n", + " .key_by(lambda i: i[0]) \\\n", + " .reduce(lambda i, j: (i[0], i[1] + j[1])) \\\n", + " .map(lambda i: print(i))\n", + "\n", + "# submit for execution\n", + "env.execute()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "stop-cluster.sh" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!ps -ef | grep -i java" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pkill -f \"java\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc.zih.tu-dresden.de/docs/software/misc/SparkExample.ipynb b/doc.zih.tu-dresden.de/docs/software/misc/SparkExample.ipynb index 67eb37e898667946a0a6dbdf60bc104908e12601..959b536b85dd3d5d01c79217b697506a7517d4f3 100644 --- a/doc.zih.tu-dresden.de/docs/software/misc/SparkExample.ipynb +++ b/doc.zih.tu-dresden.de/docs/software/misc/SparkExample.ipynb @@ -1,5 +1,24 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "!{sys.executable} -m pip install findspark --user" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!which python" + ] + }, { "cell_type": "code", "execution_count": null, @@ -27,7 +46,8 @@ "import sys\n", "import os\n", "os.environ['PYSPARK_PYTHON'] = sys.executable\n", - "os.environ['SPARK_CONF_DIR'] = os.environ['HOME'] + '/cluster-conf-' + os.environ['SLURM_JOBID'] + '/spark'" + "os.environ['SPARK_CONF_DIR'] = os.environ['HOME'] + '/cluster-conf-' + os.environ['SLURM_JOBID'] + '/spark'\n", + "os.environ['PYTHONPATH'] = os.environ['PYTHONPATH'] + ':' + os.environ['HOME'] + '/.local/lib/python3.6/site-packages'" ] }, { @@ -48,6 +68,16 @@ "!start-all.sh" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import findspark\n", + "findspark.init(os.environ['SPARK_HOME'])" + ] + }, { "cell_type": "code", "execution_count": null, @@ -116,20 +146,13 @@ "source": [ "!pkill -f \"pyspark-shell\"" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "haswell-py3.7-spark", + "display_name": "Python 3", "language": "python", - "name": "haswell-py3.7-spark" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -141,7 +164,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/doc.zih.tu-dresden.de/docs/software/misc/zsh_autocd.png b/doc.zih.tu-dresden.de/docs/software/misc/zsh_autocd.png new file mode 100644 index 0000000000000000000000000000000000000000..1d30a13f2dcc3af6e706fe8849aff6ee0739a76c Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/zsh_autocd.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/zsh_autocomplete_parameters.png b/doc.zih.tu-dresden.de/docs/software/misc/zsh_autocomplete_parameters.png new file mode 100644 index 0000000000000000000000000000000000000000..374e34a84ee88d6c0c9d47c47af609d01fc2c63c Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/zsh_autocomplete_parameters.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/zsh_autosuggestion.png b/doc.zih.tu-dresden.de/docs/software/misc/zsh_autosuggestion.png new file mode 100644 index 0000000000000000000000000000000000000000..872ed226a3f66e78063ad610e5edd8c0463a2922 Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/zsh_autosuggestion.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/zsh_syntax_highlighting.png b/doc.zih.tu-dresden.de/docs/software/misc/zsh_syntax_highlighting.png new file mode 100644 index 0000000000000000000000000000000000000000..0e1e888c2bab317d1309289c07582dc08cdd1858 Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/zsh_syntax_highlighting.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/misc/zsh_typo.png b/doc.zih.tu-dresden.de/docs/software/misc/zsh_typo.png new file mode 100644 index 0000000000000000000000000000000000000000..de04ba3d061cfb3c402e8b6d02bd7f60698e69c8 Binary files /dev/null and b/doc.zih.tu-dresden.de/docs/software/misc/zsh_typo.png differ diff --git a/doc.zih.tu-dresden.de/docs/software/modules.md b/doc.zih.tu-dresden.de/docs/software/modules.md index 58f200d25f01d52385626776b53c93f38e999397..b4aa437d270b4dda1a64f655d3c8a9db9238df2c 100644 --- a/doc.zih.tu-dresden.de/docs/software/modules.md +++ b/doc.zih.tu-dresden.de/docs/software/modules.md @@ -156,7 +156,7 @@ The command `module spider <modname>` allows searching for a specific software a environments. It will also display information on how to load a particular module when giving a precise module (with version) as the parameter. -??? example +??? example "Spider command" ```console marie@login$ module spider p7zip @@ -179,6 +179,54 @@ module (with version) as the parameter. ---------------------------------------------------------------------------------------------------------------------------------------------------------- ``` +In some cases a desired software is available as an extension of a module. + +??? example "Extension module" + ```console hl_lines="9" + marie@login$ module spider tensorboard + + -------------------------------------------------------------------------------------------------------------------------------- + tensorboard: + -------------------------------------------------------------------------------------------------------------------------------- + Versions: + tensorboard/2.4.1 (E) + + Names marked by a trailing (E) are extensions provided by another module. + [...] + ``` + + You retrieve further information using the `spider` command. + + ```console + marie@login$ module spider tensorboard/2.4.1 + + -------------------------------------------------------------------------------------------------------------------------------- + tensorboard: tensorboard/2.4.1 (E) + -------------------------------------------------------------------------------------------------------------------------------- + This extension is provided by the following modules. To access the extension you must load one of the following modules. Note that any module names in parentheses show the module location in the software hierarchy. + + TensorFlow/2.4.1 (modenv/hiera GCC/10.2.0 CUDA/11.1.1 OpenMPI/4.0.5) + TensorFlow/2.4.1-fosscuda-2019b-Python-3.7.4 (modenv/ml) + TensorFlow/2.4.1-foss-2020b (modenv/scs5) + + Names marked by a trailing (E) are extensions provided by another module. + ``` + + Finaly, you can load the dependencies and `tensorboard/2.4.1` and check the version. + + ```console + marie@login$ module load modenv/hiera GCC/10.2.0 CUDA/11.1.1 OpenMPI/4.0.5 + + The following have been reloaded with a version change: + 1) modenv/scs5 => modenv/hiera + + Module GCC/10.2.0, CUDA/11.1.1, OpenMPI/4.0.5 and 15 dependencies loaded. + marie@login$ module load TensorFlow/2.4.1 + Module TensorFlow/2.4.1 and 34 dependencies loaded. + marie@login$ tensorboard --version + 2.4.1 + ``` + ## Per-Architecture Builds Since we have a heterogeneous cluster, we do individual builds of some of the software for each @@ -206,7 +254,8 @@ Note that this will not work for meta-modules that do not have an installation d ## Advanced Usage -For writing your own Modulefiles please have a look at the [Guide for writing project and private Modulefiles](private_modules.md). +For writing your own module files please have a look at the +[Guide for writing project and private module files](private_modules.md). ## Troubleshooting diff --git a/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md b/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md index 8d1d7e17a02c3dd2ab572216899cd37f7a9aee3a..b083e80cf9962a01a6580f8b5393912ebd2c3f40 100644 --- a/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md +++ b/doc.zih.tu-dresden.de/docs/software/mpi_usage_error_detection.md @@ -40,7 +40,7 @@ Besides loading a MUST module, no further changes are needed during compilation ### Running your Application with MUST -In order to run your application with MUST you need to replace the srun command with mustrun: +In order to run your application with MUST you need to replace the `srun` command with `mustrun`: ```console marie@login$ mustrun -np <number of MPI processes> ./<your binary> @@ -65,14 +65,14 @@ marie@login$ mustrun -np 4 ./fancy-program [MUST] Execution finished, inspect "/home/marie/MUST_Output.html"! ``` -Besides replacing the srun command you need to be aware that **MUST always allocates an extra +Besides replacing the `srun` command you need to be aware that **MUST always allocates an extra process**, i.e. if you issue a `mustrun -np 4 ./a.out` then MUST will start 5 processes instead. This is usually not critical, however in batch jobs **make sure to allocate an extra CPU for this task**. Finally, MUST assumes that your application may crash at any time. To still gather correctness results under this assumption is extremely expensive in terms of performance overheads. Thus, if -your application does not crash, you should add an "--must:nocrash" to the mustrun command to make +your application does not crash, you should add `--must:nocrash` to the `mustrun` command to make MUST aware of this knowledge. Overhead is drastically reduced with this switch. ### Result Files diff --git a/doc.zih.tu-dresden.de/docs/software/papi.md b/doc.zih.tu-dresden.de/docs/software/papi.md index 9d96cc58f4453692ad7b57abe3e56abda1539290..2de80b4e8a0f420a6b42cd01a3de027b5fb89be2 100644 --- a/doc.zih.tu-dresden.de/docs/software/papi.md +++ b/doc.zih.tu-dresden.de/docs/software/papi.md @@ -20,8 +20,8 @@ To collect performance events, PAPI provides two APIs, the *high-level* and *low The high-level API provides the ability to record performance events inside instrumented regions of serial, multi-processing (MPI, SHMEM) and thread (OpenMP, Pthreads) parallel applications. It is -designed for simplicity, not flexibility. For more details click -[here](https://bitbucket.org/icl/papi/wiki/PAPI-HL.md). +designed for simplicity, not flexibility. More details can be found in the +[PAPI wiki High-Level API description](https://bitbucket.org/icl/papi/wiki/PAPI-HL.md). The following code example shows the use of the high-level API by marking a code section. @@ -86,19 +86,19 @@ more output files in JSON format. ### Low-Level API -The low-level API manages hardware events in user-defined groups -called Event Sets. It is meant for experienced application programmers and tool developers wanting -fine-grained measurement and control of the PAPI interface. It provides access to both PAPI preset -and native events, and supports all installed components. For more details on the low-level API, -click [here](https://bitbucket.org/icl/papi/wiki/PAPI-LL.md). +The low-level API manages hardware events in user-defined groups called Event Sets. It is meant for +experienced application programmers and tool developers wanting fine-grained measurement and +control of the PAPI interface. It provides access to both PAPI preset and native events, and +supports all installed components. The PAPI wiki contains also a page with more details on the +[low-level API](https://bitbucket.org/icl/papi/wiki/PAPI-LL.md). ## Usage on ZIH Systems Before you start a PAPI measurement, check which events are available on the desired architecture. -For this purpose PAPI offers the tools `papi_avail` and `papi_native_avail`. If you want to measure +For this purpose, PAPI offers the tools `papi_avail` and `papi_native_avail`. If you want to measure multiple events, please check which events can be measured concurrently using the tool -`papi_event_chooser`. For more details on the PAPI tools click -[here](https://bitbucket.org/icl/papi/wiki/PAPI-Overview.md#markdown-header-papi-utilities). +`papi_event_chooser`. The PAPI wiki contains more details on +[the PAPI tools](https://bitbucket.org/icl/papi/wiki/PAPI-Overview.md#markdown-header-papi-utilities). !!! hint @@ -133,8 +133,7 @@ compile your application against the PAPI library. !!! hint The PAPI modules on ZIH systems are only installed with the default `perf_event` component. If you - want to measure, e.g., GPU events, you have to install your own PAPI. Instructions on how to - download and install PAPI can be found - [here](https://bitbucket.org/icl/papi/wiki/Downloading-and-Installing-PAPI.md). To install PAPI - with additional components, you have to specify them during configure, for details click - [here](https://bitbucket.org/icl/papi/wiki/PAPI-Overview.md#markdown-header-components). + want to measure, e.g., GPU events, you have to install your own PAPI. Please see the + [external instructions on how to download and install PAPI](https://bitbucket.org/icl/papi/wiki/Downloading-and-Installing-PAPI.md). + To install PAPI with additional components, you have to specify them during configure as + described for the [Installation of Components](https://bitbucket.org/icl/papi/wiki/PAPI-Overview.md#markdown-header-components). diff --git a/doc.zih.tu-dresden.de/docs/software/pika.md b/doc.zih.tu-dresden.de/docs/software/pika.md index 36aab905dbf33602c64333e2a695070ffc0ad9db..deecced31ce928fcb2347a286d7f13a83ed05d17 100644 --- a/doc.zih.tu-dresden.de/docs/software/pika.md +++ b/doc.zih.tu-dresden.de/docs/software/pika.md @@ -2,19 +2,19 @@ PIKA is a hardware performance monitoring stack to identify inefficient HPC jobs. Users of ZIH systems have the possibility to visualize and analyze the efficiency of their jobs via the -[PIKA web interface](https://selfservice.zih.tu-dresden.de/l/index.php/hpcportal/jobmonitoring/z../jobs_and_resources). +[PIKA web interface](https://selfservice.zih.tu-dresden.de/l/index.php/hpcportal/jobmonitoring/zih/jobs). !!! hint To understand this small guide, it is recommended to open the - [web interface](https://selfservice.zih.tu-dresden.de/l/index.php/hpcportal/jobmonitoring/z../jobs_and_resources) + [web interface](https://selfservice.zih.tu-dresden.de/l/index.php/hpcportal/jobmonitoring/zih/jobs) in a separate window. Furthermore, at least one real HPC job should have been submitted. ## Overview PIKA consists of several components and tools. It uses the collection daemon collectd, InfluxDB to store time-series data and MariaDB to store job metadata. Furthermore, it provides a powerful -[web interface](https://selfservice.zih.tu-dresden.de/l/index.php/hpcportal/jobmonitoring/z../jobs_and_resources) +[web interface](https://selfservice.zih.tu-dresden.de/l/index.php/hpcportal/jobmonitoring/zih/jobs) for the visualization and analysis of job performance data. ## Table View and Job Search @@ -90,7 +90,7 @@ reason for further investigation, since not all HUs are equally utilized. To identify imbalances between HUs over time, the visualization modes *Best* and *Lowest* are a first indicator how much the HUs differ in terms of resource usage. The timelines *Best* and -*Lowest* show the recoded performance data of the best/lowest average HU over time. +*Lowest* show the recorded performance data of the best/lowest average HU over time. ## Footprint Visualization @@ -111,7 +111,7 @@ investigating their correlation. ## Hints If users wish to perform their own measurement of performance counters using performance tools other -than PIKA, it is recommended to disable PIKA monitoring. This can be done using the following slurm +than PIKA, it is recommended to disable PIKA monitoring. This can be done using the following Slurm flags in the job script: ```Bash diff --git a/doc.zih.tu-dresden.de/docs/software/python_virtual_environments.md b/doc.zih.tu-dresden.de/docs/software/python_virtual_environments.md index e19daeeb6731aa32eb993f2495e6ec443bebe2dd..67b10817c738b414a3302388b5cca3392ff96bb1 100644 --- a/doc.zih.tu-dresden.de/docs/software/python_virtual_environments.md +++ b/doc.zih.tu-dresden.de/docs/software/python_virtual_environments.md @@ -93,8 +93,6 @@ are in the virtual environment. You can deactivate the conda environment as foll (conda-env) marie@compute$ conda deactivate #Leave the virtual environment ``` -TODO: Link to this page from other DA/ML topics. insert link in alpha centauri - ??? example This is an example on partition Alpha. The example creates a virtual environment, and installs diff --git a/doc.zih.tu-dresden.de/docs/software/vampir.md b/doc.zih.tu-dresden.de/docs/software/vampir.md index 24a22c35acda9afcfa6e1e56bdd553da716ec245..9df5eb62a0d461da97fcb2ce28f461d9042e93a2 100644 --- a/doc.zih.tu-dresden.de/docs/software/vampir.md +++ b/doc.zih.tu-dresden.de/docs/software/vampir.md @@ -146,7 +146,7 @@ marie@local$ ssh -L 30000:taurusi1253:30055 taurus.hrsk.tu-dresden.de ``` Now, the port 30000 on your desktop is connected to the VampirServer port 30055 at the compute node -taurusi1253 of the ZIH system. Finally, start your local Vampir client and establish a remote +`taurusi1253` of the ZIH system. Finally, start your local Vampir client and establish a remote connection to `localhost`, port 30000 as described in the manual. ```console diff --git a/doc.zih.tu-dresden.de/docs/software/visualization.md b/doc.zih.tu-dresden.de/docs/software/visualization.md index 328acc490f5fa5c65e687d50bf9f43ceae44c541..f1e551c968cb4478069c98e691eef11bce7ccb01 100644 --- a/doc.zih.tu-dresden.de/docs/software/visualization.md +++ b/doc.zih.tu-dresden.de/docs/software/visualization.md @@ -49,10 +49,10 @@ marie@login$ mpiexec -bind-to -help` or from [mpich wiki](https://wiki.mpich.org/mpich/index.php/Using_the_Hydra_Process_Manager#Process-core_Binding%7Cwiki.mpich.org). -In the following, we provide two examples on how to use `pvbatch` from within a jobfile and an +In the following, we provide two examples on how to use `pvbatch` from within a job file and an interactive allocation. -??? example "Example jobfile" +??? example "Example job file" ```Bash #!/bin/bash @@ -97,7 +97,7 @@ cards (GPUs) specified by the device index. For that, make sure to use the modul *-egl*, e.g., `ParaView/5.9.0-RC1-egl-mpi-Python-3.8`, and pass the option `--egl-device-index=$CUDA_VISIBLE_DEVICES`. -??? example "Example jobfile" +??? example "Example job file" ```Bash #!/bin/bash @@ -171,7 +171,7 @@ are outputed.* This contains the node name which your job and server runs on. However, since the node names of the cluster are not present in the public domain name system (only cluster-internally), you cannot just use this line as-is for connection with your client. **You first have to resolve** the name to an IP -address on ZIH systems: Suffix the nodename with `-mn` to get the management network (ethernet) +address on ZIH systems: Suffix the node name with `-mn` to get the management network (ethernet) address, and pass it to a lookup-tool like `host` in another SSH session: ```console diff --git a/doc.zih.tu-dresden.de/docs/software/zsh.md b/doc.zih.tu-dresden.de/docs/software/zsh.md new file mode 100644 index 0000000000000000000000000000000000000000..147758a6a66dd84aeb040c80d0000110f4af882c --- /dev/null +++ b/doc.zih.tu-dresden.de/docs/software/zsh.md @@ -0,0 +1,238 @@ +# ZSH + +!!! warning + Though all efforts have been made to ensure the accuracy and + currency of the content on this website, please be advised that + some content might be out of date and there is no continuous + website support available. In case of any ambiguity or doubts, + users are advised to do their own research on the content's + accuracy and currency. + +The [ZSH](https://www.zsh.org), short for `z-shell`, is an alternative shell for Linux that offers +many convenience features for productive use that `bash`, the default shell, does not offer. + +This should be a short introduction to `zsh` and offer some examples that are especially useful +on ZIH systems. + +## `oh-my-zsh` + +`oh-my-zsh` is a plugin that adds many features to the `zsh` with a very simple install. Simply run: + +``` +marie@login$ sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" +``` + +and then, if it is not already your login shell, run `zsh` or re-login. + +The rest of this document assumes that you have `oh-my-zsh` installed and running. + +## Features + +### Themes + +There are many different themes for the `zsh`. See the +[GitHub-page of `oh-my-zsh`](https://github.com/ohmyzsh/ohmyzsh) for more details. + +### Auto-completion + +`zsh` offers more auto-completion features than `bash`. You can auto-complete programs, filenames, parameters, +`man`-pages and a lot more, and you can cycle through the suggestions with `TAB`-button. + + + +### Syntax-highlighting + +When you add this line to your `~/.zshrc` with `oh-my-zsh` installed, you get syntax-highlighting directly +in the shell: + +```bash +plugins+=( + zsh-syntax-highlighting +) +``` + + + +### Typo-correction + +With + +```bash +setopt correct_all +ENABLE_CORRECTION="true" +``` + +in `~/.zshrc` you get correction suggestions when the shell thinks +that it might be what you want, e.g. when a command +is expected to be handed an existing file. + + + +### Automatic `cd` + +Adding `AUTO_CD` to `~/.zshrc` file allows to leave out the `cd` when a folder name is provided. + +```bash +setopt AUTO_CD +``` + + + +### `fish`-like auto-suggestions + +Install [`zsh-autosuggestions`](https://github.com/zsh-users/zsh-autosuggestions) to get `fish`-shell-like +auto-suggestions of previous commands that start with the same letters and that you can complete with +the right arrow key. + + + +??? example "Addons for your shell" + === "`bash`" + ```bash + # Create a new directory and directly `cd` into it + mcd () { + mkdir -p $1 + cd $1 + } + + # Find the largest files in the current directory easily + function treesizethis { + du -k --max-depth=1 | sort -nr | awk ' + BEGIN { + split("KB,MB,GB,TB", Units, ","); + } + { + u = 1; + while ($1 >= 1024) { + $1 = $1 / 1024; + u += 1 + } + $1 = sprintf("%.1f %s", $1, Units[u]); + print $0; + } + ' + } + + #This allows you to run `slurmlogpath $SLURM_ID` and get the log-path directly in stdout: + function slurmlogpath { + scontrol show job $1 | sed -n -e 's/^\s*StdOut=//p' + } + + # `ftails` follow-tails a slurm-log. Call it without parameters to tail the only running job or + # get a list of running jobs or use `ftails $JOBID` to tail a specific job + function ftails { + JOBID=$1 + if [[ -z $JOBID ]]; then + JOBS=$(squeue --format="%i \\'%j\\' " --me | grep -v JOBID) + NUMBER_OF_JOBS=$(echo "$JOBS" | wc -l) + JOBID= + if [[ "$NUMBER_OF_JOBS" -eq 1 ]]; then + JOBID=$(echo $JOBS | sed -e "s/'//g" | sed -e 's/ .*//') + else + JOBS=$(echo $JOBS | tr -d '\n') + JOBID=$(eval "whiptail --title 'Choose jobs to tail' --menu 'Choose Job to tail' 25 78 16 $JOBS" 3>&1 1>&2 2>&3) + fi + fi + SLURMLOGPATH=$(slurmlogpath $JOBID) + if [[ -e $SLURMLOGPATH ]]; then + tail -n100 -f $SLURMLOGPATH + else + echo "No slurm-log-file found" + fi + } + + #With this, you only need to type `sq` instead of `squeue -u $USER`. + alias sq="squeue --me" + ``` + === "`zsh`" + ```bash + # Create a new directory and directly `cd` into it + mcd () { + mkdir -p $1 + cd $1 + } + + # Find the largest files in the current directory easily + function treesizethis { + du -k --max-depth=1 | sort -nr | awk ' + BEGIN { + split("KB,MB,GB,TB", Units, ","); + } + { + u = 1; + while ($1 >= 1024) { + $1 = $1 / 1024; + u += 1 + } + $1 = sprintf("%.1f %s", $1, Units[u]); + print $0; + } + ' + } + + #This allows you to run `slurmlogpath $SLURM_ID` and get the log-path directly in stdout: + function slurmlogpath { + scontrol show job $1 | sed -n -e 's/^\s*StdOut=//p' + } + + # `ftails` follow-tails a slurm-log. Call it without parameters to tail the only running job or + # get a list of running jobs or use `ftails $JOBID` to tail a specific job + function ftails { + JOBID=$1 + if [[ -z $JOBID ]]; then + JOBS=$(squeue --format="%i \\'%j\\' " --me | grep -v JOBID) + NUMBER_OF_JOBS=$(echo "$JOBS" | wc -l) + JOBID= + if [[ "$NUMBER_OF_JOBS" -eq 1 ]]; then + JOBID=$(echo $JOBS | sed -e "s/'//g" | sed -e 's/ .*//') + else + JOBS=$(echo $JOBS | tr -d '\n') + JOBID=$(eval "whiptail --title 'Choose jobs to tail' --menu 'Choose Job to tail' 25 78 16 $JOBS" 3>&1 1>&2 2>&3) + fi + fi + SLURMLOGPATH=$(slurmlogpath $JOBID) + if [[ -e $SLURMLOGPATH ]]; then + tail -n100 -f $SLURMLOGPATH + else + echo "No slurm-log-file found" + fi + } + + #With this, you only need to type `sq` instead of `squeue -u $USER`. + alias sq="squeue --me" + + #This will automatically replace `...` with `../..` and `....` with `../../..` + # and so on (each additional `.` adding another `/..`) when typing commands: + rationalise-dot() { + if [[ $LBUFFER = *.. ]]; then + LBUFFER+=/.. + else + LBUFFER+=. + fi + } + zle -N rationalise-dot + bindkey . rationalise-dot + + # This allows auto-completion for `module load`: + function _module { + MODULE_COMMANDS=( + '-t:Show computer parsable output' + 'load:Load a module' + 'unload:Unload a module' + 'spider:Search for a module' + 'avail:Show available modules' + 'list:List loaded modules' + ) + + MODULE_COMMANDS_STR=$(printf "\n'%s'" "${MODULE_COMMANDS[@]}") + + eval "_describe 'command' \"($MODULE_COMMANDS_STR)\"" + _values -s ' ' 'flags' $(ml -t avail | sed -e 's#/$##' | tr '\n' ' ') + } + + compdef _module "module" + ``` + +## Setting `zsh` as default-shell + +Please ask HPC support if you want to set the `zsh` as your default login shell. diff --git a/doc.zih.tu-dresden.de/docs/support/news_archive.md b/doc.zih.tu-dresden.de/docs/support/news_archive.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/doc.zih.tu-dresden.de/mkdocs.yml b/doc.zih.tu-dresden.de/mkdocs.yml index 4efbb60c85f44b6cb8d80c33cfb251c7a52003a3..51885fab70587470746e280a3874831521e96459 100644 --- a/doc.zih.tu-dresden.de/mkdocs.yml +++ b/doc.zih.tu-dresden.de/mkdocs.yml @@ -27,6 +27,7 @@ nav: - Private Modulefiles: software/private_modules.md - Custom EasyBuild Modules: software/custom_easy_build_environment.md - Python Virtual Environments: software/python_virtual_environments.md + - ZSH: software/zsh.md - Containers: - Singularity: software/containers.md - Singularity Recipes and Hints: software/singularity_recipe_hints.md @@ -69,7 +70,6 @@ nav: - PAPI Library: software/papi.md - Pika: software/pika.md - Perf Tools: software/perf_tools.md - - Score-P: software/scorep.md - Vampir: software/vampir.md - Data Life Cycle Management: - Overview: data_lifecycle/overview.md @@ -80,7 +80,6 @@ nav: - BeeGFS: data_lifecycle/beegfs.md - Warm Archive: data_lifecycle/warm_archive.md - Intermediate Archive: data_lifecycle/intermediate_archive.md - - Quotas: data_lifecycle/quotas.md - Workspaces: data_lifecycle/workspaces.md - Preservation of Research Data: data_lifecycle/preservation_research_data.md - Structuring Experiments: data_lifecycle/experiments.md @@ -102,7 +101,6 @@ nav: - Binding And Distribution Of Tasks: jobs_and_resources/binding_and_distribution_of_tasks.md - Support: - How to Ask for Support: support/support.md - - News Archive: support/news_archive.md - Archive of the Old Wiki: - Overview: archive/overview.md - Bio Informatics: archive/bioinformatics.md diff --git a/doc.zih.tu-dresden.de/util/check-bash-syntax.sh b/doc.zih.tu-dresden.de/util/check-bash-syntax.sh index 9f31effee3ebc3380af5ca892047aca6a9357139..ac0fcd4621741d7f094e29aaf772f283b64c284d 100755 --- a/doc.zih.tu-dresden.de/util/check-bash-syntax.sh +++ b/doc.zih.tu-dresden.de/util/check-bash-syntax.sh @@ -47,12 +47,12 @@ branch="origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-preview}" if [ $all_files = true ]; then echo "Search in all bash files." - files=`git ls-tree --full-tree -r --name-only HEAD $basedir/docs/ | grep .sh || true` + files=`git ls-tree --full-tree -r --name-only HEAD $basedir/docs/ | grep '\.sh$' || true` elif [[ ! -z $file ]]; then files=$file else echo "Search in git-changed files." - files=`git diff --name-only "$(git merge-base HEAD "$branch")" | grep .sh || true` + files=`git diff --name-only "$(git merge-base HEAD "$branch")" | grep '\.sh$' || true` fi diff --git a/doc.zih.tu-dresden.de/util/check-empty-page.sh b/doc.zih.tu-dresden.de/util/check-empty-page.sh new file mode 100755 index 0000000000000000000000000000000000000000..7c4fdc2cd07b167b39b0b0ece58e199df0df6d84 --- /dev/null +++ b/doc.zih.tu-dresden.de/util/check-empty-page.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -euo pipefail + +scriptpath=${BASH_SOURCE[0]} +basedir=`dirname "$scriptpath"` +basedir=`dirname "$basedir"` + +if find $basedir -name \*.md -exec wc -l {} \; | grep '^0 '; then + exit 1 +fi diff --git a/doc.zih.tu-dresden.de/util/check-filesize.sh b/doc.zih.tu-dresden.de/util/check-filesize.sh new file mode 100755 index 0000000000000000000000000000000000000000..9b11b09c742a387513a265da28aca57d5533516b --- /dev/null +++ b/doc.zih.tu-dresden.de/util/check-filesize.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# BSD 3-Clause License +# +# Copyright (c) 2017, The Regents of the University of California, through +# Lawrence Berkeley National Laboratory (subject to receipt of any required +# approvals from the U.S. Dept. of Energy). All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +large_files_present=false +branch="origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-preview}" +source_hash=`git merge-base HEAD "$branch"` + +for f in $(git diff $source_hash --name-only); do + fs=$(wc -c $f | awk '{print $1}') + if [ $fs -gt 1048576 ]; then + echo $f 'is over 1M ('$fs' bytes)' + large_files_present=true + fi +done + +if [ "$large_files_present" == true ]; then + exit 1 +fi diff --git a/doc.zih.tu-dresden.de/util/check-links.sh b/doc.zih.tu-dresden.de/util/check-links.sh index 49ee7f2dbe876ed8e82c0706631db3479c51fb16..a1b28c271d654f117f344490fd3875e70f77b15e 100755 --- a/doc.zih.tu-dresden.de/util/check-links.sh +++ b/doc.zih.tu-dresden.de/util/check-links.sh @@ -8,57 +8,96 @@ ## ## Author: Martin.Schroschk@tu-dresden.de -set -euo pipefail +set -eo pipefail + +scriptpath=${BASH_SOURCE[0]} +basedir=`dirname "$scriptpath"` +basedir=`dirname "$basedir"` usage() { - echo "Usage: bash $0" + cat <<-EOF +usage: $0 [file | -a] +If file is given, checks whether all links in it are reachable. +If parameter -a (or --all) is given instead of the file, checks all markdown files. +Otherwise, checks whether any changed file contains broken links. +EOF } -# Any arguments? -if [ $# -gt 0 ]; then - usage - exit 1 -fi - mlc=markdown-link-check if ! command -v $mlc &> /dev/null; then echo "INFO: $mlc not found in PATH (global module)" mlc=./node_modules/markdown-link-check/$mlc if ! command -v $mlc &> /dev/null; then echo "INFO: $mlc not found (local module)" - echo "INFO: See CONTRIBUTE.md for information." - echo "INFO: Abort." exit 1 fi fi echo "mlc: $mlc" +LINK_CHECK_CONFIG="$basedir/util/link-check-config.json" +if [ ! -f "$LINK_CHECK_CONFIG" ]; then + echo $LINK_CHECK_CONFIG does not exist + exit 1 +fi + branch="preview" if [ -n "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" ]; then branch="origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" fi -any_fails=false +function checkSingleFile(){ + theFile="$1" + if [ -e "$theFile" ]; then + echo "Checking links in $theFile" + if ! $mlc -q -c "$LINK_CHECK_CONFIG" -p "$theFile"; then + return 1 + fi + fi + return 0 +} -files=$(git diff --name-only "$(git merge-base HEAD "$branch")") +function checkFiles(){ +any_fails=false echo "Check files:" echo "$files" echo "" for f in $files; do - if [ "${f: -3}" == ".md" ]; then - # do not check links for deleted files - if [ "$f" != "doc.zih.tu-dresden.de/README.md" ]; then - if [ -e $f ]; then - echo "Checking links for $f" - if ! $mlc -q -p "$f"; then - any_fails=true - fi - fi - fi + if ! checkSingleFile "$f"; then + any_fails=true fi done if [ "$any_fails" == true ]; then exit 1 fi +} + +function checkAllFiles(){ +files=$(git ls-tree --full-tree -r --name-only HEAD $basedir/ | grep '.md$' || true) +checkFiles +} + +function checkChangedFiles(){ +files=$(git diff --name-only "$(git merge-base HEAD "$branch")" | grep '.md$' || true) +checkFiles +} + +if [ $# -eq 1 ]; then + case $1 in + help | -help | --help) + usage + exit + ;; + -a | --all) + checkAllFiles + ;; + *) + checkSingleFile "$1" + ;; + esac +elif [ $# -eq 0 ]; then + checkChangedFiles +else + usage +fi diff --git a/doc.zih.tu-dresden.de/util/check-no-floating.sh b/doc.zih.tu-dresden.de/util/check-no-floating.sh index 6f94039f3125f87502b1583e699140e15e0e5f5f..4fbc5affe7c670c9dc2d998447c29e3a1e99fe55 100755 --- a/doc.zih.tu-dresden.de/util/check-no-floating.sh +++ b/doc.zih.tu-dresden.de/util/check-no-floating.sh @@ -4,30 +4,41 @@ if [ ${#} -ne 1 ]; then echo "Usage: ${0} <path>" fi -DOCUMENT_ROOT=${1} +basedir=${1} +DOCUMENT_ROOT=${basedir}/docs +maxDepth=4 +expectedFooter="$DOCUMENT_ROOT/legal_notice.md $DOCUMENT_ROOT/accessibility.md $DOCUMENT_ROOT/data_protection_declaration.md" -check_md() { - awk -F'/' '{print $0,NF,$NF}' <<< "${1}" | while IFS=' ' read string depth md; do - #echo "string=${string} depth=${depth} md=${md}" +MSG=$(find ${DOCUMENT_ROOT} -name "*.md" | awk -F'/' '{print $0,NF}' | while IFS=' ' read string depth + do + #echo "string=${string} depth=${depth}" # max depth check - if [ "${depth}" -gt "5" ]; then - echo "max depth (4) exceeded for ${string}" - exit -1 + if [ "${depth}" -gt $maxDepth ]; then + echo "max depth ($maxDepth) exceeded for ${string}" fi + md=${string#${DOCUMENT_ROOT}/} + # md included in nav - if ! sed -n '/nav:/,/^$/p' ${2}/mkdocs.yml | grep --quiet ${md}; then - echo "${md} is not included in nav" - exit -1 + numberOfReferences=`sed -n '/nav:/,/^$/p' ${basedir}/mkdocs.yml | grep -c ${md}` + if [ $numberOfReferences -eq 0 ]; then + # fallback: md included in footer + if [[ "${expectedFooter}" =~ ${string} ]]; then + numberOfReferencesInFooter=`sed -n '/footer:/,/^$/p' ${basedir}/mkdocs.yml | grep -c /${md%.md}` + if [ $numberOfReferencesInFooter -eq 0 ]; then + echo "${md} is not included in footer" + elif [ $numberOfReferencesInFooter -ne 1 ]; then + echo "${md} is included $numberOfReferencesInFooter times in footer" + fi + else + echo "${md} is not included in nav" + fi + elif [ $numberOfReferences -ne 1 ]; then + echo "${md} is included $numberOfReferences times in nav" fi done -} - -export -f check_md - -#find ${DOCUMENT_ROOT}/docs -name "*.md" -exec bash -c 'check_md "${0#${1}}" "${1}"' {} ${DOCUMENT_ROOT} \; -MSG=$(find ${DOCUMENT_ROOT}/docs -name "*.md" -exec bash -c 'check_md "${0#${1}}" "${1}"' {} ${DOCUMENT_ROOT} \;) +) if [ ! -z "${MSG}" ]; then echo "${MSG}" exit -1 diff --git a/doc.zih.tu-dresden.de/util/check-spelling.sh b/doc.zih.tu-dresden.de/util/check-spelling.sh index 8448d0bbffe534b0fd676dbd00ca82e17e7d167d..0d574c1e6adeadacb895f31209b16a9d7f25a123 100755 --- a/doc.zih.tu-dresden.de/util/check-spelling.sh +++ b/doc.zih.tu-dresden.de/util/check-spelling.sh @@ -7,6 +7,7 @@ basedir=`dirname "$scriptpath"` basedir=`dirname "$basedir"` wordlistfile=$(realpath $basedir/wordlist.aspell) branch="origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-preview}" +files_to_skip=(doc.zih.tu-dresden.de/docs/accessibility.md doc.zih.tu-dresden.de/docs/data_protection_declaration.md data_protection_declaration.md) aspellmode= if aspell dump modes | grep -q markdown; then aspellmode="--mode=markdown" @@ -14,9 +15,10 @@ fi function usage() { cat <<-EOF -usage: $0 [file] +usage: $0 [file | -a] If file is given, outputs all words of the file, that the spell checker cannot recognize. -If file is omitted, checks whether any changed file contains more unrecognizable words than before the change. +If parameter -a (or --all) is given instead of the file, checks all markdown files. +Otherwise, checks whether any changed file contains more unrecognizable words than before the change. If you are sure a word is correct, you can put it in $wordlistfile. EOF } @@ -29,12 +31,52 @@ function getNumberOfAspellOutputLines(){ getAspellOutput | wc -l } +function isWordlistSorted(){ + #Unfortunately, sort depends on locale and docker does not provide much. + #Therefore, it uses bytewise comparison. We avoid problems with the command tr. + if sed 1d "$wordlistfile" | tr [:upper:] [:lower:] | sort -C; then + return 1 + fi + return 0 +} + +function shouldSkipFile(){ + printf '%s\n' "${files_to_skip[@]}" | grep -xq $1 +} + +function checkAllFiles(){ + any_fails=false + + if isWordlistSorted; then + echo "Unsorted wordlist in $wordlistfile" + any_fails=true + fi + + files=$(git ls-tree --full-tree -r --name-only HEAD $basedir/ | grep .md) + while read file; do + if [ "${file: -3}" == ".md" ]; then + if shouldSkipFile ${file}; then + echo "Skip $file" + else + echo "Check $file" + echo "-- File $file" + if { cat "$file" | getAspellOutput | tee /dev/fd/3 | grep -xq '.*'; } 3>&1; then + any_fails=true + fi + fi + fi + done <<< "$files" + + if [ "$any_fails" == true ]; then + return 1 + fi + return 0 +} + function isMistakeCountIncreasedByChanges(){ any_fails=false - #Unfortunately, sort depends on locale and docker does not provide much. - #Therefore, it uses bytewise comparison. We avoid problems with the command tr. - if ! sed 1d "$wordlistfile" | tr [:upper:] [:lower:] | sort -C; then + if isWordlistSorted; then echo "Unsorted wordlist in $wordlistfile" any_fails=true fi @@ -48,9 +90,7 @@ function isMistakeCountIncreasedByChanges(){ while read oldfile; do read newfile if [ "${newfile: -3}" == ".md" ]; then - if [[ $newfile == *"accessibility.md"* || - $newfile == *"data_protection_declaration.md"* || - $newfile == *"legal_notice.md"* ]]; then + if shouldSkipFile ${newfile:2}; then echo "Skip $newfile" else echo "Check $newfile" @@ -90,6 +130,9 @@ if [ $# -eq 1 ]; then usage exit ;; + -a | --all) + checkAllFiles + ;; *) cat "$1" | getAspellOutput ;; diff --git a/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh b/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh index 7895f576e46e66caa9e14f3d77a74deb918fdab0..f3cfa673ce063a674cb2f850d7f7da252a6ab093 100755 --- a/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh +++ b/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh @@ -23,7 +23,7 @@ s \<SLURM\> doc.zih.tu-dresden.de/docs/contrib/content_rules.md i file \+system HDFS Use \"ZIH systems\" or \"ZIH system\" instead of \"Taurus\". \"taurus\" is only allowed when used in ssh commands and other very specific situations. -doc.zih.tu-dresden.de/docs/contrib/content_rules.md +doc.zih.tu-dresden.de/docs/contrib/content_rules.md doc.zih.tu-dresden.de/docs/archive/phase2_migration.md i \<taurus\> taurus\.hrsk /taurus /TAURUS ssh ^[0-9]\+:Host taurus$ \"HRSKII\" should be avoided, use \"ZIH system\" instead. doc.zih.tu-dresden.de/docs/contrib/content_rules.md @@ -35,20 +35,20 @@ i hpc[ -]\+da\> i attachurl Replace \"todo\" with real content. - +doc.zih.tu-dresden.de/docs/archive/system_triton.md i \<todo\> <!--.*todo.*--> -Replace \"Coming soon\" with real content. +Replace variations of \"Coming soon\" with real content. -i \<coming soon\> +i \(\<coming soon\>\|This .* under construction\|posted here\) Avoid spaces at end of lines. - +doc.zih.tu-dresden.de/docs/accessibility.md i [[:space:]]$ When referencing partitions, put keyword \"partition\" in front of partition name, e. g. \"partition ml\", not \"ml partition\". doc.zih.tu-dresden.de/docs/contrib/content_rules.md -i \(alpha\|ml\|haswell\|romeo\|gpu\|smp\|julia\|hpdlf\|scs5\)-\?\(interactive\)\?[^a-z]*partition +i \(alpha\|ml\|haswell\|romeo\|gpu\|smp\|julia\|hpdlf\|scs5\|dcv\)-\?\(interactive\)\?[^a-z]*partition Give hints in the link text. Words such as \"here\" or \"this link\" are meaningless. doc.zih.tu-dresden.de/docs/contrib/content_rules.md -i \[\s\?\(documentation\|here\|this \(link\|page\|subsection\)\|slides\?\|manpage\)\s\?\] +i \[\s\?\(documentation\|here\|more info\|this \(link\|page\|subsection\)\|slides\?\|manpage\)\s\?\] Use \"workspace\" instead of \"work space\" or \"work-space\". doc.zih.tu-dresden.de/docs/contrib/content_rules.md i work[ -]\+space" diff --git a/doc.zih.tu-dresden.de/util/link-check-config.json b/doc.zih.tu-dresden.de/util/link-check-config.json new file mode 100644 index 0000000000000000000000000000000000000000..fdbb8373f2ebe4d14098d1af5eb62c15733c8f3c --- /dev/null +++ b/doc.zih.tu-dresden.de/util/link-check-config.json @@ -0,0 +1,10 @@ +{ + "ignorePatterns": [ + { + "pattern": "^https://gitlab.hrz.tu-chemnitz.de/zih/hpcsupport/hpc-compendium/-/merge_requests/new$" + }, + { + "pattern": "^https://doc.zih.tu-dresden.de/preview$" + } + ] +} diff --git a/doc.zih.tu-dresden.de/util/pre-commit b/doc.zih.tu-dresden.de/util/pre-commit index eb63bbea24052eb1dff4ec16a17b8b5aba275e18..1cc901e00efbece94209bfa6c4bbbc54aad682e9 100755 --- a/doc.zih.tu-dresden.de/util/pre-commit +++ b/doc.zih.tu-dresden.de/util/pre-commit @@ -75,6 +75,13 @@ then exit_ok=no fi +echo "Looking for empty files..." +docker run --name=hpc-compendium --rm -w /docs --mount src="$(pwd)",target=/docs,type=bind hpc-compendium ./doc.zih.tu-dresden.de/util/check-empty-page.sh +if [ $? -ne 0 ] +then + exit_ok=no +fi + if [ $exit_ok == yes ] then exit 0 diff --git a/doc.zih.tu-dresden.de/wordlist.aspell b/doc.zih.tu-dresden.de/wordlist.aspell index 443647e74a9cc4a7e17e92f381c914de04e1b0f3..262f5eeae1b153648d59418137b8bac2dc2cf5fb 100644 --- a/doc.zih.tu-dresden.de/wordlist.aspell +++ b/doc.zih.tu-dresden.de/wordlist.aspell @@ -1,5 +1,7 @@ personal_ws-1.1 en 203 Abaqus +Addon +Addons ALLREDUCE Altix Amber @@ -65,7 +67,11 @@ DockerHub dockerized dotfile dotfiles +downtime +downtimes +EasyBlocks EasyBuild +EasyConfig ecryptfs engl english @@ -81,6 +87,7 @@ filesystem filesystems flink Flink +FlinkExample FMA foreach Fortran @@ -88,6 +95,7 @@ Galilei Gauss Gaussian GBit +GDB GDDR GFLOPS gfortran @@ -135,13 +143,16 @@ img Infiniband init inode +Instrumenter IOPS IPs +ipynb ISA Itanium jobqueue jpg jss +jupyter Jupyter JupyterHub JupyterLab @@ -153,8 +164,8 @@ LAPACK lapply Leichtbau LINPACK -Linter linter +Linter lmod LoadLeveler localhost @@ -167,6 +178,7 @@ MathWorks matlab MEGWARE mem +Memcheck MiB Microarchitecture MIMD @@ -174,6 +186,7 @@ Miniconda mkdocs MKL MNIST +MobaXTerm modenv modenvs modulefile @@ -194,6 +207,7 @@ multithreaded Multithreading NAMD natively +nbgitpuller nbsp NCCL Neptun @@ -220,17 +234,18 @@ OpenBLAS OpenCL OpenGL OpenMP -OpenMPI openmpi +OpenMPI OpenSSH Opteron OTF overfitting -Pandarallel pandarallel +Pandarallel PAPI parallelization parallelize +parallelized parfor pdf perf @@ -245,21 +260,26 @@ PMI png PowerAI ppc -Pre pre +Pre Preload preloaded preloading +prepend preprocessing PSOCK +Pthread Pthreads pty +PuTTY pymdownx PythonAnaconda pytorch PyTorch Quantum queue +quickstart +Quickstart randint reachability README @@ -276,8 +296,8 @@ RSS RStudio Rsync runnable -Runtime runtime +Runtime sacct salloc Sandybridge @@ -304,6 +324,8 @@ Slurm SLURMCluster SMP SMT +SparkExample +spython squeue srun ssd @@ -325,8 +347,8 @@ TensorFlow TFLOPS Theano tmp -ToDo todo +ToDo toolchain toolchains torchvision @@ -340,7 +362,9 @@ undistinguishable unencrypted uplink userspace +Valgrind Vampir +VampirServer VampirTrace VampirTrace's VASP @@ -363,6 +387,7 @@ XLC XLF Xming yaml -ZIH zih +ZIH ZIH's +ZSH