From cf7c07457c47a5eafe0b19be194e924d6b7bd794 Mon Sep 17 00:00:00 2001
From: Martin Schroschk <martin.schroschk@tu-dresden.de>
Date: Wed, 20 Oct 2021 08:29:31 +0200
Subject: [PATCH] Review: use example syntax; fix spelling; add zip file

---
 .../PyTorch/example_PyTorch_parallel.zip      | Bin 4237 -> 0 bytes
 .../docs/software/distributed_training.md     | 245 +++++++++---------
 doc.zih.tu-dresden.de/wordlist.aspell         |   1 +
 3 files changed, 123 insertions(+), 123 deletions(-)
 delete mode 100644 Compendium_attachments/PyTorch/example_PyTorch_parallel.zip

diff --git a/Compendium_attachments/PyTorch/example_PyTorch_parallel.zip b/Compendium_attachments/PyTorch/example_PyTorch_parallel.zip
deleted file mode 100644
index 05c458be14d89e7862cb547b2db8aaefd9a654a1..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4237
zcmai&S2P?9w}vsgAQ(h1(R=ia9)##^l+mJwM2S8a(K8q&j3Igr(M6On1W_YIiyCEg
z8NClt&i9>jch>sXzxI0f-tWb}e)oEebOD4wJUl#7ytGh1Lo9>Q1ezKTuUrWaj~$N{
z&jsq>`P#$9-YD42+t<m>{<VXzgNKKUhnV~8U@u3o85tgc?t0SpUj;xZ@bC$@0eE=-
z+xDzIQ+^Mo-tOCv%c)7^CJiH9+Yuc~+tG69OdJONAnQ?DmuwuvTv{9#x^<*Xx=r);
zuM&79$e8)>>Kr8%J5<`N)7)a_hbj1UbhKBqx}mxqXV4zX*E@<k6HQeBlGiY{HeSTf
z?I4&gHio`yfZ}Tl?)GzZOy?}8gfV{?WkoeTXn7AUSm0ZV7P1B|e5D2&9w>hynx`*F
zKFOM=*XmFefAnzBp-({9(6;I|y>mCvEuFVQn)y`96$_z6W33P>UoUS?IP4ORjX)^V
zp2go=DwbeA9Yh$md39vv=PdMx!tDz%Yi@%^B817$FjQKa?`g0QMOE&E1_O<a^Qk_n
zooO1J#vnYIWH93{5^(Ry9RNoAsw(~UTY<HXiS~L|cv&C`Xcb?PIsfxL@i<$K!*1BM
zNOd@hu^;4Uc88;NhMbwAAg`5lEzIYE@J;k=B`&{TK_bjsL}j4ioe!2Og;sl@D&CY@
zsMhACKQKo};S?+M!+_RH$ighSZ@=f%*hHCJ9%0&EXC>i5DWgFcRBq^>^Gz*6B8sDo
zWN(628vd9n4JAFT5)eHzy-ugD9v5S7@axr$73!{LK@aAg`-4;V=w>{y)nC(<b(VvH
zB&2vTVK}Kd<OCb=dArKS>(@A_Z3qD<K+7weWLGt|6mRuV38|-R8Q%ak4LL)qjd5GU
z-BL8%yw5J0qQy$Wyg5UeLwF>E@wkkhJoNa`QHB#7Q)0e=%X$A`N~eTqU#-TZ-QT$N
zU>4L&tDPAV+H>nqBm(g->dKI1T4+)Bje^s@t;a#I+O|GymT9F+b(tS+nUN;FpFWW_
zaw;g+78(lP`S3Zw_J>zG<D?>|x!5c9Y{O<2#lv`BiWUn@RUnEB&E`TBBi;;E8p*mb
z0A9J<%o?Dx-$&AL@xZt;YMNu~E{UW1V0;WGRbyEg$y(aW#p~l=<mN8m@K}|q8Shcg
zpfE>tT_{@g3e6@3*bEf+{MP<H9xfQD_&hVCy-?}pp@jUz6k*;W4Qh(5^+vF82TKOr
z*+|f6YgI_?4o4AW!#IhaNx2dB2ERHtB9mMJ_bUsSKK`SLZ7)mb86T>O4~YY>Sf0=P
zNzSvt^xct{aXeSuZ4pc_8Fa3G3;H2M)M?3l{Eb<85}0=ko#{wBQnK$taV&oB`wPt1
zHvg&m*JK{cncHqLJ0*1dnMD{0bpFh1Fr!y2cTdn;i@%rV$&s9CFs=}#(++aVYG@d;
zW~-YKZvNBQxS3(HSy_gXEiCKj-fiik)G%J7kOJ&kTY&iTZUaOkx4IfW6DhUwNR~7`
z6=#b3&0)V5+F5X!e@ngZ_Gj@q^e>!)!0wSqDUtef_D4>9lIUFc$)%%7%e>2z>+HCF
z_j=utz7L%>UCXU~XFHgA7KHN1ts}GM!P`8u<EwPFj?wC`zpLtzO@a`7N(APaSp(<5
z`q>fXm-jB~`-!+CE>WhiU{l+-Y~WI+L}E30|E8F48P!eiGXw~7yJGkwi1j96_yOO>
z`V~UJx^%o9qp=0Qg(y5PphRJnMSV_mZA|ah^PH`mD|T}2=j^R`P%ih~{Qr1tY!n1e
zO5v+Ji^`rg_x!fPf8$9h%ysTmo`4#9dB%F4df!FRJccwd$yGmOJ)F6P0!sgB2h3B1
zxP&Qxpk71<O^T2w@j%~BqAts|l+N{FynDosH&YfoBRR*76a`D$gGC&M<porEKYP(R
zS<pvx%fr1R@T~2;khxH0zTNaS&oF4k8wm83KOzp$%xb($<_lHq^=NsY$5cP@0{uO@
zU}tng-9b-QK?|zzzG-DQP;vDN+9fFApRGGn@QrureAD=wT`mh(P!0`0Uz|#G&3x?i
z5%T$?)n{WvMu!rmSQ?S$<jCH@iupA|vNew3m5F@ue8P9B8>o9jkX+vHgz*#33L8E|
z5#!{xtbYg663Onr1*>bU-Fw3QkzT!CMnqDtgejuQ=z4sYzIs1Vl9by}&YHiO*80b=
zP!_9zAHF;iIvdUVz_BJG0ONu=8?P1|oNx|H>E?_SG0=x*s{#~%2y6A$n2A)Su@ZBG
zlO%>qWB6rAvZPYJlBO_GPeLx#pj8Gu!=&1-w`0T{8T`-pBm-&#NirlQP7|X&j%E~O
z--BL@l}l5ozTapiQ$zi-{F2lV>5cqa+8rZ&{6#ZCd>7c^YXZ*{JVkd9?4E!|lE-(f
z<asCRR@AG~0Os{(N)f&3T(S%cg@#dZ`C&s;j4V<WlcvqdLfn*}z(guG6Y1STJ4<9|
zH3ZXte_6N1Zg64yH~0%)%kKq62a)<hM#dS=Vb7WhSys}mB4}%%h?EQyEViaM!>$=t
znRx7zut6Z%P%Al<&np;B^aVlx<Xovc#P&0B1B6+$h_-lw>^>YosWHprm(L>;IFmb6
zvRp;nVpf{V2KC~36>+EZ^5u~qi(;cT(w}d@&!#!|A=ug`T=;gVKybr1yDLGR7M;rM
zM^|?$N179n2T|xIqxQl|3e21Ycl@lQPGx?XxmspQ<CQpgU3}A)lKXzw%`gso_1otp
za6$1*W2rPe?4jZ6LuM7sPxF`SV(b*i?KP=bo%ZzpibMdju6)I>7*Ji((t^(^tABeN
z?ZL7kdT-otyGfpW6VP>jbTX$B$nsE3A(-IUn#L-0x7AqfV5l61^Df+kq`D}(yC?Hc
zslRo2UI60!1@X#;?-7ze&_n_YGNN6SQ^5{Qy6~Ljs6G!^>X!WGrRshWSt9vvu;ON_
zQ5%~qd4*ET+slf9XYQ+Obd8PeC)AB09p0AwF?^da9g#P%ZGOb}WfR=+rT6ima-E{*
z5u@LMizVnWoytlSj3)jDc$6~R&3Rza+@ZPo&DYQ&P~+MhL?&kMB|9n59?H#DUuo3k
z3;bn3DQIhucOZTx5~|Y5eE%u<aF$Odfcuth7@j&ftmCWWwMG{b@y?fjGBFZAaU=&I
zrj1~Es&$sR!<0D<>*oSg#(M9GLo$_%-M}B1jEsS2p`jyj$z(;CBB8DzdS^aj88!Qq
zWA3PR4LgF2CdJr%zY<&`PSZ!uvRl^m!!9jDPl|2ey#kp5yXo)&mQ5Xk=a?dYvd15}
zOEU@$$zfF4<0>9X;c^_tn^zYXjH^LN*-z+swGM>XnMUn%ApV7R!xf{eknE{mU(dUW
zFcVN26@&xM<t#}6Nq%W2=B|&qcQna2fk*Dd5g(|ko#1a3UPUkJL#VnA@c2lnntp00
zXF7l2M+DiSq$!x?Jpla~bk6WPJ(e^FKCpG1te({=MAf}`(#Izi7)(}}oNURk&Yo#z
zGglt}+i^^8fN({q7{DR^#p4WeJv%?7aHvPvVrk89u_k1sbcaua4SO6`D<!398a>~^
znxvqup|;Xr_8o|MsH~8X@i4|;qRPqcM2R!Ge}r(ahK)LYWe-RR)ey1dS!x=xp}@e8
z*g8%iOQf4VcN-GriJ@)np3!GNN{%uuy)B-a!(-WT5z@=5vtJ@uvk7*U<&NW{L@g7A
z`~u3*HAD9nMf$>*#(?_FVd<uk1%naUIcEKw*Jr_rTe$-dhRHR9x~wN-ir<aPSHaHb
zec7Xq=d5`JiA&77Bb~5qZJPTi8UhUKyNE<l&6vjUobUl|;E<KmaT<-^%|6LEJ~%e3
z@8?CE*htbpfug9L{W>E?-G_ANopBKkdH$*F1<6K2cDi<3i3d#FvRX=86CxnAFkY6s
z%Wn$y>1Mg;>J#JsK7%hZQg>p``^r}(OogRwDTM0CFLsmf%~_vD@7Ya-ZJLufwUqK>
zFg?x&9dePD$^GT&jHy-Srk`ttaKJs_{>cxlr`lz43(<z|67=i%WBW&#{qr{;v=X;z
z<#=Csm5C9l9r{t(Ud{L|HU<;Nth&s7twny<IdQqZ-J=#KqPI}O#z?S@IZ$rgYl!Yl
zPpxe9mXsl@Z+I6f>g(nJf#OG3f5wY!Hrq2BIKdUHdcw1Bun(RY0ESaOR!gKjrnNDN
zKPsUR<$w0LvME<AwNcpD1kD>myR?@XE)i<CGiFkHc#;0M?4aANhsjye{%5)GwoB<0
zVCBsf9d3#bPH(;!l69qtd66%~CQVmU+_xMi&scdZ9Fb2o>tNW^Yy4+RZq;S>J|L%C
zoCr5!2vqL(w<r1P9`!4G!H9t<7OZd8opRBNyb3mYF3%Dhn%zmAb3*BsKZ=E6N{QBS
zCmUMqJD+xNaV$zbS)(JUw%2QPvX7$hq!kJ3lhuDtHap&j0XzLQETlQ>A87gaIcr!b
zGK}jx>Dt{nsk*8UmPhHsc7l7x$0oJYl@Y6JN88_zIv)s$wwX=~J_{*uBh}dn5*X~k
z>JGaO8qtU4TFt<-Gg0`I4yG5yBdiGBlw56nvF~HXb2WYL{1|V6+hRyiJ~ofwaM36m
zwahwKInLnQVg9KKLD$uMy6SD6a_GWOZ5_ttqe;(Nkb|@u<xk~$GHe>W+kKv8%$m$L
z`Fm7pn=avM;U_zc9=X)&e=~1N&F{deuHDyoX1Bb%*MYP;du1UnFVA)<n$S9+FFLjG
z8Ivx<z32TWiT~Pel+YoW5qCq}xqO^sw4lcEtG)1?C;(5;0Q4*{nJtK!<WCS=@OQ&U
zX-YPyzX=&kuLEh*#|HS{kBz=~`X{=s=tvQ1e)`g*8+nU+cmi^_=P>*^2!EIKI^`rl
z*;QWK>Xpcc*U2R3baEwn!=NCjbY@S}?Pz=^U#!WfCy(kv!&xiuj?|<1a<>qF>VaH?
z=3L#zQ>4g2=sayVAMa50jPv=jeWI)0h7E?A;US5yV{0m-)wQD+p@+XGY!^j;e*bET
z(Aq_XqCVq8^rM4nDRlaDc7_f<i;l%XPds7m*Aq0yI>o#kY)<m?_<E+4o<%)%-^IG6
z%~0SKYAwi9cX~PeGMr=gQ_&6WUReD3SB=nu!7TCdrA)U2j^y)N6`6$+vQzj!FB_zL
z#<|OT6QJ-Ekl$5w7n`1P<Ho2n^`;Fm_a&k04*fP)YL)f&K)53@T<Sw`&?9rZs<7cn
zgrwFdmm4ha@amcTvTf(J6{E<<=?EOPKo`(#z6O)JedlvdP9RAve2tL5X~AvBc37fr
zK4P;tw-hl{+2Z@CDWMY8IxiZ=XXJc?fXu<TPW{xco8Ek9W#2SSExK(3w4|2gmTaUg
zrhi80+U5kZ5U>X8kb=0ArS}#=?%@GG#&`BlPZx~T!<u%aO=<K>CIYrJEm@hLMNG&%
zS(Vtd=g2SM`r>Xg&Vn5RRqN<koV#iJ%)a(#aZ%N2)_U1focs%sbC`3uEOt$F|GQ;D
zY^>1~;`Tzou&JoePVw*G&v!&dy7&a_fd2#q9{d9f@bK{eQDfx4+kZm@ME{2f{vRGN
U(j_AP_Zt77a{g!1-T&A95C7v42LJ#7

diff --git a/doc.zih.tu-dresden.de/docs/software/distributed_training.md b/doc.zih.tu-dresden.de/docs/software/distributed_training.md
index 98bbdfaa3..ab35c32ab 100644
--- a/doc.zih.tu-dresden.de/docs/software/distributed_training.md
+++ b/doc.zih.tu-dresden.de/docs/software/distributed_training.md
@@ -55,90 +55,91 @@ The Parameter Server holds the parameters and is responsible for updating
 the global state of the models.
 Each worker runs the training loop independently.
 
-#### Example
-
-In this case, we will go through an example with Multi Worker Mirrored Strategy.
-Multi-node training requires a `TF_CONFIG` environment variable to be set which will
-be different on each node.
-
-```console
-marie@compute$ TF_CONFIG='{"cluster": {"worker": ["10.1.10.58:12345", "10.1.10.250:12345"]}, "task": {"index": 0, "type": "worker"}}' python main.py
-```
-
-The `cluster` field describes how the cluster is set up (same on each node).
-Here, the cluster has two nodes referred to as workers.
-The `IP:port` information is listed in the `worker` array.
-The `task` field varies from node to node.
-It specifies the type and index of the node.
-In this case, the training job runs on worker 0, which is `10.1.10.58:12345`.
-We need to adapt this snippet for each node.
-The second node will have `'task': {'index': 1, 'type': 'worker'}`.
-
-With two modifications, we can parallelize the serial code:
-We need to initialize the distributed strategy:
-
-```python
-strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
-```
-
-And define the model under the strategy scope:
-
-```python
-with strategy.scope():
-    model = resnet.resnet56(img_input=img_input, classes=NUM_CLASSES)
-    model.compile(
-        optimizer=opt,
-        loss='sparse_categorical_crossentropy',
-        metrics=['sparse_categorical_accuracy'])
-model.fit(train_dataset,
-    epochs=NUM_EPOCHS)
-```
-
-To run distributed training, the training script needs to be copied to all nodes,
-in this case on two nodes.
-TensorFlow is available as a module.
-Check for the version.
-The `TF_CONFIG` environment variable can be set as a prefix to the command.
-Now, run the script on the partition `alpha` simultaneously on both nodes:
-
-```bash
-#!/bin/bash
-
-#SBATCH --job-name=distr
-#SBATCH --partition=alpha
-#SBATCH --output=%j.out
-#SBATCH --error=%j.err
-#SBATCH --mem=64000
-#SBATCH --nodes=2
-#SBATCH --ntasks=2
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=14
-#SBATCH --gres=gpu:1
-#SBATCH --time=01:00:00
-
-function print_nodelist {
+??? example "Multi Worker Mirrored Strategy"
+
+    In this case, we will go through an example with Multi Worker Mirrored Strategy.
+    Multi-node training requires a `TF_CONFIG` environment variable to be set which will
+    be different on each node.
+
+    ```console
+    marie@compute$ TF_CONFIG='{"cluster": {"worker": ["10.1.10.58:12345", "10.1.10.250:12345"]}, "task": {"index": 0, "type": "worker"}}' python main.py
+    ```
+
+    The `cluster` field describes how the cluster is set up (same on each node).
+    Here, the cluster has two nodes referred to as workers.
+    The `IP:port` information is listed in the `worker` array.
+    The `task` field varies from node to node.
+    It specifies the type and index of the node.
+    In this case, the training job runs on worker 0, which is `10.1.10.58:12345`.
+    We need to adapt this snippet for each node.
+    The second node will have `'task': {'index': 1, 'type': 'worker'}`.
+
+    With two modifications, we can parallelize the serial code:
+    We need to initialize the distributed strategy:
+
+    ```python
+    strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+    ```
+
+    And define the model under the strategy scope:
+
+    ```python
+    with strategy.scope():
+        model = resnet.resnet56(img_input=img_input, classes=NUM_CLASSES)
+        model.compile(
+            optimizer=opt,
+            loss='sparse_categorical_crossentropy',
+            metrics=['sparse_categorical_accuracy'])
+    model.fit(train_dataset,
+        epochs=NUM_EPOCHS)
+    ```
+
+    To run distributed training, the training script needs to be copied to all nodes,
+    in this case on two nodes.
+    TensorFlow is available as a module.
+    Check for the version.
+    The `TF_CONFIG` environment variable can be set as a prefix to the command.
+    Now, run the script on the partition `alpha` simultaneously on both nodes:
+
+    ```bash
+    #!/bin/bash
+
+    #SBATCH --job-name=distr
+    #SBATCH --partition=alpha
+    #SBATCH --output=%j.out
+    #SBATCH --error=%j.err
+    #SBATCH --mem=64000
+    #SBATCH --nodes=2
+    #SBATCH --ntasks=2
+    #SBATCH --ntasks-per-node=1
+    #SBATCH --cpus-per-task=14
+    #SBATCH --gres=gpu:1
+    #SBATCH --time=01:00:00
+
+    function print_nodelist {
         scontrol show hostname $SLURM_NODELIST
-}
-NODE_1=$(print_nodelist | awk '{print $1}' | sort -u | head -n 1)
-NODE_2=$(print_nodelist | awk '{print $1}' | sort -u | tail -n 1)
-IP_1=$(dig +short ${NODE_1}.taurus.hrsk.tu-dresden.de)
-IP_2=$(dig +short ${NODE_2}.taurus.hrsk.tu-dresden.de)
+    }
+    NODE_1=$(print_nodelist | awk '{print $1}' | sort -u | head -n 1)
+    NODE_2=$(print_nodelist | awk '{print $1}' | sort -u | tail -n 1)
+    IP_1=$(dig +short ${NODE_1}.taurus.hrsk.tu-dresden.de)
+    IP_2=$(dig +short ${NODE_2}.taurus.hrsk.tu-dresden.de)
 
-module load modenv/hiera
-module load modenv/hiera GCC/10.2.0 CUDA/11.1.1 OpenMPI/4.0.5 TensorFlow/2.4.1
+    module load modenv/hiera
+    module load modenv/hiera GCC/10.2.0 CUDA/11.1.1 OpenMPI/4.0.5 TensorFlow/2.4.1
 
-# On the first node
-TF_CONFIG='{"cluster": {"worker": ["'"${NODE_1}"':33562", "'"${NODE_2}"':33561"]}, "task": {"index": 0, "type": "worker"}}' srun -w ${NODE_1} -N 1 --ntasks=1 --gres=gpu:1 python main_ddl.py &
+    # On the first node
+    TF_CONFIG='{"cluster": {"worker": ["'"${NODE_1}"':33562", "'"${NODE_2}"':33561"]}, "task": {"index": 0, "type": "worker"}}' srun -w ${NODE_1} -N 1 --ntasks=1 --gres=gpu:1 python main_ddl.py &
 
-# On the second node
-TF_CONFIG='{"cluster": {"worker": ["'"${NODE_1}"':33562", "'"${NODE_2}"':33561"]}, "task": {"index": 1, "type": "worker"}}' srun -w ${NODE_2} -N 1 --ntasks=1 --gres=gpu:1 python main_ddl.py &
+    # On the second node
+    TF_CONFIG='{"cluster": {"worker": ["'"${NODE_1}"':33562", "'"${NODE_2}"':33561"]}, "task": {"index": 1, "type": "worker"}}' srun -w ${NODE_2} -N 1 --ntasks=1 --gres=gpu:1 python main_ddl.py &
 
-wait
-```
+    wait
+    ```
 
 ### Distributed PyTorch
 
 !!! note
+
     This section is under construction
 
 PyTorch provides multiple ways to achieve data parallelism to train the deep learning models
@@ -179,23 +180,21 @@ See: Use `nn.parallel.DistributedDataParallel` instead of multiprocessing or `nn
 Check the [page](https://pytorch.org/docs/stable/notes/cuda.html#cuda-nn-ddp-instead) and
 [Distributed Data Parallel](https://pytorch.org/docs/stable/notes/ddp.html#ddp).
 
-Examples:
+??? example "Parallel Model"
 
-1. The parallel model.
-The main aim of this model to show the way how to effectively implement your
-neural network on several GPUs.
-It includes a comparison of different kinds of models and tips to improve the performance
-of your model.
-**Necessary** parameters for running this model are **2 GPU** and 14 cores.
+    The main aim of this model to show the way how to effectively implement your neural network on
+    multiple GPUs. It includes a comparison of different kinds of models and tips to improve the
+    performance of your model.
+    **Necessary** parameters for running this model are **2 GPU** and 14 cores.
 
-(example_PyTorch_parallel.zip)
+    Download: [example_PyTorch_parallel.zip (4.2 KB)](misc/example_PyTorch_parallel.zip)
 
-Remember that for using [JupyterHub service](../access/jupyterhub.md) for PyTorch you need to
-create and activate a virtual environment (kernel) with loaded essential modules.
+    Remember that for using [JupyterHub service](../access/jupyterhub.md) for PyTorch you need to
+    create and activate a virtual environment (kernel) with loaded essential modules.
 
-Run the example in the same way as the previous examples.
+    Run the example in the same way as the previous examples.
 
-#### Distributed data-parallel
+#### Distributed Data-Parallel
 
 [DistributedDataParallel](https://pytorch.org/docs/stable/nn.html#torch.nn.parallel.DistributedDataParallel)
 (DDP) implements data parallelism at the module level which can run across multiple machines.
@@ -206,21 +205,21 @@ synchronize gradients and buffers.
 
 Please also look at the [official tutorial](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html).
 
-To use distributed data parallelism on ZIH systems, please make sure the `--ntasks-per-node`
-parameter is equal to the number of GPUs you use per node.
+To use distributed data parallelism on ZIH systems, please make sure the value of
+parameter `--ntasks-per-node=<N>` equals the number of GPUs you use per node.
 Also, it can be useful to increase `memory/cpu` parameters if you run larger models.
 Memory can be set up to:
 
 - `--mem=250G` and `--cpus-per-task=7` for the partition `ml`.
 - `--mem=60G` and `--cpus-per-task=6` for the partition `gpu2`.
 
-Keep in mind that only one memory parameter (`--mem-per-cpu=<MB>` or `--mem=<MB>`) can be specified
+Keep in mind that only one memory parameter (`--mem-per-cpu=<MB>` or `--mem=<MB>`) can be specified.
 
 ## External Distribution
 
 ### Horovod
 
-[Horovod](https://github.com/horovod/horovod) is the open source distributed training framework
+[Horovod](https://github.com/horovod/horovod) is the open-source distributed training framework
 for TensorFlow, Keras and PyTorch.
 It makes it easier to develop distributed deep learning projects and speeds them up.
 Horovod scales well to a large number of nodes and has a strong focus on efficient training on
@@ -235,7 +234,7 @@ the distributed code from TensorFlow for instance, with parameter servers.
 Horovod uses MPI and NCCL which gives in some cases better results than
 pure TensorFlow and PyTorch.
 
-#### Horovod as a module
+#### Horovod as Module
 
 Horovod is available as a module with **TensorFlow** or **PyTorch** for
 **all** module environments.
@@ -260,19 +259,19 @@ marie@compute$ module load Horovod/0.19.5-fosscuda-2019b-TensorFlow-2.2.0-Python
 
 Or if you want to use Horovod on the partition `alpha`, you can load it with the dependencies:
 
-```bash
+```console
 marie@alpha$ module spider Horovod                         #Check available modules
 marie@alpha$ module load modenv/hiera  GCC/10.2.0  CUDA/11.1.1  OpenMPI/4.0.5 Horovod/0.21.1-TensorFlow-2.4.1
 ```
 
-#### Horovod installation
+#### Horovod Installation
 
 However, if it is necessary to use another version of Horovod, it is possible to install it
 manually. For that, you need to create a [virtual environment](python_virtual_environments.md) and
 load the dependencies (e.g. MPI).
 Installing TensorFlow can take a few hours and is not recommended.
 
-##### Install Horovod for TensorFlow with python and pip
+##### Install Horovod for TensorFlow with Python and Pip
 
 This example shows the installation of Horovod for TensorFlow.
 Adapt as required and refer to the [Horovod documentation](https://horovod.readthedocs.io/en/stable/install_include.html)
@@ -299,13 +298,12 @@ Available Tensor Operations:
     [ ] CCL
     [X] MPI
     [ ] Gloo
-
 ```
 
 If you want to use OpenMPI then specify `HOROVOD_GPU_ALLREDUCE=MPI`.
 To have better performance it is recommended to use NCCL instead of OpenMPI.
 
-##### Verify that Horovod works
+##### Verify Horovod Works
 
 ```pycon
 >>> import tensorflow
@@ -320,29 +318,30 @@ To have better performance it is recommended to use NCCL instead of OpenMPI.
 Hello from: 0
 ```
 
-#### Example
-
-Follow the steps in the [official examples](https://github.com/horovod/horovod/tree/master/examples)
-to parallelize your code.
-In Horovod, each GPU gets pinned to a process.
-You can easily start your job with the following bash script with four processes on two nodes:
-
-```bash
-#!/bin/bash
-#SBATCH --nodes=2
-#SBATCH --ntasks=4
-#SBATCH --ntasks-per-node=2
-#SBATCH --gres=gpu:2
-#SBATCH --partition=ml
-#SBATCH --mem=250G
-#SBATCH --time=01:00:00
-#SBATCH --output=run_horovod.out
-
-module load modenv/ml
-module load Horovod/0.19.5-fosscuda-2019b-TensorFlow-2.2.0-Python-3.7.4
-
-srun python <your_program.py>
-```
-
-Do not forget to specify the total number of tasks `--ntasks` and the number of tasks per node
-`--ntasks-per-node` which must match the number of GPUs per node.
+??? example
+
+    Follow the steps in the
+    [official examples](https://github.com/horovod/horovod/tree/master/examples)
+    to parallelize your code.
+    In Horovod, each GPU gets pinned to a process.
+    You can easily start your job with the following bash script with four processes on two nodes:
+
+    ```bash
+    #!/bin/bash
+    #SBATCH --nodes=2
+    #SBATCH --ntasks=4
+    #SBATCH --ntasks-per-node=2
+    #SBATCH --gres=gpu:2
+    #SBATCH --partition=ml
+    #SBATCH --mem=250G
+    #SBATCH --time=01:00:00
+    #SBATCH --output=run_horovod.out
+
+    module load modenv/ml
+    module load Horovod/0.19.5-fosscuda-2019b-TensorFlow-2.2.0-Python-3.7.4
+
+    srun python <your_program.py>
+    ```
+
+    Do not forget to specify the total number of tasks `--ntasks` and the number of tasks per node
+    `--ntasks-per-node` which must match the number of GPUs per node.
diff --git a/doc.zih.tu-dresden.de/wordlist.aspell b/doc.zih.tu-dresden.de/wordlist.aspell
index c54073a48..2cc8f1197 100644
--- a/doc.zih.tu-dresden.de/wordlist.aspell
+++ b/doc.zih.tu-dresden.de/wordlist.aspell
@@ -96,6 +96,7 @@ GitHub
 GitLab
 GitLab's
 glibc
+Gloo
 gnuplot
 gpu
 GPU
-- 
GitLab