Skip to content
Snippets Groups Projects
Commit c85c6552 authored by Jan Frenzel's avatar Jan Frenzel
Browse files

Removed some whitespace at end of lines. Corrected forbidden words where

necessary.
parent 4f240fc6
No related branches found
No related tags found
3 merge requests!392Merge preview into contrib guide for browser users,!377Merge preview into main,!370Added patterns to check whether "todo" appears
...@@ -42,7 +42,7 @@ marie@login$ # source $FOAM_CSH ...@@ -42,7 +42,7 @@ marie@login$ # source $FOAM_CSH
module load OpenFOAM module load OpenFOAM
source $FOAM_BASH source $FOAM_BASH
cd /scratch/ws/1/marie-example-workspace # work directory using workspace cd /scratch/ws/1/marie-example-workspace # work directory using workspace
srun pimpleFoam -parallel > "$OUTFILE" srun pimpleFoam -parallel > "$OUTFILE"
``` ```
## Ansys CFX ## Ansys CFX
...@@ -62,7 +62,7 @@ geometry and mesh generator cfx5pre, and the post-processor cfx5post. ...@@ -62,7 +62,7 @@ geometry and mesh generator cfx5pre, and the post-processor cfx5post.
module load ANSYS module load ANSYS
cd /scratch/ws/1/marie-example-workspace # work directory using workspace cd /scratch/ws/1/marie-example-workspace # work directory using workspace
cfx-parallel.sh -double -def StaticMixer.def cfx-parallel.sh -double -def StaticMixer.def
``` ```
## Ansys Fluent ## Ansys Fluent
......
...@@ -212,11 +212,11 @@ for the partition `alpha` (queue at the dask terms) on the ZIH system: ...@@ -212,11 +212,11 @@ for the partition `alpha` (queue at the dask terms) on the ZIH system:
```python ```python
from dask_jobqueue import SLURMCluster from dask_jobqueue import SLURMCluster
cluster = SLURMCluster(queue='alpha', cluster = SLURMCluster(queue='alpha',
cores=8, cores=8,
processes=2, processes=2,
project='p_marie', project='p_marie',
memory="8GB", memory="8GB",
walltime="00:30:00") walltime="00:30:00")
``` ```
...@@ -235,15 +235,15 @@ from distributed import Client ...@@ -235,15 +235,15 @@ from distributed import Client
from dask_jobqueue import SLURMCluster from dask_jobqueue import SLURMCluster
from dask import delayed from dask import delayed
cluster = SLURMCluster(queue='alpha', cluster = SLURMCluster(queue='alpha',
cores=8, cores=8,
processes=2, processes=2,
project='p_marie', project='p_marie',
memory="80GB", memory="80GB",
walltime="00:30:00", walltime="00:30:00",
extra=['--resources gpu=1']) extra=['--resources gpu=1'])
cluster.scale(2) #scale it to 2 workers! cluster.scale(2) #scale it to 2 workers!
client = Client(cluster) #command will show you number of workers (python objects corresponds to jobs) client = Client(cluster) #command will show you number of workers (python objects corresponds to jobs)
``` ```
...@@ -288,7 +288,7 @@ for the Monte-Carlo estimation of Pi. ...@@ -288,7 +288,7 @@ for the Monte-Carlo estimation of Pi.
uid = int( sp.check_output('id -u', shell=True).decode('utf-8').replace('\n','') ) uid = int( sp.check_output('id -u', shell=True).decode('utf-8').replace('\n','') )
portdash = 10001 + uid portdash = 10001 + uid
#create a Slurm cluster, please specify your project #create a Slurm cluster, please specify your project
cluster = SLURMCluster(queue='alpha', cores=2, project='p_marie', memory="8GB", walltime="00:30:00", extra=['--resources gpu=1'], scheduler_options={"dashboard_address": f":{portdash}"}) cluster = SLURMCluster(queue='alpha', cores=2, project='p_marie', memory="8GB", walltime="00:30:00", extra=['--resources gpu=1'], scheduler_options={"dashboard_address": f":{portdash}"})
...@@ -309,12 +309,12 @@ for the Monte-Carlo estimation of Pi. ...@@ -309,12 +309,12 @@ for the Monte-Carlo estimation of Pi.
def calc_pi_mc(size_in_bytes, chunksize_in_bytes=200e6): def calc_pi_mc(size_in_bytes, chunksize_in_bytes=200e6):
"""Calculate PI using a Monte Carlo estimate.""" """Calculate PI using a Monte Carlo estimate."""
size = int(size_in_bytes / 8) size = int(size_in_bytes / 8)
chunksize = int(chunksize_in_bytes / 8) chunksize = int(chunksize_in_bytes / 8)
xy = da.random.uniform(0, 1, size=(size / 2, 2), chunks=(chunksize / 2, 2)) xy = da.random.uniform(0, 1, size=(size / 2, 2), chunks=(chunksize / 2, 2))
in_circle = ((xy ** 2).sum(axis=-1) < 1) in_circle = ((xy ** 2).sum(axis=-1) < 1)
pi = 4 * in_circle.mean() pi = 4 * in_circle.mean()
...@@ -327,11 +327,11 @@ for the Monte-Carlo estimation of Pi. ...@@ -327,11 +327,11 @@ for the Monte-Carlo estimation of Pi.
f"\tErr: {abs(pi - np.pi) : 10.3e}\n" f"\tErr: {abs(pi - np.pi) : 10.3e}\n"
f"\tWorkers: {num_workers}" f"\tWorkers: {num_workers}"
f"\t\tTime: {time_delta : 7.3f}s") f"\t\tTime: {time_delta : 7.3f}s")
#let's loop over different volumes of double-precision random numbers and estimate it #let's loop over different volumes of double-precision random numbers and estimate it
for size in (1e9 * n for n in (1, 10, 100)): for size in (1e9 * n for n in (1, 10, 100)):
start = time() start = time()
pi = calc_pi_mc(size).compute() pi = calc_pi_mc(size).compute()
elaps = time() - start elaps = time() - start
...@@ -339,7 +339,7 @@ for the Monte-Carlo estimation of Pi. ...@@ -339,7 +339,7 @@ for the Monte-Carlo estimation of Pi.
print_pi_stats(size, pi, time_delta=elaps, num_workers=len(cluster.scheduler.workers)) print_pi_stats(size, pi, time_delta=elaps, num_workers=len(cluster.scheduler.workers))
#Scaling the Cluster to twice its size and re-run the experiments #Scaling the Cluster to twice its size and re-run the experiments
new_num_workers = 2 * len(cluster.scheduler.workers) new_num_workers = 2 * len(cluster.scheduler.workers)
print(f"Scaling from {len(cluster.scheduler.workers)} to {new_num_workers} workers.") print(f"Scaling from {len(cluster.scheduler.workers)} to {new_num_workers} workers.")
...@@ -349,11 +349,11 @@ for the Monte-Carlo estimation of Pi. ...@@ -349,11 +349,11 @@ for the Monte-Carlo estimation of Pi.
sleep(120) sleep(120)
client client
#Re-run same experiments with doubled cluster #Re-run same experiments with doubled cluster
for size in (1e9 * n for n in (1, 10, 100)): for size in (1e9 * n for n in (1, 10, 100)):
start = time() start = time()
pi = calc_pi_mc(size).compute() pi = calc_pi_mc(size).compute()
elaps = time() - start elaps = time() - start
......
...@@ -183,7 +183,7 @@ DDP uses collective communications in the ...@@ -183,7 +183,7 @@ DDP uses collective communications in the
[torch.distributed](https://pytorch.org/tutorials/intermediate/dist_tuto.html) package to [torch.distributed](https://pytorch.org/tutorials/intermediate/dist_tuto.html) package to
synchronize gradients and buffers. synchronize gradients and buffers.
The tutorial can be found [here](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html). Please also look at the [official tutorial](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html).
To use distributed data parallelism on ZIH systems, please make sure the `--ntasks-per-node` To use distributed data parallelism on ZIH systems, please make sure the `--ntasks-per-node`
parameter is equal to the number of GPUs you use per node. parameter is equal to the number of GPUs you use per node.
...@@ -234,7 +234,7 @@ marie@compute$ module spider Horovod # Check available modules ...@@ -234,7 +234,7 @@ marie@compute$ module spider Horovod # Check available modules
Horovod/0.19.5-fosscuda-2019b-TensorFlow-2.2.0-Python-3.7.4 Horovod/0.19.5-fosscuda-2019b-TensorFlow-2.2.0-Python-3.7.4
Horovod/0.21.1-TensorFlow-2.4.1 Horovod/0.21.1-TensorFlow-2.4.1
[...] [...]
marie@compute$ module load Horovod/0.19.5-fosscuda-2019b-TensorFlow-2.2.0-Python-3.7.4 marie@compute$ module load Horovod/0.19.5-fosscuda-2019b-TensorFlow-2.2.0-Python-3.7.4
``` ```
Or if you want to use Horovod on the partition `alpha`, you can load it with the dependencies: Or if you want to use Horovod on the partition `alpha`, you can load it with the dependencies:
......
...@@ -53,7 +53,7 @@ Create a container from the image from the NGC catalog. ...@@ -53,7 +53,7 @@ Create a container from the image from the NGC catalog.
(For this example, the alpha is used): (For this example, the alpha is used):
```console ```console
marie@login$ srun --partition=alpha --nodes=1 --ntasks-per-node=1 --ntasks=1 --gres=gpu:1 --time=08:00:00 --pty --mem=50000 bash marie@login$ srun --partition=alpha --nodes=1 --ntasks-per-node=1 --ntasks=1 --gres=gpu:1 --time=08:00:00 --pty --mem=50000 bash
marie@compute$ cd /scratch/ws/<name_of_your_workspace>/containers #please create a Workspace marie@compute$ cd /scratch/ws/<name_of_your_workspace>/containers #please create a Workspace
......
# Introduction # Introduction
`perf` consists of two parts: the kernel space implementation and the userland tools. This wiki `perf` consists of two parts: the kernel space implementation and the userland tools. This wiki
entry focusses on the latter. These tools are installed on taurus, and others and provides support entry focusses on the latter. These tools are installed on ZIH systems, and others and provides
for sampling applications and reading performance counters. support for sampling applications and reading performance counters.
## Configuration ## Configuration
...@@ -34,18 +34,18 @@ Run `perf stat <Your application>`. This will provide you with a general ...@@ -34,18 +34,18 @@ Run `perf stat <Your application>`. This will provide you with a general
overview on some counters. overview on some counters.
```Bash ```Bash
Performance counter stats for 'ls':= Performance counter stats for 'ls':=
2,524235 task-clock # 0,352 CPUs utilized 2,524235 task-clock # 0,352 CPUs utilized
15 context-switches # 0,006 M/sec 15 context-switches # 0,006 M/sec
0 CPU-migrations # 0,000 M/sec 0 CPU-migrations # 0,000 M/sec
292 page-faults # 0,116 M/sec 292 page-faults # 0,116 M/sec
6.431.241 cycles # 2,548 GHz 6.431.241 cycles # 2,548 GHz
3.537.620 stalled-cycles-frontend # 55,01% frontend cycles idle 3.537.620 stalled-cycles-frontend # 55,01% frontend cycles idle
2.634.293 stalled-cycles-backend # 40,96% backend cycles idle 2.634.293 stalled-cycles-backend # 40,96% backend cycles idle
6.157.440 instructions # 0,96 insns per cycle 6.157.440 instructions # 0,96 insns per cycle
# 0,57 stalled cycles per insn # 0,57 stalled cycles per insn
1.248.527 branches # 494,616 M/sec 1.248.527 branches # 494,616 M/sec
34.044 branch-misses # 2,73% of all branches 34.044 branch-misses # 2,73% of all branches
0,007167707 seconds time elapsed 0,007167707 seconds time elapsed
``` ```
...@@ -142,10 +142,10 @@ If you added a callchain, it also gives you a callchain profile.\<br /> \*Discla ...@@ -142,10 +142,10 @@ If you added a callchain, it also gives you a callchain profile.\<br /> \*Discla
not an appropriate way to gain exact numbers. So this is merely a rough overview and not guaranteed not an appropriate way to gain exact numbers. So this is merely a rough overview and not guaranteed
to be absolutely correct.\*\<span style="font-size: 1em;"> \</span> to be absolutely correct.\*\<span style="font-size: 1em;"> \</span>
### On Taurus ### On ZIH systems
On Taurus, users are not allowed to see the kernel functions. If you have multiple events defined, On ZIH systems, users are not allowed to see the kernel functions. If you have multiple events
then the first thing you select in `perf report` is the type of event. Press right defined, then the first thing you select in `perf report` is the type of event. Press right
```Bash ```Bash
Available samples Available samples
...@@ -165,7 +165,7 @@ If you'd select cycles, you would get such a screen: ...@@ -165,7 +165,7 @@ If you'd select cycles, you would get such a screen:
```Bash ```Bash
Events: 96 cycles Events: 96 cycles
+ 49,13% test_gcc_perf test_gcc_perf [.] main.omp_fn.0 + 49,13% test_gcc_perf test_gcc_perf [.] main.omp_fn.0
+ 34,48% test_gcc_perf test_gcc_perf [.] + 34,48% test_gcc_perf test_gcc_perf [.]
+ 6,92% test_gcc_perf test_gcc_perf [.] omp_get_thread_num@plt + 6,92% test_gcc_perf test_gcc_perf [.] omp_get_thread_num@plt
+ 5,20% test_gcc_perf libgomp.so.1.0.0 [.] omp_get_thread_num + 5,20% test_gcc_perf libgomp.so.1.0.0 [.] omp_get_thread_num
+ 2,25% test_gcc_perf test_gcc_perf [.] main.omp_fn.1 + 2,25% test_gcc_perf test_gcc_perf [.] main.omp_fn.1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment