Skip to content
Snippets Groups Projects
Commit 5ba2c88a authored by Noah Löwer's avatar Noah Löwer
Browse files

Added configuration as attachments and links

parent 71cef17f
No related branches found
No related tags found
2 merge requests!808Automated merge from preview to main,!767Add new file spec.md to software - menu item Performance Engineering Tools
#######################################################################
# Example configuration file for the GNU Compilers
#
# Defines: "model" => "mpi", "acc", "omp", "tgt", "tgtgpu" default "mpi"
# "label" => ext base label, default "nv"
#
# MPI-only Command:
# runhpc -c Example_gnu --reportable -T base --define model=mpi --ranks=40 small
#
# OpenACC Command:
# runhpc -c Example_gnu --reportable -T base --define model=acc --ranks=4 small
#
# OpenMP Command:
# runhpc -c Example_gnu --reportable -T base --define model=omp --ranks=1 --threads=40 small
#
# OpenMP Target Offload to Host Command:
# runhpc -c Example_gnu --reportable -T base --define model=tgt --ranks=1 --threads=40 small
#
# OpenMP Target Offload to NVIDIA GPU Command:
# runhpc -c Example_gnu --reportable -T base --define model=tgtnv --ranks=4 small
#
#######################################################################
%ifndef %{label} # IF label is not set use gnu
% define label gnu
%endif
%ifndef %{model} # IF model is not set use mpi
% define model mpi
%endif
teeout = yes
makeflags=-j 24
# Tester Information
license_num = 37A
tester = Technische Universitaet Dresden
test_sponsor = Technische Universitaet Dresden
prepared_by = Holger Brunst
######################################################
# SUT Section
######################################################
#include: Example_SUT.inc
include: sut-taurus.inc
#[Software]
sw_compiler000 = C/C++/Fortran: Version 8.2.0 of
sw_compiler001 = GNU Compilers
sw_mpi_library = OpenMPI Version 3.1.3
sw_mpi_other = None
sw_other = None
#[General notes]
notes_000 = MPI startup command:
notes_005 = slurm srun command was used to start MPI jobs.
#######################################################################
# End of SUT section
#######################################################################
#######################################################################
# The header section of the config file. Must appear
# before any instances of "section markers" (see below)
#
# ext = how the binaries you generated will be identified
# tune = specify "base" or "peak" or "all"
%ifndef %{tudprof}
label = %{label}_%{model}
%else
label = %{label}_%{model}_%{tudprof}
%endif
tune = base
output_format = text
use_submit_for_speed = 1
# Compiler Settings
default:
CC = mpicc
CXX = mpicxx
FC = mpif90
%if %{tudprof} eq 'scorep'
CC = scorep --mpp=mpi --instrument-filter=${SPEC}/scorep.filter mpicc
CXX = scorep --mpp=mpi --instrument-filter=${SPEC}/scorep.filter mpicxx
FC = scorep --mpp=mpi --instrument-filter=${SPEC}/scorep.filter mpif90
%endif
# Compiler Version Flags
CC_VERSION_OPTION = --version
CXX_VERSION_OPTION = --version
FC_VERSION_OPTION = --version
# enable non-official patches to this kit
#strict_rundir_verify = 0
# MPI options and binding environment, dependent upon Model being run
# Adjust to match your system
# OpenMP (CPU) Settings
%if %{model} eq 'omp'
preENV_OMP_PROC_BIND=true
preENV_OMP_PLACES=cores
%endif
#OpenMP Targeting Host Settings
%if %{model} eq 'tgt'
preENV_OMP_PROC_BIND=true
preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,226,27,28,29,30,31,32,33,34,35,36,37,38,39
%endif
#MPIRUN_OPTS = --bind-to none -q
MPIRUN_OPTS=
submit = timeout 2h srun ${MPIRUN_OPTS} -n $ranks -c $threads $command
# MPI Workaround for mca issues in sph_exa
#preOMPI_MCA_topo=basic
# Score-P performance profiling
%if %{tudprof} eq 'scorep'
## only record calls to main and MPI functions (runtime filtering)
## runtime filtering was replaced by compile-time filtering (see above)
# preENV_SCOREP_FILTERING_FILE=/home/brunst/ws-hpc2020/kit91/scorep.filter
## set buffer memory size for profile/trace
preENV_SCOREP_TOTAL_MEMORY=64MB
## enable profile recording
preENV_SCOREP_ENABLE_PROFILING=true
## set to 'true' to enable detailed trace file recording
preENV_SCOREP_ENABLE_TRACING=false
## collect memory consumption per node
preENV_SCOREP_METRIC_RUSAGE=ru_maxrss
## uncomment to record cycle counter for scheduling analysis
preENV_SCOREP_METRIC_PAPI=PAPI_TOT_CYC
%endif
#######################################################################
# Optimization
# Note that SPEC baseline rules require that all uses of a given compiler
# use the same flags in the same order. See the SPEChpc Run Rules
# for more details
# http://www.spec.org/hpc2021/Docs/runrules.html
#
# OPTIMIZE = flags applicable to all compilers
# FOPTIMIZE = flags appliable to the Fortran compiler
# COPTIMIZE = flags appliable to the C compiler
# CXXOPTIMIZE = flags appliable to the C++ compiler
#
# See your compiler manual for information on the flags available
# for your compiler
# Compiler flags applied to all models
default=base=default:
COPTIMIZE = -Ofast -march=native -lm # use -mcpu=native for ARM
CXXOPTIMIZE = -Ofast -march=native -std=c++14
FOPTIMIZE = -Ofast -march=native -fno-stack-protector
FPORTABILITY = -ffree-line-length-none
%if %{model} eq 'mpi'
pmodel=MPI
%endif
# OpenACC flags
%if %{model} eq 'acc'
pmodel=ACC
OPTIMIZE += -fopenacc -foffload=-lm
%endif
# OpenMP (CPU) flags
%if %{model} eq 'omp'
pmodel=OMP
OPTIMIZE += -fopenmp
%endif
# OpenMP Targeting host flags
%if %{model} eq 'tgt'
pmodel=TGT
OPTIMIZE += -fopenmp
%endif
# OpenMP Targeting Nvidia GPU flags
%if %{model} eq 'tgtnv'
pmodel=TGT
OPTIMIZE += -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda
%endif
# No peak flags set, so make peak use the same flags as base
default=peak=default:
basepeak=1
#######################################################################
# Portability
#######################################################################
519.clvleaf_t,619.clvleaf_s,719.clvleaf_m,819.clvleaf_l=default=default:
# Not needed anymore?
#PORTABILITY += -DSPEC_GNU_FLUSH
# Invocation command line:
# runhpc -c nvhpc_alpha.cfg -ranks 8 --rebuild --define pmodel=acc --noreportable --tune=base --iterations=1 small
#######################################################################
# Example configuration file for the GNU Compilers
#
# Defines: "pmodel" => "mpi", "acc", "omp", "tgt", "tgtgpu" default "mpi"
# "label" => ext base label, default "nv"
#
# MPI-only Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=mpi --ranks=40 small
#
# OpenACC Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=acc --ranks=4 small
#
# OpenMP Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=omp --ranks=1 --threads=40 small
#
# OpenMP Target Offload to Host Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=tgt --ranks=1 --threads=40 small
#
# OpenMP Target Offload to NVIDIA GPU Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=tgtnv --ranks=4 small
#
#######################################################################
%ifndef %{label} # IF label is not set use gnu
% define label nv
%endif
%ifndef %{pmodel} # IF pmodel is not set use mpi
% define pmodel mpi
%endif
teeout = yes
makeflags=-j 40
# Tester Information
license_num = 37A
test_sponsor = TU Dresden
tester = TU Dresden
prepared_by = Noah Trumpik (Noah.Trumpik@tu-dresden.de)
#######################################################################
# SUT Section
#######################################################################
# General SUT info
system_vendor = AMD
system_name = Alpha Centauri: AMD EPYC 7352 (AMD x86_64, NVIDIA A100-SXM4-40GB)
hw_avail = Jan-2019
sw_avail = Aug-2022
#[Node_Description: Hardware]
node_compute_syslbl = AMD Rome
node_compute_order = 1
node_compute_count = 34
node_compute_purpose = compute
node_compute_hw_vendor = AMD
node_compute_hw_model = AMD K17 (Zen2)
node_compute_hw_cpu_name = AMD EPYC 7352
node_compute_hw_ncpuorder = 2 chips
node_compute_hw_nchips = 2
node_compute_hw_ncores = 96
node_compute_hw_ncoresperchip = 48
node_compute_hw_nthreadspercore = 2
node_compute_hw_cpu_char = Up to 2.3 GHz
node_compute_hw_cpu_mhz = 2100
node_compute_hw_pcache = 32 KB I + 32 KB D on chip per core
node_compute_hw_scache = 512 KB I+D on chip per core
node_compute_hw_tcache000= 16384 KB I+D on chip per chip
node_compute_hw_ocache = None
node_compute_hw_memory = 1 TB
node_compute_hw_disk000= 3.5 TB
node_compute_hw_disk001 = NVMe SSD Controller SM981/PM981/PM983
node_compute_hw_adapter_ib_model = Mellanox ConnectX-6
node_compute_hw_adapter_ib_interconnect = EDR InfiniBand
node_compute_hw_adapter_ib_firmware = 20.28.2006
node_compute_hw_adapter_ib_driver = mlx5_core
node_compute_hw_adapter_ib_data_rate = 200 Gb/s
node_compute_hw_adapter_ib_count = 2
node_compute_hw_adapter_ib_slot_type = PCIe
node_compute_hw_adapter_ib_ports_used = 2
node_compute_hw_other = None
#[Node_Description: Accelerator]
node_compute_hw_accel_model = Tesla A100-SXM4-40GB
node_compute_hw_accel_count = 8
node_compute_hw_accel_vendor = NVIDIA Corporation
node_compute_sw_accel_driver = NVIDIA CUDA 470.57.02
node_compute_hw_accel_type = GPU
node_compute_hw_accel_connect = ASPEED Technology, Inc. (rev 04)
node_compute_hw_accel_ecc = Yes
node_compute_hw_accel_desc = none
#[Node_Description: Software]
node_compute_sw_os000 = CentOS Linux
node_compute_sw_os001 = 7
node_compute_sw_localfile = xfs
node_compute_sw_sharedfile000 = 4 PB Lustre parallel filesystem
node_compute_sw_sharedfile001 = over 4X EDR InfiniBand
node_compute_sw_state = Multi-user
node_compute_sw_other = None
#[Fileserver]
#[Interconnect]
interconnect_ib_syslbl = Mellanox InfiniBand
interconnect_ib_purpose = MPI Traffic and GPFS access
interconnect_ib_order = 1
interconnect_ib_hw_vendor = Mellanox
interconnect_ib_hw_topo = Non-blocking Fat-tree
#interconnect_ib_hw_switch_ib_count = 2
#interconnect_ib_hw_switch_ib_ports = 2
#interconnect_ib_hw_switch_ib_data_rate = 100 Gb/s
#interconnect_ib_hw_switch_ib_model = Mellanox Switch IB-2
#[Software]
sw_compiler000 = C/C++/Fortran: Version 21.7 of the
sw_compiler001 = NVHPC toolkit
sw_mpi_library = Open MPI Version 4.1.1
sw_mpi_other = None
system_class = Homogenous Cluster
sw_other = CUDA Driver Version: 11.4.2
#######################################################################
# End of SUT section
#######################################################################
#######################################################################
# The header section of the config file. Must appear
# before any instances of "section markers" (see below)
#
# ext = how the binaries you generated will be identified
# tune = specify "base" or "peak" or "all"
label = %{label}_%{pmodel}
tune = base
output_format = text
use_submit_for_speed = 1
# Compiler Settings
default:
CC = mpicc
CXX = mpicxx
FC = mpif90
# Compiler Version Flags
CC_VERSION_OPTION = --version
CXX_VERSION_OPTION = --version
FC_VERSION_OPTION = --version
# MPI options and binding environment, dependent upon Model being run
# Adjust to match your system
# OpenMP (CPU) Settings
%if %{pmodel} eq 'omp'
preENV_OMP_PLACES=cores
#preENV_OMP_PROC_BIND=true
#preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,226,27,28,29,30,31,32,33,34,35,36,37,38,39
#preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,2\
#3,24
%endif
#OpenMP Targeting Host Settings
%if %{pmodel} eq 'tgt'
#preENV_OMP_PROC_BIND=true
preENV_MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
preEnv_MPICH_GPU_SUPPORT_ENABLED=1
preEnv_MPICH_SMP_SINGLE_COPY_MODE=CMA
preEnv_MPICH_GPU_EAGER_DEVICE_MEM=0
#preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,226,27,28,29,30,31,32,33,34,35,36,37,38,39
#preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24
%endif
%ifdef %{ucx}
# if using OpenMPI with UCX support, these settings are needed with use of CUDA Aware MPI
# without these flags, LBM is known to hang when using OpenACC and OpenMP Target to GPUs
preENV_UCX_MEMTYPE_CACHE=n
preENV_UCX_TLS=self,shm,cuda_copy
%endif
#MPIRUN_OPTS = --bind-to none -q
#submit = mpirun ${MPIRUN_OPTS} -n $ranks $command
submit = srun $command
#######################################################################
# Optimization
# Note that SPEC baseline rules require that all uses of a given compiler
# use the same flags in the same order. See the SPEChpc Run Rules
# for more details
# http://www.spec.org/hpc2021/Docs/runrules.html
#
# OPTIMIZE = flags applicable to all compilers
# FOPTIMIZE = flags appliable to the Fortran compiler
# COPTIMIZE = flags appliable to the C compiler
# CXXOPTIMIZE = flags appliable to the C++ compiler
#
# See your compiler manual for information on the flags available
# for your compiler
# Compiler flags applied to all models
default=base=default:
#OPTIMIZE = -w -Mfprelaxed -Mnouniform -Mstack_arrays -fast
OPTIMIZE = -w -O3 -Mfprelaxed -Mnouniform -Mstack_arrays
COPTIMIZE = -lm # use -mcpu=native for ARM
CXXOPTIMIZE = -std=c++11
CXXPORTABILITY = --c++17
#ARM
%if %{armOn} eq 'arm'
COPTIMIZE += -mcpu=native
#OPTIMIZE += -mcpu=a64fx
%endif
# SVE
%if %{sveOn} eq 'sve'
COPTIMIZE += -march=armv8-a+sve
%endif
%if %{model} eq 'mpi'
pmodel=MPI
%endif
# OpenACC flags
%if %{pmodel} eq 'acc'
pmodel=ACC
# Use with PGI compiler only
# https://docs.nvidia.com/hpc-sdk/archive/21.7/
#OPTIMIZE += -acc=gpu
OPTIMIZE += -acc -ta=tesla -tp=zen #-Minfo=accel #-DSPEC_ACCEL_AWARE_MPI->hangs it forever
# 513.soma_t:
# PORTABILITY+=-DSPEC_NO_VAR_ARRAY_REDUCE
%endif
# OpenMP (CPU) flags
%if %{pmodel} eq 'omp'
pmodel=OMP
#OPTIMIZE += -qsmp=omp
OPTIMIZE += -fopenmp
#FOPTIMIZE +=
%endif
# OpenMP Targeting host flags
%if %{pmodel} eq 'tgt'
pmodel=TGT
# PGI
OPTIMIZE += -mp -acc=multicore
# Intel??
# OPTIMIZE += -qsmp=omp -qoffload
# -fopen-simd
# GCC (doesn't recognize its own flags)
#OPTIMIZE += -fopenmp
#OPTIMIZE += -fopenmp -mgomp
#OPTIMIZE += -fopenmp -msoft-stack -muniform-simt
#FOPTIMIZE += -homp
%endif
# OpenMP Targeting host flags
%if %{pmodel} eq 'tgtnv'
pmodel=TGT
# PGI
OPTIMIZE += -mp=gpu -acc
#FOPTIMIZE += -homp
# Note that NVHPC is in the process of adding OpenMP array
# reduction support so this option may be removed in future
513.soma_t:
PORTABILITY+=-DSPEC_NO_VAR_ARRAY_REDUCE
%endif
# No peak flags set, so make peak use the same flags as base
default=peak=default:
basepeak=1
#######################################################################
# Portability
#######################################################################
# The following section was added automatically, and contains settings that
# did not appear in the original configuration file, but were added to the
# raw file after the run.
default:
flagsurl000 = http://www.spec.org/hpc2021/flags/nv2021_flags.xml
interconnect_ib_hw_switch_ib_model000 = Mellanox IB EDR Switch IB-2
# Invocation command line:
# /lustre/scratch2/ws/0/notr584d-spec_nt/bin/runhpc --config nvhpc_ppc --define pmodel=acc --action run --nobuild --ranks=6 --reportable tiny
# output_root was not used for this run
#######################################################################
# Example configuration file for the GNU Compilers
#
# Defines: "pmodel" => "mpi", "acc", "omp", "tgt", "tgtnv" default "mpi"
# "label" => ext base label, default "nv"
#
# MPI-only Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=mpi --ranks=40 small
#
# OpenACC Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=acc --ranks=4 small
#
# OpenMP Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=omp --ranks=1 --threads=40 small
#
# OpenMP Target Offload to Host Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=tgt --ranks=1 --threads=40 small
#
# OpenMP Target Offload to NVIDIA GPU Command:
# runhpc -c Example_gnu --reportable -T base --define pmodel=tgtnv --ranks=4 small
#
#######################################################################
%ifndef %{label} # IF label is not set use default "nv"
% define label nv
%endif
%ifndef %{pmodel} # IF pmodel is not set use default mpi
% define pmodel mpi
%endif
teeout = yes
makeflags=-j 40
# Tester Information
license_num = 37A
test_sponsor = TU Dresden
tester = TU Dresden
#######################################################################
# SUT Section
#######################################################################
#include: Example_SUT.inc
# ----- Begin inclusion of 'Example_SUT.inc'
#######################################################################
# General SUT info
system_vendor = IBM
system_name = Taurus: IBM Power System AC922 (IBM Power9, Tesla V100-SXM2-32GB)
node_compute_sw_accel_driver = NVIDIA CUDA 440.64.00
node_compute_hw_adapter_ib_slot_type = None
node_compute_hw_adapter_ib_ports_used = 2
node_compute_hw_adapter_ib_model = Mellanox ConnectX-5
node_compute_hw_adapter_ib_interconnect = EDR InfiniBand
node_compute_hw_adapter_ib_firmware = 16.27.6008
node_compute_hw_adapter_ib_driver = mlx5_core
node_compute_hw_adapter_ib_data_rate = 100 Gb/s (4X EDR)
node_compute_hw_adapter_ib_count = 2
interconnect_ib_syslbl = Mellanox InfiniBand
interconnect_ib_purpose = MPI Traffic and GPFS access
interconnect_ib_order = 1
#interconnect_ib_hw_vendor = Mellanox
#interconnect_ib_hw_topo = Non-blocking Fat-tree
#interconnect_ib_hw_switch_ib_ports = 36
#interconnect_ib_hw_switch_ib_data_rate = 100 Gb/s
#interconnect_ib_hw_switch_ib_count = 1
#interconnect_ib_hw_model = Mellanox Switch IB-2
hw_avail = Nov-2018
sw_avail = Nov-2021
prepared_by = Noah Trumpik (Noah.Trumpik@tu-dresden.de)
#[Node_Description: Hardware]
node_compute_syslbl = IBM Power System AC922
node_compute_order = 1
node_compute_count = 30
node_compute_purpose = compute
node_compute_hw_vendor = IBM
node_compute_hw_model = IBM Power System AC922
node_compute_hw_cpu_name = IBM POWER9 2.2 (pvr 004e 1202)
node_compute_hw_ncpuorder = 2 chips
node_compute_hw_nchips = 2
node_compute_hw_ncores = 44
node_compute_hw_ncoresperchip = 22
node_compute_hw_nthreadspercore = 4
node_compute_hw_cpu_char = Up to 3.8 GHz
node_compute_hw_cpu_mhz = 2300
node_compute_hw_pcache = 32 KB I + 32 KB D on chip per core
node_compute_hw_scache = 512 KB I+D on chip per core
node_compute_hw_tcache000= 10240 KB I+D on chip per chip
node_compute_hw_ocache = None
node_compute_hw_memory = 256 GB (16 x 16 GB RDIMM-DDR4-2666)
node_compute_hw_disk000= 2 x 1 TB (ATA Rev BE35)
node_compute_hw_disk001 = NVMe SSD Controller 172Xa/172Xb
node_compute_hw_other = None
#[Node_Description: Accelerator]
node_compute_hw_accel_model = Tesla V100-SXM2-32GB
node_compute_hw_accel_count = 6
node_compute_hw_accel_vendor= NVIDIA Corporation
node_compute_hw_accel_type = GPU
node_compute_hw_accel_connect = NVLINK
node_compute_hw_accel_ecc = Yes
node_compute_hw_accel_desc = See Notes
#[Node_Description: Software]
node_compute_sw_os000 = Red Hat Enterprise Linux
node_compute_sw_os001 = 7.6
node_compute_sw_localfile = xfs
node_compute_sw_sharedfile = 4 PB Lustre parallel filesystem
node_compute_sw_state = Multi-user
node_compute_sw_other = None
#[Fileserver]
#[Interconnect]
#[Software]
sw_compiler000 = C/C++/Fortran: Version 21.5 of the
sw_compiler001 = NVHPC toolkit
sw_mpi_library = Open MPI Version 4.1.2
sw_mpi_other = None
system_class = Homogenous Cluster
sw_other = None
#[General notes]
notes_000 = MPI startup command:
notes_005 = srun command was used to launch job using 1 GPU/rank.
notes_010 =Detailed information from nvaccelinfo
notes_015 =
notes_020 =CUDA Driver Version: 11000
notes_025 =NVRM version: NVIDIA UNIX ppc64le Kernel Module 440.64.00 Wed Feb 26 16:01:28 UTC 2020
notes_030 =
notes_035 =Device Number: 0
notes_040 =Device Name: Tesla V100-SXM2-32GB
notes_045 =Device Revision Number: 7.0
notes_050 =Global Memory Size: 33822867456
notes_055 =Number of Multiprocessors: 80
notes_060 =Concurrent Copy and Execution: Yes
notes_065 =Total Constant Memory: 65536
notes_070 =Total Shared Memory per Block: 49152
notes_075 =Registers per Block: 65536
notes_080 =Warp Size: 32
notes_085 =Maximum Threads per Block: 1024
notes_090 =Maximum Block Dimensions: 1024, 1024, 64
notes_095 =Maximum Grid Dimensions: 2147483647 x 65535 x 65535
notes_100 =Maximum Memory Pitch: 2147483647B
notes_105 =Texture Alignment: 512B
notes_110 =Max Clock Rate: 1530 MHz
notes_115 =Execution Timeout: No
notes_120 =Integrated Device: No
notes_125 =Can Map Host Memory: Yes
notes_130 =Compute Mode: default
notes_135 =Concurrent Kernels: Yes
notes_140 =ECC Enabled: Yes
notes_145 =Memory Clock Rate: 877 MHz
notes_150 =Memory Bus Width: 4096 bits
notes_155 =L2 Cache Size: 6291456 bytes
notes_160 =Max Threads Per SMP: 2048
notes_165 =Async Engines: 4
notes_170 =Unified Addressing: Yes
notes_175 =Managed Memory: Yes
notes_180 =Concurrent Managed Memory: Yes
notes_185 =Preemption Supported: Yes
notes_190 =Cooperative Launch: Yes
notes_195 = Multi-Device: Yes
notes_200 =Default Target: cc70
notes_205 =
#######################################################################
# End of SUT section
#######################################################################
#######################################################################
# The header section of the config file. Must appear
# before any instances of "section markers" (see below)
#
# ext = how the binaries you generated will be identified
# tune = specify "base" or "peak" or "all"
label = %{label}_%{pmodel}
tune = base
output_format = text
use_submit_for_speed = 1
# Compiler Settings
default:
CC = mpicc
CXX = mpic++
FC = mpif90
# Compiler Version Flags
CC_VERSION_OPTION = --version
CXX_VERSION_OPTION = --version
FC_VERSION_OPTION = --version
# MPI options and binding environment, dependent upon Model being run
# Adjust to match your system
# OpenMP (CPU) Settings
%if %{pmodel} eq 'omp'
preENV_OMP_PLACES=cores
#preENV_OMP_PROC_BIND=true
#preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,226,27,28,29,30,31,32,33,34,35,36,37,38,39
#preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,2\
#3,24
%endif
#OpenMP Targeting Host Settings
%if %{pmodel} eq 'tgt'
#preENV_OMP_PROC_BIND=true
preENV_MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
preEnv_MPICH_GPU_SUPPORT_ENABLED=1
preEnv_MPICH_SMP_SINGLE_COPY_MODE=CMA
preEnv_MPICH_GPU_EAGER_DEVICE_MEM=0
#preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,226,27,28,29,30,31,32,33,34,35,36,37,38,39
#preENV_OMP_PLACES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24
%endif
%ifdef %{ucx}
# if using OpenMPI with UCX support, these settings are needed with use of CUDA Aware MPI
# without these flags, LBM is known to hang when using OpenACC and OpenMP Target to GPUs
preENV_UCX_MEMTYPE_CACHE=n
preENV_UCX_TLS=self,shm,cuda_copy
%endif
#MPIRUN_OPTS = --bind-to none -q
# 1 GPU per rs, 7 cores per RS, 1 MPI task per RS, 6 RS per host
submit = srun ${MPIRUN_OPTS} $command
#######################################################################
# Optimization
# Note that SPEC baseline rules require that all uses of a given compiler
# use the same flags in the same order. See the SPEChpc Run Rules
# for more details
# http://www.spec.org/hpc2021/Docs/runrules.html
#
# OPTIMIZE = flags applicable to all compilers
# FOPTIMIZE = flags appliable to the Fortran compiler
# COPTIMIZE = flags appliable to the C compiler
# CXXOPTIMIZE = flags appliable to the C++ compiler
#
# See your compiler manual for information on the flags available
# for your compiler
# Compiler flags applied to all models
default=base=default:
OPTIMIZE = -O3
COPTIMIZE = -lm # use -mcpu=native for ARM
CXXOPTIMIZE = -std=c++11
#FOPTIMIZE = -ffree-line-length-none -fno-stack-protector
FOPTIMIZE =
%if %{model} eq 'mpi'
pmodel=MPI
%endif
# OpenACC flags
%if %{pmodel} eq 'acc'
# Use with PGI compiler only
# https://docs.nvidia.com/hpc-sdk/archive/21.5/
pmodel=ACC
#OPTIMIZE += -acc=gpu
OPTIMIZE += -acc -ta=tesla
OPTIMIZE += -acc -ta=tesla -DSPEC_ACCEL_AWARE_MPI #-Minfo=accel
%endif
# Note that NVHPC is in the process of adding OpenMP array
# reduction support so this option may be removed in future
# reduction not supported on taurusml due to old driver
513.soma_t:
PORTABILITY+=-DSPEC_NO_VAR_ARRAY_REDUCE
513.soma_s:
PORTABILITY+=-DSPEC_NO_VAR_ARRAY_REDUCE
# OpenMP (CPU) flags
%if %{pmodel} eq 'omp'
pmodel=OMP
#OPTIMIZE += -qsmp=omp
OPTIMIZE += -fopenmp
#FOPTIMIZE +=
%endif
# OpenMP Targeting host flags
%if %{pmodel} eq 'tgt'
pmodel=TGT
# PGI
OPTIMIZE += -mp -acc=multicore
# Intel??
# OPTIMIZE += -qsmp=omp -qoffload
# -fopen-simd
# GCC (doesn't recognize its own flags)
#OPTIMIZE += -fopenmp
#OPTIMIZE += -fopenmp -mgomp
#OPTIMIZE += -fopenmp -msoft-stack -muniform-simt
#FOPTIMIZE += -homp
%endif
# OpenMP Targeting host flags
%if %{pmodel} eq 'tgtnv'
pmodel=TGT
# PGI
OPTIMIZE += -mp=gpu -acc
#FOPTIMIZE += -homp
%endif
# No peak flags set, so make peak use the same flags as base
default=peak=default:
basepeak=1
#######################################################################
# Portability
#######################################################################
# The following section was added automatically, and contains settings that
# did not appear in the original configuration file, but were added to the
# raw file after the run.
default:
flagsurl000 = http://www.spec.org/hpc2021/flags/nv2021_flags.xml
interconnect_ib_hw_switch_ib_model000 = Mellanox IB EDR Switch IB-2
......@@ -53,7 +53,13 @@ The behavior in terms of how to build, run and report the benchmark in a particu
controlled by a configuration file. There are a few examples included in the source code.
Here you can apply compiler tuning and porting, specify the runtime environment and describe the
system under test. SPEChpc2021 has been deployed on the partitions `haswell`, `ml` and
`alpha`, configurations are available. No matter which one you choose as a starting point,
`alpha`. Configurations are available, respectively:
- [gnu-taurus.cfg](misc/spec_gnu-taurus.cfg)
- [nvhpc-ppc.cfg](misc/spec_nvhpc-ppc.cfg)
- [nvhpc-alpha.cfg](misc/spec_nvhpc-alpha.cfg)
No matter which one you choose as a starting point,
double-check the line that defines the submit command and make sure it says `srun [...]`, e.g.
``` bash
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment