Skip to content
Snippets Groups Projects
Commit 8a11ea4e authored by Michael Hinton's avatar Michael Hinton Committed by Morris Jette
Browse files

Fix bug where frequencies of GPUs in a cgroup were not set

Check for cgroup usage and change GPU indexes accordingly.
Fix formatting errors in docs.
bug 5520
parent d583c9ed
No related branches found
No related tags found
No related merge requests found
......@@ -594,7 +594,7 @@ If the specified numeric value is not possible, a value as close as
possible will be used. See below for definition of the values.
The \fIverbose\fR option causes current GPU frequency information to be logged.
Examples of use include "\-\-gpu\-freq=medium,memory=high" and
\-\-gpu\-freq=450".
"\-\-gpu\-freq=450".
Supported \fIvalue\fR definitions:
.RS
......
......@@ -687,7 +687,7 @@ If the specified numeric value is not possible, a value as close as
possible will be used. See below for definition of the values.
The \fIverbose\fR option causes current GPU frequency information to be logged.
Examples of use include "\-\-gpu\-freq=medium,memory=high" and
\-\-gpu\-freq=450".
"\-\-gpu\-freq=450".
Supported \fIvalue\fR definitions:
.RS
......
......@@ -867,7 +867,7 @@ If the specified numeric value is not possible, a value as close as
possible will be used. See below for definition of the values.
The \fIverbose\fR option causes current GPU frequency information to be logged.
Examples of use include "\-\-gpu\-freq=medium,memory=high" and
\-\-gpu\-freq=450".
"\-\-gpu\-freq=450".
Supported \fIvalue\fR definitions:
.RS
......
......@@ -1095,7 +1095,7 @@ The PSS value will be saved as RSS.
Kill steps that are being detected to use more memory than requested, every
time accounting information is gathered by JobAcctGather plugin.
This parameter will not kill a job directly, but only the step.
See \fBMemLimitEnforce\fB for that purpose. This parameter should be used
See \fBMemLimitEnforce\fR for that purpose. This parameter should be used
with caution as if jobs exceeds its memory allocation it may affect other
processes and/or machine health.
NOTE: It is recommended to limit memory by enabling task/cgroup
......@@ -1495,7 +1495,7 @@ a node cannot be shared with other users.
\fBMemLimitEnforce\fR
If set to yes then Slurm will terminate the job if it exceeds the value
requested using the \-\-mem\-per\-cpu option of salloc/sbatch/srun.
This is useful in combination with \fBJobAcctGaterParams=OverMemoryKill\fB.
This is useful in combination with \fBJobAcctGatherParams=OverMemoryKill\fR.
Used when jobs need to specify \-\-mem\-per\-cpu for scheduling and they should
be terminated if they exceed the estimated value.
The default value is 'no', which disables this enforcing mechanism.
......
......@@ -56,6 +56,7 @@
#include "src/common/env.h"
#include "src/common/gres.h"
#include "src/common/list.h"
#include "src/common/xcgroup_read_config.h"
#include "src/common/xstring.h"
#include "../common/gres_common.h"
......@@ -841,11 +842,16 @@ static void _reset_freq(bitstr_t *gpus, log_level_t log_lvl)
static void _set_freq(bitstr_t *gpus, char *gpu_freq, log_level_t log_lvl)
{
bool verbose_flag = false;
int gpu_len = bit_size(gpus);
int gpu_len = 0;
int i = -1, count = 0, count_set = 0;
unsigned int gpu_freq_num = 0, mem_freq_num = 0;
bool freq_set = false, freq_logged = false;
char *tmp = NULL;
slurm_cgroup_conf_t *cg_conf;
bool task_cgroup = false;
bool constrained_devices = false;
bool cgroups_active = false;
char *task_plugin_type = NULL;
/*
* Parse frequency information
......@@ -867,18 +873,48 @@ static void _set_freq(bitstr_t *gpus, char *gpu_freq, log_level_t log_lvl)
return;
}
// Check if GPUs are constrained by cgroups
slurm_mutex_lock(&xcgroup_config_read_mutex);
cg_conf = xcgroup_get_slurm_cgroup_conf();
if (cg_conf && cg_conf->constrain_devices)
constrained_devices = true;
slurm_mutex_unlock(&xcgroup_config_read_mutex);
// Check if task/cgroup plugin is loaded
task_plugin_type = slurm_get_task_plugin();
if (strstr(task_plugin_type, "cgroup"))
task_cgroup = true;
xfree(task_plugin_type);
// If both of these are true, then GPUs will be constrained
if (constrained_devices && task_cgroup) {
cgroups_active = true;
gpu_len = bit_set_count(gpus);
debug2("%s: cgroups are configured. Using LOCAL GPU IDs",
__func__);
} else {
gpu_len = bit_size(gpus);
debug2("%s: cgroups are NOT configured. Assuming GLOBAL GPU IDs",
__func__);
}
/*
* Set the frequency of each device allocated to the step
*/
for (i = 0; i < gpu_len; i++) {
char *sep = "";
nvmlDevice_t device;
if (!bit_test(gpus, i))
// Only check the global GPU bitstring if not using cgroups
if (!cgroups_active && !bit_test(gpus, i)) {
debug2("Passing over NVML device %u", i);
continue;
}
count++;
if (!_nvml_get_handle(i, &device))
continue;
debug2("Setting frequency of NVML device %u", i);
_nvml_get_nearest_freqs(&device, &mem_freq_num, &gpu_freq_num,
log_lvl);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment