From 6d10ad4af0636071f88937b7a5e2943fbca16e13 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Wed, 21 Jul 2010 17:39:59 +0000 Subject: [PATCH] in gres logic, permit gres.conf to have CPUs defined for only some of the records for a particular gres type --- doc/man/man5/gres.conf.5 | 13 +++++++------ src/common/gres.c | 28 +++++++++++++++++++--------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/doc/man/man5/gres.conf.5 b/doc/man/man5/gres.conf.5 index 0c85634618d..3f18c696552 100644 --- a/doc/man/man5/gres.conf.5 +++ b/doc/man/man5/gres.conf.5 @@ -31,18 +31,19 @@ Specify the CPU index numbers for the specific CPUs which can use this resources. For example, it may be strongly preferable to use specific CPUs with specific devices (e.g. on a NUMA architecture). Multiple CPUs may be specified using a comma -delimited list. If not specified, then any CPU can be used with -the resources. If any CPU can be used with the resources, then -do not specify the \fBCPUs\fR option for improved speed in the -SLURM scheduling logic. +delimited list or a range may be specified using a "\-" separator +(e.g. "0,1,2,3" or "0\-3"). +If not specified, then any CPU can be used with the resources. +If any CPU can be used with the resources, then do not specify the +\fBCPUs\fR option for improved speed in the SLURM scheduling logic. .TP \fBFile\fR Fully qualified pathname of the device file associated with a resource. This field is generally required if enforcement of generic resource allocations is to be supported (i.e. prevents a users from making -use of resources allocated to a different user. If specified then -\fBCount\fR must be one. +use of resources allocated to a different user. If \fBFile\fR is specified then +\fBCount\fR must be either set to one or not set (the default value is one). .TP \fBName\fR diff --git a/src/common/gres.c b/src/common/gres.c index 3d4e91e2b32..a4fb9357a56 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -553,6 +553,7 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, p = xmalloc(sizeof(gres_slurmd_conf_t)); p->name = xstrdup(value); + p->cpu_cnt = gres_cpu_cnt; if (s_p_get_uint32(&p->count, "Count", tbl)) { if (p->count == 0) fatal("Invalid gres data for %s, Count=0", p->name); @@ -560,7 +561,6 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, p->count = 1; if (s_p_get_string(&p->cpus, "CPUs", tbl)) { bitstr_t *cpu_bitmap; /* Just use to validate config */ - p->cpu_cnt = gres_cpu_cnt; cpu_bitmap = bit_alloc(gres_cpu_cnt); if (cpu_bitmap == NULL) fatal("bit_alloc: malloc failure"); @@ -956,7 +956,7 @@ static uint32_t _get_tot_gres_cnt(uint32_t plugin_id, uint32_t *set_cnt) { ListIterator iter; gres_slurmd_conf_t *gres_slurmd_conf; - uint32_t gres_cnt = 0; + uint32_t gres_cnt = 0, cpu_set_cnt = 0, rec_cnt = 0; xassert(set_cnt); *set_cnt = 0; @@ -970,10 +970,13 @@ static uint32_t _get_tot_gres_cnt(uint32_t plugin_id, uint32_t *set_cnt) if (gres_slurmd_conf->plugin_id != plugin_id) continue; gres_cnt += gres_slurmd_conf->count; + rec_cnt++; if (gres_slurmd_conf->cpus) - (*set_cnt)++; + cpu_set_cnt++; } list_iterator_destroy(iter); + if (cpu_set_cnt) + *set_cnt = rec_cnt; return gres_cnt; } @@ -1006,7 +1009,7 @@ extern int _node_config_validate(char *node_name, char *orig_config, if (updated_config == false) return SLURM_SUCCESS; - if ((set_cnt != gres_data->topo_cnt) || 1) { + if ((set_cnt != 0) || (set_cnt != gres_data->topo_cnt)) { /* Rebuild GRES information when the node registers. * Do we want to do this for every node registration * since it is fairly high overhead? */ @@ -1024,16 +1027,23 @@ extern int _node_config_validate(char *node_name, char *orig_config, gres_inx = i = 0; while ((gres_slurmd_conf = (gres_slurmd_conf_t *) list_next(iter))) { - if ((gres_slurmd_conf->cpus == 0) || - (gres_slurmd_conf->plugin_id != - *context_ptr->ops.plugin_id)) + if (gres_slurmd_conf->plugin_id != + *context_ptr->ops.plugin_id) continue; gres_data->cpus_bitmap[i] = bit_alloc(gres_slurmd_conf->cpu_cnt); if (gres_data->cpus_bitmap[i] == NULL) fatal("bit_alloc: malloc failure"); - bit_unfmt(gres_data->cpus_bitmap[i], - gres_slurmd_conf->cpus); + if (gres_slurmd_conf->cpus) { + bit_unfmt(gres_data->cpus_bitmap[i], + gres_slurmd_conf->cpus); + } else { + error("%s: has CPUs configured for only some " + "of the records on node %s", + context_ptr->gres_type, node_name); + bit_nset(gres_data->cpus_bitmap[i], 0, + (gres_slurmd_conf->cpu_cnt - 1)); + } gres_inx += gres_slurmd_conf->count; i++; } -- GitLab