diff --git a/src/common/gres.c b/src/common/gres.c index 21b8c705c0ebad7d8ecf8ebb48f1c9a045f90383..8de64c29ec4b0d2e7d71305ed1021ede939cbfee 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -97,12 +97,24 @@ typedef struct slurm_gres_context { } slurm_gres_context_t; static int gres_context_cnt = -1; +static uint32_t gres_cpu_cnt = 0; static bool gres_debug = false; static slurm_gres_context_t *gres_context = NULL; static char *gres_plugin_list = NULL; static pthread_mutex_t gres_context_lock = PTHREAD_MUTEX_INITIALIZER; static List gres_conf_list = NULL; +static void _destroy_gres_slurmd_conf(void *x); +static char * _get_gres_conf(void); +static int _load_gres_plugin(char *plugin_name, + slurm_gres_context_t *plugin_context); +static int _log_gres_slurmd_conf(void *x, void *arg); +static int _parse_gres_config(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line, char **leftover); +static int _strcmp(const char *s1, const char *s2); +static int _unload_gres_plugin(slurm_gres_context_t *plugin_context); + typedef struct gres_state { uint32_t plugin_id; void *gres_data; @@ -325,6 +337,7 @@ extern int gres_plugin_help_msg(char *msg, int msg_size) { int i, rc; char *tmp_msg; + char *header = "Valid gres options are:\n"; if (msg_size < 1) return EINVAL; @@ -333,6 +346,8 @@ extern int gres_plugin_help_msg(char *msg, int msg_size) tmp_msg = xmalloc(msg_size); rc = gres_plugin_init(); + if ((strlen(header) + 2) <= msg_size) + strcat(msg, header); slurm_mutex_lock(&gres_context_lock); for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) { tmp_msg = (gres_context[i].ops.help_msg); @@ -340,9 +355,8 @@ extern int gres_plugin_help_msg(char *msg, int msg_size) continue; if ((strlen(msg) + strlen(tmp_msg) + 2) > msg_size) break; - if (msg[0]) - strcat(msg, "\n"); strcat(msg, tmp_msg); + strcat(msg, "\n"); } slurm_mutex_unlock(&gres_context_lock); @@ -442,8 +456,13 @@ static int _log_gres_slurmd_conf(void *x, void *arg) p = (gres_slurmd_conf_t *) x; xassert(p); - info("Gres Name:%s File:%s CPUs:%s Count:%u", - p->name, p->file, p->cpus, p->count); + if (p->cpus) { + info("Gres Name:%s Count:%u File:%s CPUs:%s CpuCnt:%u", + p->name, p->count, p->file, p->cpus, p->cpu_cnt); + } else { + info("Gres Name:%s Count:%u File:%s", + p->name, p->count, p->file); + } return 0; } @@ -472,8 +491,17 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, if (!s_p_get_uint32(&p->count, "Count", tbl)) p->count = 1; if (s_p_get_string(&p->cpus, "CPUs", tbl)) { -//FIXME: change to bitmap, size? change from cpuset/numa to slurmctld format -//bit_unfmt(bimap, p->cpus); + bitstr_t *cpu_bitmap; /* Just use to validate config */ + p->cpu_cnt = gres_cpu_cnt; + cpu_bitmap = bit_alloc(gres_cpu_cnt); + if (cpu_bitmap == NULL) + fatal("bit_alloc: malloc failure"); + i = bit_unfmt(cpu_bitmap, p->cpus); + if (i != 0) { + fatal("Invalid gres data for %s, CPUs=%s", + p->name, p->cpus); + } + FREE_NULL_BITMAP(cpu_bitmap); } if (s_p_get_string(&p->file, "File", tbl)) { struct stat config_stat; @@ -497,9 +525,10 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, } /* - * Load this node's gres configuration (i.e. how many resources it has) + * Load this node's configuration (how many resources it has, topology, etc.) + * IN cpu_cnt - Number of CPUs on configured on this node */ -extern int gres_plugin_node_config_load(void) +extern int gres_plugin_node_config_load(uint32_t cpu_cnt) { static s_p_options_t _gres_options[] = { {"Name", S_P_ARRAY, _parse_gres_config, NULL}, @@ -517,7 +546,7 @@ extern int gres_plugin_node_config_load(void) return SLURM_SUCCESS; slurm_mutex_lock(&gres_context_lock); - + gres_cpu_cnt = cpu_cnt; if (stat(gres_conf_file, &config_stat) < 0) fatal("can't stat gres.conf file %s: %m", gres_conf_file); tbl = s_p_hashtbl_create(_gres_options); @@ -572,6 +601,7 @@ extern int gres_plugin_node_config_pack(Buf buffer) pack32(magic, buffer); pack32(gres_slurmd_conf->plugin_id, buffer); pack32(gres_slurmd_conf->count, buffer); + pack32(gres_slurmd_conf->cpu_cnt, buffer); packstr(gres_slurmd_conf->cpus, buffer); } list_iterator_destroy(iter); @@ -589,7 +619,7 @@ extern int gres_plugin_node_config_pack(Buf buffer) extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name) { int i, j, rc; - uint32_t count, magic, plugin_id, utmp32; + uint32_t count, cpu_cnt, magic, plugin_id, utmp32; uint16_t rec_cnt, version; char *tmp_cpus; gres_slurmd_conf_t *p; @@ -618,6 +648,7 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name) goto unpack_error; safe_unpack32(&plugin_id, buffer); safe_unpack32(&count, buffer); + safe_unpack32(&cpu_cnt, buffer); safe_unpackstr_xmalloc(&tmp_cpus, &utmp32, buffer); for (j=0; j<gres_context_cnt; j++) { if (*(gres_context[j].ops.plugin_id) == plugin_id) { @@ -636,8 +667,9 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name) } p = xmalloc(sizeof(gres_slurmd_conf_t)); p->count = count; - p->plugin_id = plugin_id; + p->cpu_cnt = cpu_cnt; p->cpus = tmp_cpus; + p->plugin_id = plugin_id; tmp_cpus = NULL; /* Nothing left to xfree */ list_append(gres_conf_list, p); } diff --git a/src/common/gres.h b/src/common/gres.h index 7cef2c1a31851322fd91dd2a22e9441efd7a503e..c1fdf02903cec0cdf0f94ab471a37fbc19c5f7f9 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -47,10 +47,20 @@ /* Gres state information gathered by slurmd daemon */ typedef struct gres_slurmd_conf { + /* Count of gres available in this configuration record */ uint32_t count; + + /* Specific CPUs associated with this configuration record */ + uint16_t cpu_cnt; char *cpus; + + /* Device file associated with this configuration record */ char *file; + + /* Name of this gres type */ char *name; + + /* Gres ID number */ uint32_t plugin_id; } gres_slurmd_conf_t; @@ -142,8 +152,9 @@ extern int gres_plugin_help_msg(char *msg, int msg_size); */ /* * Load this node's configuration (how many resources it has, topology, etc.) + * IN cpu_cnt - Number of CPUs on configured on this node */ -extern int gres_plugin_node_config_load(void); +extern int gres_plugin_node_config_load(uint32_t cpu_cnt); /* * Pack this node's gres configuration into a buffer diff --git a/src/common/proc_args.c b/src/common/proc_args.c index a6add5c7fec46fc4ffe04ee8e4c438cdb833a649..4c009361340706042c1fae9f0b52e84618f1cb72 100644 --- a/src/common/proc_args.c +++ b/src/common/proc_args.c @@ -91,7 +91,7 @@ void print_gres_help(void) gres_plugin_help_msg(help_msg, sizeof(help_msg)); if (help_msg[0]) - printf("%s\n", help_msg); + printf("%s", help_msg); else printf("No gres help is available\n"); } diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 9680021627da8def07d425f774e4095fff339297..c63c99ff918e47f5c0b903eeb56c82d37f9d3973 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -1103,7 +1103,8 @@ void set_options(const int argc, char **argv) opt.time_min_str = xstrdup(optarg); break; case LONG_OPT_GRES: - if (!strcasecmp(optarg, "help")) { + if (!strcasecmp(optarg, "help") || + !strcasecmp(optarg, "list")) { print_gres_help(); exit(0); } diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 356ac9db56f14e1c4d30f32068b0768dcf94852c..9622febec05fee9f0227a2e103ee7bb13c2f5ede 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1589,7 +1589,8 @@ static void _set_options(int argc, char **argv) opt.time_min_str = xstrdup(optarg); break; case LONG_OPT_GRES: - if (!strcasecmp(optarg, "help")) { + if (!strcasecmp(optarg, "help") || + !strcasecmp(optarg, "list")) { print_gres_help(); exit(0); } diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 40281415d67cc5e72347d09f0fe5b1b665043fdc..2efb2e790f846afcb33bebeba6c6a4e5bea1c360 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -890,7 +890,8 @@ _reconfigure(void) gres_plugin_reconfig(&did_change); if (did_change) { - (void) gres_plugin_node_config_load(); + uint32_t cpu_cnt = MAX(conf->conf_cpus, conf->block_map_size); + (void) gres_plugin_node_config_load(cpu_cnt); send_registration_msg(SLURM_SUCCESS, false); } @@ -1168,6 +1169,8 @@ _slurmd_init(void) slurm_ctl_conf_t *cf; struct stat stat_buf; char slurm_stepd_path[MAXPATHLEN]; + uint32_t cpu_cnt; + /* * Process commandline arguments first, since one option may be * an alternate location for the slurm config file. @@ -1180,9 +1183,9 @@ _slurmd_init(void) * */ _read_config(); - + cpu_cnt = MAX(conf->conf_cpus, conf->block_map_size); if ((gres_plugin_init() != SLURM_SUCCESS) || - (gres_plugin_node_config_load() != SLURM_SUCCESS)) + (gres_plugin_node_config_load(cpu_cnt) != SLURM_SUCCESS)) return SLURM_FAILURE; if (slurm_topo_init() != SLURM_SUCCESS) return SLURM_FAILURE; diff --git a/src/srun/opt.c b/src/srun/opt.c index e5d8412c14d93d25b056075ef643764c7b41f0cc..f950c95f33da4f594c12e944c261a4b6c0897beb 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -1435,7 +1435,8 @@ static void set_options(const int argc, char **argv) opt.time_min_str = xstrdup(optarg); break; case LONG_OPT_GRES: - if (!strcasecmp(optarg, "help")) { + if (!strcasecmp(optarg, "help") || + !strcasecmp(optarg, "list")) { print_gres_help(); exit(0); }