From 0cf417331d0655fbfdd0d5d120b90e64b5480a1c Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Mon, 12 Jul 2010 20:27:25 +0000 Subject: [PATCH] various gres tweaks, modify help message, add cpu count to gres registration msg, validate cpu list in gres.conf --- src/common/gres.c | 54 ++++++++++++++++++++++++++++++-------- src/common/gres.h | 13 ++++++++- src/common/proc_args.c | 2 +- src/salloc/opt.c | 3 ++- src/sbatch/opt.c | 3 ++- src/slurmd/slurmd/slurmd.c | 9 ++++--- src/srun/opt.c | 3 ++- 7 files changed, 68 insertions(+), 19 deletions(-) diff --git a/src/common/gres.c b/src/common/gres.c index 21b8c705c0e..8de64c29ec4 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -97,12 +97,24 @@ typedef struct slurm_gres_context { } slurm_gres_context_t; static int gres_context_cnt = -1; +static uint32_t gres_cpu_cnt = 0; static bool gres_debug = false; static slurm_gres_context_t *gres_context = NULL; static char *gres_plugin_list = NULL; static pthread_mutex_t gres_context_lock = PTHREAD_MUTEX_INITIALIZER; static List gres_conf_list = NULL; +static void _destroy_gres_slurmd_conf(void *x); +static char * _get_gres_conf(void); +static int _load_gres_plugin(char *plugin_name, + slurm_gres_context_t *plugin_context); +static int _log_gres_slurmd_conf(void *x, void *arg); +static int _parse_gres_config(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line, char **leftover); +static int _strcmp(const char *s1, const char *s2); +static int _unload_gres_plugin(slurm_gres_context_t *plugin_context); + typedef struct gres_state { uint32_t plugin_id; void *gres_data; @@ -325,6 +337,7 @@ extern int gres_plugin_help_msg(char *msg, int msg_size) { int i, rc; char *tmp_msg; + char *header = "Valid gres options are:\n"; if (msg_size < 1) return EINVAL; @@ -333,6 +346,8 @@ extern int gres_plugin_help_msg(char *msg, int msg_size) tmp_msg = xmalloc(msg_size); rc = gres_plugin_init(); + if ((strlen(header) + 2) <= msg_size) + strcat(msg, header); slurm_mutex_lock(&gres_context_lock); for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) { tmp_msg = (gres_context[i].ops.help_msg); @@ -340,9 +355,8 @@ extern int gres_plugin_help_msg(char *msg, int msg_size) continue; if ((strlen(msg) + strlen(tmp_msg) + 2) > msg_size) break; - if (msg[0]) - strcat(msg, "\n"); strcat(msg, tmp_msg); + strcat(msg, "\n"); } slurm_mutex_unlock(&gres_context_lock); @@ -442,8 +456,13 @@ static int _log_gres_slurmd_conf(void *x, void *arg) p = (gres_slurmd_conf_t *) x; xassert(p); - info("Gres Name:%s File:%s CPUs:%s Count:%u", - p->name, p->file, p->cpus, p->count); + if (p->cpus) { + info("Gres Name:%s Count:%u File:%s CPUs:%s CpuCnt:%u", + p->name, p->count, p->file, p->cpus, p->cpu_cnt); + } else { + info("Gres Name:%s Count:%u File:%s", + p->name, p->count, p->file); + } return 0; } @@ -472,8 +491,17 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, if (!s_p_get_uint32(&p->count, "Count", tbl)) p->count = 1; if (s_p_get_string(&p->cpus, "CPUs", tbl)) { -//FIXME: change to bitmap, size? change from cpuset/numa to slurmctld format -//bit_unfmt(bimap, p->cpus); + bitstr_t *cpu_bitmap; /* Just use to validate config */ + p->cpu_cnt = gres_cpu_cnt; + cpu_bitmap = bit_alloc(gres_cpu_cnt); + if (cpu_bitmap == NULL) + fatal("bit_alloc: malloc failure"); + i = bit_unfmt(cpu_bitmap, p->cpus); + if (i != 0) { + fatal("Invalid gres data for %s, CPUs=%s", + p->name, p->cpus); + } + FREE_NULL_BITMAP(cpu_bitmap); } if (s_p_get_string(&p->file, "File", tbl)) { struct stat config_stat; @@ -497,9 +525,10 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, } /* - * Load this node's gres configuration (i.e. how many resources it has) + * Load this node's configuration (how many resources it has, topology, etc.) + * IN cpu_cnt - Number of CPUs on configured on this node */ -extern int gres_plugin_node_config_load(void) +extern int gres_plugin_node_config_load(uint32_t cpu_cnt) { static s_p_options_t _gres_options[] = { {"Name", S_P_ARRAY, _parse_gres_config, NULL}, @@ -517,7 +546,7 @@ extern int gres_plugin_node_config_load(void) return SLURM_SUCCESS; slurm_mutex_lock(&gres_context_lock); - + gres_cpu_cnt = cpu_cnt; if (stat(gres_conf_file, &config_stat) < 0) fatal("can't stat gres.conf file %s: %m", gres_conf_file); tbl = s_p_hashtbl_create(_gres_options); @@ -572,6 +601,7 @@ extern int gres_plugin_node_config_pack(Buf buffer) pack32(magic, buffer); pack32(gres_slurmd_conf->plugin_id, buffer); pack32(gres_slurmd_conf->count, buffer); + pack32(gres_slurmd_conf->cpu_cnt, buffer); packstr(gres_slurmd_conf->cpus, buffer); } list_iterator_destroy(iter); @@ -589,7 +619,7 @@ extern int gres_plugin_node_config_pack(Buf buffer) extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name) { int i, j, rc; - uint32_t count, magic, plugin_id, utmp32; + uint32_t count, cpu_cnt, magic, plugin_id, utmp32; uint16_t rec_cnt, version; char *tmp_cpus; gres_slurmd_conf_t *p; @@ -618,6 +648,7 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name) goto unpack_error; safe_unpack32(&plugin_id, buffer); safe_unpack32(&count, buffer); + safe_unpack32(&cpu_cnt, buffer); safe_unpackstr_xmalloc(&tmp_cpus, &utmp32, buffer); for (j=0; j<gres_context_cnt; j++) { if (*(gres_context[j].ops.plugin_id) == plugin_id) { @@ -636,8 +667,9 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name) } p = xmalloc(sizeof(gres_slurmd_conf_t)); p->count = count; - p->plugin_id = plugin_id; + p->cpu_cnt = cpu_cnt; p->cpus = tmp_cpus; + p->plugin_id = plugin_id; tmp_cpus = NULL; /* Nothing left to xfree */ list_append(gres_conf_list, p); } diff --git a/src/common/gres.h b/src/common/gres.h index 7cef2c1a318..c1fdf02903c 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -47,10 +47,20 @@ /* Gres state information gathered by slurmd daemon */ typedef struct gres_slurmd_conf { + /* Count of gres available in this configuration record */ uint32_t count; + + /* Specific CPUs associated with this configuration record */ + uint16_t cpu_cnt; char *cpus; + + /* Device file associated with this configuration record */ char *file; + + /* Name of this gres type */ char *name; + + /* Gres ID number */ uint32_t plugin_id; } gres_slurmd_conf_t; @@ -142,8 +152,9 @@ extern int gres_plugin_help_msg(char *msg, int msg_size); */ /* * Load this node's configuration (how many resources it has, topology, etc.) + * IN cpu_cnt - Number of CPUs on configured on this node */ -extern int gres_plugin_node_config_load(void); +extern int gres_plugin_node_config_load(uint32_t cpu_cnt); /* * Pack this node's gres configuration into a buffer diff --git a/src/common/proc_args.c b/src/common/proc_args.c index a6add5c7fec..4c009361340 100644 --- a/src/common/proc_args.c +++ b/src/common/proc_args.c @@ -91,7 +91,7 @@ void print_gres_help(void) gres_plugin_help_msg(help_msg, sizeof(help_msg)); if (help_msg[0]) - printf("%s\n", help_msg); + printf("%s", help_msg); else printf("No gres help is available\n"); } diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 9680021627d..c63c99ff918 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -1103,7 +1103,8 @@ void set_options(const int argc, char **argv) opt.time_min_str = xstrdup(optarg); break; case LONG_OPT_GRES: - if (!strcasecmp(optarg, "help")) { + if (!strcasecmp(optarg, "help") || + !strcasecmp(optarg, "list")) { print_gres_help(); exit(0); } diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 356ac9db56f..9622febec05 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1589,7 +1589,8 @@ static void _set_options(int argc, char **argv) opt.time_min_str = xstrdup(optarg); break; case LONG_OPT_GRES: - if (!strcasecmp(optarg, "help")) { + if (!strcasecmp(optarg, "help") || + !strcasecmp(optarg, "list")) { print_gres_help(); exit(0); } diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 40281415d67..2efb2e790f8 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -890,7 +890,8 @@ _reconfigure(void) gres_plugin_reconfig(&did_change); if (did_change) { - (void) gres_plugin_node_config_load(); + uint32_t cpu_cnt = MAX(conf->conf_cpus, conf->block_map_size); + (void) gres_plugin_node_config_load(cpu_cnt); send_registration_msg(SLURM_SUCCESS, false); } @@ -1168,6 +1169,8 @@ _slurmd_init(void) slurm_ctl_conf_t *cf; struct stat stat_buf; char slurm_stepd_path[MAXPATHLEN]; + uint32_t cpu_cnt; + /* * Process commandline arguments first, since one option may be * an alternate location for the slurm config file. @@ -1180,9 +1183,9 @@ _slurmd_init(void) * */ _read_config(); - + cpu_cnt = MAX(conf->conf_cpus, conf->block_map_size); if ((gres_plugin_init() != SLURM_SUCCESS) || - (gres_plugin_node_config_load() != SLURM_SUCCESS)) + (gres_plugin_node_config_load(cpu_cnt) != SLURM_SUCCESS)) return SLURM_FAILURE; if (slurm_topo_init() != SLURM_SUCCESS) return SLURM_FAILURE; diff --git a/src/srun/opt.c b/src/srun/opt.c index e5d8412c14d..f950c95f33d 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -1435,7 +1435,8 @@ static void set_options(const int argc, char **argv) opt.time_min_str = xstrdup(optarg); break; case LONG_OPT_GRES: - if (!strcasecmp(optarg, "help")) { + if (!strcasecmp(optarg, "help") || + !strcasecmp(optarg, "list")) { print_gres_help(); exit(0); } -- GitLab