From 0a798abfb501e0ab145b8478cca8633ad6e75ef1 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Mon, 26 Jul 2010 19:39:53 +0000 Subject: [PATCH] modify gres.conf file parsing to accept file name with numeric suffix range (e.g. "File=/dev/nvidia[0-3]") --- doc/man/man5/gres.conf.5 | 13 ++++-- src/common/gres.c | 85 ++++++++++++++++++++++++++++++++++------ 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/doc/man/man5/gres.conf.5 b/doc/man/man5/gres.conf.5 index 3f18c696552..2dcb7339db4 100644 --- a/doc/man/man5/gres.conf.5 +++ b/doc/man/man5/gres.conf.5 @@ -23,7 +23,8 @@ The overall configuration parameters available include: .TP \fBCount\fR Number of resources of this type available on this node. -The default value is one. +The default value is set to the number of \fBFile\fR values specified (if any), +otherwise the default value is one. .TP \fBCPUs\fR @@ -39,11 +40,15 @@ If any CPU can be used with the resources, then do not specify the .TP \fBFile\fR -Fully qualified pathname of the device file associated with a resource. +Fully qualified pathname of the device files associated with a resource. +The name can include a numberic range suffix to be interpretted by SLURM +(e.g. \fIFile=/dev/nvidia[0-3]\fR). This field is generally required if enforcement of generic resource allocations is to be supported (i.e. prevents a users from making -use of resources allocated to a different user. If \fBFile\fR is specified then -\fBCount\fR must be either set to one or not set (the default value is one). +use of resources allocated to a different user). +If \fBFile\fR is specified then \fBCount\fR must be either set to the number +of file names specified or not set (the default value is the number of files +specified). .TP \fBName\fR diff --git a/src/common/gres.c b/src/common/gres.c index 0e8fe24a6cd..73493480e25 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -58,6 +58,7 @@ # include <sys/types.h> # include <unistd.h> # include <stdint.h> +# include <stdlib.h> # include <string.h> #endif /* HAVE_CONFIG_H */ @@ -142,6 +143,7 @@ extern uint32_t _job_test(void *job_gres_data, void *node_gres_data, static int _load_gres_plugin(char *plugin_name, slurm_gres_context_t *plugin_context); static int _log_gres_slurmd_conf(void *x, void *arg); +static void _my_stat(char *file_name); static int _node_config_init(char *node_name, char *orig_config, slurm_gres_context_t *context_ptr, gres_state_t *gres_ptr); @@ -167,6 +169,7 @@ static int _step_state_validate(char *config, void **gres_data, slurm_gres_context_t *context_ptr); static int _strcmp(const char *s1, const char *s2); static int _unload_gres_plugin(slurm_gres_context_t *plugin_context); +static int _validate_file(char *path_name, char *gres_name); static void _validate_gres_node_cpus(gres_node_state_t *node_gres_ptr, int cpus_ctld); @@ -531,6 +534,60 @@ static int _log_gres_slurmd_conf(void *x, void *arg) return 0; } +static void _my_stat(char *file_name) +{ + struct stat config_stat; + + if (stat(file_name, &config_stat) < 0) + fatal("can't stat gres.conf file %s: %m", file_name); +} + +static int _validate_file(char *path_name, char *gres_name) +{ + char *file_name, *slash, *one_name, *root_path; + char *formatted_path = NULL; + hostlist_t hl; + int i, file_count = 0; + + i = strlen(path_name); + if ((i < 3) || (path_name[i-1] != ']')) { + _my_stat(path_name); + return 1; + } + + slash = strrchr(path_name, '/'); + if (slash) { + i = strlen(path_name); + formatted_path = xmalloc(i+1); + slash[0] = '\0'; + root_path = xstrdup(path_name); + xstrcat(root_path, "/"); + slash[0] = '/'; + file_name = slash + 1; + } else { + file_name = path_name; + root_path = NULL; + } + hl = hostlist_create(file_name); + if (hl == NULL) + fatal("can't parse File=%s", path_name); + while ((one_name = hostlist_shift(hl))) { + if (slash) { + sprintf(formatted_path, "%s/%s", root_path, one_name); + _my_stat(formatted_path); + } else { + _my_stat(one_name); + } + file_count++; + free(one_name); + } + hostlist_destroy(hl); + xfree(formatted_path); + xfree(root_path); + + return file_count; +} + /* * Build gres_slurmd_conf_t record based upon a line from the gres.conf file */ @@ -547,6 +604,7 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, int i; s_p_hashtbl_t *tbl; gres_slurmd_conf_t *p; + uint32_t tmp_u32; tbl = s_p_hashtbl_create(_gres_options); s_p_parse_line(tbl, *leftover, leftover); @@ -554,11 +612,6 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, p = xmalloc(sizeof(gres_slurmd_conf_t)); p->name = xstrdup(value); p->cpu_cnt = gres_cpu_cnt; - if (s_p_get_uint32(&p->count, "Count", tbl)) { - if (p->count == 0) - fatal("Invalid gres data for %s, Count=0", p->name); - } else - p->count = 1; if (s_p_get_string(&p->cpus, "CPUs", tbl)) { bitstr_t *cpu_bitmap; /* Just use to validate config */ cpu_bitmap = bit_alloc(gres_cpu_cnt); @@ -571,15 +624,21 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, } FREE_NULL_BITMAP(cpu_bitmap); } - if (s_p_get_string(&p->file, "File", tbl)) { - struct stat config_stat; - if (stat(p->file, &config_stat) < 0) - fatal("can't stat gres.conf file %s: %m", p->file); - if (p->count > 1) { - fatal("Invalid gres data for %s, Count=%u and File=...", - p->name, p->count); + + if (s_p_get_string(&p->file, "File", tbl)) + p->count = _validate_file(p->file, p->name); + + if (s_p_get_uint32(&tmp_u32, "Count", tbl)) { + if (tmp_u32 == 0) + fatal("Invalid gres data for %s, Count=0", p->name); + if (p->count && (p->count != tmp_u32)) { + fatal("Invalid gres data for %s, Count does not match " + "File value", p->name); } - } + p->count = tmp_u32; + } else if (p->count == 0) + p->count = 1; + s_p_hashtbl_destroy(tbl); for (i=0; i<gres_context_cnt; i++) { -- GitLab