diff --git a/doc/man/man5/gres.conf.5 b/doc/man/man5/gres.conf.5 index 345a10dffb40cdf837a252fa8cf5454eed34bc64..33ac1c42ff61e4864fec059681414b57aca5cd0e 100644 --- a/doc/man/man5/gres.conf.5 +++ b/doc/man/man5/gres.conf.5 @@ -95,6 +95,11 @@ line should apply to. The NodeName specification can use a Slurm hostlist specification as shown in the example below. +.TP +\fBType\fR +An arbitrary string identifying the type of device. +For example, a particular model of GPU. + .SH "EXAMPLES" .LP .br @@ -106,13 +111,13 @@ the example below. .br # Configure support for our four GPUs .br -Name=gpu File=/dev/nvidia0 CPUs=0,1 +Name=gpu Type=gtx560 File=/dev/nvidia0 CPUs=0,1 .br -Name=gpu File=/dev/nvidia1 CPUs=0,1 +Name=gpu Type=gtx560 File=/dev/nvidia1 CPUs=0,1 .br -Name=gpu File=/dev/nvidia2 CPUs=2,3 +Name=gpu Type=tesla File=/dev/nvidia2 CPUs=2,3 .br -Name=gpu File=/dev/nvidia3 CPUs=2,3 +Name=gpu Type=tesla File=/dev/nvidia3 CPUs=2,3 .br Name=bandwidth Count=20M @@ -134,7 +139,7 @@ NodeName=tux[16\-31] Name=gpu File=/dev/nvidia[0\-7] Copyright (C) 2010 The Regents of the University of California. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). .br -Copyright (C) 2010\-2013 SchedMD LLC. +Copyright (C) 2010\-2014 SchedMD LLC. .LP This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. diff --git a/src/common/gres.c b/src/common/gres.c index a586f7c21bae74ef17dedfe06bfc52a97967e9b3..e429f99d581a9124b2774f6ed8218161faf7716e 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -552,6 +552,7 @@ static void _destroy_gres_slurmd_conf(void *x) xfree(p->cpus); xfree(p->file); /* Only used by slurmd */ xfree(p->name); + xfree(p->type); xfree(p); } @@ -566,20 +567,22 @@ static int _log_gres_slurmd_conf(void *x, void *arg) xassert(p); if (!gres_debug) { - verbose("Gres Name=%s Count=%u", p->name, p->count); + verbose("Gres Name=%s Type=%s Count=%u", + p->name, p->type, p->count); return 0; } if (p->cpus) { - info("Gres Name=%s Count=%u ID=%u File=%s CPUs=%s CpuCnt=%u", - p->name, p->count, p->plugin_id, p->file, p->cpus, + info("Gres Name=%s Type=%s Count=%u ID=%u File=%s CPUs=%s " + "CpuCnt=%u", + p->name, p->type, p->count, p->plugin_id, p->file, p->cpus, p->cpu_cnt); } else if (p->file) { - info("Gres Name=%s Count=%u ID=%u File=%s", - p->name, p->count, p->plugin_id, p->file); + info("Gres Name=%s Type=%s Count=%u ID=%u File=%s", + p->name, p->type, p->count, p->plugin_id, p->file); } else { - info("Gres Name=%s Count=%u ID=%u", p->name, p->count, - p->plugin_id); + info("Gres Name=%s Type=%s Count=%u ID=%u", p->name, p->type, + p->count, p->plugin_id); } return 0; @@ -650,7 +653,8 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, {"Count", S_P_STRING}, /* Number of Gres available */ {"CPUs" , S_P_STRING}, /* CPUs to bind to Gres resource */ {"File", S_P_STRING}, /* Path to Gres device */ - {"Name", S_P_STRING}, /* Gres type name */ + {"Name", S_P_STRING}, /* Gres name */ + {"Type", S_P_STRING}, /* Gres type (e.g. model name) */ {NULL} }; int i; @@ -692,6 +696,8 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type, p->has_file = 1; } + (void) s_p_get_string(&p->type, "Type", tbl); + if (s_p_get_string(&tmp_str, "Count", tbl)) { tmp_long = strtol(tmp_str, &last, 10); if ((tmp_long == LONG_MIN) || (tmp_long == LONG_MAX)) { @@ -744,7 +750,8 @@ static int _parse_gres_config2(void **dest, slurm_parser_enum_t type, {"Count", S_P_STRING}, /* Number of Gres available */ {"CPUs" , S_P_STRING}, /* CPUs to bind to Gres resource */ {"File", S_P_STRING}, /* Path to Gres device */ - {"Name", S_P_STRING}, /* Gres type name */ + {"Name", S_P_STRING}, /* Gres name */ + {"Type", S_P_STRING}, /* Gres type (e.g. model name) */ {NULL} }; s_p_hashtbl_t *tbl; @@ -968,7 +975,7 @@ extern int gres_plugin_node_config_pack(Buf buffer) { int rc; uint32_t magic = GRES_MAGIC; - uint16_t rec_cnt = 0, version= SLURM_PROTOCOL_VERSION; + uint16_t rec_cnt = 0, version = SLURM_PROTOCOL_VERSION; ListIterator iter; gres_slurmd_conf_t *gres_slurmd_conf; @@ -990,6 +997,7 @@ extern int gres_plugin_node_config_pack(Buf buffer) pack32(gres_slurmd_conf->plugin_id, buffer); packstr(gres_slurmd_conf->cpus, buffer); packstr(gres_slurmd_conf->name, buffer); + packstr(gres_slurmd_conf->type, buffer); } list_iterator_destroy(iter); } @@ -1009,7 +1017,7 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name) uint32_t count, cpu_cnt, magic, plugin_id, utmp32; uint16_t rec_cnt, version; uint8_t has_file; - char *tmp_cpus, *tmp_name; + char *tmp_cpus, *tmp_name, *tmp_type; gres_slurmd_conf_t *p; rc = gres_plugin_init(); @@ -1024,67 +1032,149 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name) return SLURM_SUCCESS; slurm_mutex_lock(&gres_context_lock); - for (i=0; i<rec_cnt; i++) { - safe_unpack32(&magic, buffer); - if (magic != GRES_MAGIC) - goto unpack_error; + if (version >= SLURM_14_11_PROTOCOL_VERSION) { + for (i = 0; i < rec_cnt; i++) { + safe_unpack32(&magic, buffer); + if (magic != GRES_MAGIC) + goto unpack_error; - safe_unpack32(&count, buffer); - safe_unpack32(&cpu_cnt, buffer); - safe_unpack8(&has_file, buffer); - safe_unpack32(&plugin_id, buffer); - safe_unpackstr_xmalloc(&tmp_cpus, &utmp32, buffer); - safe_unpackstr_xmalloc(&tmp_name, &utmp32, buffer); + safe_unpack32(&count, buffer); + safe_unpack32(&cpu_cnt, buffer); + safe_unpack8(&has_file, buffer); + safe_unpack32(&plugin_id, buffer); + safe_unpackstr_xmalloc(&tmp_cpus, &utmp32, buffer); + safe_unpackstr_xmalloc(&tmp_name, &utmp32, buffer); + safe_unpackstr_xmalloc(&tmp_type, &utmp32, buffer); - for (j=0; j<gres_context_cnt; j++) { - if (gres_context[j].plugin_id != plugin_id) - continue; - if (strcmp(gres_context[j].gres_name, tmp_name)) { - /* Should be caught in gres_plugin_init() */ - error("gres_plugin_node_config_unpack: gres/%s" - " duplicate plugin ID with %s, unable " - "to process", - tmp_name, gres_context[j].gres_name); + for (j = 0; j < gres_context_cnt; j++) { + if (gres_context[j].plugin_id != plugin_id) + continue; + if (strcmp(gres_context[j].gres_name, + tmp_name)) { + /* Should have beeen caught in + * gres_plugin_init() */ + error("gres_plugin_node_config_unpack: " + "gres/%s duplicate plugin ID with" + " %s, unable to process", + tmp_name, + gres_context[j].gres_name); + continue; + } + if (gres_context[j].has_file && + !has_file && count) { + error("gres_plugin_node_config_unpack: " + "gres/%s lacks File parameter " + "for node %s", + tmp_name, node_name); + has_file = 1; + } + if (has_file && (count > 1024)) { + /* Avoid over-subscribing memory with + * huge bitmaps */ + error("gres_plugin_node_config_unpack: " + "gres/%s has File plus very " + "large Count (%u) for node %s, " + "resetting value to 1024", + tmp_name, count, node_name); + count = 1024; + } + if (has_file) /* Don't clear if already set */ + gres_context[j].has_file = has_file; + break; + } + if (j >= gres_context_cnt) { + /* GresPlugins is inconsistently configured. + * Not a fatal error. Skip this data. */ + error("gres_plugin_node_config_unpack: no " + "plugin configured to unpack data " + "type %s from node %s", + tmp_name, node_name); + xfree(tmp_cpus); + xfree(tmp_name); continue; } - if (gres_context[j].has_file && !has_file && count) { - error("gres_plugin_node_config_unpack: gres/%s" - " lacks File parameter for node %s", + p = xmalloc(sizeof(gres_slurmd_conf_t)); + p->count = count; + p->cpu_cnt = cpu_cnt; + p->has_file = has_file; + p->cpus = tmp_cpus; + tmp_cpus = NULL; /* Nothing left to xfree */ + p->name = tmp_name; /* Preserve for accounting! */ + p->type = tmp_type; + tmp_type = NULL; /* Nothing left to xfree */ + p->plugin_id = plugin_id; + list_append(gres_conf_list, p); + } + } else { + for (i = 0; i < rec_cnt; i++) { + safe_unpack32(&magic, buffer); + if (magic != GRES_MAGIC) + goto unpack_error; + + safe_unpack32(&count, buffer); + safe_unpack32(&cpu_cnt, buffer); + safe_unpack8(&has_file, buffer); + safe_unpack32(&plugin_id, buffer); + safe_unpackstr_xmalloc(&tmp_cpus, &utmp32, buffer); + safe_unpackstr_xmalloc(&tmp_name, &utmp32, buffer); + + for (j = 0; j < gres_context_cnt; j++) { + if (gres_context[j].plugin_id != plugin_id) + continue; + if (strcmp(gres_context[j].gres_name, + tmp_name)) { + /* Should have beeen caught in + * gres_plugin_init() */ + error("gres_plugin_node_config_unpack: " + "gres/%s duplicate plugin ID with" + " %s, unable to process", + tmp_name, + gres_context[j].gres_name); + continue; + } + if (gres_context[j].has_file && + !has_file && count) { + error("gres_plugin_node_config_unpack: " + "gres/%s lacks File parameter " + "for node %s", + tmp_name, node_name); + has_file = 1; + } + if (has_file && (count > 1024)) { + /* Avoid over-subscribing memory with + * huge bitmaps */ + error("gres_plugin_node_config_unpack: " + "gres/%s has File plus very " + "large Count (%u) for node %s, " + "resetting value to 1024", + tmp_name, count, node_name); + count = 1024; + } + if (has_file) /* Don't clear if already set */ + gres_context[j].has_file = has_file; + break; + } + if (j >= gres_context_cnt) { + /* GresPlugins is inconsistently configured. + * Not a fatal error. Skip this data. */ + error("gres_plugin_node_config_unpack: no " + "plugin configured to unpack data " + "type %s from node %s", tmp_name, node_name); - has_file = 1; - } - if (has_file && (count > 1024)) { - /* Avoid over-subscribing memory with huge - * bitmaps */ - error("gres_plugin_node_config_unpack: gres/%s" - " has File plus very large Count (%u) " - "for node %s, resetting value to 1024", - tmp_name, count, node_name); - count = 1024; + xfree(tmp_cpus); + xfree(tmp_name); + continue; } - if (has_file) /* Don't clear if already set */ - gres_context[j].has_file = has_file; - break; - } - if (j >= gres_context_cnt) { - /* A sign that GresPlugins is inconsistently - * configured. Not a fatal error. Skip this data. */ - error("gres_plugin_node_config_unpack: no plugin " - "configured to unpack data type %s from node %s", - tmp_name, node_name); - xfree(tmp_cpus); - xfree(tmp_name); - continue; + p = xmalloc(sizeof(gres_slurmd_conf_t)); + p->count = count; + p->cpu_cnt = cpu_cnt; + p->has_file = has_file; + p->cpus = tmp_cpus; + tmp_cpus = NULL; /* Nothing left to xfree */ + p->name = tmp_name; /* Preserve for accounting! */ + p->plugin_id = plugin_id; + list_append(gres_conf_list, p); } - p = xmalloc(sizeof(gres_slurmd_conf_t)); - p->count = count; - p->cpu_cnt = cpu_cnt; - p->has_file = has_file; - p->cpus = tmp_cpus; - tmp_cpus = NULL; /* Nothing left to xfree */ - p->name = tmp_name; /* We need to preserve for accounting! */ - p->plugin_id = plugin_id; - list_append(gres_conf_list, p); } slurm_mutex_unlock(&gres_context_lock); return rc; diff --git a/src/common/gres.h b/src/common/gres.h index e2d20de115bc3782e3ca1cc9151ef120bf7d185e..0a5b6fde6e9c1aca3ef37a06f9da7bc28d23c796 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -66,9 +66,12 @@ typedef struct gres_slurmd_conf { char *file; uint8_t has_file; /* non-zero if file is set, flag for RPC */ - /* Name of this gres type */ + /* Name of this gres */ char *name; + /* Type of this gres (e.g. model name) */ + char *type; + /* Gres ID number */ uint32_t plugin_id; } gres_slurmd_conf_t;