diff --git a/src/common/gres.c b/src/common/gres.c index a6049e6ac640bcba525ff2ba39fc61d1254e2c71..7484a5ce7b20208e5052595d1e6555f42b687f38 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -94,13 +94,18 @@ typedef struct slurm_gres_ops { char **new_config, void **gres_data, uint16_t fast_schedule ); - void (*node_config_delete) ( void *list_element ); + void (*node_config_delete) ( void *gres_data ); int (*pack_node_state) ( void *gres_data, Buf buffer ); int (*unpack_node_state) ( void **gres_data, Buf buffer ); - void (*node_state_log) ( void *list_element, + void (*node_state_log) ( void *gres_data, char *node_name ); + void (*job_config_delete) ( void *gres_data ); + int (*job_gres_validate) ( char *config, + void **gres_data); + void (*job_state_log) ( void *gres_data, + uint32_t job_id ); } slurm_gres_ops_t; typedef struct slurm_gres_context { @@ -117,10 +122,10 @@ static slurm_gres_context_t *gres_context = NULL; static char *gres_plugin_list = NULL; static pthread_mutex_t gres_context_lock = PTHREAD_MUTEX_INITIALIZER; -typedef struct gres_node_state { +typedef struct gres_state { uint32_t plugin_id; void *gres_data; -} gres_node_state_t; +} gres_state_t; /* Variant of strcmp that will accept NULL string pointers */ static int _strcmp(const char *s1, const char *s2) @@ -151,7 +156,10 @@ static int _load_gres_plugin(char *plugin_name, "node_config_delete", "pack_node_state", "unpack_node_state", - "node_state_log" + "node_state_log", + "job_config_delete", + "job_gres_validate", + "job_state_log" }; int n_syms = sizeof(syms) / sizeof(char *); @@ -525,15 +533,15 @@ unpack_error: goto fini; } -static void _gres_list_delete(void *list_element) +static void _gres_node_list_delete(void *list_element) { int i; - gres_node_state_t *gres_ptr; + gres_state_t *gres_ptr; if (gres_plugin_init() != SLURM_SUCCESS) return; - gres_ptr = (gres_node_state_t *) list_element; + gres_ptr = (gres_state_t *) list_element; slurm_mutex_lock(&gres_context_lock); for (i=0; i<gres_context_cnt; i++) { if (gres_ptr->plugin_id != *(gres_context[i].ops.plugin_id)) @@ -566,27 +574,27 @@ extern int gres_plugin_node_config_validate(char *node_name, { int i, rc; ListIterator gres_iter; - gres_node_state_t *gres_ptr; + gres_state_t *gres_ptr; rc = gres_plugin_init(); slurm_mutex_lock(&gres_context_lock); if ((gres_context_cnt > 0) && (*gres_list == NULL)) { - *gres_list = list_create(_gres_list_delete); + *gres_list = list_create(_gres_node_list_delete); if (*gres_list == NULL) fatal("list_create malloc failure"); } for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) { - /* Find or create gres_node_state entry on the list */ + /* Find or create gres_state entry on the list */ gres_iter = list_iterator_create(*gres_list); - while ((gres_ptr = (gres_node_state_t *) list_next(gres_iter))){ + while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { if (gres_ptr->plugin_id == *(gres_context[i].ops.plugin_id)) break; } list_iterator_destroy(gres_iter); if (gres_ptr == NULL) { - gres_ptr = xmalloc(sizeof(gres_node_state_t)); + gres_ptr = xmalloc(sizeof(gres_state_t)); gres_ptr->plugin_id = *(gres_context[i].ops.plugin_id); list_append(*gres_list, gres_ptr); } @@ -618,20 +626,20 @@ extern int gres_plugin_node_reconfig(char *node_name, { int i, rc; ListIterator gres_iter; - gres_node_state_t *gres_ptr; + gres_state_t *gres_ptr; rc = gres_plugin_init(); slurm_mutex_lock(&gres_context_lock); if ((gres_context_cnt > 0) && (*gres_list == NULL)) { - *gres_list = list_create(_gres_list_delete); + *gres_list = list_create(_gres_node_list_delete); if (*gres_list == NULL) fatal("list_create malloc failure"); } for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) { - /* Find gres_node_state entry on the list */ + /* Find gres_state entry on the list */ gres_iter = list_iterator_create(*gres_list); - while ((gres_ptr = (gres_node_state_t *) list_next(gres_iter))){ + while ((gres_ptr = (gres_state_t *) list_next(gres_iter))){ if (gres_ptr->plugin_id == *(gres_context[i].ops.plugin_id)) break; @@ -664,7 +672,7 @@ extern int gres_plugin_pack_node_state(List gres_list, Buf buffer, uint32_t magic = GRES_MAGIC; uint16_t rec_cnt = 0; ListIterator gres_iter; - gres_node_state_t *gres_ptr; + gres_state_t *gres_ptr; if (gres_list == NULL) { pack16(rec_cnt, buffer); @@ -681,7 +689,7 @@ extern int gres_plugin_pack_node_state(List gres_list, Buf buffer, slurm_mutex_lock(&gres_context_lock); gres_iter = list_iterator_create(gres_list); - while ((gres_ptr = (gres_node_state_t *) list_next(gres_iter))) { + while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { for (i=0; i<gres_context_cnt; i++) { if (gres_ptr->plugin_id != *(gres_context[i].ops.plugin_id)) @@ -736,7 +744,8 @@ extern int gres_plugin_unpack_node_state(List *gres_list, Buf buffer, int i, rc, rc2; uint32_t gres_size, magic, tail_offset, plugin_id; uint16_t rec_cnt; - gres_node_state_t *gres_ptr; + gres_state_t *gres_ptr; + void *gres_data; safe_unpack16(&rec_cnt, buffer); if (rec_cnt == 0) @@ -746,7 +755,7 @@ extern int gres_plugin_unpack_node_state(List *gres_list, Buf buffer, slurm_mutex_lock(&gres_context_lock); if ((gres_context_cnt > 0) && (*gres_list == NULL)) { - *gres_list = list_create(_gres_list_delete); + *gres_list = list_create(_gres_node_list_delete); if (*gres_list == NULL) fatal("list_create malloc failure"); } @@ -779,14 +788,14 @@ extern int gres_plugin_unpack_node_state(List *gres_list, Buf buffer, continue; } gres_context[i].unpacked_info = true; - gres_ptr = xmalloc(sizeof(gres_node_state_t)); - gres_ptr->plugin_id = *(gres_context[i].ops.plugin_id); rc2 = (*(gres_context[i].ops.unpack_node_state)) - (&gres_ptr->gres_data, buffer); + (&gres_data, buffer); if (rc2 != SLURM_SUCCESS) { rc = rc2; - xfree(gres_ptr); } else { + gres_ptr = xmalloc(sizeof(gres_state_t)); + gres_ptr->plugin_id = *(gres_context[i].ops.plugin_id); + gres_ptr->gres_data = gres_data; list_append(*gres_list, gres_ptr); } } @@ -800,15 +809,14 @@ fini: /* Insure that every gres plugin is called for unpack, even if no data error("gres_plugin_unpack_node_state: no info packed for %s " "by node %s", gres_context[i].gres_type, node_name); - gres_ptr = xmalloc(sizeof(gres_node_state_t)); - gres_ptr->plugin_id = *(gres_context[i].ops.plugin_id); - list_append(*gres_list, gres_ptr); rc2 = (*(gres_context[i].ops.unpack_node_state)) - (&gres_ptr->gres_data, NULL); + (&gres_data, NULL); if (rc2 != SLURM_SUCCESS) { rc = rc2; - xfree(gres_ptr); } else { + gres_ptr = xmalloc(sizeof(gres_state_t)); + gres_ptr->plugin_id = *(gres_context[i].ops.plugin_id); + gres_ptr->gres_data = gres_data; list_append(*gres_list, gres_ptr); } } @@ -833,7 +841,7 @@ extern void gres_plugin_node_state_log(List gres_list, char *node_name) #if _DEBUG int i; ListIterator gres_iter; - gres_node_state_t *gres_ptr; + gres_state_t *gres_ptr; if (gres_list == NULL) return; @@ -842,7 +850,7 @@ extern void gres_plugin_node_state_log(List gres_list, char *node_name) slurm_mutex_lock(&gres_context_lock); gres_iter = list_iterator_create(gres_list); - while ((gres_ptr = (gres_node_state_t *) list_next(gres_iter))) { + while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { for (i=0; i<gres_context_cnt; i++) { if (gres_ptr->plugin_id != *(gres_context[i].ops.plugin_id)) @@ -856,3 +864,113 @@ extern void gres_plugin_node_state_log(List gres_list, char *node_name) slurm_mutex_unlock(&gres_context_lock); #endif } + +static void _gres_job_list_delete(void *list_element) +{ + int i; + gres_state_t *gres_ptr; + + if (gres_plugin_init() != SLURM_SUCCESS) + return; + + gres_ptr = (gres_state_t *) list_element; + slurm_mutex_lock(&gres_context_lock); + for (i=0; i<gres_context_cnt; i++) { + if (gres_ptr->plugin_id != *(gres_context[i].ops.plugin_id)) + continue; + (*(gres_context[i].ops.job_config_delete))(gres_ptr->gres_data); + xfree(gres_ptr); + break; + } + slurm_mutex_unlock(&gres_context_lock); +} + +/* + * Given a job's requested gres configuration, validate it and build a gres list + * IN req_config - job request's gres input string + * OUT gres_list - List of Gres records for this job to track usage + * RET SLURM_SUCCESS or ESLURM__INVALIDGRES + */ +extern int gres_plugin_job_gres_validate(char *req_config, List *gres_list) +{ + char *tmp_str, *tok, *last; + int i, rc, rc2; + gres_state_t *gres_ptr; + void *gres_data; + + if ((req_config == NULL) || (req_config[0] == '\0')) { + *gres_list = NULL; + return SLURM_SUCCESS; + } + + if ((rc = gres_plugin_init()) != SLURM_SUCCESS) + return rc; + + slurm_mutex_lock(&gres_context_lock); + tmp_str = xstrdup(req_config); + if ((gres_context_cnt > 0) && (*gres_list == NULL)) { + *gres_list = list_create(_gres_job_list_delete); + if (*gres_list == NULL) + fatal("list_create malloc failure"); + } + + tok = strtok_r(tmp_str, ",", &last); + while (tok && (rc == SLURM_SUCCESS)) { + rc2 = SLURM_ERROR; + for (i=0; i<gres_context_cnt; i++) { + rc2 = (*(gres_context[i].ops.job_gres_validate)) + (tok, &gres_data); + if (rc2 != SLURM_SUCCESS) + continue; + gres_ptr = xmalloc(sizeof(gres_state_t)); + gres_ptr->plugin_id = *(gres_context[i].ops.plugin_id); + gres_ptr->gres_data = gres_data; + list_append(*gres_list, gres_ptr); + break; /* processed it */ + } + if (rc2 != SLURM_SUCCESS) { + info("Invalid gres job specification %s", tok); + rc = ESLURM_INVALID_GRES; + break; + } + tok = strtok_r(NULL, ",", &last); + } + slurm_mutex_unlock(&gres_context_lock); + + xfree(tmp_str); + return rc; +} + +/* + * Log a job's current gres state + * IN gres_list - generated by gres_plugin_job_gres_validate() + * IN job_id - job's ID + */ +extern void gres_plugin_job_state_log(List gres_list, uint32_t job_id) +{ +#if _DEBUG + int i; + ListIterator gres_iter; + gres_state_t *gres_ptr; + + if (gres_list == NULL) + return; + + (void) gres_plugin_init(); + + slurm_mutex_lock(&gres_context_lock); + gres_iter = list_iterator_create(gres_list); + while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) { + for (i=0; i<gres_context_cnt; i++) { + if (gres_ptr->plugin_id != + *(gres_context[i].ops.plugin_id)) + continue; + (*(gres_context[i].ops.job_state_log)) + (gres_ptr->gres_data, job_id); + break; + } + } + list_iterator_destroy(gres_iter); + slurm_mutex_unlock(&gres_context_lock); +#endif +} diff --git a/src/common/gres.h b/src/common/gres.h index 8443e0812f13b8c75fc7341a07e9007f6403ef02..29a5b09717c10751c2f20f3fa69904d1abe53b74 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -155,7 +155,6 @@ extern int gres_plugin_pack_node_state(List gres_list, Buf buffer, extern int gres_plugin_unpack_node_state(List *gres_list, Buf buffer, char *node_name); - /* * Log a node's current gres state * IN gres_list - generated by gres_plugin_node_config_validate() @@ -163,4 +162,19 @@ extern int gres_plugin_unpack_node_state(List *gres_list, Buf buffer, */ extern void gres_plugin_node_state_log(List gres_list, char *node_name); +/* + * Given a job's requested gres configuration, validate it and build a gres list + * IN req_config - job request's gres input string + * OUT gres_list - List of Gres records for this job to track usage + * RET SLURM_SUCCESS or ESLURM__INVALIDGRES + */ +extern int gres_plugin_job_gres_validate(char *req_config, List *gres_list); + +/* + * Log a job's current gres state + * IN gres_list - generated by gres_plugin_job_gres_validate() + * IN job_id - job's ID + */ +extern void gres_plugin_job_state_log(List gres_list, uint32_t job_id); + #endif /* !_GRES_H */ diff --git a/src/plugins/gres/gpu/gres_gpu.c b/src/plugins/gres/gpu/gres_gpu.c index 3ad6f8bc8fe3c22a628ba5881b7f2719aaf4e5d2..691570f1cc2ed362c81b13cbcb5a235e5b95694e 100644 --- a/src/plugins/gres/gpu/gres_gpu.c +++ b/src/plugins/gres/gpu/gres_gpu.c @@ -110,10 +110,10 @@ typedef struct gpu_config { } gpu_config_t; static gpu_config_t gres_config; -/* Gres state as used by slurmctld. Includes data from gres_config loaded +/* Gres node state as used by slurmctld. Includes data from gres_config loaded * from slurmd, resources configured (may be more or less than actually found) * plus resource allocation information. */ -typedef struct gpu_status { +typedef struct gpu_node_state { /* Actual hardware found */ uint32_t gpu_cnt_found; @@ -126,7 +126,18 @@ typedef struct gpu_status { /* Resources currently allocated to jobs */ uint32_t gpu_cnt_alloc; bitstr_t *gpu_bit_alloc; -} gpu_status_t; +} gpu_node_state_t; + +/* Gres job state as used by slurmctld. */ +typedef struct gpu_job_state { + /* Count of resources needed */ + uint32_t gpu_cnt_alloc; + + /* If 0 then gpu_cnt_alloc is per node, + * if 1 then gpu_cnt_alloc is per CPU */ + uint8_t gpu_cnt_mult; +} gpu_job_state_t; + /* * This will be the output for "--gres=help" option. @@ -225,10 +236,10 @@ unpack_error: */ extern void node_config_delete(void *gres_data) { - gpu_status_t *gres_ptr; + gpu_node_state_t *gres_ptr; xassert(gres_data); - gres_ptr = (gpu_status_t *) gres_data; + gres_ptr = (gpu_node_state_t *) gres_data; if (gres_ptr->gpu_bit_alloc) bit_free(gres_ptr->gpu_bit_alloc); xfree(gres_ptr); @@ -252,15 +263,15 @@ extern int node_config_validate(char *node_name, char **reason_down) { int rc = SLURM_SUCCESS; - gpu_status_t *gres_ptr; + gpu_node_state_t *gres_ptr; char *node_gres_config, *tok, *last = NULL; int32_t gres_config_cnt = -1; bool updated_config = false; xassert(gres_data); - gres_ptr = (gpu_status_t *) *gres_data; + gres_ptr = (gpu_node_state_t *) *gres_data; if (gres_ptr == NULL) { - gres_ptr = xmalloc(sizeof(gpu_status_t)); + gres_ptr = xmalloc(sizeof(gpu_node_state_t)); *gres_data = gres_ptr; gres_ptr->gpu_cnt_found = gres_config.gpu_cnt; updated_config = true; @@ -364,12 +375,12 @@ extern int node_reconfig(char *node_name, char *orig_config, char **new_config, void **gres_data, uint16_t fast_schedule) { int rc = SLURM_SUCCESS; - gpu_status_t *gres_ptr; + gpu_node_state_t *gres_ptr; char *node_gres_config, *tok, *last = NULL; int32_t gres_config_cnt = 0; xassert(gres_data); - gres_ptr = (gpu_status_t *) *gres_data; + gres_ptr = (gpu_node_state_t *) *gres_data; if (gres_ptr == NULL) { /* Assume that node has not yet registerd */ info("%s record is NULL for node %s", plugin_name, node_name); @@ -446,7 +457,7 @@ extern int node_reconfig(char *node_name, char *orig_config, char **new_config, extern int pack_node_state(void *gres_data, Buf buffer) { - gpu_status_t *gres_ptr = (gpu_status_t *) gres_data; + gpu_node_state_t *gres_ptr = (gpu_node_state_t *) gres_data; pack32(gres_ptr->gpu_cnt_avail, buffer); pack32(gres_ptr->gpu_cnt_alloc, buffer); @@ -457,9 +468,9 @@ extern int pack_node_state(void *gres_data, Buf buffer) extern int unpack_node_state(void **gres_data, Buf buffer) { - gpu_status_t *gres_ptr; + gpu_node_state_t *gres_ptr; - gres_ptr = xmalloc(sizeof(gpu_status_t)); + gres_ptr = xmalloc(sizeof(gpu_node_state_t)); gres_ptr->gpu_cnt_found = NO_VAL; if (buffer) { @@ -497,10 +508,10 @@ unpack_error: extern void node_state_log(void *gres_data, char *node_name) { - gpu_status_t *gres_ptr; + gpu_node_state_t *gres_ptr; xassert(gres_data); - gres_ptr = (gpu_status_t *) gres_data; + gres_ptr = (gpu_node_state_t *) gres_data; info("%s state for %s", plugin_name, node_name); info(" gpu_cnt found:%u configured:%u avail:%u alloc:%u", gres_ptr->gpu_cnt_found, gres_ptr->gpu_cnt_config, @@ -513,3 +524,50 @@ extern void node_state_log(void *gres_data, char *node_name) info(" gpu_bit_alloc:NULL"); } } + +extern void job_config_delete(void *gres_data) +{ + xfree(gres_data); +} + +extern int job_gres_validate(char *config, void **gres_data) +{ + char *last; + gpu_job_state_t *gres_ptr; + uint32_t cnt; + uint8_t mult = 0; + + if (!strcmp(config, "gpu")) { + cnt = 1; + } else if (!strncmp(config, "gpu:", 4)) { + cnt = strtol(config+4, &last, 10); + if (last[0] == '\0') + ; + else if ((last[0] == 'k') || (last[0] == 'K')) + cnt *= 1024; + else if (!strcasecmp(last, "*cpu")) + mult = 1; + } else + return SLURM_ERROR; + + gres_ptr = xmalloc(sizeof(gpu_job_state_t)); + gres_ptr->gpu_cnt_alloc = cnt; + gres_ptr->gpu_cnt_mult = mult; + *gres_data = gres_ptr; + return SLURM_SUCCESS; +} + +extern void job_state_log(void *gres_data, uint32_t job_id) +{ + gpu_job_state_t *gres_ptr; + char *mult; + + xassert(gres_data); + gres_ptr = (gpu_job_state_t *) gres_data; + info("%s state for job %u", plugin_name, job_id); + if (gres_ptr->gpu_cnt_mult) + mult = "cpu"; + else + mult = "node"; + info(" gpu_cnt %u per %s", gres_ptr->gpu_cnt_alloc, mult); +} diff --git a/src/plugins/gres/nic/gres_nic.c b/src/plugins/gres/nic/gres_nic.c index 256413dd77b1e11dacdfabe5e8f4bde03a6721b1..290c97194ec77167181ecd9cc9c5a7e187f4d6b0 100644 --- a/src/plugins/gres/nic/gres_nic.c +++ b/src/plugins/gres/nic/gres_nic.c @@ -113,7 +113,7 @@ static nic_config_t gres_config; /* Gres state as used by slurmctld. Includes data from gres_config loaded * from slurmd, resources configured (may be more or less than actually found) * plus resource allocation information. */ -typedef struct nic_status { +typedef struct nic_node_state { /* Actual hardware found */ uint32_t nic_cnt_found; @@ -126,7 +126,17 @@ typedef struct nic_status { /* Resources currently allocated to jobs */ uint32_t nic_cnt_alloc; bitstr_t *nic_bit_alloc; -} nic_status_t; +} nic_node_state_t; + +/* Gres job state as used by slurmctld. */ +typedef struct nic_job_state { + /* Count of resources needed */ + uint32_t nic_cnt_alloc; + + /* If 0 then nic_cnt_alloc is per node, + * if 1 then nic_cnt_alloc is per CPU */ + uint8_t nic_cnt_mult; +} nic_job_state_t; /* * This will be the output for "--gres=help" option. @@ -225,10 +235,10 @@ unpack_error: */ extern void node_config_delete(void *gres_data) { - nic_status_t *gres_ptr; + nic_node_state_t *gres_ptr; xassert(gres_data); - gres_ptr = (nic_status_t *) gres_data; + gres_ptr = (nic_node_state_t *) gres_data; if (gres_ptr->nic_bit_alloc) bit_free(gres_ptr->nic_bit_alloc); xfree(gres_ptr); @@ -252,15 +262,15 @@ extern int node_config_validate(char *node_name, char **reason_down) { int rc = SLURM_SUCCESS; - nic_status_t *gres_ptr; + nic_node_state_t *gres_ptr; char *node_gres_config, *tok, *last = NULL; int32_t gres_config_cnt = -1; bool updated_config = false; xassert(gres_data); - gres_ptr = (nic_status_t *) *gres_data; + gres_ptr = (nic_node_state_t *) *gres_data; if (gres_ptr == NULL) { - gres_ptr = xmalloc(sizeof(nic_status_t)); + gres_ptr = xmalloc(sizeof(nic_node_state_t)); *gres_data = gres_ptr; gres_ptr->nic_cnt_found = gres_config.nic_cnt; updated_config = true; @@ -364,12 +374,12 @@ extern int node_reconfig(char *node_name, char *orig_config, char **new_config, void **gres_data, uint16_t fast_schedule) { int rc = SLURM_SUCCESS; - nic_status_t *gres_ptr; + nic_node_state_t *gres_ptr; char *node_gres_config, *tok, *last = NULL; int32_t gres_config_cnt = 0; xassert(gres_data); - gres_ptr = (nic_status_t *) *gres_data; + gres_ptr = (nic_node_state_t *) *gres_data; if (gres_ptr == NULL) { /* Assume that node has not yet registerd */ info("%s record is NULL for node %s", plugin_name, node_name); @@ -446,7 +456,7 @@ extern int node_reconfig(char *node_name, char *orig_config, char **new_config, extern int pack_node_state(void *gres_data, Buf buffer) { - nic_status_t *gres_ptr = (nic_status_t *) gres_data; + nic_node_state_t *gres_ptr = (nic_node_state_t *) gres_data; pack32(gres_ptr->nic_cnt_avail, buffer); pack32(gres_ptr->nic_cnt_alloc, buffer); @@ -457,9 +467,9 @@ extern int pack_node_state(void *gres_data, Buf buffer) extern int unpack_node_state(void **gres_data, Buf buffer) { - nic_status_t *gres_ptr; + nic_node_state_t *gres_ptr; - gres_ptr = xmalloc(sizeof(nic_status_t)); + gres_ptr = xmalloc(sizeof(nic_node_state_t)); gres_ptr->nic_cnt_found = NO_VAL; if (buffer) { @@ -496,10 +506,10 @@ unpack_error: } extern void node_state_log(void *gres_data, char *node_name) { - nic_status_t *gres_ptr; + nic_node_state_t *gres_ptr; xassert(gres_data); - gres_ptr = (nic_status_t *) gres_data; + gres_ptr = (nic_node_state_t *) gres_data; info("%s state for %s", plugin_name, node_name); info(" nic_cnt found:%u configured:%u avail:%u alloc:%u", gres_ptr->nic_cnt_found, gres_ptr->nic_cnt_config, @@ -512,3 +522,50 @@ extern void node_state_log(void *gres_data, char *node_name) info(" nic_bit_alloc:NULL"); } } + +extern void job_config_delete(void *gres_data) +{ + xfree(gres_data); +} + +extern int job_gres_validate(char *config, void **gres_data) +{ + char *last; + nic_job_state_t *gres_ptr; + uint32_t cnt; + uint8_t mult = 0; + + if (!strcmp(config, "nic")) { + cnt = 1; + } else if (!strncmp(config, "nic:", 4)) { + cnt = strtol(config+4, &last, 10); + if (last[0] == '\0') + ; + else if ((last[0] == 'k') || (last[0] == 'K')) + cnt *= 1024; + else if (!strcasecmp(last, "*cpu")) + mult = 1; + } else + return SLURM_ERROR; + + gres_ptr = xmalloc(sizeof(nic_job_state_t)); + gres_ptr->nic_cnt_alloc = cnt; + gres_ptr->nic_cnt_mult = mult; + *gres_data = gres_ptr; + return SLURM_SUCCESS; +} + +extern void job_state_log(void *gres_data, uint32_t job_id) +{ + nic_job_state_t *gres_ptr; + char *mult; + + xassert(gres_data); + gres_ptr = (nic_job_state_t *) gres_data; + info("%s state for job %u", plugin_name, job_id); + if (gres_ptr->nic_cnt_mult) + mult = "cpu"; + else + mult = "node"; + info(" nic_cnt %u per %s", gres_ptr->nic_cnt_alloc, mult); +} diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index a69e7a76822e6b35fe86f99b385e4e59ed34d4d1..90b57895c81ac88474a739ebc669adde6298553b 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3043,10 +3043,11 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, error_code = ESLURM_INVALID_FEATURE; goto cleanup_fail; } - if (build_gres_list(job_ptr)) { + if (gres_plugin_job_gres_validate(job_ptr->gres, &job_ptr->gres_list)) { error_code = ESLURM_INVALID_GRES; goto cleanup_fail; } + gres_plugin_job_state_log(job_ptr->gres_list, job_ptr->job_id); if ((error_code = validate_job_resv(job_ptr))) goto cleanup_fail; diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 7e48f9f9e0f2ea3d066e59051eea683fd8fee162..723d79c5224a74ff823ed6709eb15453366c979f 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1660,15 +1660,3 @@ static int _valid_node_feature(char *feature) return rc; } - -/* - * build_gres_list - Translate a job's gres string into a gres_list - * IN job_ptr->gres - * OUT job_ptr->gres_list - * RET error code - */ -extern int build_gres_list(struct job_record *job_ptr) -{ - error("construction of a job's gres list TBD"); - return SLURM_SUCCESS; -} diff --git a/src/slurmctld/job_scheduler.h b/src/slurmctld/job_scheduler.h index be6e6e3c92bbfbed68890da04904a08cd0235b34..a1f39fd575be3e2c770d83a6603bed58c31271c7 100644 --- a/src/slurmctld/job_scheduler.h +++ b/src/slurmctld/job_scheduler.h @@ -58,14 +58,6 @@ struct job_queue { */ extern int build_feature_list(struct job_record *job_ptr); -/* - * build_gres_list - Translate a job's gres string into a gres_list - * IN job_ptr->gres - * OUT job_ptr->gres_list - * RET error code - */ -extern int build_gres_list(struct job_record *job_ptr); - /* * build_job_queue - build (non-priority ordered) list of pending jobs * OUT job_queue - pointer to job queue diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index af2f5ffc26c81bb5f40c22d8dae27e4e6fa988bc..988510c32bc6b9dfbf78c391c0cf111ef91d2819 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -393,6 +393,7 @@ struct job_record { uint32_t exit_code; /* exit code for job (status from * wait call) */ char *gres; /* generic resources */ + List gres_list; /* generic resource allocation detaisl */ uint32_t group_id; /* group submitted under */ uint32_t job_id; /* job ID */ struct job_record *job_next; /* next entry with same hash index */