diff --git a/src/common/gres.c b/src/common/gres.c index b6cdbec92bbd0cb88223af33d204a185b56a05d3..eb648f58423115fbaf2afc3d1346c26b142c406f 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -1720,7 +1720,7 @@ static void _node_state_dealloc(gres_state_t *gres_ptr) break; } } - error("gres_plugin_node_state_dealloc: gres/%s topo_cnt!=0 " + error("gres_plugin_node_state_dealloc_all: gres/%s topo_cnt!=0 " "and topo_gres_cnt_alloc is NULL", gres_name); } else { for (i=0; i<gres_node_ptr->topo_cnt; i++) { @@ -1735,7 +1735,7 @@ static void _node_state_dealloc(gres_state_t *gres_ptr) * is reconfigured. * IN gres_list - node gres state information */ -extern void gres_plugin_node_state_dealloc(List gres_list) +extern void gres_plugin_node_state_dealloc_all(List gres_list) { ListIterator gres_iter; gres_state_t *gres_ptr; @@ -1754,120 +1754,6 @@ extern void gres_plugin_node_state_dealloc(List gres_list) slurm_mutex_unlock(&gres_context_lock); } -static int _node_state_realloc(void *job_gres_data, int node_offset, - void *node_gres_data, char *gres_name) -{ - gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data; - gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data; - int i, job_bit_size, node_bit_size; - - xassert(job_gres_ptr); - xassert(node_gres_ptr); - - if (node_offset >= job_gres_ptr->node_cnt) { - error("gres: %s job node offset is bad (%d >= %u)", - gres_name, node_offset, job_gres_ptr->node_cnt); - return EINVAL; - } - - node_gres_ptr->gres_cnt_alloc += job_gres_ptr->gres_cnt_alloc; - - if (node_gres_ptr->gres_bit_alloc && job_gres_ptr->gres_bit_alloc && - job_gres_ptr->gres_bit_alloc[node_offset]) { - job_bit_size = bit_size(job_gres_ptr-> - gres_bit_alloc[node_offset]); - node_bit_size = bit_size(node_gres_ptr->gres_bit_alloc); - if (job_bit_size > node_bit_size) { - error("gres/%s: job/node bit size mismatch (%d != %d)", - gres_name, job_bit_size, node_bit_size); - /* Node needs to register with more resources, expand - * node's bitmap now so we can merge the data */ - node_gres_ptr->gres_bit_alloc = - bit_realloc(node_gres_ptr->gres_bit_alloc, - job_bit_size); - if (node_gres_ptr->gres_bit_alloc == NULL) - fatal("bit_realloc: malloc failure"); - node_bit_size = job_bit_size; - } - if (job_bit_size < node_bit_size) { - error("gres/%s: job/node bit size mismatch (%d != %d)", - gres_name, job_bit_size, node_bit_size); - /* Update what we can */ - node_bit_size = MIN(job_bit_size, node_bit_size); - for (i=0; i<node_bit_size; i++) { - if (!bit_test(job_gres_ptr-> - gres_bit_alloc[node_offset], i)) - continue; - node_gres_ptr->gres_cnt_alloc++; - bit_set(node_gres_ptr->gres_bit_alloc, i); - } - } else { - bit_or(node_gres_ptr->gres_bit_alloc, - job_gres_ptr->gres_bit_alloc[node_offset]); - node_gres_ptr->gres_cnt_alloc = - bit_set_count(node_gres_ptr->gres_bit_alloc); - } - } - - return SLURM_SUCCESS; -} - -/* - * Allocate in this nodes record the resources previously allocated to this - * job. This function isused to synchronize state after slurmctld restarts - * or is reconfigured. - * IN job_gres_list - job gres state information - * IN node_offset - zero-origin index of this node in the job's allocation - * IN node_gres_list - node gres state information - * RET SLURM_SUCCESS or error code - */ -extern int gres_plugin_node_state_realloc(List job_gres_list, int node_offset, - List node_gres_list) -{ - ListIterator job_gres_iter, node_gres_iter; - gres_state_t *job_gres_ptr, *node_gres_ptr; - int i; - - if (job_gres_list == NULL) - return SLURM_SUCCESS; - if (node_gres_list == NULL) - return SLURM_ERROR; - - (void) gres_plugin_init(); - - slurm_mutex_lock(&gres_context_lock); - job_gres_iter = list_iterator_create(job_gres_list); - while ((job_gres_ptr = (gres_state_t *) list_next(job_gres_iter))) { - node_gres_iter = list_iterator_create(node_gres_list); - while ((node_gres_ptr = (gres_state_t *) - list_next(node_gres_iter))) { - if (job_gres_ptr->plugin_id == node_gres_ptr->plugin_id) - break; - } - list_iterator_destroy(node_gres_iter); - if (node_gres_ptr == NULL) { - error("Could not find plugin id %u to realloc job", - job_gres_ptr->plugin_id); - continue; - } - - for (i=0; i<gres_context_cnt; i++) { - if (job_gres_ptr->plugin_id != - gres_context[i].plugin_id) - continue; - _node_state_realloc(job_gres_ptr->gres_data, - node_offset, - node_gres_ptr->gres_data, - gres_context[i].gres_name); - break; - } - } - list_iterator_destroy(job_gres_iter); - slurm_mutex_unlock(&gres_context_lock); - - return SLURM_SUCCESS; -} - static void _node_state_log(void *gres_data, char *node_name, char *gres_name) { gres_node_state_t *gres_node_ptr; @@ -2146,12 +2032,15 @@ List gres_plugin_job_state_dup(List gres_list) * IN gres_list - generated by gres_plugin_job_config_validate() * IN/OUT buffer - location to write state to * IN job_id - job's ID + * IN details - if set then pack job step allocation details (only needed to + * save/restore job state, not needed in job credential for + * slurmd task binding) * * NOTE: A job's allocation to steps is not recorded here, but recovered with * the job step state information upon slurmctld restart. */ extern int gres_plugin_job_state_pack(List gres_list, Buf buffer, - uint32_t job_id) + uint32_t job_id, bool details) { int i, rc = SLURM_SUCCESS; uint32_t top_offset, tail_offset; @@ -2186,6 +2075,24 @@ extern int gres_plugin_job_state_pack(List gres_list, Buf buffer, } else { pack8((uint8_t) 0, buffer); } + if (details && gres_job_ptr->gres_bit_step_alloc) { + pack8((uint8_t) 1, buffer); + for (i=0; i<gres_job_ptr->node_cnt; i++) { + pack_bit_str(gres_job_ptr-> + gres_bit_step_alloc[i], buffer); + } + } else { + pack8((uint8_t) 0, buffer); + } + if (details && gres_job_ptr->gres_cnt_step_alloc) { + pack8((uint8_t) 1, buffer); + for (i=0; i<gres_job_ptr->node_cnt; i++) { + pack32(gres_job_ptr->gres_cnt_step_alloc[i], + buffer); + } + } else { + pack8((uint8_t) 0, buffer); + } rec_cnt++; } list_iterator_destroy(gres_iter); @@ -2211,7 +2118,7 @@ extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer, int i, rc; uint32_t magic, plugin_id; uint16_t rec_cnt; - uint8_t has_bitmap; + uint8_t has_more; gres_state_t *gres_ptr; gres_job_state_t *gres_job_ptr = NULL; @@ -2239,8 +2146,8 @@ extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer, gres_job_ptr = xmalloc(sizeof(gres_job_state_t)); safe_unpack32(&gres_job_ptr->gres_cnt_alloc, buffer); safe_unpack32(&gres_job_ptr->node_cnt, buffer); - safe_unpack8(&has_bitmap, buffer); - if (has_bitmap) { + safe_unpack8(&has_more, buffer); + if (has_more) { gres_job_ptr->gres_bit_alloc = xmalloc(sizeof(bitstr_t *) * gres_job_ptr->node_cnt); @@ -2249,6 +2156,26 @@ extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer, buffer); } } + safe_unpack8(&has_more, buffer); + if (has_more) { + gres_job_ptr->gres_bit_step_alloc = + xmalloc(sizeof(bitstr_t *) * + gres_job_ptr->node_cnt); + for (i=0; i<gres_job_ptr->node_cnt; i++) { + unpack_bit_str(&gres_job_ptr-> + gres_bit_step_alloc[i], buffer); + } + } + safe_unpack8(&has_more, buffer); + if (has_more) { + gres_job_ptr->gres_cnt_step_alloc = + xmalloc(sizeof(uint32_t) * + gres_job_ptr->node_cnt); + for (i=0; i<gres_job_ptr->node_cnt; i++) { + safe_unpack32(&gres_job_ptr-> + gres_cnt_step_alloc[i], buffer); + } + } for (i=0; i<gres_context_cnt; i++) { if (gres_context[i].plugin_id == plugin_id) break; diff --git a/src/common/gres.h b/src/common/gres.h index a4a91ad0b27148e4a0f06fb67745991e259608b1..5460da019b909df32cd34ebb1cde59f69bacd99b 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -252,19 +252,7 @@ extern List gres_plugin_node_state_dup(List gres_list); * is reconfigured. * IN gres_list - node gres state information */ -extern void gres_plugin_node_state_dealloc(List gres_list); - -/* - * Allocate in this nodes record the resources previously allocated to this - * job. This function isused to synchronize state after slurmctld restarts - * or is reconfigured. - * IN job_gres_list - job gres state information - * IN node_offset - zero-origin index of this node in the job's allocation - * IN node_gres_list - node gres state information - * RET SLURM_SUCCESS or error code - */ -extern int gres_plugin_node_state_realloc(List job_gres_list, int node_offset, - List node_gres_list); +extern void gres_plugin_node_state_dealloc_all(List gres_list); /* * Log a node's current gres state @@ -295,12 +283,15 @@ List gres_plugin_job_state_dup(List gres_list); * IN gres_list - generated by gres_plugin_job_config_validate() * IN/OUT buffer - location to write state to * IN job_id - job's ID + * IN details - if set then pack job step allocation details (only needed to + * save/restore job state, not needed in job credential for + * slurmd task binding) * * NOTE: A job's allocation to steps is not recorded here, but recovered with * the job step state information upon slurmctld restart. */ extern int gres_plugin_job_state_pack(List gres_list, Buf buffer, - uint32_t job_id); + uint32_t job_id, bool details); /* * Unpack a job's current gres status, called from slurmctld for save/restore diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index a382e5a7a866c2140458eb38a8560e65ca2e756b..324956376ae20190a90e5ce4995c1baccb8ab1a6 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -1645,7 +1645,8 @@ _pack_cred(slurm_cred_t *cred, Buf buffer) pack32(cred->stepid, buffer); pack32(cred_uid, buffer); - gres_plugin_job_state_pack(cred->job_gres_list, buffer, cred->jobid); + (void) gres_plugin_job_state_pack(cred->job_gres_list, buffer, + cred->jobid, false); gres_plugin_step_state_pack(cred->step_gres_list, buffer, cred->jobid, cred->stepid); pack32(cred->job_mem_limit, buffer); diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index 55a305a71d17544018daa7cb9a8f57e05e469745..5fb2364d8e467c7a69bc55a1d5d440214e332234 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -1701,8 +1701,8 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) real_memory; } select_node_usage[i].node_state = NODE_CR_AVAILABLE; - gres_plugin_node_state_dealloc(select_node_record[i].node_ptr-> - gres_list); + gres_plugin_node_state_dealloc_all(select_node_record[i]. + node_ptr->gres_list); } _create_part_data(); diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 5a8782a6da8ff7f8328bc29fd041bd65c421bd7a..89aad0fc0d5d03614059c17b090926a08c68fe1f 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -1848,7 +1848,7 @@ static void _init_node_cr(void) /* Clear existing node Gres allocations */ for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; i++, node_ptr++) { - gres_plugin_node_state_dealloc(node_ptr->gres_list); + gres_plugin_node_state_dealloc_all(node_ptr->gres_list); } /* record running and suspended jobs in node_cr_records */ @@ -1913,9 +1913,12 @@ static void _init_node_cr(void) } if (bit_test(job_ptr->node_bitmap, i)) { - gres_plugin_node_state_realloc( - job_ptr->gres_list, node_offset, - node_ptr->gres_list); + gres_plugin_job_alloc(job_ptr->gres_list, + node_ptr->gres_list, + job_resrcs_ptr->nhosts, + node_offset, + job_resrcs_ptr-> + cpus[node_offset]); } part_cr_ptr = cr_ptr->nodes[i].parts; diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index a9fb55abd7e3c41c51ed4bffbada529a31905c28..c47f449cd5a4d68109ec469ce4184495e8ccd5a2 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -787,7 +787,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) dump_job_ptr->spank_job_env_size, buffer); (void) gres_plugin_job_state_pack(dump_job_ptr->gres_list, buffer, - dump_job_ptr->job_id); + dump_job_ptr->job_id, true); /* Dump job details, if available */ detail_ptr = dump_job_ptr->details;