diff --git a/src/common/select_job_res.c b/src/common/select_job_res.c index ba073f51d2ba963a536ed717fde4394fdbc27c28..36b30d05111fefd44d0b7e9dc25f4084991156f3 100644 --- a/src/common/select_job_res.c +++ b/src/common/select_job_res.c @@ -109,6 +109,46 @@ extern int build_select_job_res(select_job_res_t select_job_res, return SLURM_SUCCESS; } +/* Rebuild cpu_array_cnt, cpu_array_value, and cpu_array_reps based upon the + * values of cpus in an existing data structure */ +extern int build_select_job_res_cpu_array(select_job_res_t select_job_res_ptr) +{ + int i; + uint32_t last_cpu_cnt = 0; + + if (select_job_res_ptr->nhosts == 0) + return SLURM_SUCCESS; /* no work to do */ + if (select_job_res_ptr->cpus == NULL) { + error("build_select_job_res_cpu_array cpus==NULL"); + return SLURM_ERROR; + } + + /* clear vestigial data and create new arrays of max size */ + select_job_res_ptr->cpu_array_cnt = 0; + xfree(select_job_res_ptr->cpu_array_reps); + select_job_res_ptr->cpu_array_reps = + xmalloc(select_job_res_ptr->nhosts * sizeof(uint32_t)); + xfree(select_job_res_ptr->cpu_array_value); + select_job_res_ptr->cpu_array_value = + xmalloc(select_job_res_ptr->nhosts * sizeof(uint16_t)); + + for (i=0; i<select_job_res_ptr->nhosts; i++) { + if (select_job_res_ptr->cpus[i] != last_cpu_cnt) { + last_cpu_cnt = select_job_res_ptr->cpus[i]; + select_job_res_ptr->cpu_array_value[ + select_job_res_ptr->cpu_array_cnt] + = last_cpu_cnt; + select_job_res_ptr->cpu_array_reps[ + select_job_res_ptr->cpu_array_cnt] = 1; + select_job_res_ptr->cpu_array_cnt++; + } else { + select_job_res_ptr->cpu_array_reps[ + select_job_res_ptr->cpu_array_cnt-1]++; + } + } + return SLURM_SUCCESS; +} + extern int valid_select_job_res(select_job_res_t select_job_res, void *node_rec_table, uint16_t fast_schedule) @@ -181,13 +221,33 @@ extern select_job_res_t copy_select_job_res(select_job_res_t new_layout->node_bitmap = bit_copy(select_job_res_ptr-> node_bitmap); } + + new_layout->cpu_array_cnt = select_job_res_ptr->cpu_array_cnt; + if (select_job_res_ptr->cpu_array_reps && + select_job_res_ptr->cpu_array_cnt) { + new_layout->cpu_array_reps = + xmalloc(sizeof(uint32_t) * + select_job_res_ptr->cpu_array_cnt); + memcpy(new_layout->cpu_array_reps, + select_job_res_ptr->cpu_array_reps, + (sizeof(uint32_t) * select_job_res_ptr->cpu_array_cnt)); + } + if (select_job_res_ptr->cpu_array_value && + select_job_res_ptr->cpu_array_cnt) { + new_layout->cpu_array_value = + xmalloc(sizeof(uint16_t) * + select_job_res_ptr->cpu_array_cnt); + memcpy(new_layout->cpu_array_value, + select_job_res_ptr->cpu_array_value, + (sizeof(uint16_t) * select_job_res_ptr->cpu_array_cnt)); + } + if (select_job_res_ptr->cpus) { new_layout->cpus = xmalloc(sizeof(uint16_t) * select_job_res_ptr->nhosts); memcpy(new_layout->cpus, select_job_res_ptr->cpus, (sizeof(uint16_t) * select_job_res_ptr->nhosts)); } - if (select_job_res_ptr->cpus_used) { new_layout->cpus_used = xmalloc(sizeof(uint16_t) * select_job_res_ptr->nhosts); @@ -248,6 +308,8 @@ extern void free_select_job_res(select_job_res_t *select_job_res_pptr) if (select_job_res_ptr->core_bitmap_used) bit_free(select_job_res_ptr->core_bitmap_used); xfree(select_job_res_ptr->cores_per_socket); + xfree(select_job_res_ptr->cpu_array_reps); + xfree(select_job_res_ptr->cpu_array_value); xfree(select_job_res_ptr->cpus); xfree(select_job_res_ptr->cpus_used); xfree(select_job_res_ptr->memory_allocated); @@ -350,6 +412,14 @@ extern void log_select_job_res(select_job_res_t select_job_res_ptr) bit_inx++; } } + for (node_inx=0; node_inx<select_job_res_ptr->cpu_array_cnt; + node_inx++) { + if (node_inx == 0) + info("--------------------"); + info("cpu_array_value[%d]:%u reps:%u", node_inx, + select_job_res_ptr->cpu_array_value[node_inx], + select_job_res_ptr->cpu_array_reps[node_inx]); + } info("===================="); } @@ -379,6 +449,18 @@ extern void pack_select_job_res(select_job_res_t select_job_res_ptr, pack32(select_job_res_ptr->nprocs, buffer); pack8(select_job_res_ptr->node_req, buffer); + if (select_job_res_ptr->cpu_array_cnt && + select_job_res_ptr->cpu_array_reps && + select_job_res_ptr->cpu_array_value) { + pack32(select_job_res_ptr->cpu_array_cnt, buffer); + pack32_array(select_job_res_ptr->cpu_array_reps, + select_job_res_ptr->cpu_array_cnt, buffer); + pack16_array(select_job_res_ptr->cpu_array_value, + select_job_res_ptr->cpu_array_cnt, buffer); + } else { + pack32((uint32_t) 0, buffer); + } + pack16_array(select_job_res_ptr->cpus, select_job_res_ptr->nhosts, buffer); if (select_job_res_ptr->cpus_used) { @@ -442,6 +524,18 @@ extern int unpack_select_job_res(select_job_res_t *select_job_res_pptr, safe_unpack32(&select_job_res->nprocs, buffer); safe_unpack8(&select_job_res->node_req, buffer); + safe_unpack32(&select_job_res->cpu_array_cnt, buffer); + if (select_job_res->cpu_array_cnt) { + safe_unpack32_array(&select_job_res->cpu_array_reps, + &tmp32, buffer); + if (tmp32 != select_job_res->cpu_array_cnt) + goto unpack_error; + safe_unpack16_array(&select_job_res->cpu_array_value, + &tmp32, buffer); + if (tmp32 != select_job_res->cpu_array_cnt) + goto unpack_error; + } + safe_unpack16_array(&select_job_res->cpus, &tmp32, buffer); if (tmp32 != select_job_res->nhosts) goto unpack_error; diff --git a/src/common/select_job_res.h b/src/common/select_job_res.h index a1fd9e547eb5f72f890d710e6f717c38b5b90f5b..4844ba370faeacf9ff9ee4747fdae50686ae7234 100644 --- a/src/common/select_job_res.h +++ b/src/common/select_job_res.h @@ -61,6 +61,10 @@ * cores_per_socket - Count of cores per socket on this node * cpus - Count of desired/allocated CPUs per node for job/step * cpus_used - For a job, count of CPUs per node used by job steps + * cpu_array_cnt - Count of elements in cpu_array_* below + * cpu_array_value - Count of allocated CPUs per node for job + * cpu_array_reps - Number of consecutive nodes on which cpu_array_value + * is duplicated. See NOTES below. * memory_allocated - MB per node reserved for the job or step * memory_used - MB per node of memory consumed by job steps * nhosts - Number of nodes in the allocation @@ -74,6 +78,13 @@ * and cores_per_socket apply to * sockets_per_node - Count of sockets on this node * + * NOTES: + * cpu_array_* contains the same information as "cpus", but in a more compact + * format. For example if cpus = {4, 4, 2, 2, 2, 2, 2, 2} then cpu_array_cnt=2 + * cpu_array_value = {4, 2} and cpu_array_reps = {2, 6}. We do not need to + * save/restore these values, but generate them by calling + * build_select_job_res_cpu_array() + * * Sample layout of core_bitmap: * | Node_0 | Node_1 | * | Sock_0 | Sock_1 | Sock_0 | Sock_1 | @@ -83,6 +94,9 @@ struct select_job_res { bitstr_t * core_bitmap; bitstr_t * core_bitmap_used; + uint32_t cpu_array_cnt; + uint16_t * cpu_array_value; + uint32_t * cpu_array_reps; uint16_t * cpus; uint16_t * cpus_used; uint16_t * cores_per_socket; @@ -115,6 +129,10 @@ extern int build_select_job_res(select_job_res_t select_job_res_ptr, void *node_rec_table, uint16_t fast_schedule); +/* Rebuild cpu_array_cnt, cpu_array_value, and cpu_array_reps based upon the + * values of cpus in an existing data structure */ +extern int build_select_job_res_cpu_array(select_job_res_t select_job_res_ptr); + /* Validate a select_job_res data structure originally built using * build_select_job_res() is still valid based upon slurmctld state. * NOTE: Reset the node_bitmap field before calling this function. diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 544466bffee9cef43b7cbe341712e3d143451c4a..38129b2ed796dcbc021b6d02108e7986c5901f16 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1045,11 +1045,12 @@ if (i) error("build_select_job_res failure"); else info("build_select_job_res success"); -select_res_ptr1->nprocs = 5; +select_res_ptr1->nprocs = 16; select_res_ptr1->node_req = 1; select_res_ptr1->cpus = xmalloc(sizeof(uint16_t) * 4); +set_select_job_res_bit(select_res_ptr1, 0, 1, 1); info("set_bit(0,1,1)"); set_select_job_res_bit(select_res_ptr1, 0, 2, 1); info("set_bit(0,2,1)"); -select_res_ptr1->cpus[0] = 1; +select_res_ptr1->cpus[0] = 2; set_select_job_res_bit(select_res_ptr1, 1, 0, 0); info("set_bit(1,0,0)"); select_res_ptr1->cpus[1] = 1; i = get_select_job_res_node(select_res_ptr1, 2); @@ -1069,6 +1070,12 @@ else //select_res_ptr1->cpus[3] = 2; set_select_job_res_node(select_res_ptr1, 3); info("set_node(3)"); select_res_ptr1->cpus[3] = 12; +i = build_select_job_res_cpu_array(select_res_ptr1); +if (i) + error("build_select_job_res_cpu_array failure"); +else + info("build_select_job_res_cpu_array success"); + i = get_select_job_res_bit(select_res_ptr1, 1, 0, 0); if (i == 1) info("get_bit(1,0,0):%d", i);