diff --git a/RELEASE_NOTES b/RELEASE_NOTES index f121af6e9c9aca63fc1a7da7aa8f197e0a48ae3c..68c6cae2154babbf36fbcac20fd50b642d94022a 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -126,7 +126,9 @@ job_state_reason WAIT_QOS_THRES /* required QOS threshold has been breached */ select_jobdata_type - SELECT_JOBDATA_PTR /* data-> select_jobinfo_t *jobinfo */ + SELECT_JOBDATA_PTR /* data-> select_jobinfo_t *jobinfo */ + SELECT_JOBDATA_BLOCK_PTR /* data-> bg_record_t *bg_record */ + SELECT_JOBDATA_DIM_CNT /* data-> uint16_t dim_cnt */ select_nodedata_type SELECT_NODEDATA_PTR /* data-> select_nodeinfo_t *nodeinfo */ diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 5e26863cc9ef3b611ca306e83d14da30dcb55c02..f23f7691aa6f1251741c4944ea5625cfcdba144b 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -342,6 +342,7 @@ enum select_jobdata_type { SELECT_JOBDATA_PAGG_ID, /* data-> uint64_t job container ID */ SELECT_JOBDATA_PTR, /* data-> select_jobinfo_t *jobinfo */ SELECT_JOBDATA_BLOCK_PTR, /* data-> bg_record_t *bg_record */ + SELECT_JOBDATA_DIM_CNT, /* data-> uint16_t dim_cnt */ }; enum select_nodedata_type { diff --git a/src/plugins/select/bluegene/ba_bgq/block_allocator.c b/src/plugins/select/bluegene/ba_bgq/block_allocator.c index 4d9470c92c4ceb0891c45ea9755dbac59c939a8f..d87628b72fe3286c8be2562fc7b7d2f151af7056 100644 --- a/src/plugins/select/bluegene/ba_bgq/block_allocator.c +++ b/src/plugins/select/bluegene/ba_bgq/block_allocator.c @@ -920,12 +920,15 @@ extern ba_mp_t *ba_pick_sub_block_cnodes( ba_geo_table_t *geo_table = NULL; char *tmp_char = NULL, *tmp_char2 = NULL; uint32_t orig_node_count = *node_count; + int dim; xassert(ba_mp_geo_system); xassert(bg_record->ba_mp_list); xassert(jobinfo); xassert(!jobinfo->units_used); + jobinfo->dim_cnt = ba_mp_geo_system->dim_count; + while (!(geo_table = ba_mp_geo_system->geo_table_ptr[*node_count])) { debug2("ba_pick_sub_block_cnodes: No geometries of size %u ", *node_count); @@ -1020,6 +1023,9 @@ extern ba_mp_t *ba_pick_sub_block_cnodes( } jobinfo->ionode_str = ba_node_map_ranged_hostlist( jobinfo->units_used, ba_mp_geo_system); + for (dim = 0; dim < jobinfo->dim_cnt; dim++) + jobinfo->geometry[dim] = + geo_table->geometry[dim]; break; } diff --git a/src/plugins/select/bluegene/bg_dynamic_block.c b/src/plugins/select/bluegene/bg_dynamic_block.c index 911c7d219af421b31986c086fdeb722adbd4050d..a9f40a67fe2662642ede035f76504e71d1c9bcf8 100644 --- a/src/plugins/select/bluegene/bg_dynamic_block.c +++ b/src/plugins/select/bluegene/bg_dynamic_block.c @@ -294,7 +294,8 @@ extern List create_dynamic_block(List block_list, //debug("going to create %d", request->size); if (!new_ba_request(request)) { if (request->geometry[0] != (uint16_t)NO_VAL) { - char *geo = give_geo(request->geometry); + char *geo = give_geo(request->geometry, + SYSTEM_DIMENSIONS, 1); error("Problems with request for size %d geo %s", request->size, geo); xfree(geo); diff --git a/src/plugins/select/bluegene/bg_job_info.c b/src/plugins/select/bluegene/bg_job_info.c index 2976634ecbbf443b7f4dd43d8b5bf1f48b7d1ae3..67acf8515ce4f055d0e233818971b604dafdc8f8 100644 --- a/src/plugins/select/bluegene/bg_job_info.c +++ b/src/plugins/select/bluegene/bg_job_info.c @@ -60,7 +60,8 @@ extern select_jobinfo_t *alloc_select_jobinfo() { int i; select_jobinfo_t *jobinfo = xmalloc(sizeof(struct select_jobinfo)); - for (i=0; i<SYSTEM_DIMENSIONS; i++) { + jobinfo->dim_cnt = 0; /* This will be setup later */ + for (i=0; i<HIGHEST_DIMENSIONS; i++) { jobinfo->geometry[i] = (uint16_t) NO_VAL; jobinfo->conn_type[i] = (uint16_t) NO_VAL; } @@ -129,10 +130,16 @@ extern int set_select_jobinfo(select_jobinfo_t *jobinfo, return SLURM_ERROR; } + if (!jobinfo->dim_cnt) + jobinfo->dim_cnt = SYSTEM_DIMENSIONS; + switch (data_type) { + case SELECT_JOBDATA_DIM_CNT: + jobinfo->dim_cnt = *uint16; + break; case SELECT_JOBDATA_GEOMETRY: new_size = 1; - for (i=0; i<SYSTEM_DIMENSIONS; i++) { + for (i=0; i<jobinfo->dim_cnt; i++) { jobinfo->geometry[i] = uint16[i]; new_size *= uint16[i]; @@ -151,7 +158,7 @@ extern int set_select_jobinfo(select_jobinfo_t *jobinfo, jobinfo->rotate = *uint16; break; case SELECT_JOBDATA_CONN_TYPE: - for (i=0; i<SYSTEM_DIMENSIONS; i++) { + for (i=0; i<jobinfo->dim_cnt; i++) { jobinfo->conn_type[i] = uint16[i]; } break; @@ -169,6 +176,15 @@ extern int set_select_jobinfo(select_jobinfo_t *jobinfo, break; case SELECT_JOBDATA_IONODES: xfree(jobinfo->ionode_str); + if (tmp_char) { +#ifdef HAVE_BGQ + jobinfo->dim_cnt = 5; +#else + jobinfo->dim_cnt = SYSTEM_DIMENSIONS; +#endif + } else + jobinfo->dim_cnt = SYSTEM_DIMENSIONS; + jobinfo->ionode_str = xstrdup(tmp_char); break; case SELECT_JOBDATA_NODE_CNT: @@ -186,7 +202,7 @@ extern int set_select_jobinfo(select_jobinfo_t *jobinfo, || (jobinfo->cnode_cnt < bg_conf->mp_cnode_cnt)) jobinfo->conn_type[0] = SELECT_SMALL; else if (jobinfo->conn_type[0] == SELECT_SMALL) - for (i=0; i<SYSTEM_DIMENSIONS; i++) + for (i=0; i<jobinfo->dim_cnt; i++) jobinfo->conn_type[i] = SELECT_TORUS; break; case SELECT_JOBDATA_ALTERED: @@ -244,9 +260,15 @@ extern int get_select_jobinfo(select_jobinfo_t *jobinfo, return SLURM_ERROR; } + if (!jobinfo->dim_cnt) + jobinfo->dim_cnt = SYSTEM_DIMENSIONS; + switch (data_type) { + case SELECT_JOBDATA_DIM_CNT: + *uint16 = jobinfo->dim_cnt; + break; case SELECT_JOBDATA_GEOMETRY: - for (i=0; i<SYSTEM_DIMENSIONS; i++) { + for (i=0; i<jobinfo->dim_cnt; i++) { uint16[i] = jobinfo->geometry[i]; } break; @@ -257,7 +279,7 @@ extern int get_select_jobinfo(select_jobinfo_t *jobinfo, *uint16 = jobinfo->rotate; break; case SELECT_JOBDATA_CONN_TYPE: - for (i=0; i<SYSTEM_DIMENSIONS; i++) + for (i=0; i<jobinfo->dim_cnt; i++) uint16[i] = jobinfo->conn_type[i]; break; case SELECT_JOBDATA_BLOCK_ID: @@ -341,6 +363,7 @@ extern select_jobinfo_t *copy_select_jobinfo(select_jobinfo_t *jobinfo) error("copy_jobinfo: jobinfo magic bad"); else { rc = xmalloc(sizeof(struct select_jobinfo)); + rc->dim_cnt = jobinfo->dim_cnt; memcpy(rc->geometry, jobinfo->geometry, sizeof(rc->geometry)); memcpy(rc->conn_type, jobinfo->conn_type, sizeof(rc->conn_type)); @@ -378,6 +401,12 @@ extern int pack_select_jobinfo(select_jobinfo_t *jobinfo, Buf buffer, if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { if (jobinfo) { + if (jobinfo->dim_cnt) + dims = jobinfo->dim_cnt; + else if (bg_recover != NOT_FROM_CONTROLLER) + xassert(0); + + pack16(dims, buffer); /* NOTE: If new elements are added here, make sure to * add equivalant pack of zeros below for NULL * pointer */ @@ -400,6 +429,7 @@ extern int pack_select_jobinfo(select_jobinfo_t *jobinfo, Buf buffer, packstr(jobinfo->ramdiskimage, buffer); pack_bit_fmt(jobinfo->units_used, buffer); } else { + pack16(dims, buffer); /* pack space for 3 positions for geo * then 1 for conn_type, reboot, and rotate */ @@ -526,7 +556,13 @@ extern int unpack_select_jobinfo(select_jobinfo_t **jobinfo_pptr, Buf buffer, *jobinfo_pptr = jobinfo; jobinfo->magic = JOBINFO_MAGIC; + if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { + safe_unpack16(&jobinfo->dim_cnt, buffer); + + xassert(jobinfo->dim_cnt); + dims = jobinfo->dim_cnt; + for (i=0; i<dims; i++) { safe_unpack16(&(jobinfo->geometry[i]), buffer); safe_unpack16(&(jobinfo->conn_type[i]), buffer); @@ -557,8 +593,8 @@ extern int unpack_select_jobinfo(select_jobinfo_t **jobinfo_pptr, Buf buffer, bit_unfmt(jobinfo->units_used, bit_char); xfree(bit_char); } - } else if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) { + jobinfo->dim_cnt = dims; for (i=0; i<dims; i++) { safe_unpack16(&(jobinfo->geometry[i]), buffer); } @@ -584,6 +620,7 @@ extern int unpack_select_jobinfo(select_jobinfo_t **jobinfo_pptr, Buf buffer, safe_unpackstr_xmalloc(&(jobinfo->ramdiskimage), &uint32_tmp, buffer); } else { + jobinfo->dim_cnt = dims; for (i=0; i<dims; i++) { safe_unpack16(&(jobinfo->geometry[i]), buffer); } @@ -655,14 +692,14 @@ extern char *sprint_select_jobinfo(select_jobinfo_t *jobinfo, } if (jobinfo->geometry[0] == (uint16_t) NO_VAL) { - for (i=0; i<SYSTEM_DIMENSIONS; i++) { + for (i=0; i<jobinfo->dim_cnt; i++) { if (geo) xstrcat(geo, "x0"); else xstrcat(geo, "0"); } } else - geo = give_geo(jobinfo->geometry); + geo = give_geo(jobinfo->geometry, jobinfo->dim_cnt, 0); switch (mode) { case SELECT_PRINT_HEAD: @@ -786,7 +823,7 @@ extern char *xstrdup_select_jobinfo(select_jobinfo_t *jobinfo, int mode) xstrcat(geo, "0"); } } else - geo = give_geo(jobinfo->geometry); + geo = give_geo(jobinfo->geometry, jobinfo->dim_cnt, 1); switch (mode) { case SELECT_PRINT_HEAD: diff --git a/src/plugins/select/bluegene/bg_job_info.h b/src/plugins/select/bluegene/bg_job_info.h index 24606aa46d36e6be0264eb699df0315649a6c50f..4c2578a636f231c60f1c5275ab9c3e962d83713a 100644 --- a/src/plugins/select/bluegene/bg_job_info.h +++ b/src/plugins/select/bluegene/bg_job_info.h @@ -52,6 +52,14 @@ struct select_jobinfo { char *blrtsimage; /* BlrtsImage for this block */ uint32_t cnode_cnt; /* how many cnodes in block running job */ uint16_t conn_type[HIGHEST_DIMENSIONS]; /* see enum connection_type */ + uint16_t dim_cnt; /* how many dimensions this + * represents in most cases this will + * be SYSTEM_DIMENSIONS, but in the + * case of a sub-block allocation + * this will be the number of + * dimensions a cnode represent. In + * Q that is 5 while a midplane is + * only representing 4. */ uint16_t geometry[HIGHEST_DIMENSIONS]; /* node count in various * dimensions, e.g. AXYZ */ char *ionode_str; /* for bg to tell which ionodes of a small diff --git a/src/plugins/select/bluegene/bg_node_info.c b/src/plugins/select/bluegene/bg_node_info.c index af49daadac47f88f00e61c1707fb7b2ed1762ea8..bedbfe16454b5a9818ef99202cd44e714eec2f84 100644 --- a/src/plugins/select/bluegene/bg_node_info.c +++ b/src/plugins/select/bluegene/bg_node_info.c @@ -131,13 +131,13 @@ unpack_error: * compiled for non-bluegene machines, and it didn't make since to * compile the whole file just for this one function. */ -extern char *give_geo(uint16_t int_geo[SYSTEM_DIMENSIONS]) +extern char *give_geo(uint16_t *int_geo, int dims, bool with_sep) { char *geo = NULL; int i; - for (i=0; i<SYSTEM_DIMENSIONS; i++) { - if (geo) + for (i=0; i<dims; i++) { + if (geo && with_sep) xstrcat(geo, "x"); xstrfmtcat(geo, "%c", alpha_num[int_geo[i]]); } diff --git a/src/plugins/select/bluegene/bg_node_info.h b/src/plugins/select/bluegene/bg_node_info.h index 9e57cd18676d458e25b2b538cc697aadef72307b..449cb4b1e3faff042025087e27e4429db9bcbce8 100644 --- a/src/plugins/select/bluegene/bg_node_info.h +++ b/src/plugins/select/bluegene/bg_node_info.h @@ -59,7 +59,7 @@ struct select_nodeinfo { List subgrp_list; }; -extern char *give_geo(uint16_t int_geo[SYSTEM_DIMENSIONS]); +extern char *give_geo(uint16_t *int_geo, int dims, bool with_sep); extern int select_nodeinfo_pack(select_nodeinfo_t *nodeinfo, Buf buffer, uint16_t protocol_version); diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index 298965202af73727b8b730b646348f30242444ef..e5500e0d31a2daed1393f17a9779ced6b54a4086 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -1688,6 +1688,7 @@ extern bitstr_t *select_p_step_pick_nodes(struct job_record *job_ptr, fatal("bit_copy malloc failure"); bit_or(bg_record->mp_used_bitmap, picked_mps); step_jobinfo->ionode_str = xstrdup(jobinfo->ionode_str); + step_jobinfo->dim_cnt = jobinfo->dim_cnt; goto found_it; } else if ((ba_mp = ba_pick_sub_block_cnodes( bg_record, &node_count, @@ -1712,6 +1713,7 @@ found_it: } step_jobinfo->cnode_cnt = node_count; } + end_it: FREE_NULL_BITMAP(avail_mps);