diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 68c6cae2154babbf36fbcac20fd50b642d94022a..34a33cdaa13ef1a13e2b3321be7480ebe65b4505 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -129,6 +129,7 @@ select_jobdata_type SELECT_JOBDATA_PTR /* data-> select_jobinfo_t *jobinfo */ SELECT_JOBDATA_BLOCK_PTR /* data-> bg_record_t *bg_record */ SELECT_JOBDATA_DIM_CNT /* data-> uint16_t dim_cnt */ + SELECT_JOBDATA_BLOCK_NODE_CNT /* data-> uint32_t block_cnode_cnt */ select_nodedata_type SELECT_NODEDATA_PTR /* data-> select_nodeinfo_t *nodeinfo */ diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index f23f7691aa6f1251741c4944ea5625cfcdba144b..1827b42992c2ab7c090d69e481413cbc3d42c5e7 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -331,7 +331,7 @@ enum select_jobdata_type { SELECT_JOBDATA_BLOCK_ID, /* data-> char *bg_block_id */ SELECT_JOBDATA_NODES, /* data-> char *nodes */ SELECT_JOBDATA_IONODES, /* data-> char *ionodes */ - SELECT_JOBDATA_NODE_CNT, /* data-> uint32_t node_cnt */ + SELECT_JOBDATA_NODE_CNT, /* data-> uint32_t cnode_cnt */ SELECT_JOBDATA_ALTERED, /* data-> uint16_t altered */ SELECT_JOBDATA_BLRTS_IMAGE,/* data-> char *blrtsimage */ SELECT_JOBDATA_LINUX_IMAGE,/* data-> char *linuximage */ @@ -343,6 +343,7 @@ enum select_jobdata_type { SELECT_JOBDATA_PTR, /* data-> select_jobinfo_t *jobinfo */ SELECT_JOBDATA_BLOCK_PTR, /* data-> bg_record_t *bg_record */ SELECT_JOBDATA_DIM_CNT, /* data-> uint16_t dim_cnt */ + SELECT_JOBDATA_BLOCK_NODE_CNT, /* data-> uint32_t block_cnode_cnt */ }; enum select_nodedata_type { diff --git a/src/common/env.c b/src/common/env.c index d355e412a789df5851f7f9e2027074aa43e15b25..100000b341417b264ed1d5d5eed4f2c23d1ded1f 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -90,6 +90,7 @@ static int _setup_particulars(uint32_t cluster_flags, int rc = SLURM_SUCCESS; if (cluster_flags & CLUSTER_FLAG_BG) { char *bg_part_id = NULL; + uint32_t node_cnt = 0; select_g_select_jobinfo_get(select_jobinfo, SELECT_JOBDATA_BLOCK_ID, &bg_part_id); @@ -108,6 +109,13 @@ static int _setup_particulars(uint32_t cluster_flags, bg_part_id); } } + select_g_select_jobinfo_get( + select_jobinfo, + SELECT_JOBDATA_BLOCK_NODE_CNT, + &node_cnt); + if (node_cnt) + setenvf(dest, "SLURM_BLOCK_NUM_NODES", + "%u", node_cnt); setenvf(dest, "MPIRUN_PARTITION", "%s", bg_part_id); setenvf(dest, "MPIRUN_NOFREE", "%d", 1); diff --git a/src/plugins/select/bluegene/bg_job_info.c b/src/plugins/select/bluegene/bg_job_info.c index 67acf8515ce4f055d0e233818971b604dafdc8f8..7d83fe0eca1387c7ebb7ca259f5b38f109f9300e 100644 --- a/src/plugins/select/bluegene/bg_job_info.c +++ b/src/plugins/select/bluegene/bg_job_info.c @@ -68,6 +68,7 @@ extern select_jobinfo_t *alloc_select_jobinfo() jobinfo->reboot = (uint16_t) NO_VAL; jobinfo->rotate = (uint16_t) NO_VAL; jobinfo->magic = JOBINFO_MAGIC; + jobinfo->block_cnode_cnt = NO_VAL; jobinfo->cnode_cnt = NO_VAL; /* This bitstr is created when used. */ @@ -158,18 +159,28 @@ extern int set_select_jobinfo(select_jobinfo_t *jobinfo, jobinfo->rotate = *uint16; break; case SELECT_JOBDATA_CONN_TYPE: - for (i=0; i<jobinfo->dim_cnt; i++) { + for (i=0; i<jobinfo->dim_cnt; i++) jobinfo->conn_type[i] = uint16[i]; - } - break; - case SELECT_JOBDATA_BLOCK_PTR: - jobinfo->bg_record = bg_record; break; case SELECT_JOBDATA_BLOCK_ID: /* we xfree() any preset value to avoid a memory leak */ xfree(jobinfo->bg_block_id); jobinfo->bg_block_id = xstrdup(tmp_char); break; + case SELECT_JOBDATA_BLOCK_NODE_CNT: + jobinfo->block_cnode_cnt = *uint32; + break; + case SELECT_JOBDATA_BLOCK_PTR: + jobinfo->bg_record = bg_record; + xfree(jobinfo->bg_block_id); + if (bg_record) { + jobinfo->bg_block_id = xstrdup(bg_record->bg_block_id); + jobinfo->block_cnode_cnt = bg_record->cnode_cnt; + } else { + jobinfo->bg_block_id = xstrdup("unassigned"); + jobinfo->block_cnode_cnt = 0; + } + break; case SELECT_JOBDATA_NODES: xfree(jobinfo->mp_str); jobinfo->mp_str = xstrdup(tmp_char); @@ -289,6 +300,9 @@ extern int get_select_jobinfo(select_jobinfo_t *jobinfo, else *tmp_char = xstrdup(jobinfo->bg_block_id); break; + case SELECT_JOBDATA_BLOCK_NODE_CNT: + *uint32 = jobinfo->block_cnode_cnt; + break; case SELECT_JOBDATA_BLOCK_PTR: *bg_record = jobinfo->bg_record; break; @@ -369,10 +383,12 @@ extern select_jobinfo_t *copy_select_jobinfo(select_jobinfo_t *jobinfo) sizeof(rc->conn_type)); rc->reboot = jobinfo->reboot; rc->rotate = jobinfo->rotate; + rc->bg_record = jobinfo->bg_record; rc->bg_block_id = xstrdup(jobinfo->bg_block_id); rc->magic = JOBINFO_MAGIC; rc->mp_str = xstrdup(jobinfo->mp_str); rc->ionode_str = xstrdup(jobinfo->ionode_str); + rc->block_cnode_cnt = jobinfo->block_cnode_cnt; rc->cnode_cnt = jobinfo->cnode_cnt; rc->altered = jobinfo->altered; rc->blrtsimage = xstrdup(jobinfo->blrtsimage); @@ -417,6 +433,7 @@ extern int pack_select_jobinfo(select_jobinfo_t *jobinfo, Buf buffer, pack16(jobinfo->reboot, buffer); pack16(jobinfo->rotate, buffer); + pack32(jobinfo->block_cnode_cnt, buffer); pack32(jobinfo->cnode_cnt, buffer); packstr(jobinfo->bg_block_id, buffer); @@ -436,7 +453,8 @@ extern int pack_select_jobinfo(select_jobinfo_t *jobinfo, Buf buffer, for (i=0; i<((dims*2)+2); i++) { pack16((uint16_t) 0, buffer); } - pack32((uint32_t) 0, buffer); //node_cnt + pack32((uint32_t) 0, buffer); //block_cnode_cnt + pack32((uint32_t) 0, buffer); //cnode_cnt packnull(buffer); //bg_block_id packnull(buffer); //nodes packnull(buffer); //ionodes @@ -571,6 +589,7 @@ extern int unpack_select_jobinfo(select_jobinfo_t **jobinfo_pptr, Buf buffer, safe_unpack16(&(jobinfo->reboot), buffer); safe_unpack16(&(jobinfo->rotate), buffer); + safe_unpack32(&(jobinfo->block_cnode_cnt), buffer); safe_unpack32(&(jobinfo->cnode_cnt), buffer); safe_unpackstr_xmalloc(&(jobinfo->bg_block_id), &uint32_tmp, diff --git a/src/plugins/select/bluegene/bg_job_info.h b/src/plugins/select/bluegene/bg_job_info.h index 4c2578a636f231c60f1c5275ab9c3e962d83713a..17fd6918f8864182feb81f2703ab2dd86acb0285 100644 --- a/src/plugins/select/bluegene/bg_job_info.h +++ b/src/plugins/select/bluegene/bg_job_info.h @@ -50,7 +50,10 @@ struct select_jobinfo { bg_record_t *bg_record; /* For internal use only DO NOT PACK */ char *bg_block_id; /* Blue Gene block ID */ char *blrtsimage; /* BlrtsImage for this block */ - uint32_t cnode_cnt; /* how many cnodes in block running job */ + uint32_t block_cnode_cnt; /* how many cnodes in the block + * This is used to say we are + * running a sub-block job. */ + uint32_t cnode_cnt; /* how many cnodes in job running on block */ uint16_t conn_type[HIGHEST_DIMENSIONS]; /* see enum connection_type */ uint16_t dim_cnt; /* how many dimensions this * represents in most cases this will diff --git a/src/plugins/select/bluegene/bg_job_place.c b/src/plugins/select/bluegene/bg_job_place.c index b32700ed4dca6d87fd3efca820343015c342489f..c6116f93e0aff4fa3004f7b46dbbb36911858259 100644 --- a/src/plugins/select/bluegene/bg_job_place.c +++ b/src/plugins/select/bluegene/bg_job_place.c @@ -1595,8 +1595,8 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, starttime, bg_record->mp_str); set_select_jobinfo(job_ptr->select_jobinfo->data, - SELECT_JOBDATA_BLOCK_ID, - "unassigned"); + SELECT_JOBDATA_BLOCK_PTR, + NULL); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_NODE_CNT, &bg_record->cnode_cnt); @@ -1614,10 +1614,6 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, SELECT_IS_MODE_RUN_NOW(local_mode)); if (SELECT_IS_MODE_RUN_NOW(local_mode)) { - set_select_jobinfo( - job_ptr->select_jobinfo->data, - SELECT_JOBDATA_BLOCK_ID, - bg_record->bg_block_id); /* Set this up to be the correct pointer since we probably are working off a @@ -1638,10 +1634,6 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, last_bg_update = time(NULL); } } else { - set_select_jobinfo( - job_ptr->select_jobinfo->data, - SELECT_JOBDATA_BLOCK_ID, - "unassigned"); set_select_jobinfo( job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_PTR, diff --git a/src/plugins/select/bluegene/bg_job_run.c b/src/plugins/select/bluegene/bg_job_run.c index a4c5c60157f4024319bc193e1c12bff39557f5f2..69dfd29cfa8200b1c0b7ef526a6e7ca14661728d 100644 --- a/src/plugins/select/bluegene/bg_job_run.c +++ b/src/plugins/select/bluegene/bg_job_run.c @@ -72,8 +72,9 @@ enum update_op {START_OP, TERM_OP, SYNC_OP}; typedef struct { char *bg_block_id; char *blrtsimage; /* BlrtsImage for this block */ - uint16_t conn_type[SYSTEM_DIMENSIONS]; /* needed to boot small - blocks into HTC mode or not */ + uint16_t conn_type[HIGHEST_DIMENSIONS]; /* needed to boot small + blocks into HTC + mode or not */ struct job_record *job_ptr; /* pointer to job running on * block or NULL if no job */ char *linuximage; /* LinuxImage for this block */