diff --git a/NEWS b/NEWS index 1cc962ef5dc4bbf32762bb4d8fcb35105df0910f..1555205438684a50bdf71361f1176104db91a208 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,7 @@ documents those changes that are of interest to users and administrators. -- srun - suppress job step creation warning message when waiting on PrologSlurmctld. -- slurmrestd - fix incorrect return values in data_list_for_each() functions. + -- mpi/pmix - fix issue where HetJobs could fail to launch. * Changes in Slurm 20.02.3 ========================== diff --git a/src/plugins/mpi/pmix/mpi_pmix.c b/src/plugins/mpi/pmix/mpi_pmix.c index 7a65895d3171e676fe35416609b9eb20bbca9214..1bf287ff4413692fccf4dd985b0346654ed5dbfc 100644 --- a/src/plugins/mpi/pmix/mpi_pmix.c +++ b/src/plugins/mpi/pmix/mpi_pmix.c @@ -89,6 +89,8 @@ const uint32_t plugin_version = SLURM_VERSION_NUMBER; void *libpmix_plug = NULL; +char *process_mapping = NULL; + static void _libpmix_close(void *lib_plug) { xassert(lib_plug); @@ -204,7 +206,6 @@ extern mpi_plugin_client_state_t *p_mpi_hook_client_prelaunch( { static pthread_mutex_t setup_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t setup_cond = PTHREAD_COND_INITIALIZER; - static char *mapping = NULL; static bool setup_done = false; uint32_t nnodes, ntasks, **tids; uint16_t *task_cnt; @@ -215,7 +216,8 @@ extern mpi_plugin_client_state_t *p_mpi_hook_client_prelaunch( ntasks = job->step_layout->task_cnt; task_cnt = job->step_layout->tasks; tids = job->step_layout->tids; - mapping = pack_process_mapping(nnodes, ntasks, task_cnt, tids); + process_mapping = pack_process_mapping(nnodes, ntasks, + task_cnt, tids); slurm_mutex_lock(&setup_mutex); setup_done = true; slurm_cond_broadcast(&setup_cond); @@ -227,12 +229,11 @@ extern mpi_plugin_client_state_t *p_mpi_hook_client_prelaunch( slurm_mutex_unlock(&setup_mutex); } - if (NULL == mapping) { + if (!process_mapping) { PMIXP_ERROR("Cannot create process mapping"); return NULL; } - setenvf(env, PMIXP_SLURM_MAPPING_ENV, "%s", mapping); - xfree(mapping); + setenvf(env, PMIXP_SLURM_MAPPING_ENV, "%s", process_mapping); /* only return NULL on error */ return (void *)0xdeadbeef; @@ -240,5 +241,7 @@ extern mpi_plugin_client_state_t *p_mpi_hook_client_prelaunch( extern int p_mpi_hook_client_fini(void) { + xfree(process_mapping); + return SLURM_SUCCESS; }