From ba96978eb3d168d2cc4bec2e2a4e11040a3b1ed6 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Fri, 29 Feb 2008 17:31:55 +0000 Subject: [PATCH] svn merge -r13414:13423 https://eris.llnl.gov/svn/slurm/branches/slurm-1.2 --- NEWS | 2 ++ src/plugins/mpi/mvapich/mvapich.c | 13 +++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 177f780d36d..a1e8585776e 100644 --- a/NEWS +++ b/NEWS @@ -182,6 +182,8 @@ documents those changes that are of interest to users and admins. -- Bug fix for setting exit code in accounting for batch script. -- Add salloc option, --no-shell (for LSF). -- Added new options for sacct output + -- mvapich: Ensure MPIRUN_ID is unique for all job steps within a job. + (Fixes crashes when running multiple job steps within a job on one node) * Changes in SLURM 1.2.24 ========================= diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c index 38f1f5d8eca..b2e5b868484 100644 --- a/src/plugins/mpi/mvapich/mvapich.c +++ b/src/plugins/mpi/mvapich/mvapich.c @@ -1366,6 +1366,15 @@ static void mvapich_state_destroy(mvapich_state_t *st) xfree(st); } +/* + * Create a unique MPIRUN_ID for jobid/stepid pairs. + * Combine the least significant bits of the jobid and stepid + */ +int mpirun_id_create(const mpi_plugin_client_info_t *job) +{ + return (int) ((job->jobid << 16) | (job->stepid & 0xffff)); +} + extern mvapich_state_t *mvapich_thr_create(const mpi_plugin_client_info_t *job, char ***env) { @@ -1405,12 +1414,12 @@ extern mvapich_state_t *mvapich_thr_create(const mpi_plugin_client_info_t *job, */ env_array_overwrite_fmt(env, "MPIRUN_PORT", "%hu", port); env_array_overwrite_fmt(env, "MPIRUN_NPROCS", "%d", st->nprocs); - env_array_overwrite_fmt(env, "MPIRUN_ID", "%d", st->job->jobid); + env_array_overwrite_fmt(env, "MPIRUN_ID", "%d", mpirun_id_create(job)); if (st->connect_once) { env_array_overwrite_fmt(env, "MPIRUN_CONNECT_ONCE", "1"); } - verbose ("mvapich-0.9.[45] master listening on port %d", port); + verbose ("mvapich-0.9.[45] master listening on port %hu", port); return st; } -- GitLab