From ba96978eb3d168d2cc4bec2e2a4e11040a3b1ed6 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Fri, 29 Feb 2008 17:31:55 +0000
Subject: [PATCH] svn merge -r13414:13423
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.2

---
 NEWS                              |  2 ++
 src/plugins/mpi/mvapich/mvapich.c | 13 +++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index 177f780d36d..a1e8585776e 100644
--- a/NEWS
+++ b/NEWS
@@ -182,6 +182,8 @@ documents those changes that are of interest to users and admins.
  -- Bug fix for setting exit code in accounting for batch script.
  -- Add salloc option, --no-shell (for LSF).
  -- Added new options for sacct output
+ -- mvapich: Ensure MPIRUN_ID is unique for all job steps within a job.
+    (Fixes crashes when running multiple job steps within a job on one node)
 
 * Changes in SLURM 1.2.24
 =========================
diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c
index 38f1f5d8eca..b2e5b868484 100644
--- a/src/plugins/mpi/mvapich/mvapich.c
+++ b/src/plugins/mpi/mvapich/mvapich.c
@@ -1366,6 +1366,15 @@ static void mvapich_state_destroy(mvapich_state_t *st)
 	xfree(st);
 }
 
+/*
+ *  Create a unique MPIRUN_ID for jobid/stepid pairs.
+ *  Combine the least significant bits of the jobid and stepid
+ */
+int mpirun_id_create(const mpi_plugin_client_info_t *job)
+{
+	return (int) ((job->jobid << 16) | (job->stepid & 0xffff));
+}
+
 extern mvapich_state_t *mvapich_thr_create(const mpi_plugin_client_info_t *job,
 					   char ***env)
 {
@@ -1405,12 +1414,12 @@ extern mvapich_state_t *mvapich_thr_create(const mpi_plugin_client_info_t *job,
 	 */
 	env_array_overwrite_fmt(env, "MPIRUN_PORT",   "%hu", port);
 	env_array_overwrite_fmt(env, "MPIRUN_NPROCS", "%d", st->nprocs);
-	env_array_overwrite_fmt(env, "MPIRUN_ID",     "%d", st->job->jobid);
+	env_array_overwrite_fmt(env, "MPIRUN_ID",     "%d", mpirun_id_create(job));
 	if (st->connect_once) {
 		env_array_overwrite_fmt(env, "MPIRUN_CONNECT_ONCE", "1");
 	}
 
-	verbose ("mvapich-0.9.[45] master listening on port %d", port);
+	verbose ("mvapich-0.9.[45] master listening on port %hu", port);
 
 	return st;
 }
-- 
GitLab