From eeb9705024fbdab4fe109f4be3d55105a4f21235 Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Thu, 16 Oct 2014 10:36:08 -0700
Subject: [PATCH] Cray PMI refinements

Refine commit 5f89223fe8457795bb2e66caebe39e5f90584e61 based upon
feedback from David Gloe:
* It's not only MPI jobs, but anything that uses PMI. That includes MPI,
shmem, etc, so you may want to reword the error message.
* I added the terminated flag because if multiple tasks on a node exit,
you would get an error message from each of them. That reduces it to one
error message per node. Cray bug 810310 prompted that change.
* Since we're now relying on --kill-on-bad-exit, I think we should update
the Cray slurm.conf template to default to 1 (set KillOnBadExit=1 in
contribs/cray/slurm.conf.template).
bug 1171
---
 contribs/cray/slurm.conf.template |  2 +-
 src/plugins/task/cray/task_cray.c | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/contribs/cray/slurm.conf.template b/contribs/cray/slurm.conf.template
index 7e42c302d66..43ff90c9df4 100644
--- a/contribs/cray/slurm.conf.template
+++ b/contribs/cray/slurm.conf.template
@@ -32,7 +32,7 @@ JobContainerType=job_container/cncu
 #JobFileAppend=0
 #JobRequeue=1
 #JobSubmitPlugins=1
-#KillOnBadExit=0
+#KillOnBadExit=1
 #LaunchType=launch/slurm
 #Licenses=foo*4,bar
 #MailProg=/bin/mail
diff --git a/src/plugins/task/cray/task_cray.c b/src/plugins/task/cray/task_cray.c
index 519720a6aaa..c201a885c1d 100644
--- a/src/plugins/task/cray/task_cray.c
+++ b/src/plugins/task/cray/task_cray.c
@@ -120,6 +120,8 @@ unsigned int numa_bitmask_weight(const struct bitmask *bmp);
 static int _get_numa_nodes(char *path, int *cnt, int **numa_array);
 static int _get_cpu_masks(int num_numa_nodes, int32_t *numa_array,
 			  cpu_set_t **cpuMasks);
+
+static int terminated = 0;
 #endif
 
 /*
@@ -386,8 +388,18 @@ extern int task_p_post_term (stepd_step_rec_t *job,
 		}
 
 		// Cancel the job step, since we didn't find the mpi_fini msg
-		error("step %u.%u task %u exited without calling mpi_fini()",
-		      job->jobid, job->stepid, task->gtid);
+		// srun only gets the error() messages by default, send one
+		// per compute node, but log all other events with info().
+		if (terminated) {
+			info("step %u.%u task %u exited without calling "
+			     "PMI_Finalize()",
+			     job->jobid, job->stepid, task->gtid);
+		} else {
+			error("step %u.%u task %u exited without calling "
+			      "PMI_Finalize()",
+			      job->jobid, job->stepid, task->gtid);
+			terminated = 1;
+		}
 		info("reset estatus from %d to %d", task->estatus, SIGKILL);
 		task->estatus = SIGKILL;
 	}
-- 
GitLab