From f091b17c502511da7e93f0af21806b068319be07 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 16 May 2007 20:22:17 +0000
Subject: [PATCH] svn merge -r11442:11518
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.1

---
 NEWS                              |  2 ++
 src/plugins/mpi/mvapich/mvapich.c | 44 +++++++++++++++++++++++--------
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/NEWS b/NEWS
index 5f1ce9f2f03..da3270d32e1 100644
--- a/NEWS
+++ b/NEWS
@@ -304,6 +304,8 @@ documents those changes that are of interest to users and admins.
 * Changes in SLURM 1.1.36
 =========================
  - Permit node state specification of DRAIN in slurm.conf.
+ - In jobcomp/script - fix bug that prevented UID and JOBID environment 
+   variables from being set.
 
 * Changes in SLURM 1.1.35
 =========================
diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c
index dc2637f45b5..3d0e4325605 100644
--- a/src/plugins/mpi/mvapich/mvapich.c
+++ b/src/plugins/mpi/mvapich/mvapich.c
@@ -650,39 +650,61 @@ mvapich_print_abort_message (mvapich_state_t *st, int rank,
 {
 	slurm_step_layout_t *sl = st->job->step_layout;
 	char *host;
+	char *msgstr;
 
 	if (!mvapich_abort_sends_rank (st)) {
 		info ("mvapich: Received ABORT message from an MPI process.");
 		return;
 	}
 
+	if (msg && (msglen > 0)) {
+		/* 
+		 *  Remove trailing newline if it exists (syslog will add newline)
+		 */
+		if (msg [msglen - 1] == '\n')
+			msg [msglen - 1] = '\0';
+
+		msgstr = msg;
+	} 
+	else {
+		msgstr = "";
+		msglen = 0;
+	}
+
 	host = slurm_step_layout_host_name(
 		sl, slurm_step_layout_host_id(sl, rank));
 
 	if (dest >= 0) {
 		const char *dsthost = slurm_step_layout_host_name (sl, dest);
 
-		if (msg [msglen - 1] == '\n')
-			msg [msglen - 1] = '\0';
-
 		info ("mvapich: %M: ABORT from MPI rank %d [on %s] dest rank %d [on %s]",
 		      rank, host, dest, dsthost);
 
 		/*
-		 *  If we got a message from MVAPICH, log it to syslog
+		 *  Log the abort event to syslog
 		 *   so that system administrators know about possible HW events.
 		 */
-		if (msglen > 0) {
-			openlog ("srun", 0, LOG_USER);
-			syslog (LOG_WARNING, 
-					"MVAPICH ABORT [jobid=%u.%u src=%d(%s) dst=%d(%s)]: %s",
-					st->job->jobid, st->job->stepid, rank, host, dest, dsthost, msg);
-			closelog();
-		}
+		openlog ("srun", 0, LOG_USER);
+		syslog (LOG_WARNING, 
+				"MVAPICH ABORT [jobid=%u.%u src=%d(%s) dst=%d(%s)]: %s",
+				st->job->jobid, st->job->stepid, 
+				rank, host, dest, dsthost, msgstr);
+		closelog();
 	}
 	else {
 		info ("mvapich: %M: ABORT from MPI rank %d [on %s]", 
 				rank, host);
+		/*
+		 *  Log the abort event to syslog
+		 *   so that system administrators know about possible HW events.
+		 */
+		openlog ("srun", 0, LOG_USER);
+		syslog (LOG_WARNING, 
+				"MVAPICH ABORT [jobid=%u.%u src=%d(%s) dst=-1()]: %s",
+				st->job->jobid, st->job->stepid, 
+				rank, host, msgstr);
+		closelog();
+
 	}
 	return;
 }
-- 
GitLab