From 4d4c8c17a01669d599ff9bdb971f6f68f62e04c7 Mon Sep 17 00:00:00 2001
From: Artem Polyakov <artpol84@gmail.com>
Date: Wed, 11 Nov 2015 14:54:59 +0600
Subject: [PATCH] 1. Prepare to the new state machine 2. Fail with SIGKILL if
 state machine misbehaves.

---
 src/plugins/mpi/pmix/pmixp_coll.c   | 25 ++++++++++++++++++++++---
 src/plugins/mpi/pmix/pmixp_coll.h   |  2 --
 src/plugins/mpi/pmix/pmixp_server.c | 24 +++++++++++++++---------
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/src/plugins/mpi/pmix/pmixp_coll.c b/src/plugins/mpi/pmix/pmixp_coll.c
index 411ac66ee62..d31488473de 100644
--- a/src/plugins/mpi/pmix/pmixp_coll.c
+++ b/src/plugins/mpi/pmix/pmixp_coll.c
@@ -99,6 +99,26 @@ static int _pack_ranges(pmixp_coll_t *coll)
 	return SLURM_SUCCESS;
 }
 
+static void _fan_in_finished(pmixp_coll_t *coll)
+{
+	xassert(PMIXP_COLL_FAN_IN == coll->state);
+	coll->state = PMIXP_COLL_FAN_OUT;
+	memset(coll->ch_contribs, 0, sizeof(int) * coll->children_cnt);
+	coll->contrib_cntr = 0;
+	coll->contrib_local = 0;
+	set_buf_offset(coll->buf, coll->serv_offs);
+	if (SLURM_SUCCESS != _pack_ranges(coll)) {
+		PMIXP_ERROR("Cannot pack ranges to coll message header!");
+	}
+}
+
+static void _fan_out_finished(pmixp_coll_t *coll)
+{
+	xassert( PMIXP_COLL_FAN_OUT == coll->state /* || fan_out_in */);
+	coll->state = PMIXP_COLL_SYNC;
+	coll->seq++; /* move to the next collective */
+}
+
 static void _reset_coll(pmixp_coll_t *coll)
 {
 	switch (coll->state) {
@@ -535,8 +555,7 @@ static void _progress_fan_in(pmixp_coll_t *coll)
 	}
 
 	/* transit to the next state */
-	coll->state = PMIXP_COLL_FAN_OUT;
-	set_buf_offset(coll->buf, 0);
+	_fan_in_finished(coll);
 
 	/* if we are root - push data to PMIx here.
 	 * Originally there was a homogenuous solution: root nodename was in the hostlist.
@@ -577,7 +596,7 @@ void _progres_fan_out(pmixp_coll_t *coll, Buf buf)
 				pmixp_free_Buf, (void *)buf);
 	}
 	/* Prepare for the next collective operation */
-	_reset_coll(coll);
+	_fan_out_finished(coll);
 
 	PMIXP_DEBUG("%s:%d: collective is prepared for the next use",
 			pmixp_info_namespace(), pmixp_info_nodeid());
diff --git a/src/plugins/mpi/pmix/pmixp_coll.h b/src/plugins/mpi/pmix/pmixp_coll.h
index 2c05834e850..248cf4faaec 100644
--- a/src/plugins/mpi/pmix/pmixp_coll.h
+++ b/src/plugins/mpi/pmix/pmixp_coll.h
@@ -170,8 +170,6 @@ static inline int pmixp_coll_check_seq(pmixp_coll_t *coll, uint32_t seq,
 		 * want to discard this message */
 		return SLURM_ERROR;
 	}
-	PMIXP_ERROR("Bad collective seq. #%d from %s, current is %d", seq,
-			nodename, coll->seq);
 	/* maybe need more sophisticated handling in presence of
 	 * several steps. However maybe it's enough to just ignore */
 	/* slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL); */
diff --git a/src/plugins/mpi/pmix/pmixp_server.c b/src/plugins/mpi/pmix/pmixp_server.c
index 93bd06e9abd..c049d5b2b35 100644
--- a/src/plugins/mpi/pmix/pmixp_server.c
+++ b/src/plugins/mpi/pmix/pmixp_server.c
@@ -411,16 +411,22 @@ static void _process_server_request(recv_header_t *_hdr, void *payload)
 		coll = pmixp_state_coll_get(type, procs, nprocs);
 		xfree(procs);
 
-		PMIXP_DEBUG(
-				"FENCE collective message from node \"%s\", type = %s",
-				nodename,
-				(PMIXP_MSG_FAN_IN == hdr->type) ?
-						"fan-in" : "fan-out");
-
-		if (SLURM_SUCCESS
-				!= pmixp_coll_check_seq(coll, hdr->seq,
+		PMIXP_DEBUG("FENCE collective message from node \"%s\", type = %s",
+			    nodename,
+			    (PMIXP_MSG_FAN_IN == hdr->type) ? "fan-in" : "fan-out");
+
+		if (SLURM_SUCCESS != pmixp_coll_check_seq(coll, hdr->seq,
 						nodename)) {
-			/* stop processing discardig this message */
+			/* this is unexepable event: either something went
+			 * really wrong or the state machine is incorrect.
+			 * This will 100% lead to application hang.
+			 */
+			PMIXP_ERROR("Bad collective seq. #%d from %s, current is %d",
+				    hdr->seq, nodename, coll->seq);
+			pmixp_debug_hang(0); /* enable hang to debug this! */
+			slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(),
+					    SIGKILL);
+
 			break;
 		}
 
-- 
GitLab