From aa01e5fc625c0b9b4a7acebbb33357d54101a8c8 Mon Sep 17 00:00:00 2001
From: Mark Grondona <mgrondona@llnl.gov>
Date: Thu, 12 Jun 2003 21:36:44 +0000
Subject: [PATCH]  o take neterr_mutex sooner to avoid hang in slurmd startup

---
 src/slurmd/elan_interconnect.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/slurmd/elan_interconnect.c b/src/slurmd/elan_interconnect.c
index 55bda24e659..6d7cad63dfc 100644
--- a/src/slurmd/elan_interconnect.c
+++ b/src/slurmd/elan_interconnect.c
@@ -107,6 +107,8 @@ int interconnect_node_init(void)
 	if (err)
 		error("pthread_attr_setdetachstate: %s", slurm_strerror(err));
 
+	slurm_mutex_lock(&neterr_mutex);
+
 	if ((err = pthread_create(&neterr_tid, &attr, neterr_thr, NULL)))
 		return SLURM_FAILURE;
 
@@ -114,7 +116,6 @@ int interconnect_node_init(void)
 	 *  Wait for successful startup of neterr thread before
 	 *   returning control to slurmd.
 	 */
-	slurm_mutex_lock(&neterr_mutex);
 	pthread_cond_wait(&neterr_cond, &neterr_mutex);
 	pthread_mutex_unlock(&neterr_mutex);
 
@@ -219,12 +220,11 @@ _wait_and_destroy_prg(qsw_jobinfo_t qsw_job)
 	int i = 0;
 	int sleeptime = 1;
 
-	debug("going to destory program description...");
+	debug("going to destroy program description...");
 
-	while((qsw_prgdestroy(qsw_job) < 0) && (errno != EEXIST_PRGDESTROY)) {
+	while((qsw_prgdestroy(qsw_job) < 0) && (errno == ECHILD_PRGDESTROY)) {
+		debug("qsw_prgdestroy: %m");
 		i++;
-		if (errno != ECHILD_PRGDESTROY)
-			error("qsw_prgdestroy: %m");
 		if (i == 1) {
 			debug("sending SIGTERM to remaining tasks");
 			qsw_prgsignal(qsw_job, SIGTERM);
-- 
GitLab