From 364fedac77dfe8798a902e57f574b7adc5f19026 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 1 Jun 2007 18:08:21 +0000
Subject: [PATCH] svn merge -r11598:11609
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.1

---
 NEWS                          |  2 ++
 src/plugins/sched/wiki2/msg.c | 53 ++++++++++++++++++-----------------
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/NEWS b/NEWS
index 9d3c5985aed..b5c61d3504e 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,7 @@ documents those changes that are of interest to users and admins.
 ========================
  -- Add new sinfo field to sort by "%E" sorts by the time associated with a 
     node's state (from Prashanth Tamraparni, HP).
+ -- In sched/wiki: fix logic for restarting backup slurmctld.
 
 * Changes in SLURM 1.2.8
 ========================
@@ -318,6 +319,7 @@ documents those changes that are of interest to users and admins.
    issue in srun to fail job has failed instead of waiting around for threads
    that will never end.
  - Added fork handlers in the slurmstepd
+ - In sched/wiki2: fix logic for restarting backup slurmctld.
 
 * Changes in SLURM 1.1.36
 =========================
diff --git a/src/plugins/sched/wiki2/msg.c b/src/plugins/sched/wiki2/msg.c
index 843634da366..b07ab5c22eb 100644
--- a/src/plugins/sched/wiki2/msg.c
+++ b/src/plugins/sched/wiki2/msg.c
@@ -105,28 +105,28 @@ extern void term_msg_thread(void)
 	pthread_mutex_lock(&thread_flag_mutex);
 	if (thread_running) {
 		int fd;
-                slurm_addr addr;
+		slurm_addr addr;
 
 		thread_shutdown = true;
 
-                /* Open and close a connection to the wiki listening port.
-                 * Allows slurm_accept_msg_conn() to return in 
-                 * _msg_thread() so that it can check the thread_shutdown
-                 * flag.
-                 */
-                slurm_set_addr(&addr, sched_port, "localhost");
-                fd = slurm_open_stream(&addr);
-                if (fd != -1) {
-                        /* we don't care if the open failed */
-                        slurm_close_stream(fd);
-                }
-
-                debug2("waiting for sched/wiki2 thread to exit");
-                pthread_join(msg_thread_id, NULL);
-                msg_thread_id = 0;
-                thread_shutdown = false;
-                thread_running = false;
-                debug2("join of sched/wiki2 thread was successful");
+		/* Open and close a connection to the wiki listening port.
+		 * Allows slurm_accept_msg_conn() to return in
+		 * _msg_thread() so that it can check the thread_shutdown
+		 * flag.
+		 */
+		slurm_set_addr(&addr, sched_port, "localhost");
+		fd = slurm_open_stream(&addr);
+		if (fd != -1) {
+			/* we don't care if the open failed */
+			slurm_close_stream(fd);
+		}
+
+		debug2("waiting for sched/wiki thread to exit");
+		pthread_join(msg_thread_id, NULL);
+		msg_thread_id = 0;
+		thread_shutdown = false;
+		thread_running = false;
+		debug2("join of sched/wiki thread was successful");
 	}
 	pthread_mutex_unlock(&thread_flag_mutex);
 }
@@ -136,7 +136,7 @@ extern void term_msg_thread(void)
 \*****************************************************************************/
 static void *_msg_thread(void *no_data)
 {
-	slurm_fd sock_fd, new_fd;
+	slurm_fd sock_fd = -1, new_fd;
 	slurm_addr cli_addr;
 	char *msg;
 	slurm_ctl_conf_t *conf = slurm_conf_lock();
@@ -148,7 +148,7 @@ static void *_msg_thread(void *no_data)
 	/* If SchedulerPort is already taken, keep trying to open it
 	 * once per minute. Slurmctld will continue to function
 	 * during this interval even if nothing can be scheduled. */
-	for (i=0; ; i++) {
+	for (i=0; (!thread_shutdown); i++) {
 		if (i > 0)
 			sleep(60);
 		sock_fd = slurm_init_msg_engine_port(sched_port);
@@ -167,10 +167,10 @@ static void *_msg_thread(void *no_data)
 				error("wiki: slurm_accept_msg_conn %m");
 			continue;
 		}
-                if (thread_shutdown) {
-                        close(new_fd);
-                        break;
-                }
+		if (thread_shutdown) {
+			close(new_fd);
+			break;
+		}
 		/* It would be nice to create a pthread for each new 
 		 * RPC, but that leaks memory on some systems when 
 		 * done from a plugin.
@@ -182,7 +182,8 @@ static void *_msg_thread(void *no_data)
 		xfree(msg);
 		slurm_close_accepted_conn(new_fd);
 	}
-	(void) slurm_shutdown_msg_engine(sock_fd);
+	if (sock_fd > 0)
+		(void) slurm_shutdown_msg_engine(sock_fd);
 	pthread_exit((void *) 0);
 	return NULL;
 }
-- 
GitLab