From be5ec31faad5a9b2a59860f414f42836a5735133 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 16 Mar 2007 15:58:10 +0000
Subject: [PATCH] svn merge -r11157:11172
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.1

---
 NEWS                          |  7 ++++---
 src/plugins/sched/wiki2/msg.c | 15 ++++++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/NEWS b/NEWS
index d80ceafe48b..2c38b6e392b 100644
--- a/NEWS
+++ b/NEWS
@@ -245,17 +245,18 @@ documents those changes that are of interest to users and admins.
 
 * Changes in SLURM 1.1.33
 =========================
- - sched/wiki - Do not wait for job completion before permitting 
+ - sched/wiki2 - Do not wait for job completion before permitting 
    additional jobs to be scheduled.
  - Add srun SLURM_EXCLUSIVE environment variable support, from 
    Gilles Civario (Bull).
- - sched/wiki - Report job's node sharing options.
+ - sched/wiki2 - Report job's node sharing options.
+ - sched/wiki2 - If SchedulerPort is in use, retry opening it indefinitely.
 
 * Changes in SLURM 1.1.32
 =========================
  - If a job's stdout/err file names are unusable (bad path), use the 
    default names.
- - sched/wiki - Fix logic to be compatible with select/cons_res plugin
+ - sched/wiki2 - Fix logic to be compatible with select/cons_res plugin
    for allocating individual processors within nodes.
  - Fix job end time calculation when changed from an initial value of 
    INFINITE.
diff --git a/src/plugins/sched/wiki2/msg.c b/src/plugins/sched/wiki2/msg.c
index 218f54e5d07..3f6ed229e39 100644
--- a/src/plugins/sched/wiki2/msg.c
+++ b/src/plugins/sched/wiki2/msg.c
@@ -128,12 +128,21 @@ static void *_msg_thread(void *no_data)
 	uint16_t sched_port;
 	char *msg;
 	slurm_ctl_conf_t *conf = slurm_conf_lock();
+	int i;
 
 	sched_port = conf->schedport;
 	slurm_conf_unlock();
-	if ((sock_fd = slurm_init_msg_engine_port(sched_port)) 
-			== SLURM_SOCKET_ERROR) {
-		fatal("wiki: slurm_init_msg_engine_port %u %m",
+
+	/* If SchedulerPort is already taken, keep trying to open it
+	 * once per minute. Slurmctld will continue to function
+	 * during this interval even if nothing can be scheduled. */
+	for (i=0; ; i++) {
+		if (i > 0)
+			sleep(60);
+		sock_fd = slurm_init_msg_engine_port(sched_port);
+		if (sock_fd != SLURM_SOCKET_ERROR)
+			break;
+		error("wiki: slurm_init_msg_engine_port %u %m",
 			sched_port);
 	}
 
-- 
GitLab