From cfc6ca6cb375a4ed47eda430f84608f4662ab4be Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 1 Jun 2007 17:52:14 +0000 Subject: [PATCH] Add socket re-open logic to retry --- src/plugins/sched/wiki/msg.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/plugins/sched/wiki/msg.c b/src/plugins/sched/wiki/msg.c index 7c92b0d28d6..01e0c5fb43a 100644 --- a/src/plugins/sched/wiki/msg.c +++ b/src/plugins/sched/wiki/msg.c @@ -134,17 +134,27 @@ extern void term_msg_thread(void) \*****************************************************************************/ static void *_msg_thread(void *no_data) { - slurm_fd sock_fd, new_fd; + slurm_fd sock_fd = -1, new_fd; slurm_addr cli_addr; char *msg; slurm_ctl_conf_t *conf = slurm_conf_lock(); + int i; sched_port = conf->schedport; slurm_conf_unlock(); - if ((sock_fd = slurm_init_msg_engine_port(sched_port)) - == SLURM_SOCKET_ERROR) { - fatal("wiki: slurm_init_msg_engine_port %u %m", + + /* If SchedulerPort is already taken, keep trying to open it + * once per minute. Slurmctld will continue to function + * during this interval even if nothing can be scheduled. */ + for (i=0; (!thread_shutdown); i++) { + if (i > 0) + sleep(60); + sock_fd = slurm_init_msg_engine_port(sched_port); + if (sock_fd != SLURM_SOCKET_ERROR) + break; + error("wiki: slurm_init_msg_engine_port %u %m", sched_port); + error("wiki: Unable to communicate with Moab"); } /* Process incoming RPCs until told to shutdown */ @@ -170,7 +180,8 @@ static void *_msg_thread(void *no_data) xfree(msg); slurm_close_accepted_conn(new_fd); } - (void) slurm_shutdown_msg_engine(sock_fd); + if (sock_fd > 0) + (void) slurm_shutdown_msg_engine(sock_fd); pthread_exit((void *) 0); return NULL; } -- GitLab