From 5cffa4a1a5eaa1ba17ce69e13b3cbf970eb4902b Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 5 Nov 2003 21:56:29 +0000
Subject: [PATCH] Increase time between attempts to contact slurm controller
 when transistioning between primary and backup. The request has a brief
 window in which it can abort and we want to decrease the likelyhood of that
 happening by retrying less frequently when we know control is transistioning.

---
 src/common/slurm_protocol_api.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 1386b009fc1..f99eb2630a2 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -834,12 +834,13 @@ int slurm_send_recv_controller_msg(slurm_msg_t *req, slurm_msg_t *resp)
                         ESLURM_IN_STANDBY_MODE) &&
 	        (req->msg_type != MESSAGE_NODE_REGISTRATION_STATUS) &&
 	        (slurmctld_conf.backup_controller) &&
-                (difftime(time(NULL), start_time) < (slurmctld_conf.slurmctld_timeout +
+                (difftime(time(NULL), start_time) < 
+                        (slurmctld_conf.slurmctld_timeout +
                         slurmctld_conf.heartbeat_interval))) {
 		debug("Neither primary nor backup controller responding, "
 		      "sleep and retry");
 		slurm_free_return_code_msg(resp->data);
-		sleep(5);
+		sleep(30);
 		if ((fd = slurm_open_controller_conn()) < 0) 
                 	return SLURM_SOCKET_ERROR;
 	}
-- 
GitLab