From 5cffa4a1a5eaa1ba17ce69e13b3cbf970eb4902b Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Wed, 5 Nov 2003 21:56:29 +0000 Subject: [PATCH] Increase time between attempts to contact slurm controller when transistioning between primary and backup. The request has a brief window in which it can abort and we want to decrease the likelyhood of that happening by retrying less frequently when we know control is transistioning. --- src/common/slurm_protocol_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 1386b009fc1..f99eb2630a2 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -834,12 +834,13 @@ int slurm_send_recv_controller_msg(slurm_msg_t *req, slurm_msg_t *resp) ESLURM_IN_STANDBY_MODE) && (req->msg_type != MESSAGE_NODE_REGISTRATION_STATUS) && (slurmctld_conf.backup_controller) && - (difftime(time(NULL), start_time) < (slurmctld_conf.slurmctld_timeout + + (difftime(time(NULL), start_time) < + (slurmctld_conf.slurmctld_timeout + slurmctld_conf.heartbeat_interval))) { debug("Neither primary nor backup controller responding, " "sleep and retry"); slurm_free_return_code_msg(resp->data); - sleep(5); + sleep(30); if ((fd = slurm_open_controller_conn()) < 0) return SLURM_SOCKET_ERROR; } -- GitLab