From 0ec2af2735ee91b46fd5b59a9cd1318ef3411aca Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Fri, 4 Apr 2014 16:19:45 -0700
Subject: [PATCH] Do msg retry logic at 0.1 sec intervals

rather than 1 sec interval retries
---
 src/common/slurm_protocol_api.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 26c2e39412a..294ed10fa9b 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -2182,6 +2182,7 @@ slurm_fd_t slurm_open_controller_conn(slurm_addr_t *addr)
 	slurm_ctl_conf_t *conf;
 	slurm_protocol_config_t *myproto = NULL;
 	int retry, have_backup = 0;
+	int msg_timeout = slurm_get_msg_timeout();
 
 	if (!working_cluster_rec) {
 		/* This means the addr wasn't set up already.
@@ -2198,9 +2199,10 @@ slurm_fd_t slurm_open_controller_conn(slurm_addr_t *addr)
 				myproto->primary_controller.sin_port;
 	}
 
-	for (retry=0; retry<slurm_get_msg_timeout(); retry++) {
+	msg_timeout *= 10;	/* Do iteration every 0.1 secs */
+	for (retry = 0; retry < msg_timeout; retry++) {
 		if (retry)
-			sleep(1);
+			usleep(100000);
 		if (working_cluster_rec) {
 			if (working_cluster_rec->control_addr.sin_port == 0) {
 				slurm_set_addr(
@@ -3681,11 +3683,12 @@ List slurm_send_addr_recv_msgs(slurm_msg_t *msg, char *name, int timeout)
 
 	if (conn_timeout == (uint16_t) NO_VAL)
 		conn_timeout = MIN(slurm_get_msg_timeout(), 10);
+	conn_timeout *= 10;     /* Do iteration every 0.1 secs */
 	/* This connect retry logic permits Slurm hierarchical communications
 	 * to better survive slurmd restarts */
 	for (i = 0; i <= conn_timeout; i++) {
 		if (i > 0)
-			sleep(1);
+			usleep(100000);
 		fd = slurm_open_msg_conn(&msg->address);
 		if ((fd >= 0) || (errno != ECONNREFUSED))
 			break;
-- 
GitLab