From 0ec2af2735ee91b46fd5b59a9cd1318ef3411aca Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Fri, 4 Apr 2014 16:19:45 -0700 Subject: [PATCH] Do msg retry logic at 0.1 sec intervals rather than 1 sec interval retries --- src/common/slurm_protocol_api.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 26c2e39412a..294ed10fa9b 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -2182,6 +2182,7 @@ slurm_fd_t slurm_open_controller_conn(slurm_addr_t *addr) slurm_ctl_conf_t *conf; slurm_protocol_config_t *myproto = NULL; int retry, have_backup = 0; + int msg_timeout = slurm_get_msg_timeout(); if (!working_cluster_rec) { /* This means the addr wasn't set up already. @@ -2198,9 +2199,10 @@ slurm_fd_t slurm_open_controller_conn(slurm_addr_t *addr) myproto->primary_controller.sin_port; } - for (retry=0; retry<slurm_get_msg_timeout(); retry++) { + msg_timeout *= 10; /* Do iteration every 0.1 secs */ + for (retry = 0; retry < msg_timeout; retry++) { if (retry) - sleep(1); + usleep(100000); if (working_cluster_rec) { if (working_cluster_rec->control_addr.sin_port == 0) { slurm_set_addr( @@ -3681,11 +3683,12 @@ List slurm_send_addr_recv_msgs(slurm_msg_t *msg, char *name, int timeout) if (conn_timeout == (uint16_t) NO_VAL) conn_timeout = MIN(slurm_get_msg_timeout(), 10); + conn_timeout *= 10; /* Do iteration every 0.1 secs */ /* This connect retry logic permits Slurm hierarchical communications * to better survive slurmd restarts */ for (i = 0; i <= conn_timeout; i++) { if (i > 0) - sleep(1); + usleep(100000); fd = slurm_open_msg_conn(&msg->address); if ((fd >= 0) || (errno != ECONNREFUSED)) break; -- GitLab