From dafc8c07a4aba195af1383b738ec6926000cb06d Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 19 Feb 2009 23:59:36 +0000
Subject: [PATCH] svn merge -r16594:16610
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.3

---
 NEWS                                              | 2 ++
 src/common/slurm_protocol_socket_implementation.c | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index a38e0f57bf7..405947e461f 100644
--- a/NEWS
+++ b/NEWS
@@ -181,6 +181,8 @@ documents those changes that are of interest to users and admins.
     STARTJOB request: "TASKLIST includes non-responsive nodes".
  -- Fix bug in select/linear when used with sched/gang that can result in a 
     job's required or excluded node specification being ignored.
+ -- Add logic to handle message connect timeouts (timed-out.patch from 
+    Chuck Clouston, Bull).
 
 * Changes in SLURM 1.3.13
 =========================
diff --git a/src/common/slurm_protocol_socket_implementation.c b/src/common/slurm_protocol_socket_implementation.c
index 98195539121..bf1f3190a98 100644
--- a/src/common/slurm_protocol_socket_implementation.c
+++ b/src/common/slurm_protocol_socket_implementation.c
@@ -77,7 +77,7 @@
 #include "src/common/xmalloc.h"
 #include "src/common/util-net.h"
 
-#define PORT_RETRIES    2
+#define PORT_RETRIES    3
 #define MIN_USER_PORT   (IPPORT_RESERVED + 1)
 #define MAX_USER_PORT   0xffff
 #define RANDOM_USER_PORT ((uint16_t) ((lrand48() % \
@@ -525,7 +525,7 @@ slurm_fd _slurm_open_stream(slurm_addr *addr, bool retry)
 				    sizeof(*addr));
 		if (rc >= 0)		    /* success */
 			break;
-		if ((errno != ECONNREFUSED) || 
+		if (((errno != ECONNREFUSED) && (errno != ETIMEDOUT)) ||
 		    (!retry) || (retry_cnt >= PORT_RETRIES)) {
 			slurm_seterrno(errno);
 			goto error;
-- 
GitLab