From dafc8c07a4aba195af1383b738ec6926000cb06d Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Thu, 19 Feb 2009 23:59:36 +0000 Subject: [PATCH] svn merge -r16594:16610 https://eris.llnl.gov/svn/slurm/branches/slurm-1.3 --- NEWS | 2 ++ src/common/slurm_protocol_socket_implementation.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index a38e0f57bf7..405947e461f 100644 --- a/NEWS +++ b/NEWS @@ -181,6 +181,8 @@ documents those changes that are of interest to users and admins. STARTJOB request: "TASKLIST includes non-responsive nodes". -- Fix bug in select/linear when used with sched/gang that can result in a job's required or excluded node specification being ignored. + -- Add logic to handle message connect timeouts (timed-out.patch from + Chuck Clouston, Bull). * Changes in SLURM 1.3.13 ========================= diff --git a/src/common/slurm_protocol_socket_implementation.c b/src/common/slurm_protocol_socket_implementation.c index 98195539121..bf1f3190a98 100644 --- a/src/common/slurm_protocol_socket_implementation.c +++ b/src/common/slurm_protocol_socket_implementation.c @@ -77,7 +77,7 @@ #include "src/common/xmalloc.h" #include "src/common/util-net.h" -#define PORT_RETRIES 2 +#define PORT_RETRIES 3 #define MIN_USER_PORT (IPPORT_RESERVED + 1) #define MAX_USER_PORT 0xffff #define RANDOM_USER_PORT ((uint16_t) ((lrand48() % \ @@ -525,7 +525,7 @@ slurm_fd _slurm_open_stream(slurm_addr *addr, bool retry) sizeof(*addr)); if (rc >= 0) /* success */ break; - if ((errno != ECONNREFUSED) || + if (((errno != ECONNREFUSED) && (errno != ETIMEDOUT)) || (!retry) || (retry_cnt >= PORT_RETRIES)) { slurm_seterrno(errno); goto error; -- GitLab