diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 0f7e688315a1ed96330f03218f06c9c3c958bd77..839433990e29d959aeb29b4fa38fd429b2783ba7 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -57,6 +57,7 @@ /* #DEFINES */ #define _DEBUG 0 +#define MAX_SHUTDOWN_RETRY 10 #define SLURM_DEFAULT_TIMEOUT 2000 /* STATIC VARIABLES */ @@ -782,16 +783,21 @@ _send_and_recv_msg(slurm_fd fd, slurm_msg_t *req, slurm_msg_t *resp, int timeout) { int err = SLURM_SUCCESS; + int retry = 0; if ( (slurm_send_node_msg(fd, req) < 0) || (slurm_receive_msg(fd, resp, timeout) < 0) ) err = errno; - /* - * Attempt to close an open connection - */ - if (slurm_shutdown_msg_conn(fd) < 0) - return SLURM_ERROR; + /* + * Attempt to close an open connection + */ + while ( (slurm_shutdown_msg_conn(fd) < 0) && (errno == EINTR) ) { + if (retry++ > MAX_SHUTDOWN_RETRY) { + err = errno; + break; + } + } if (err) slurm_seterrno_ret(err); @@ -867,6 +873,7 @@ int slurm_send_recv_node_msg(slurm_msg_t *req, slurm_msg_t *resp, int timeout) int slurm_send_only_controller_msg(slurm_msg_t *req) { int rc = SLURM_SUCCESS; + int retry = 0; slurm_fd fd = -1; /* @@ -879,9 +886,14 @@ int slurm_send_only_controller_msg(slurm_msg_t *req) rc = slurm_send_node_msg(fd, req); - if (slurm_shutdown_msg_conn(fd) < 0) { - rc = SLURM_SOCKET_ERROR; - goto cleanup; + /* + * Attempt to close an open connection + */ + while ( (slurm_shutdown_msg_conn(fd) < 0) && (errno == EINTR) ) { + if (retry++ > MAX_SHUTDOWN_RETRY) { + rc = SLURM_SOCKET_ERROR; + goto cleanup; + } } cleanup: @@ -899,6 +911,7 @@ int slurm_send_only_controller_msg(slurm_msg_t *req) int slurm_send_only_node_msg(slurm_msg_t *req) { int rc = SLURM_SUCCESS; + int retry = 0; slurm_fd fd = -1; if ((fd = slurm_open_msg_conn(&req->address)) < 0) @@ -906,8 +919,13 @@ int slurm_send_only_node_msg(slurm_msg_t *req) rc = slurm_send_node_msg(fd, req); - if (slurm_shutdown_msg_conn(fd) < 0) - return SLURM_SOCKET_ERROR; + /* + * Attempt to close an open connection + */ + while ( (slurm_shutdown_msg_conn(fd) < 0) && (errno == EINTR) ) { + if (retry++ > MAX_SHUTDOWN_RETRY) + return SLURM_SOCKET_ERROR; + } return rc; } diff --git a/src/common/slurm_protocol_socket_implementation.c b/src/common/slurm_protocol_socket_implementation.c index 8f71cb611254e9841ac6f46b7b971fd30ef6aaed..a01fc92ce15918024887aee27a80f183778b67af 100644 --- a/src/common/slurm_protocol_socket_implementation.c +++ b/src/common/slurm_protocol_socket_implementation.c @@ -374,7 +374,8 @@ slurm_fd _slurm_listen_stream(slurm_addr *addr) return fd; error: - _slurm_close_stream(fd); + if ((_slurm_close_stream(fd) < 0) && (errno == EINTR)) + _slurm_close_stream(fd); /* try again */ return rc; } @@ -408,7 +409,8 @@ slurm_fd _slurm_open_stream(slurm_addr *addr) return fd; error: - _slurm_close_stream(fd); + if ((_slurm_close_stream(fd) < 0) && (errno == EINTR)) + _slurm_close_stream(fd); /* try again */ return rc; }