Skip to content
Snippets Groups Projects
Commit 128f5492 authored by Moe Jette's avatar Moe Jette
Browse files

Retry connect using different ports, but only for connects initiated

from slurm_open_stream (which is only called by slurmd/io.c to connect
stdin/out/err).
(gnats:253)
parent 7a3340a8
No related branches found
No related tags found
No related merge requests found
...@@ -537,12 +537,13 @@ slurm_fd slurm_accept_stream(slurm_fd open_fd, slurm_addr * slurm_address) ...@@ -537,12 +537,13 @@ slurm_fd slurm_accept_stream(slurm_fd open_fd, slurm_addr * slurm_address)
/* slurm_open_stream /* slurm_open_stream
* opens a client connection to stream server * opens a client connection to stream server
* IN slurm_address - slurm_addr of the connection destination * IN slurm_address - slurm_addr of the connection destination
* RET slurm_fd - file descriptor of the connection created * RET slurm_fd - file descriptor of the connection created
* NOTE: Retry with various ports as needed if connection is refused
*/ */
slurm_fd slurm_open_stream(slurm_addr * slurm_address) slurm_fd slurm_open_stream(slurm_addr * slurm_address)
{ {
return _slurm_open_stream(slurm_address); return _slurm_open_stream(slurm_address, true);
} }
/* slurm_write_stream /* slurm_write_stream
......
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
#include <fcntl.h> #include <fcntl.h>
#include <stdarg.h> #include <stdarg.h>
#include "src/common/macros.h"
#include "src/common/pack.h" #include "src/common/pack.h"
#include "src/common/slurm_protocol_common.h" #include "src/common/slurm_protocol_common.h"
...@@ -196,9 +197,11 @@ slurm_fd _slurm_accept_stream ( slurm_fd open_fd , ...@@ -196,9 +197,11 @@ slurm_fd _slurm_accept_stream ( slurm_fd open_fd ,
/* _slurm_open_stream /* _slurm_open_stream
* opens a client connection to stream server * opens a client connection to stream server
* IN slurm_address - slurm_addr of the connection destination * IN slurm_address - slurm_addr of the connection destination
* IN retry - if true, retry as needed with various ports
* to avoid socket address collision
* RET slurm_fd - file descriptor of the connection created * RET slurm_fd - file descriptor of the connection created
*/ */
slurm_fd _slurm_open_stream ( slurm_addr * slurm_address ) ; slurm_fd _slurm_open_stream ( slurm_addr * slurm_address, bool retry ) ;
/* _slurm_get_stream_addr /* _slurm_get_stream_addr
* esentially a encapsilated get_sockname * esentially a encapsilated get_sockname
......
...@@ -62,7 +62,7 @@ ...@@ -62,7 +62,7 @@
#include "src/common/xmalloc.h" #include "src/common/xmalloc.h"
#include "src/common/util-net.h" #include "src/common/util-net.h"
#define PORT_RETRIES 0 #define PORT_RETRIES 2
#define MIN_USER_PORT (IPPORT_RESERVED + 1) #define MIN_USER_PORT (IPPORT_RESERVED + 1)
#define MAX_USER_PORT 0xffff #define MAX_USER_PORT 0xffff
#define RANDOM_USER_PORT ((uint16_t) ((lrand48() % \ #define RANDOM_USER_PORT ((uint16_t) ((lrand48() % \
...@@ -99,7 +99,7 @@ slurm_fd _slurm_init_msg_engine ( slurm_addr * slurm_address ) ...@@ -99,7 +99,7 @@ slurm_fd _slurm_init_msg_engine ( slurm_addr * slurm_address )
slurm_fd _slurm_open_msg_conn ( slurm_addr * slurm_address ) slurm_fd _slurm_open_msg_conn ( slurm_addr * slurm_address )
{ {
return _slurm_open_stream ( slurm_address ) ; return _slurm_open_stream ( slurm_address, false ) ;
} }
slurm_fd _slurm_accept_msg_conn (slurm_fd fd, slurm_addr *addr) slurm_fd _slurm_accept_msg_conn (slurm_fd fd, slurm_addr *addr)
...@@ -425,22 +425,24 @@ slurm_fd _slurm_accept_stream(slurm_fd fd, slurm_addr *addr) ...@@ -425,22 +425,24 @@ slurm_fd _slurm_accept_stream(slurm_fd fd, slurm_addr *addr)
return _slurm_accept(fd, (struct sockaddr *)addr, &len); return _slurm_accept(fd, (struct sockaddr *)addr, &len);
} }
slurm_fd _slurm_open_stream(slurm_addr *addr) slurm_fd _slurm_open_stream(slurm_addr *addr, bool retry)
{ {
int rc, retry; int retry_cnt;
slurm_fd fd; slurm_fd fd;
if ( (addr->sin_family == 0) || (addr->sin_port == 0) ) if ( (addr->sin_family == 0) || (addr->sin_port == 0) )
return SLURM_SOCKET_ERROR; return SLURM_SOCKET_ERROR;
for (retry=0; ; retry++) { for (retry_cnt=0; ; retry_cnt++) {
int rc;
if ((fd =_slurm_create_socket(SLURM_STREAM)) < 0) { if ((fd =_slurm_create_socket(SLURM_STREAM)) < 0) {
error("Error creating slurm stream socket: %m"); error("Error creating slurm stream socket: %m");
return fd; slurm_seterrno(errno);
return SLURM_SOCKET_ERROR;
} }
if (retry) { if (retry_cnt) {
if (retry == 1) if (retry_cnt == 1)
debug3("Error connecting, picking new stream port"); debug3("Error connecting, picking new stream port");
_sock_bind_wild(fd); _sock_bind_wild(fd);
} }
...@@ -448,8 +450,11 @@ slurm_fd _slurm_open_stream(slurm_addr *addr) ...@@ -448,8 +450,11 @@ slurm_fd _slurm_open_stream(slurm_addr *addr)
rc = _slurm_connect(fd, (struct sockaddr const *)addr, sizeof(*addr)); rc = _slurm_connect(fd, (struct sockaddr const *)addr, sizeof(*addr));
if (rc >= 0) /* success */ if (rc >= 0) /* success */
break; break;
if ((errno != ECONNREFUSED) || (retry >= PORT_RETRIES)) if ((errno != ECONNREFUSED) ||
(!retry) || (retry_cnt >= PORT_RETRIES)) {
slurm_seterrno(errno);
goto error; goto error;
}
if ((_slurm_close_stream(fd) < 0) && (errno == EINTR)) if ((_slurm_close_stream(fd) < 0) && (errno == EINTR))
_slurm_close_stream(fd); /* try again */ _slurm_close_stream(fd); /* try again */
...@@ -461,7 +466,7 @@ slurm_fd _slurm_open_stream(slurm_addr *addr) ...@@ -461,7 +466,7 @@ slurm_fd _slurm_open_stream(slurm_addr *addr)
debug2("Error connecting slurm stream socket: %m"); debug2("Error connecting slurm stream socket: %m");
if ((_slurm_close_stream(fd) < 0) && (errno == EINTR)) if ((_slurm_close_stream(fd) < 0) && (errno == EINTR))
_slurm_close_stream(fd); /* try again */ _slurm_close_stream(fd); /* try again */
return rc; return SLURM_SOCKET_ERROR;
} }
int _slurm_get_stream_addr(slurm_fd fd, slurm_addr *addr ) int _slurm_get_stream_addr(slurm_fd fd, slurm_addr *addr )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment