Skip to content
Snippets Groups Projects
Commit f8342b44 authored by Moe Jette's avatar Moe Jette
Browse files
parent 0b2f2b9f
No related branches found
No related tags found
No related merge requests found
...@@ -95,8 +95,6 @@ documents those changes that are of interest to users and admins. ...@@ -95,8 +95,6 @@ documents those changes that are of interest to users and admins.
passthrough nodes to the allocation when creating a block. passthrough nodes to the allocation when creating a block.
- BLUEGENE - Fix deadlock issue with starting and failing jobs at the same - BLUEGENE - Fix deadlock issue with starting and failing jobs at the same
time time
- Make connect() non-blocking and poll to avoid possibly very long default
timeout.
* Changes in SLURM 1.1.17 * Changes in SLURM 1.1.17
========================= =========================
......
...@@ -565,28 +565,33 @@ extern int _slurm_getsockname (int __fd, struct sockaddr * __addr, ...@@ -565,28 +565,33 @@ extern int _slurm_getsockname (int __fd, struct sockaddr * __addr,
extern int _slurm_connect (int __fd, struct sockaddr const * __addr, extern int _slurm_connect (int __fd, struct sockaddr const * __addr,
socklen_t __len) socklen_t __len)
{ {
#if 1
return connect ( __fd , __addr , __len ) ;
#else
/* From "man connect": Note that for IP sockets the timeout /* From "man connect": Note that for IP sockets the timeout
* may be very long when syncookies are enabled on the server. * may be very long when syncookies are enabled on the server.
* *
* Timeouts in excess of 3 minutes have been observed, resulting * Timeouts in excess of 3 minutes have been observed, resulting
* in serious problems for slurmctld. Making the connect call * in serious problems for slurmctld. Making the connect call
* non-blocking and polling seems to fix the problem. */ * non-blocking and polling seems to fix the problem on Linux.
* It fails on AIX. */
int rc = -1, flags; int rc = -1, flags;
flags = fcntl(__fd, F_GETFL); flags = fcntl(__fd, F_GETFL);
fcntl(__fd, F_SETFL, flags | O_NONBLOCK); fcntl(__fd, F_SETFL, flags | O_NONBLOCK);
rc = connect ( __fd , __addr , __len ) ; rc = connect(__fd , __addr , __len);
if ((rc == -1) if ((rc == -1) && (errno == EINPROGRESS)) {
&& ((errno == EINPROGRESS) || (errno == EALREADY))) {
struct pollfd ufds; struct pollfd ufds;
ufds.fd = __fd; ufds.fd = __fd;
ufds.events = POLLOUT; ufds.events = POLLOUT;
ufds. revents = 0; ufds.revents = 0;
poll(&ufds, 1, 5000); /* 5 sec max wait */ poll(&ufds, 1, 5000); /* 5 sec max wait */
rc = connect ( __fd , __addr , __len ) ; if (ufds.revents == POLLOUT)
rc = connect(__fd , __addr , __len);
} }
fcntl(__fd, F_SETFL, flags); fcntl(__fd, F_SETFL, flags);
return rc; return rc;
#endif
} }
/* Put the address of the peer connected to socket FD into *ADDR /* Put the address of the peer connected to socket FD into *ADDR
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment