From 777bf478837f89b22f0e87b741f03043704f1ca6 Mon Sep 17 00:00:00 2001 From: jette <jette@schedmd.com> Date: Thu, 17 Jan 2013 15:11:36 -0800 Subject: [PATCH] Replace socket shutdown call with linger sockopt The shutdown call was causing all pending I/O to be discarded. Linger waits for pending I/O to complete before the close call returns. --- src/common/net.c | 21 ++++++++++++++------- src/slurmd/slurmstepd/io.c | 13 +++---------- src/slurmd/slurmstepd/slurmstepd_job.h | 1 - 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/common/net.c b/src/common/net.c index d7ac4f5717f..ea46c59bdd2 100644 --- a/src/common/net.c +++ b/src/common/net.c @@ -178,8 +178,9 @@ int net_set_low_water(int sock, size_t size) /* set keep alive time on socket */ extern int net_set_keep_alive(int sock) { - int opt_val; + int opt_int; socklen_t opt_len; + struct linger opt_linger; static bool keep_alive_set = false; static int keep_alive_time = (uint16_t) NO_VAL; @@ -191,20 +192,26 @@ extern int net_set_keep_alive(int sock) if (keep_alive_time == (uint16_t) NO_VAL) return 0; + opt_len = sizeof(struct linger); + opt_linger.l_onoff = 1; + opt_linger.l_linger = keep_alive_time; + if (setsockopt(sock, SOL_SOCKET, SO_LINGER, &opt_linger, opt_len) < 0) + error("Unable to set linger socket option: %m"); + opt_len = sizeof(int); - opt_val = 1; - if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &opt_val, opt_len) < 0) { + opt_int = 1; + if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &opt_int, opt_len) < 0) { error("Unable to set keep alive socket option: %m"); return -1; } - opt_val = keep_alive_time; - if (setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, &opt_val, opt_len) < 0) { + opt_int = keep_alive_time; + if (setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, &opt_int, opt_len) < 0) { error("Unable to set keep alive socket time: %m"); return -1; } #if 0 - getsockopt(sock, SOL_TCP, TCP_KEEPIDLE, &opt_val, &opt_len); - info("keep_alive time is %d", opt_val); + getsockopt(sock, SOL_TCP, TCP_KEEPIDLE, &opt_int, &opt_len); + info("keep_alive time is %d", opt_int); #endif return 0; diff --git a/src/slurmd/slurmstepd/io.c b/src/slurmd/slurmstepd/io.c index 750189dbd2f..56681d0e889 100644 --- a/src/slurmd/slurmstepd/io.c +++ b/src/slurmd/slurmstepd/io.c @@ -1377,15 +1377,9 @@ io_close_task_fds(slurmd_job_t *job) int i; for (i = 0; i < job->node_tasks; i++) { - if (job->task[i]->socket_io) { - shutdown(job->task[i]->stdin_fd, SHUT_RDWR); - shutdown(job->task[i]->stdout_fd, SHUT_RDWR); - shutdown(job->task[i]->stderr_fd, SHUT_RDWR); - } else { - close(job->task[i]->stdin_fd); - close(job->task[i]->stdout_fd); - close(job->task[i]->stderr_fd); - } + close(job->task[i]->stdin_fd); + close(job->task[i]->stdout_fd); + close(job->task[i]->stderr_fd); } } @@ -1960,7 +1954,6 @@ user_managed_io_client_connect(int node_tasks, srun_info_t *srun, tasks[i]->from_stdout = -1; tasks[i]->stderr_fd = fd; tasks[i]->from_stderr = -1; - tasks[i]->socket_io = true; } return SLURM_SUCCESS; diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index c8b455f6832..8204690041d 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -105,7 +105,6 @@ typedef struct task_info { bool esent; /* true if exit status has been sent */ bool exited; /* true if task has exited */ int estatus; /* this task's exit status */ - bool socket_io; /* true if stdin/out/err_fd are sockets */ uint32_t argc; char **argv; -- GitLab