diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 4debd7cc745839da94fbee14656f2f28678b3ba6..8ffbd3e5f2f78056b3d6644ae5de9ba2f517cf37 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -601,14 +601,13 @@ static void *_slurmctld_background(void *no_data) if (slurmctld_config.shutdown_time) { int i; /* wait for RPC's to complete */ - for (i=0; ((i<2) && slurmctld_config. - server_thread_count); i++) { - debug2("server_thread_count=%d", - slurmctld_config.server_thread_count); + for (i = 1; i < CONTROL_TIMEOUT; i++) { + if (slurmctld_config.server_thread_count == 0) + break; sleep(1); } if (slurmctld_config.server_thread_count) - info("shutdown server_thread_count %d", + info("shutdown server_thread_count=%d", slurmctld_config.server_thread_count); if (_report_locks_set() == 0) save_all_state(); @@ -883,7 +882,7 @@ static int _shutdown_backup_controller(void) req.msg_type = REQUEST_CONTROL; req.data = NULL; - if (slurm_send_recv_rc_msg(&req, &rc, 0) < 0) { + if (slurm_send_recv_rc_msg(&req, &rc, CONTROL_TIMEOUT) < 0) { error("shutdown_backup:send/recv: %m"); return SLURM_SOCKET_ERROR; } diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 81b29152739d62b6100393991336dd3aa6a160f3..630e0ba5fdaa2e67bb0a8d0461bffae6f7ff5e29 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1271,15 +1271,17 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) } if (msg->msg_type == REQUEST_CONTROL) { - /* wait for workload to dry up before sending reply */ - for (i = 0; ((i < 10) && (slurmctld_config. - server_thread_count > 1)); i++) { + /* Wait for workload to dry up before sending reply. + * One thread should remain, this one. */ + for (i = 1; i < CONTROL_TIMEOUT; i++) { + if (slurmctld_config.server_thread_count <= 1) + break; sleep(1); } if (slurmctld_config.server_thread_count > 1) - error("shutting down with server_thread_count=%d", + error("REQUEST_CONTROL reply with %d active threads", slurmctld_config.server_thread_count); - save_all_state(); + /* save_all_state(); performed by _slurmctld_background */ } slurm_send_rc_msg(msg, error_code); if ((error_code == SLURM_SUCCESS) && core_arg) diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index bb2078c611b2db46ed94a59d8e0badc31d60ff02..ee221cd2c63cd5d4a06da28852893159f43b7433 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -101,6 +101,9 @@ * Update the group uid_t access list as needed */ #define PERIODIC_GROUP_CHECK 600 +/* Seconds to wait for backup controller response to REQUEST_CONTROL RPC */ +#define CONTROL_TIMEOUT 4 + /* Default configuration configuration file values */ #define DEFAULT_FAST_SCHEDULE 1 #define DEFAULT_FIRST_JOB_ID 1