diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index cff832d9f41f71fe26b313739e0863ba90261060..a0b71fb61c7414a1de2f6d73210b628d63c77199 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -80,6 +80,7 @@ #define MIN_CHECKIN_TIME 3 /* Nodes have this number of seconds to * check-in before we ping them */ #define MEM_LEAK_TEST 0 /* Running memory leak test if set */ +#define SHUTDOWN_WAIT 2 /* Time to wait for backup server shutdown */ /* Log to stderr and syslog until becomes a daemon */ @@ -94,6 +95,7 @@ static int daemonize = DEFAULT_DAEMONIZE; static int debug_level = 0; static char *debug_logfile = NULL; static bool dump_core = false; +static char node_name[MAX_NAME_LEN]; static int recover = DEFAULT_RECOVER; static pthread_cond_t server_thread_cond = PTHREAD_COND_INITIALIZER; static pid_t slurmctld_pid; @@ -117,7 +119,7 @@ static void _parse_commandline(int argc, char *argv[], inline static int _report_locks_set(void); static void * _service_connection(void *arg); static int _set_slurmctld_state_loc(void); -static int _shutdown_backup_controller(void); +static int _shutdown_backup_controller(int wait_time); static void * _slurmctld_background(void *no_data); static void * _slurmctld_rpc_mgr(void *no_data); static void * _slurmctld_signal_hand(void *no_data); @@ -133,7 +135,6 @@ typedef struct connection_arg { int main(int argc, char *argv[]) { int error_code; - char node_name[MAX_NAME_LEN]; pthread_attr_t thread_attr_sig, thread_attr_rpc; /* @@ -225,7 +226,7 @@ int main(int argc, char *argv[]) } else if (slurmctld_conf.control_machine && (strcmp(node_name, slurmctld_conf.control_machine) == 0)) { - (void) _shutdown_backup_controller(); + (void) _shutdown_backup_controller(SHUTDOWN_WAIT); /* Now recover the remaining state information */ if ((error_code = read_slurm_conf(recover))) { error("read_slurm_conf reading %s: %m", @@ -600,7 +601,9 @@ static void *_slurmctld_background(void *no_data) static time_t last_ping_node_time; static time_t last_ping_srun_time; static time_t last_timelimit_time; + static time_t last_assert_primary_time; time_t now; + /* Locks: Read job */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK @@ -620,7 +623,7 @@ static void *_slurmctld_background(void *no_data) /* Let the dust settle before doing work */ now = time(NULL); last_sched_time = last_checkpoint_time = last_group_time = now; - last_timelimit_time = now; + last_timelimit_time = last_assert_primary_time = now; last_ping_node_time = now + (time_t)MIN_CHECKIN_TIME - (time_t)slurmctld_conf.heartbeat_interval; last_ping_srun_time = now; @@ -706,6 +709,21 @@ static void *_slurmctld_background(void *no_data) save_all_state(); } + /* Reassert this machine as the primary controller. + * A network or security problem could result in + * the backup controller assuming control even + * while the real primary controller is running */ + if (slurmctld_conf.slurmctld_timeout && + slurmctld_conf.backup_addr && + slurmctld_conf.backup_addr[0] && + (difftime(now, last_assert_primary_time) >= + slurmctld_conf.slurmd_timeout) && + node_name && slurmctld_conf.backup_controller && + strcmp(node_name, slurmctld_conf.backup_controller)) { + last_assert_primary_time = now; + (void) _shutdown_backup_controller(0); + } + } debug3("_slurmctld_background shutting down"); @@ -899,9 +917,10 @@ static void _usage(char *prog_name) /* * Tell the backup_controller to relinquish control, primary control_machine * has resumed operation + * wait_time - How long to wait for backup controller to write state, seconds * RET 0 or an error code */ -static int _shutdown_backup_controller(void) +static int _shutdown_backup_controller(int wait_time) { int rc; slurm_msg_t req; @@ -921,7 +940,7 @@ static int _shutdown_backup_controller(void) if (slurm_send_recv_rc_msg(&req, &rc, CONTROL_TIMEOUT) < 0) { error("shutdown_backup:send/recv: %m"); - return SLURM_SOCKET_ERROR; + return SLURM_ERROR; } if (rc) { @@ -935,7 +954,8 @@ static int _shutdown_backup_controller(void) * not presently the case (it returns when no other work is pending, * so the state save should occur right away). We sleep for a while * here and give the backup controller time to shutdown */ - sleep(2); + if (wait_time) + sleep(wait_time); return SLURM_SUCCESS; }