diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index cbae3312acc9a842356eb2e51173bdcbde3dac11..15eb26e6eb46f8a71c61f2edbbda3bb1c4b28495 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -1197,6 +1197,7 @@ extern int assoc_mgr_fini(char *state_save_location) assoc_mgr_wckey_list = NULL; assoc_mgr_root_assoc = NULL; + running_cache = 0; assoc_mgr_unlock(&locks); diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c index ef6f4353f99c70c5c3d2db684772625dcbdc3287..385e7be5d3c9d0689c8744043baaed6347e6096e 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -155,13 +155,6 @@ void run_backup(slurm_trigger_callbacks_t *callbacks) sleep(1); /* Give the primary slurmctld set-up time */ } - /* The job list needs to be freed before the wait, we also - * free it afterwards just to make sure. - */ - lock_slurmctld(config_write_lock); - job_fini(); - unlock_slurmctld(config_write_lock); - /* repeatedly ping ControlMachine */ while (slurmctld_config.shutdown_time == 0) { sleep(1); @@ -224,11 +217,18 @@ void run_backup(slurm_trigger_callbacks_t *callbacks) pthread_join(slurmctld_config.thread_id_sig, NULL); pthread_join(slurmctld_config.thread_id_rpc, NULL); + /* The job list needs to be freed before we run + * ctld_assoc_mgr_init, it should be empty here in the first place. + */ + lock_slurmctld(config_write_lock); + job_fini(); + init_job_conf(); + unlock_slurmctld(config_write_lock); + ctld_assoc_mgr_init(callbacks); /* clear old state and read new state */ lock_slurmctld(config_write_lock); - job_fini(); if (switch_restore(slurmctld_conf.state_save_location, true)) { error("failed to restore switch state"); abort(); diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index f00331cf675a49e35a82e28ccd7b017962eb9406..928638b70c75c90a3cd49a3d695b27455c6ccdfb 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -398,8 +398,6 @@ int main(int argc, char *argv[]) fatal( "failed to initialize preempt plugin" ); if (checkpoint_init(slurmctld_conf.checkpoint_type) != SLURM_SUCCESS ) fatal( "failed to initialize checkpoint plugin" ); - if (slurm_acct_storage_init(NULL) != SLURM_SUCCESS ) - fatal( "failed to initialize accounting_storage plugin"); if (jobacct_gather_init() != SLURM_SUCCESS ) fatal( "failed to initialize jobacct_gather plugin"); if (job_submit_plugin_init() != SLURM_SUCCESS ) @@ -419,10 +417,17 @@ int main(int argc, char *argv[]) slurm_sched_fini(); /* make sure shutdown */ primary = 0; run_backup(&callbacks); + info("starting it up"); + if (slurm_acct_storage_init(NULL) != SLURM_SUCCESS ) + fatal("failed to initialize " + "accounting_storage plugin"); } else if (_valid_controller()) { (void) _shutdown_backup_controller(SHUTDOWN_WAIT); trigger_primary_ctld_res_ctrl(); ctld_assoc_mgr_init(&callbacks); + if (slurm_acct_storage_init(NULL) != SLURM_SUCCESS ) + fatal("failed to initialize " + "accounting_storage plugin"); /* Now recover the remaining state information */ lock_slurmctld(config_write_lock); if (switch_restore(slurmctld_conf.state_save_location, @@ -1553,7 +1558,6 @@ static void *_slurmctld_background(void *no_data) last_node_acct = now; _accounting_cluster_ready(); } - if (last_proc_req_start == 0) { /* Stats will reset at midnight (aprox).