From 78fdfe9cefca77968f134b6bdd8edf81e42a7515 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Sat, 20 Sep 2003 20:49:51 +0000 Subject: [PATCH] Save all state when the last node associated with a job records its EPILOG_COMPLETE_MESSAGE. At this time the job is COMPLETED and all associated nodes available. --- src/slurmctld/controller.c | 2 +- src/slurmctld/proc_req.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 3ff37e8923e..15a17b96ffe 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -637,7 +637,7 @@ static void *_slurmctld_background(void *no_data) purge_old_job(); /* remove defunct job recs */ unlock_slurmctld(job_write_lock); if (schedule()) - last_checkpoint_time = 0; /* force save */ + last_checkpoint_time = 0; /* force state save */ } if (difftime(now, last_checkpoint_time) >= diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 94a5adef868..81b29152739 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -59,6 +59,7 @@ #include "src/slurmctld/agent.h" #include "src/slurmctld/locks.h" #include "src/slurmctld/proc_req.h" +#include "src/slurmctld/read_config.h" #include "src/slurmctld/slurmctld.h" #define BUF_SIZE 1024 /* Temporary buffer size */ @@ -684,8 +685,11 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) epilog_msg->job_id, epilog_msg->node_name, TIME_STR); - if (run_scheduler) - schedule(); /* has own locks */ + /* Functions below provide their own locking */ + if (run_scheduler) { + (void) schedule(); + save_all_state(); + } /* NOTE: RPC has no response */ } @@ -729,7 +733,6 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) /* Below function provides its own locking */ (void) dump_all_job_state(); - } } else { error_code = job_step_signal(job_step_kill_msg->job_id, @@ -1085,7 +1088,6 @@ static void _slurm_rpc_node_registration(slurm_msg_t * msg) debug2("_slurm_rpc_node_registration complete for %s %s", node_reg_stat_msg->node_name, TIME_STR); slurm_send_rc_msg(msg, SLURM_SUCCESS); - schedule(); /* has own locks */ } } @@ -1277,6 +1279,7 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) if (slurmctld_config.server_thread_count > 1) error("shutting down with server_thread_count=%d", slurmctld_config.server_thread_count); + save_all_state(); } slurm_send_rc_msg(msg, error_code); if ((error_code == SLURM_SUCCESS) && core_arg) -- GitLab