Skip to content
Snippets Groups Projects
Commit fe8b04fb authored by Moe Jette's avatar Moe Jette
Browse files

Make frequency with which we purge jobs (and resend terminate job

RPC) a parameter, effectively raise the time from 1 to 5 mins.
parent 74b17867
No related branches found
No related tags found
No related merge requests found
...@@ -729,6 +729,7 @@ static void *_slurmctld_background(void *no_data) ...@@ -729,6 +729,7 @@ static void *_slurmctld_background(void *no_data)
static time_t last_group_time; static time_t last_group_time;
static time_t last_ping_node_time; static time_t last_ping_node_time;
static time_t last_ping_srun_time; static time_t last_ping_srun_time;
static time_t last_purge_job_time;
static time_t last_timelimit_time; static time_t last_timelimit_time;
static time_t last_assert_primary_time; static time_t last_assert_primary_time;
time_t now; time_t now;
...@@ -755,6 +756,7 @@ static void *_slurmctld_background(void *no_data) ...@@ -755,6 +756,7 @@ static void *_slurmctld_background(void *no_data)
/* Let the dust settle before doing work */ /* Let the dust settle before doing work */
now = time(NULL); now = time(NULL);
last_sched_time = last_checkpoint_time = last_group_time = now; last_sched_time = last_checkpoint_time = last_group_time = now;
last_purge_job_time = now;
last_timelimit_time = last_assert_primary_time = now; last_timelimit_time = last_assert_primary_time = now;
if (slurmctld_conf.slurmd_timeout) { if (slurmctld_conf.slurmd_timeout) {
/* We ping nodes that haven't responded in SlurmdTimeout/2, /* We ping nodes that haven't responded in SlurmdTimeout/2,
...@@ -830,12 +832,16 @@ static void *_slurmctld_background(void *no_data) ...@@ -830,12 +832,16 @@ static void *_slurmctld_background(void *no_data)
unlock_slurmctld(part_write_lock); unlock_slurmctld(part_write_lock);
} }
if (difftime(now, last_sched_time) >= PERIODIC_SCHEDULE) { if (difftime(now, last_purge_job_time) >= PURGE_JOB_INTERVAL) {
last_sched_time = now; last_purge_job_time = now;
debug2("Performing purge of old job records"); debug2("Performing purge of old job records");
lock_slurmctld(job_write_lock); lock_slurmctld(job_write_lock);
purge_old_job(); /* remove defunct job recs */ purge_old_job();
unlock_slurmctld(job_write_lock); unlock_slurmctld(job_write_lock);
}
if (difftime(now, last_sched_time) >= PERIODIC_SCHEDULE) {
last_sched_time = now;
if (schedule()) if (schedule())
last_checkpoint_time = 0; /* force state save */ last_checkpoint_time = 0; /* force state save */
} }
......
...@@ -108,6 +108,10 @@ ...@@ -108,6 +108,10 @@
/* Check for jobs reaching their time limit every PERIODIC_TIMEOUT seconds */ /* Check for jobs reaching their time limit every PERIODIC_TIMEOUT seconds */
#define PERIODIC_TIMEOUT 60 #define PERIODIC_TIMEOUT 60
/* Attempt to purge defunct job records and resend job kill requests
* every PURGE_JOB_INTERVAL seconds */
#define PURGE_JOB_INTERVAL 300
/* Pathname of group file record for checking update times */ /* Pathname of group file record for checking update times */
#define GROUP_FILE "/etc/group" #define GROUP_FILE "/etc/group"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment