From fe8b04fb2fad3f0311a6acd264aa41d5e6b612a5 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Thu, 18 Jan 2007 17:40:18 +0000 Subject: [PATCH] Make frequency with which we purge jobs (and resend terminate job RPC) a parameter, effectively raise the time from 1 to 5 mins. --- src/slurmctld/controller.c | 12 +++++++++--- src/slurmctld/slurmctld.h | 4 ++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 4fd41c1552a..885b0a58eb3 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -729,6 +729,7 @@ static void *_slurmctld_background(void *no_data) static time_t last_group_time; static time_t last_ping_node_time; static time_t last_ping_srun_time; + static time_t last_purge_job_time; static time_t last_timelimit_time; static time_t last_assert_primary_time; time_t now; @@ -755,6 +756,7 @@ static void *_slurmctld_background(void *no_data) /* Let the dust settle before doing work */ now = time(NULL); last_sched_time = last_checkpoint_time = last_group_time = now; + last_purge_job_time = now; last_timelimit_time = last_assert_primary_time = now; if (slurmctld_conf.slurmd_timeout) { /* We ping nodes that haven't responded in SlurmdTimeout/2, @@ -830,12 +832,16 @@ static void *_slurmctld_background(void *no_data) unlock_slurmctld(part_write_lock); } - if (difftime(now, last_sched_time) >= PERIODIC_SCHEDULE) { - last_sched_time = now; + if (difftime(now, last_purge_job_time) >= PURGE_JOB_INTERVAL) { + last_purge_job_time = now; debug2("Performing purge of old job records"); lock_slurmctld(job_write_lock); - purge_old_job(); /* remove defunct job recs */ + purge_old_job(); unlock_slurmctld(job_write_lock); + } + + if (difftime(now, last_sched_time) >= PERIODIC_SCHEDULE) { + last_sched_time = now; if (schedule()) last_checkpoint_time = 0; /* force state save */ } diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index f38f32addb5..d0ae1c968c6 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -108,6 +108,10 @@ /* Check for jobs reaching their time limit every PERIODIC_TIMEOUT seconds */ #define PERIODIC_TIMEOUT 60 +/* Attempt to purge defunct job records and resend job kill requests + * every PURGE_JOB_INTERVAL seconds */ +#define PURGE_JOB_INTERVAL 300 + /* Pathname of group file record for checking update times */ #define GROUP_FILE "/etc/group" -- GitLab