From fe8b04fb2fad3f0311a6acd264aa41d5e6b612a5 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 18 Jan 2007 17:40:18 +0000
Subject: [PATCH] Make frequency with which we purge jobs (and resend terminate
 job RPC) a parameter, effectively raise the time from 1 to 5 mins.

---
 src/slurmctld/controller.c | 12 +++++++++---
 src/slurmctld/slurmctld.h  |  4 ++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 4fd41c1552a..885b0a58eb3 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -729,6 +729,7 @@ static void *_slurmctld_background(void *no_data)
 	static time_t last_group_time;
 	static time_t last_ping_node_time;
 	static time_t last_ping_srun_time;
+	static time_t last_purge_job_time;
 	static time_t last_timelimit_time;
 	static time_t last_assert_primary_time;
 	time_t now;
@@ -755,6 +756,7 @@ static void *_slurmctld_background(void *no_data)
 	/* Let the dust settle before doing work */
 	now = time(NULL);
 	last_sched_time = last_checkpoint_time = last_group_time = now;
+	last_purge_job_time = now;
 	last_timelimit_time = last_assert_primary_time = now;
 	if (slurmctld_conf.slurmd_timeout) {
 		/* We ping nodes that haven't responded in SlurmdTimeout/2,
@@ -830,12 +832,16 @@ static void *_slurmctld_background(void *no_data)
 			unlock_slurmctld(part_write_lock);
 		}
 
-		if (difftime(now, last_sched_time) >= PERIODIC_SCHEDULE) {
-			last_sched_time = now;
+		if (difftime(now, last_purge_job_time) >= PURGE_JOB_INTERVAL) {
+			last_purge_job_time = now;
 			debug2("Performing purge of old job records");
 			lock_slurmctld(job_write_lock);
-			purge_old_job();	/* remove defunct job recs */
+			purge_old_job();
 			unlock_slurmctld(job_write_lock);
+		}
+
+		if (difftime(now, last_sched_time) >= PERIODIC_SCHEDULE) {
+			last_sched_time = now;
 			if (schedule())
 				last_checkpoint_time = 0;  /* force state save */
 		}
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index f38f32addb5..d0ae1c968c6 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -108,6 +108,10 @@
 /* Check for jobs reaching their time limit every PERIODIC_TIMEOUT seconds */
 #define	PERIODIC_TIMEOUT	60
 
+/* Attempt to purge defunct job records and resend job kill requests
+ * every PURGE_JOB_INTERVAL seconds */
+#define PURGE_JOB_INTERVAL 300
+
 /* Pathname of group file record for checking update times */
 #define GROUP_FILE	"/etc/group"
 
-- 
GitLab