From 919b1b9892711bcf726ee657134aee3764ec02cb Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Tue, 11 May 2004 17:41:43 +0000
Subject: [PATCH] Better deal with KillWait==0. Don't immediately kill a job
 after a timeout RPC is send (possible race condition in background threads).

---
 src/slurmctld/job_mgr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index f198bd71776..8314bf459f4 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -2110,6 +2110,7 @@ void job_time_limit(void)
 			continue;
 		}
 
+		/* Give srun command warning message about pending timeout */
 		if (job_ptr->end_time <= (now + 60))
 			srun_timeout (job_ptr->job_id, job_ptr->end_time);
 
@@ -2300,7 +2301,7 @@ static int _list_find_job_id(void *job_entry, void *key)
 static int _list_find_job_old(void *job_entry, void *key)
 {
 	time_t now      = time(NULL);
-	time_t kill_age = now - (slurmctld_conf.kill_wait * 2);
+	time_t kill_age = now - (slurmctld_conf.kill_wait + 20);
 	time_t min_age  = now - slurmctld_conf.min_job_age;
 	struct job_record *job_ptr = (struct job_record *)job_entry;
 
-- 
GitLab