From 919b1b9892711bcf726ee657134aee3764ec02cb Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 11 May 2004 17:41:43 +0000 Subject: [PATCH] Better deal with KillWait==0. Don't immediately kill a job after a timeout RPC is send (possible race condition in background threads). --- src/slurmctld/job_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index f198bd71776..8314bf459f4 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2110,6 +2110,7 @@ void job_time_limit(void) continue; } + /* Give srun command warning message about pending timeout */ if (job_ptr->end_time <= (now + 60)) srun_timeout (job_ptr->job_id, job_ptr->end_time); @@ -2300,7 +2301,7 @@ static int _list_find_job_id(void *job_entry, void *key) static int _list_find_job_old(void *job_entry, void *key) { time_t now = time(NULL); - time_t kill_age = now - (slurmctld_conf.kill_wait * 2); + time_t kill_age = now - (slurmctld_conf.kill_wait + 20); time_t min_age = now - slurmctld_conf.min_job_age; struct job_record *job_ptr = (struct job_record *)job_entry; -- GitLab