From bf9f245215f9437eec1f61e2721f5621be928706 Mon Sep 17 00:00:00 2001 From: Don Lipari <lipari1@llnl.gov> Date: Wed, 9 May 2012 08:55:57 -0700 Subject: [PATCH] Reset priority of system held jobs when dependency is satisfied The symptom is that SLURM schedules lower priority jobs to run when higher priority, dependent jobs have their dependencies satisfied. This happens because dependent jobs still have a priority of 1 when the job queue is sorted in the schedule() function. The proposed fix forces jobs to have their priority updated when their dependencies are satisfied. --- NEWS | 2 ++ src/slurmctld/job_mgr.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/NEWS b/NEWS index 83ecd50dbd1..81690c2256c 100644 --- a/NEWS +++ b/NEWS @@ -41,6 +41,8 @@ documents those changes that are of interest to users and admins. batch jobs. -- Fix possible illegal memory reference in slurmctld for job step with relative option. Work by Matthieu Hautreux (CEA). + -- Reset priority of system held jobs when dependency is satisfied. Work by + Don Lipari, LLNL. * Changes in SLURM 2.3.4 ======================== diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index f2cdc0eaccb..3337b8349ff 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -8796,6 +8796,7 @@ extern bool job_independent(struct job_record *job_ptr, int will_run) struct job_details *detail_ptr = job_ptr->details; time_t now = time(NULL); int depend_rc; + bool independent = false; /* Test dependencies first so we can cancel jobs before dependent * job records get purged (e.g. afterok, afternotok) */ @@ -8836,11 +8837,14 @@ extern bool job_independent(struct job_record *job_ptr, int will_run) /* Job is eligible to start now */ if (job_ptr->state_reason == WAIT_DEPENDENCY) { job_ptr->state_reason = WAIT_NO_REASON; + independent = true; xfree(job_ptr->state_desc); } if ((detail_ptr && (detail_ptr->begin_time == 0) && (job_ptr->priority != 0))) { detail_ptr->begin_time = now; + if (independent) + _set_job_prio(job_ptr); } else if (job_ptr->state_reason == WAIT_TIME) { job_ptr->state_reason = WAIT_NO_REASON; xfree(job_ptr->state_desc); -- GitLab