diff --git a/NEWS b/NEWS index f246b1b5e1a1b320d04f8bd8c34baf445cf62a97..891c439cd3ee9bf469d4ea2c72affafc8f2cc89a 100644 --- a/NEWS +++ b/NEWS @@ -76,6 +76,8 @@ documents those changes that are of interest to users and administrators. done. -- Testsuite - Fix test1.83 to handle gaps in node names properly. -- BlueGene - correctly scale node counts when enforcing MaxNodes limit. + -- Make sure no attempt is made to schedule a requeued job until all steps are + cleaned (Node Health Check completes for all steps on a Cray). * Changes in Slurm 16.05.4 ========================== diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 2047b4d3e42e77ee4dfceb90c70281793b2931e7..5b223043bdfc3d5280d06e9816928d5c5cf33f4d 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -195,6 +195,31 @@ static void _job_queue_rec_del(void *x) xfree(x); } +/* Return true if the job has some step still in a cleaning state, which + * can happen on a Cray if a job is requeued and the step NHC is still running + * after the requeued job is eligible to run again */ +static uint16_t _is_step_cleaning(struct job_record *job_ptr) +{ + ListIterator step_iterator; + struct step_record *step_ptr; + uint16_t cleaning = 0; + + step_iterator = list_iterator_create(job_ptr->step_list); + while ((step_ptr = (struct step_record *) list_next (step_iterator))) { + /* Only check if not a pending step */ + if (step_ptr->step_id != SLURM_PENDING_STEP) { + select_g_select_jobinfo_get(step_ptr->select_jobinfo, + SELECT_JOBDATA_CLEANING, + &cleaning); + if (cleaning) + break; + } + } + list_iterator_destroy(step_iterator); + + return cleaning; +} + /* Job test for ability to run now, excludes partition specific tests */ static bool _job_runnable_test1(struct job_record *job_ptr, bool sched_plugin) { @@ -209,6 +234,8 @@ static bool _job_runnable_test1(struct job_record *job_ptr, bool sched_plugin) select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_CLEANING, &cleaning); + if (!cleaning) + cleaning = _is_step_cleaning(job_ptr); if (cleaning || (job_ptr->details && job_ptr->details->prolog_running) || (job_ptr->step_list && list_count(job_ptr->step_list))) { diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 795f70262cd44f0a197e24d4c93883ae16effa4d..69003d7d91e1121767f1aab77e3393879621fbed 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -1992,6 +1992,8 @@ static void _step_dealloc_lps(struct step_record *step_ptr) if (step_ptr->step_layout == NULL) /* batch step */ return; + if (job_resrcs_ptr == NULL) + return; i_first = bit_ffs(job_resrcs_ptr->node_bitmap); i_last = bit_fls(job_resrcs_ptr->node_bitmap); if (i_first == -1) /* empty bitmap */