diff --git a/slurm/slurmdb.h b/slurm/slurmdb.h index 3edead0baa0bb7b6e7cf5064f833b06e69bce0fe..76994e9b67c25a0ffafe496de391100082a26f26 100644 --- a/slurm/slurmdb.h +++ b/slurm/slurmdb.h @@ -907,6 +907,7 @@ typedef struct { double usage_thres; /* percent of effective usage of an association when breached will deny pending and new jobs */ + time_t blocked_until; /* internal use only, DON'T PACK */ } slurmdb_qos_rec_t; typedef struct { diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 8b6a100a970c9b6773518a927afc841485bf9e6c..43a9d60682731153913772d38330b871e6e17565 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -167,6 +167,7 @@ static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, bitstr_t *exc_core_bitmap); static int _yield_locks(int usec); +static int _clear_qos_blocked_times(void *x, void *arg); /* Log resources to be allocated to a pending job */ static void _dump_job_sched(struct job_record *job_ptr, time_t end_time, @@ -264,6 +265,14 @@ static int _num_feature_count(struct job_record *job_ptr, bool *has_xor) return rc; } +static int _clear_qos_blocked_times(void *x, void *arg) +{ + slurmdb_qos_rec_t *qos_ptr = (slurmdb_qos_rec_t *) x; + qos_ptr->blocked_until = 0; + + return 0; +} + /* Attempt to schedule a specific job on specific available nodes * IN job_ptr - job to schedule * IN/OUT avail_bitmap - nodes available/selected to use @@ -879,7 +888,7 @@ static int _attempt_backfill(void) DEF_TIMERS; List job_queue; job_queue_rec_t *job_queue_rec; - slurmdb_qos_rec_t *qos_ptr = NULL; + slurmdb_qos_rec_t *qos_ptr = NULL, *qos_part_ptr = NULL;; int bb, i, j, node_space_recs, mcs_select = 0; struct job_record *job_ptr; struct part_record *part_ptr, **bf_part_ptr = NULL; @@ -1004,6 +1013,11 @@ static int _attempt_backfill(void) uid = xmalloc(BF_MAX_USERS * sizeof(uint32_t)); njobs = xmalloc(BF_MAX_USERS * sizeof(uint16_t)); } + if (assoc_limit_stop) { + assoc_mgr_lock(&qos_read_lock); + list_for_each(part_list, _clear_qos_blocked_times, NULL); + assoc_mgr_unlock(&qos_read_lock); + } sort_job_queue(job_queue); while (1) { @@ -1130,6 +1144,7 @@ static int _attempt_backfill(void) assoc_mgr_lock(&qos_read_lock); qos_ptr = (slurmdb_qos_rec_t *)job_ptr->qos_ptr; + qos_part_ptr = (slurmdb_qos_rec_t *)job_ptr->part_ptr->qos_ptr; if (qos_ptr) qos_flags = qos_ptr->flags; if (part_policy_valid_qos(job_ptr->part_ptr, qos_ptr) != @@ -1361,6 +1376,27 @@ next_task: time_limit = job_ptr->time_limit = job_ptr->time_min; later_start = now; + if (qos_ptr && assoc_limit_stop) { + assoc_mgr_lock(&qos_read_lock); + if (qos_ptr->blocked_until > later_start) { + later_start = qos_ptr->blocked_until; + if (debug_flags & DEBUG_FLAG_BACKFILL) + info("QOS blocked_until move start_res to %ld", + later_start); + } + assoc_mgr_unlock(&qos_read_lock); + } + if (qos_part_ptr && assoc_limit_stop) { + assoc_mgr_lock(&qos_read_lock); + if (qos_part_ptr->blocked_until > later_start) { + later_start = qos_part_ptr->blocked_until; + if (debug_flags & DEBUG_FLAG_BACKFILL) + info("Part QOS blocked_until move start_res to %ld", + later_start); + } + assoc_mgr_unlock(&qos_read_lock); + } + TRY_LATER: if (slurmctld_config.shutdown_time || (difftime(time(NULL), orig_sched_start) >= @@ -1737,6 +1773,20 @@ next_task: job_ptr->start_time = later_start; else job_ptr->start_time = now + 500; + if (job_ptr->qos_blocking_ptr && + (job_ptr->state_reason >= + WAIT_QOS_GRP_CPU && + job_ptr->state_reason <= + WAIT_QOS_GRP_WALL)) { + assoc_mgr_lock(&qos_read_lock); + qos_ptr = job_ptr->qos_blocking_ptr; + if (qos_ptr->blocked_until < + job_ptr->start_time) { + qos_ptr->blocked_until = + job_ptr->start_time; + } + assoc_mgr_unlock(&qos_read_lock); + } } else if (rc != SLURM_SUCCESS) { if (debug_flags & DEBUG_FLAG_BACKFILL) { info("backfill: planned start of job %u" diff --git a/src/slurmctld/acct_policy.c b/src/slurmctld/acct_policy.c index 88cb197886553883a4b155dd467d53734190c8e8..a2533b54b88c6cdca3544d7f07f229d34083e49c 100644 --- a/src/slurmctld/acct_policy.c +++ b/src/slurmctld/acct_policy.c @@ -2156,6 +2156,8 @@ static int _qos_job_runnable_post_select(struct job_record *job_ptr, /* we don't need to check max_wall_pj here */ end_it: + if (!rc) + job_ptr->qos_blocking_ptr = qos_ptr; return rc; } @@ -3005,6 +3007,8 @@ extern bool acct_policy_job_runnable_post_select( job_ptr->state_reason = WAIT_NO_REASON; } + job_ptr->qos_blocking_ptr = NULL; + /* clang needs this memset to avoid a warning */ memset(tres_run_mins, 0, sizeof(tres_run_mins)); memset(tres_usage_mins, 0, sizeof(tres_usage_mins)); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 66cd4ebd540160f0e5a24110c8108db2d76d2ae9..a16b07ed988583a95cc5ccd62d41d089391da101 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -15124,6 +15124,10 @@ extern int job_hold_by_qos_id(uint32_t qos_id) lock_slurmctld(job_write_lock); job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + if (job_ptr->qos_blocking_ptr && + ((slurmdb_qos_rec_t *)job_ptr->qos_blocking_ptr)->id + != qos_id) + job_ptr->qos_blocking_ptr = NULL; if (job_ptr->qos_id != qos_id) continue; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 738a20cb90e05bea538447d19487841c12a0acfb..63d3b09ab87079fa80555e2986321c21fd42209b 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -722,6 +722,7 @@ struct job_record { * void* because of interdependencies * in the header files, confirm the * value before use */ + void *qos_blocking_ptr; /* internal use only, DON'T PACK */ uint8_t reboot; /* node reboot requested before start */ uint16_t restart_cnt; /* count of restarts */ time_t resize_time; /* time of latest size change */