diff --git a/NEWS b/NEWS index be529ef12959e69cbd186445c0d4ae7763e3c6a6..19e8a866bf1c2eb0d4880ff14f871abea4dccf5a 100644 --- a/NEWS +++ b/NEWS @@ -59,6 +59,11 @@ documents those changes that are of interest to users and admins. -- Fix for accounting where your cluster isn't numbered in counting order (i.e. 1-9,0 instead of 0-9). The bug would cause 'sacct -N nodename' to not give correct results on these systems. + -- Fix to GRES allocation logic when resources are associated with specific + CPUs on a node. Patch from Steve Trofinoff, CSCS. + -- Fix bugs in sched/backfill with respect to QOS reservation support and job + time limits. Patch from Alejandro Lucero Palau (Barcelona Supercomputer + Center). * Changes in SLURM 2.3.0-2 ========================== diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index 9fe7558b4bd21732b3db014266a4d3ac5215954c..17cd414e236976c739bcc623d5ba938475223f25 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -914,12 +914,12 @@ any jobs submitted with this QOS that fall below the UsageThreshold will be held until their Fairshare Usage goes above the Threshold. .TP \fINoReserve\fP -If set, and using backfill, jobs using this QOS will all be considered -at the same level within this QOS meaning if a larger, higher priority -job is unable to run a smaller job will run if possible even if the -larger higher priority job will be delayed starting. -NOTE: This could cause starvation on these larger jobs, but if that is -ok, this flag most likely will increase utilization. +If this flag is set and backfill scheduling is used, jobs using this QOS will +not reserve resources in the backfill schedule's map of resources allocated +through time. This flag is intended for use with a QOS that may be preempted +by jobs associated with all other QOS (e.g use with a "standby" QOS). If the +allocated is used with a QOS which can not be preempted by all other QOS, it +could result in starvation of larger jobs. .TP \fIPartitionMaxNodes\fP If set jobs using this QOS will be able to diff --git a/src/common/gres.c b/src/common/gres.c index d48e3e2b8ccca9661713657c7bcbeec247d37065..7debdd3581d49952837d93a6a494d9eb5ea471e8 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -2653,7 +2653,12 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list, cpu_start_bit, cpu_end_bit, &topo_set, job_id, node_name, gres_context[i].gres_name); - cpu_cnt = MIN(tmp_cnt, cpu_cnt); + if (tmp_cnt != NO_VAL) { + if (cpu_cnt == NO_VAL) + cpu_cnt = tmp_cnt; + else + cpu_cnt = MIN(tmp_cnt, cpu_cnt); + } break; } if (cpu_cnt == 0) @@ -4062,7 +4067,12 @@ extern uint32_t gres_plugin_step_test(List step_gres_list, List job_gres_list, node_offset, ignore_alloc, gres_context[i].gres_name, job_id, step_id); - cpu_cnt = MIN(tmp_cnt, cpu_cnt); + if (tmp_cnt != NO_VAL) { + if (cpu_cnt == NO_VAL) + cpu_cnt = tmp_cnt; + else + cpu_cnt = MIN(tmp_cnt, cpu_cnt); + } break; } if (cpu_cnt == 0) diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index f1471eec6da4b8973a627c8b449971916fdfe436..efb17bc6bb6b2971e56cf65f4b5db75a8df0a263 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -567,6 +567,7 @@ static int _attempt_backfill(void) } comp_time_limit = time_limit; orig_time_limit = job_ptr->time_limit; + qos_ptr = job_ptr->qos_ptr; if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) time_limit = job_ptr->time_limit = 1; else if (job_ptr->time_min && (job_ptr->time_min < time_limit)) @@ -634,15 +635,17 @@ static int _attempt_backfill(void) bit_not(resv_bitmap); if ((time(NULL) - sched_start) >= this_sched_timeout) { + uint32_t save_time_limit = job_ptr->time_limit; + job_ptr->time_limit = orig_time_limit; debug("backfill: loop taking too long, yielding locks"); if (_yield_locks()) { debug("backfill: system state changed, " "breaking out"); rc = 1; break; - } else { - this_sched_timeout += sched_timeout; } + job_ptr->time_limit = save_time_limit; + this_sched_timeout += sched_timeout; } /* this is the time consuming operation */ debug2("backfill: entering _try_sched for job %u.", @@ -664,8 +667,11 @@ static int _attempt_backfill(void) } if (job_ptr->start_time <= now) { int rc = _start_job(job_ptr, resv_bitmap); - if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) + if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)){ job_ptr->time_limit = orig_time_limit; + job_ptr->end_time = job_ptr->start_time + + (orig_time_limit * 60); + } else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) { /* Set time limit as high as possible */ job_ptr->time_limit = comp_time_limit; @@ -724,7 +730,6 @@ static int _attempt_backfill(void) /* * Add reservation to scheduling table if appropriate */ - qos_ptr = job_ptr->qos_ptr; if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) continue; bit_not(avail_bitmap);