diff --git a/NEWS b/NEWS index a7ef18f9384346049520d9888637f4954f81d70c..5fd911fb9a822797f3ba66ae9be69c0536bf047b 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,8 @@ documents those changes that are of interest to users and admins. -- switch/nrt - Don't allocate network resources unless job step has 2+ nodes. -- select/cons_res - Avoid extraneous "oversubscribe" error messages. -- Reorder get config logic to avoid deadlock. + -- Enforce QOS MaxCPUsMin limit when job submission contains no user-specified + time limit. * Changes in Slurm 2.5.7 ======================== diff --git a/src/slurmctld/acct_policy.c b/src/slurmctld/acct_policy.c index 24516713cf3972b4a0a364a7502a905153533593..099e777868111379d24f04abbbbdfe13cb876efc 100644 --- a/src/slurmctld/acct_policy.c +++ b/src/slurmctld/acct_policy.c @@ -441,9 +441,8 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, bool rc = true; uint32_t qos_max_cpus_limit = INFINITE; uint32_t qos_max_nodes_limit = INFINITE; + uint32_t qos_time_limit = INFINITE; uint32_t job_memory = 0; - uint64_t cpu_time_limit; - uint64_t job_cpu_time_limit; bool admin_set_memory_limit = false; assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; @@ -647,23 +646,14 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, * if you can end up in PENDING QOSJobLimit, you need * to validate it if DenyOnLimit is set */ - if (strict_checking && (qos_ptr->max_cpu_mins_pj != INFINITE) - && (job_desc->time_limit != NO_VAL) - && (job_desc->min_cpus != NO_VAL)) { - cpu_time_limit = qos_ptr->max_cpu_mins_pj; - job_cpu_time_limit = (uint64_t)job_desc->time_limit - * (uint64_t)job_desc->min_cpus; - if (job_cpu_time_limit > cpu_time_limit) { - if (reason) - *reason = WAIT_QOS_JOB_LIMIT; - debug2("job submit for user %s(%u): " - "cpu time limit %"PRIu64" exceeds " - "qos max per-job %"PRIu64"", - user_name, job_desc->user_id, - job_cpu_time_limit, cpu_time_limit); - rc = false; - goto end_it; - } + if (((job_desc->min_cpus != NO_VAL) || + (job_desc->min_nodes != NO_VAL)) && + (qos_ptr->max_cpu_mins_pj != INFINITE)) { + uint32_t cpu_cnt = job_desc->min_nodes; + if ((job_desc->min_nodes == NO_VAL) || + (job_desc->min_cpus > job_desc->min_nodes)) + cpu_cnt = job_desc->min_cpus; + qos_time_limit = qos_ptr->max_cpu_mins_pj / cpu_cnt; } if ((acct_policy_limit_set->max_cpus == ADMIN_SET_LIMIT) @@ -770,33 +760,36 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, || (qos_ptr->max_wall_pj == INFINITE) || (update_call && (job_desc->time_limit == NO_VAL))) { /* no need to check/set */ - } else { - time_limit = qos_ptr->max_wall_pj; + } else if (qos_time_limit > qos_ptr->max_wall_pj) { + qos_time_limit = qos_ptr->max_wall_pj; + } + + if (qos_time_limit != INFINITE) { if (job_desc->time_limit == NO_VAL) { if (part_ptr->max_time == INFINITE) - job_desc->time_limit = time_limit; - else + job_desc->time_limit = qos_time_limit; + else { job_desc->time_limit = - MIN(time_limit, + MIN(qos_time_limit, part_ptr->max_time); + } acct_policy_limit_set->time = 1; } else if (acct_policy_limit_set->time && - job_desc->time_limit > time_limit) { - job_desc->time_limit = time_limit; + job_desc->time_limit > qos_time_limit) { + job_desc->time_limit = qos_time_limit; } else if (strict_checking - && job_desc->time_limit > time_limit) { + && job_desc->time_limit > qos_time_limit) { if (reason) *reason = WAIT_QOS_JOB_LIMIT; debug2("job submit for user %s(%u): " "time limit %u exceeds qos max %u", user_name, job_desc->user_id, - job_desc->time_limit, time_limit); + job_desc->time_limit, qos_time_limit); rc = false; goto end_it; } } - } while (assoc_ptr) {