diff --git a/NEWS b/NEWS index 8037fc5729b4d40279b2b126448ed1c57221c35b..15cc2b40fa5502eadc4a61a6a8eae65c82d8af42 100644 --- a/NEWS +++ b/NEWS @@ -306,6 +306,8 @@ documents those changes that are of interest to users and admins. -- switch/nrt - Don't allocate network resources unless job step has 2+ nodes. -- select/cons_res - Avoid extraneous "oversubscribe" error messages. -- Reorder get config logic to avoid deadlock. + -- Enforce QOS MaxCPUsMin limit when job submission contains no user-specified + time limit. * Changes in Slurm 2.5.7 ======================== diff --git a/src/slurmctld/acct_policy.c b/src/slurmctld/acct_policy.c index 1a737ef71df2635970e6282ec09f38414d12f401..64ff7b42832ffe8d59305a6823a6fbd02b1c746a 100644 --- a/src/slurmctld/acct_policy.c +++ b/src/slurmctld/acct_policy.c @@ -393,9 +393,8 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, bool rc = true; uint32_t qos_max_cpus_limit = INFINITE; uint32_t qos_max_nodes_limit = INFINITE; + uint32_t qos_time_limit = INFINITE; uint32_t job_memory = 0; - uint64_t cpu_time_limit; - uint64_t job_cpu_time_limit; bool admin_set_memory_limit = false; assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; @@ -602,23 +601,14 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, * if you can end up in PENDING QOSJobLimit, you need * to validate it if DenyOnLimit is set */ - if (strict_checking && (qos_ptr->max_cpu_mins_pj != INFINITE) - && (job_desc->time_limit != NO_VAL) - && (job_desc->min_cpus != NO_VAL)) { - cpu_time_limit = qos_ptr->max_cpu_mins_pj; - job_cpu_time_limit = (uint64_t)job_desc->time_limit - * (uint64_t)job_desc->min_cpus; - if (job_cpu_time_limit > cpu_time_limit) { - if (reason) - *reason = WAIT_QOS_JOB_LIMIT; - debug2("job submit for user %s(%u): " - "cpu time limit %"PRIu64" exceeds " - "qos max per-job %"PRIu64"", - user_name, job_desc->user_id, - job_cpu_time_limit, cpu_time_limit); - rc = false; - goto end_it; - } + if (((job_desc->min_cpus != NO_VAL) || + (job_desc->min_nodes != NO_VAL)) && + (qos_ptr->max_cpu_mins_pj != INFINITE)) { + uint32_t cpu_cnt = job_desc->min_nodes; + if ((job_desc->min_nodes == NO_VAL) || + (job_desc->min_cpus > job_desc->min_nodes)) + cpu_cnt = job_desc->min_cpus; + qos_time_limit = qos_ptr->max_cpu_mins_pj / cpu_cnt; } if ((acct_policy_limit_set->max_cpus == ADMIN_SET_LIMIT) @@ -726,33 +716,36 @@ extern bool acct_policy_validate(job_desc_msg_t *job_desc, || (qos_ptr->max_wall_pj == INFINITE) || (update_call && (job_desc->time_limit == NO_VAL))) { /* no need to check/set */ - } else { - time_limit = qos_ptr->max_wall_pj; + } else if (qos_time_limit > qos_ptr->max_wall_pj) { + qos_time_limit = qos_ptr->max_wall_pj; + } + + if (qos_time_limit != INFINITE) { if (job_desc->time_limit == NO_VAL) { if (part_ptr->max_time == INFINITE) - job_desc->time_limit = time_limit; - else + job_desc->time_limit = qos_time_limit; + else { job_desc->time_limit = - MIN(time_limit, + MIN(qos_time_limit, part_ptr->max_time); + } acct_policy_limit_set->time = 1; } else if (acct_policy_limit_set->time && - job_desc->time_limit > time_limit) { - job_desc->time_limit = time_limit; + job_desc->time_limit > qos_time_limit) { + job_desc->time_limit = qos_time_limit; } else if (strict_checking - && job_desc->time_limit > time_limit) { + && job_desc->time_limit > qos_time_limit) { if (reason) *reason = WAIT_QOS_JOB_LIMIT; debug2("job submit for user %s(%u): " "time limit %u exceeds qos max %u", user_name, job_desc->user_id, - job_desc->time_limit, time_limit); + job_desc->time_limit, qos_time_limit); rc = false; goto end_it; } } - } while (assoc_ptr) {