diff --git a/NEWS b/NEWS index aa26ff193e6dec4d02efbfed25d0d813f21b6c11..895a35cb3330ba3192c3abfeae57c1fa0f404bdc 100644 --- a/NEWS +++ b/NEWS @@ -106,6 +106,10 @@ documents those changes that are of interest to users and admins. -- jobacct_gather - fix total values to not always == the max values. -- Fix for handling node registration messages from older versions without energy data. + -- BGQ - Allow user to request full dimensional mesh. + -- sdiag command - Correction to jobs started value reported. + -- Prevent slurmctld assert when invalid change to reservation with running + jobs is made. * Changes in SLURM 2.5.3 ======================== diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml index 38218d74af1b06ba00bec0fd40b6018d52c4819f..a580a1ef11873014a1e855a14bb0bec5428cb354 100644 --- a/doc/html/accounting.shtml +++ b/doc/html/accounting.shtml @@ -646,17 +646,21 @@ in the <a href="resource_limits.html">Resource Limits</a> document.</p> this means that the parent association is used for fairshare. </li> -<li><b>GrpCPUMins=</b> A hard limit of cpu minutes to be used by jobs +<li><b>GrpCPUMins=</b> The total number of cpu minutes that can + possibly be used by past, present and future jobs running from this association and its children. If this limit is reached all jobs running in this group will be killed, and no new - jobs will be allowed to run. + jobs will be allowed to run. This usage is decayed (at a rate of + PriorityDecayHalfLife). It can also be reset (according to + PriorityUsageResetPeriod) in order to allow jobs to run against the + association tree again. </li> -<li><b>GrpCPURunMins=</b> Maximum number of CPU minutes all jobs - running with this association and its children can run at the same - time. This takes into consideration time limit of running jobs. If - the limit is reached no new jobs are started until other jobs finish - to allow time to free up. +<li><b>GrpCPURunMins=</b> Used to limit the combined total number of CPU + minutes used by all jobs running with this association and its + children. This takes into consideration time limit of + running jobs and consumes it, if the limit is reached no new jobs + are started until other jobs finish to allow time to free up. </li> <li><b>GrpCPUs=</b> The total count of cpus able to be used at any given diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index cfca365038deb11e4f2003c80ad6af2a5de86971..1df924a7af86b6b27850c02966b7a20a8b31d691 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -253,9 +253,8 @@ NOTE: This value is only meaningful for QOS PreemptMode=CANCEL) .TP \fIGrpCPUMins\fP=<max cpu minutes> -Maximum number of CPU minutes running jobs are able to be allocated in -aggregate for this association and all associations which are children -of this association. +The total number of cpu minutes that can possibly be used by past, +present and future jobs running from this association and its children. To clear a previously set value use the modify command with a new value of \-1. .P @@ -272,11 +271,11 @@ they are able to run inside the limit. .TP \fIGrpCPURunMins\fP=<max cpu run minutes> -Maximum number of CPU minutes all jobs -running with this association and all it's child associations can run -at the same time. This takes into consideration time limit of running -jobs. If the limit is reached no new jobs are started until other -jobs finish to allow time to free up. +Used to limit the combined total number of CPU minutes used by all +jobs running with this association and its children. This takes into +consideration time limit of running jobs and consumes it, if the limit +is reached no new jobs are started until other jobs finish to allow +time to free up. .TP \fIGrpCPUs\fP=<max cpus> @@ -570,17 +569,16 @@ parent association is used for fairshare. .TP \fIGrpCPUMins\fP -Maximum number of CPU minutes running jobs are able to be allocated in -aggregate for this association and all associations which are children -of this association. +The total number of cpu minutes that can possibly be used by past, +present and future jobs running from this association and its children. .TP \fIGrpCPURunMins\fP -Maximum number of CPU minutes all jobs -running with this association and all it's child associations can run -at the same time. This takes into consideration time limit of running -jobs. If the limit is reached no new jobs are started until other -jobs finish to allow time to free up. +Used to limit the combined total number of CPU minutes used by all +jobs running with this association and its children. This takes into +consideration time limit of running jobs and consumes it, if the limit +is reached no new jobs are started until other jobs finish to allow +time to free up. .TP \fIGrpCPUs\fP @@ -980,15 +978,15 @@ selected for preemption. .TP \fIGrpCPUMins\fP -Maximum number of CPU minutes running jobs are able to be allocated in -aggregate for this QOS. +The total number of cpu minutes that can possibly be used by past, +present and future jobs running from this QOS. .TP -\fIGrpCPURunMins\fP Maximum number of CPU minutes all jobs -running with this QOS can run at the same time. This takes into -consideration time limit of running jobs. If the limit is reached -no new jobs are started until other jobs finish to allow time to -free up. +\fIGrpCPURunMins\fP Used to limit the combined total number of CPU +minutes used by all jobs running with this QOS. This takes into +consideration time limit of running jobs and consumes it, if the limit +is reached no new jobs are started until other jobs finish to allow +time to free up. .TP \fIGrpCPUs\fP @@ -1104,8 +1102,8 @@ NOTE: This value is only meaningful for QOS PreemptMode=CANCEL. .TP \fIGrpCPUMins\fP -Maximum number of CPU minutes running jobs are able to be allocated in -aggregate for this QOS. +The total number of cpu minutes that can possibly be used by past, +present and future jobs running from this QOS. To clear a previously set value use the modify command with a new value of \-1. NOTE: This limit only applies when using the Priority Multifactor @@ -1462,18 +1460,20 @@ Anything included on this line will be the defaults for all associations on this cluster. These options are as follows... .TP \fIGrpCPUMins=\fP -Maximum number of CPU hours running jobs are able to -be allocated in aggregate for this association and all associations -which are children of this association. (NOTE: This limit is not -currently enforced in SLURM. You can still set this, but have to wait -for future versions of SLURM before it is enforced.) +The total number of cpu minutes that can possibly be used by past, +present and future jobs running from this association and its children. +.TP +\fIGrpCPURunMins=\fP +Used to limit the combined total number of CPU minutes used by all +jobs running with this association and its children. This takes into +consideration time limit of running jobs and consumes it, if the limit +is reached no new jobs are started until other jobs finish to allow +time to free up. .TP \fIGrpCPUs=\fP Maximum number of CPUs running jobs are able to be allocated in aggregate for this association and all associations which -are children of this association. (NOTE: This limit is not currently -enforced in SLURM. You can still set this, but have to wait for future -versions of SLURM before it is enforced.) +are children of this association. .TP \fIGrpJobs=\fP Maximum number of running jobs in aggregate for this @@ -1538,16 +1538,18 @@ A brief description of the account. \fIGrpCPUMins=\fP Maximum number of CPU hours running jobs are able to be allocated in aggregate for this association and all associations -which are children of this association. (NOTE: This limit is not -currently enforced in SLURM. You can still set this, but have to wait -for future versions of SLURM before it is enforced.) +which are children of this association. +\fIGrpCPURunMins=\fP +Used to limit the combined total number of CPU minutes used by all +jobs running with this association and its children. This takes into +consideration time limit of running jobs and consumes it, if the limit +is reached no new jobs are started until other jobs finish to allow +time to free up. .TP \fIGrpCPUs=\fP Maximum number of CPUs running jobs are able to be allocated in aggregate for this association and all associations which -are children of this association. (NOTE: This limit is not currently -enforced in SLURM. You can still set this, but have to wait for future -versions of SLURM before it is enforced.) +are children of this association. .TP \fIGrpJobs=\fP Maximum number of running jobs in aggregate for this diff --git a/src/plugins/select/bluegene/ba_bgq/block_allocator.c b/src/plugins/select/bluegene/ba_bgq/block_allocator.c index 299fa01dc4421e878b0d254892d19d57b89a9ffa..7201961c0333be31a6d1645126eddaf9310e6eea 100644 --- a/src/plugins/select/bluegene/ba_bgq/block_allocator.c +++ b/src/plugins/select/bluegene/ba_bgq/block_allocator.c @@ -848,11 +848,10 @@ extern char *set_bg_block(List results, select_ba_request_t* ba_request) small block allocations. */ for (dim=0; dim<cluster_dims; dim++) { - if (((ba_request->conn_type[dim] == SELECT_MESH) - || (ba_request->conn_type[dim] == SELECT_NAV)) - && ((ba_geo_table->geometry[dim] == 1) - || (ba_geo_table->geometry[dim] - == DIM_SIZE[dim]))) { + if ((ba_geo_table->geometry[dim] == 1) + || ((ba_geo_table->geometry[dim] == DIM_SIZE[dim]) + && (ba_request->conn_type[dim] + == SELECT_NAV))) { /* On a Q all single midplane blocks * must be a TORUS. * diff --git a/src/plugins/select/bluegene/bg_job_info.c b/src/plugins/select/bluegene/bg_job_info.c index cf98ccd73f7ef327879c9aafea6869ce7b1c67e7..511c4cc1a5c88d697111beca04afa5c5c7fa35d7 100644 --- a/src/plugins/select/bluegene/bg_job_info.c +++ b/src/plugins/select/bluegene/bg_job_info.c @@ -150,7 +150,7 @@ extern int set_select_jobinfo(select_jobinfo_t *jobinfo, /* Make sure the conn type is correct with the * new count */ if (new_size > 1) { - if (first_conn_type != (uint16_t)NO_VAL) + if (first_conn_type == (uint16_t)NO_VAL) jobinfo->conn_type[i] = SELECT_NAV; else if (first_conn_type >= SELECT_SMALL) jobinfo->conn_type[i] = diff --git a/src/plugins/select/cray/libalps/do_query.c b/src/plugins/select/cray/libalps/do_query.c index f19d175bd9e877b670475add6937023510b36983..0e108f53f06f65c34dd022ff977d86ad5222a31f 100644 --- a/src/plugins/select/cray/libalps/do_query.c +++ b/src/plugins/select/cray/libalps/do_query.c @@ -83,6 +83,8 @@ extern enum basil_version get_basil_version(void) else if ((strncmp(engine_version, "latest", 6) == 0) || (strncmp(engine_version, "5.1.0", 5) == 0)) bv = BV_5_1; + else if (strncmp(engine_version, "5.0.2", 5) == 0) + bv = BV_5_0; else if (strncmp(engine_version, "5.0.1", 5) == 0) bv = BV_5_0; else if (strncmp(engine_version, "5.0.0", 5) == 0) diff --git a/src/sdiag/sdiag.c b/src/sdiag/sdiag.c index f418b5ffc1a44cb09e1ae4fe9793da748eb12798..8939da651c76e7c63b6fbb55cace9d18a0984b5e 100644 --- a/src/sdiag/sdiag.c +++ b/src/sdiag/sdiag.c @@ -111,8 +111,7 @@ static int _print_info(void) printf("Server thread count: %d\n", buf->server_thread_count); printf("Agent queue size: %d\n\n", buf->agent_queue_size); printf("Jobs submitted: %d\n", buf->jobs_submitted); - printf("Jobs started: %d\n", - buf->jobs_started + buf->bf_last_backfilled_jobs); + printf("Jobs started: %d\n", buf->jobs_started); printf("Jobs completed: %d\n", buf->jobs_completed); printf("Jobs canceled: %d\n", buf->jobs_canceled); printf("Jobs failed: %d\n", buf->jobs_failed); diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index 202e2d2066b421d7c18cfa3ba4a9843ae782f481..b8e8896b9b56a086bbbd86ea0dfac905fbf8e207 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -136,6 +136,8 @@ static int _post_resv_delete(slurmctld_resv_t *resv_ptr); static int _post_resv_update(slurmctld_resv_t *resv_ptr, slurmctld_resv_t *old_resv_ptr); static int _resize_resv(slurmctld_resv_t *resv_ptr, uint32_t node_cnt); +static void _restore_resv(slurmctld_resv_t *dest_resv, + slurmctld_resv_t *src_resv); static bool _resv_overlap(time_t start_time, time_t end_time, uint16_t flags, bitstr_t *node_bitmap, slurmctld_resv_t *this_resv_ptr); @@ -249,6 +251,96 @@ static slurmctld_resv_t *_copy_resv(slurmctld_resv_t *resv_orig_ptr) return resv_copy_ptr; } +/* Move the contents of src_resv into dest_resv. + * NOTE: This is a destructive function with respect to the contents of + * src_resv. The data structure src_resv is suitable only for destruction + * after this function is called */ +static void _restore_resv(slurmctld_resv_t *dest_resv, + slurmctld_resv_t *src_resv) +{ + int i; + + xfree(dest_resv->accounts); + dest_resv->accounts = src_resv->accounts; + src_resv->accounts = NULL; + + for (i = 0; i < dest_resv->account_cnt; i++) + xfree(dest_resv->account_list[i]); + xfree(dest_resv->account_list); + dest_resv->account_cnt = src_resv->account_cnt; + src_resv->account_cnt = 0; + dest_resv->account_list = src_resv->account_list; + src_resv->account_list = NULL; + + dest_resv->account_not = src_resv->account_not; + + xfree(dest_resv->assoc_list); + dest_resv->assoc_list = src_resv->assoc_list; + src_resv->assoc_list = NULL; + + FREE_NULL_BITMAP(dest_resv->core_bitmap); + dest_resv->core_bitmap = src_resv->core_bitmap; + src_resv->core_bitmap = NULL; + + dest_resv->cpu_cnt = src_resv->cpu_cnt; + dest_resv->duration = src_resv->duration; + dest_resv->end_time = src_resv->end_time; + + xfree(dest_resv->features); + dest_resv->features = src_resv->features; + src_resv->features = NULL; + + dest_resv->flags = src_resv->flags; + dest_resv->full_nodes = src_resv->full_nodes; + dest_resv->job_pend_cnt = src_resv->job_pend_cnt; + dest_resv->job_run_cnt = src_resv->job_run_cnt; + + xfree(dest_resv->licenses); + dest_resv->licenses = src_resv->licenses; + src_resv->licenses = NULL; + + if (dest_resv->license_list) + list_destroy(dest_resv->license_list); + dest_resv->license_list = src_resv->license_list; + src_resv->license_list = NULL; + + dest_resv->magic = src_resv->magic; + dest_resv->maint_set_node = src_resv->maint_set_node; + + xfree(dest_resv->name); + dest_resv->name = src_resv->name; + src_resv->name = NULL; + + FREE_NULL_BITMAP(dest_resv->node_bitmap); + dest_resv->node_bitmap = src_resv->node_bitmap; + src_resv->node_bitmap = NULL; + + dest_resv->node_cnt = src_resv->node_cnt; + + xfree(dest_resv->node_list); + dest_resv->node_list = src_resv->node_list; + src_resv->node_list = NULL; + + xfree(dest_resv->partition); + dest_resv->partition = src_resv->partition; + src_resv->partition = NULL; + + dest_resv->part_ptr = src_resv->part_ptr; + dest_resv->resv_id = src_resv->resv_id; + dest_resv->start_time = src_resv->start_time; + dest_resv->start_time_first = src_resv->start_time_first; + dest_resv->start_time_prev = src_resv->start_time_prev; + + xfree(dest_resv->users); + dest_resv->users = src_resv->users; + src_resv->users = NULL; + + dest_resv->user_cnt = src_resv->user_cnt; + xfree(dest_resv->user_list); + dest_resv->user_list = src_resv->user_list; + src_resv->user_list = NULL; +} + static void _del_resv_rec(void *x) { int i; @@ -1818,11 +1910,10 @@ extern void resv_fini(void) extern int update_resv(resv_desc_msg_t *resv_desc_ptr) { time_t now = time(NULL); - slurmctld_resv_t *resv_backup, *resv_ptr, *resv_next; + slurmctld_resv_t *resv_backup, *resv_ptr; int error_code = SLURM_SUCCESS, i, rc; char start_time[32], end_time[32]; char *name1, *name2, *val1, *val2; - ListIterator iter; if (!resv_list) resv_list = list_create(_del_resv_rec); @@ -2133,17 +2224,8 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr) update_failure: /* Restore backup reservation data */ - iter = list_iterator_create(resv_list); - while ((resv_next = (slurmctld_resv_t *) list_next(iter))) { - if (resv_next == resv_ptr) { - list_delete_item(iter); - break; - } - } - if (!resv_next) - error("reservation list broken"); - list_iterator_destroy(iter); - list_append(resv_list, resv_backup); + _restore_resv(resv_ptr, resv_backup); + _del_resv_rec(resv_backup); return error_code; }