diff --git a/NEWS b/NEWS index 23910dac6d544c4e70a7e4b634bfaee00e0ee7b4..3cf831c3253636b8183a15c69318efd86b0529d6 100644 --- a/NEWS +++ b/NEWS @@ -63,6 +63,8 @@ documents those changes that are of interest to users and administrators. -- Avoid huge malloc if GRES configured with "Type" and huge "Count". -- Fix jobs from starting in overlapping reservations that won't finish before a "maint" reservation begins. + -- When node gets drained while in state mixed display its status as draining + in sinfo output. * Changes in Slurm 14.11.1 ========================== diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index b8463af670f745bdba566f997d919eaedb5d157c..06b1fb2f3ccd0b2d987f5cd6638a10ea24811c07 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -2401,6 +2401,9 @@ The following options are supported for \fBSelectType=select/cons_res\fR: \fBCR_ALLOCATE_FULL_SOCKET\fR Jobs are allocated whole sockets rather than individual cores. Must be used with \fBCR_Socket\fR or \fBCR_Socket_Memory\fR option. +NOTE: This is also needed for more accurate accounting. Without it the +number of cpus allocated the job will not be rounded up to include cores +not used by the allocation even though they are allocated. .TP \fBCR_CPU\fR CPUs are consumable resources. @@ -2474,17 +2477,21 @@ Sockets are consumable resources. On nodes with multiple cores, each core or thread is counted as a CPU to satisfy a job's resource requirement, but multiple jobs are not allocated resources on the same socket. -The count of CPUs allocated to a job may be rounded up to account for every -CPU on an allocated socket. +NOTE: With this option even though the entire socket will be allocated +to the job the count of CPUs allocated to a job will not round up to +account for every CPU on an allocated socket without the +\fBCR_ALLOCATE_FULL_SOCKET\fR option. .TP \fBCR_Socket_Memory\fR Memory and sockets are consumable resources. On nodes with multiple cores, each core or thread is counted as a CPU to satisfy a job's resource requirement, but multiple jobs are not allocated resources on the same socket. -The count of CPUs allocated to a job may be rounded up to account for every -CPU on an allocated socket. Setting a value for \fBDefMemPerCPU\fR is strongly recommended. +NOTE: With this option even though the entire socket will be allocated +to the job the count of CPUs allocated to a job will not round up to +account for every CPU on an allocated socket with out the +\fBCR_ALLOCATE_FULL_SOCKET\fR option. .TP \fBCR_Memory\fR Memory is a consumable resource. diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 12e4672d0991077aa69cfc08a3bee63c53da5d27..d984a0f14a90c7e35dff6e7ac0802acaa9c063c5 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -1753,7 +1753,9 @@ extern char *node_state_string(uint32_t inx) return "MAINT"; } if (drain_flag) { - if (comp_flag || (base == NODE_STATE_ALLOCATED)) { + if (comp_flag + || (base == NODE_STATE_ALLOCATED) + || (base == NODE_STATE_MIXED)) { if (no_resp_flag) return "DRAINING*"; return "DRAINING"; @@ -1761,10 +1763,6 @@ extern char *node_state_string(uint32_t inx) if (no_resp_flag) return "ERROR*"; return "ERROR"; - } else if (base == NODE_STATE_MIXED) { - if (no_resp_flag) - return "MIXED*"; - return "MIXED"; } else { if (no_resp_flag) return "DRAINED*"; @@ -1887,7 +1885,9 @@ extern char *node_state_string_compact(uint32_t inx) return "MAINT"; } if (drain_flag) { - if (comp_flag || (inx == NODE_STATE_ALLOCATED)) { + if (comp_flag + || (inx == NODE_STATE_ALLOCATED) + || (inx == NODE_STATE_MIXED)) { if (no_resp_flag) return "DRNG*"; return "DRNG"; @@ -1895,10 +1895,6 @@ extern char *node_state_string_compact(uint32_t inx) if (no_resp_flag) return "ERROR*"; return "ERROR"; - } else if (inx == NODE_STATE_MIXED) { - if (no_resp_flag) - return "MIXED*"; - return "MIXED"; } else { if (no_resp_flag) return "DRAIN*"; diff --git a/src/plugins/priority/multifactor/fair_tree.c b/src/plugins/priority/multifactor/fair_tree.c index f285c68b2cce4c808e7830ceab86f88fca094489..15ceb369ca6979d85289c3cbdfb79ebcdb627b7b 100644 --- a/src/plugins/priority/multifactor/fair_tree.c +++ b/src/plugins/priority/multifactor/fair_tree.c @@ -196,7 +196,8 @@ static void _calc_assoc_fs(slurmdb_assoc_rec_t *assoc) } /* If S is 0, the assoc is assigned the lowest possible LF value. If - * U==0 && S!=0, assoc is assigned the highest possible value, infinity. + * U==0 && S!=0, assoc is assigned the highest possible value, + * infinity. * Checking for U==0 then setting level_fs=INFINITY is not the same * since you would still have to check for S==0 then set level_fs=0. * @@ -207,35 +208,51 @@ static void _calc_assoc_fs(slurmdb_assoc_rec_t *assoc) assoc->usage->level_fs = S / U; } - -static slurmdb_assoc_rec_t** _append_children_to_array( +/* Append list of associations to array + * IN list - list of associations + * IN merged - array of associations to append to + * IN/OUT merged_size - number of associations in merged array + * RET - New array. Must be freed. + */ +static slurmdb_assoc_rec_t** _append_list_to_array( List list, slurmdb_assoc_rec_t** merged, - size_t *child_count) + size_t *merged_size) { ListIterator itr; slurmdb_assoc_rec_t *next; - size_t i = *child_count; - *child_count += list_count(list); + size_t bytes; + size_t i = *merged_size; + *merged_size += list_count(list); - merged = xrealloc(merged, sizeof(slurmdb_assoc_rec_t*) - * (*child_count + 1)); + /* must be null-terminated, so add one extra slot */ + bytes = sizeof(slurmdb_assoc_rec_t*) * (*merged_size + 1); + merged = xrealloc(merged, bytes); itr = list_iterator_create(list); while ((next = list_next(itr))) merged[i++] = next; list_iterator_destroy(itr); + /* null terminate the array */ + merged[*merged_size] = NULL; return merged; } - +/* Returns number of tied sibling accounts. + * IN assocs - array of siblings, sorted by level_fs + * IN begin_ndx - begin looking for ties at this index + * RET - number of sibling accounts with equal level_fs values + */ static size_t _count_tied_accounts(slurmdb_assoc_rec_t** assocs, - size_t i) + size_t begin_ndx) { slurmdb_assoc_rec_t* next_assoc; - slurmdb_assoc_rec_t* assoc = assocs[i]; + slurmdb_assoc_rec_t* assoc = assocs[begin_ndx]; + size_t i = begin_ndx; size_t tied_accounts = 0; while ((next_assoc = assocs[++i])) { + /* Users are sorted to the left of accounts, so no user we + * encounter here will be equal to this account */ if (!next_assoc->user) break; if (assoc->usage->level_fs != next_assoc->usage->level_fs) @@ -246,12 +263,20 @@ static size_t _count_tied_accounts(slurmdb_assoc_rec_t** assocs, } +/* Copy the children of accounts [begin, end] into a single array. + * IN siblings - array of siblings, sorted by level_fs + * IN begin - index of first account to merge + * IN end - index of last account to merge + * IN assoc_level - depth in the tree (root is 0) + * RET - Array of the children. Must be freed. + */ static slurmdb_assoc_rec_t** _merge_accounts( slurmdb_assoc_rec_t** siblings, size_t begin, size_t end, uint16_t assoc_level) { size_t i; - size_t child_count = 0; + /* number of associations in merged array */ + size_t merged_size = 0; /* merged is a null terminated array */ slurmdb_assoc_rec_t** merged = (slurmdb_assoc_rec_t **) xmalloc(sizeof(slurmdb_assoc_rec_t *)); @@ -260,6 +285,7 @@ static slurmdb_assoc_rec_t** _merge_accounts( for (i = begin; i <= end; i++) { List children = siblings[i]->usage->children_list; + /* the first account's debug was already printed */ if (priority_debug && i > begin) _ft_debug(siblings[i], assoc_level, true); @@ -267,8 +293,7 @@ static slurmdb_assoc_rec_t** _merge_accounts( continue; } - merged = _append_children_to_array(children, merged, - &child_count); + merged = _append_list_to_array(children, merged, &merged_size); } return merged; } @@ -278,62 +303,81 @@ static slurmdb_assoc_rec_t** _merge_accounts( * (level_fs). Once they are sorted, operate on each child in sorted order. * This portion of the tree is now sorted and users are given a fairshare value * based on the order they are operated on. The basic equation is - * (rank / g_user_assoc_count), though ties are allowed. The rank is decremented - * for each user that is encountered. + * (rank / g_user_assoc_count), though ties are allowed. The rank is + * decremented for each user that is encountered except when ties occur. + * + * Tie Handling Rules: + * 1) Sibling users with the same level_fs receive the same rank + * 2) Sibling accounts with the same level_fs have their children lists + * merged before sorting + * 3) A user with the same level_fs as a sibling account will receive + * the same rank as the account's highest ranked user + * + * IN siblings - array of siblings + * IN assoc_level - depth in the tree (root is 0) + * IN/OUT rank - current user ranking, starting at g_user_assoc_count + * IN/OUT rnt - rank, no ties (what rank would be if no tie exists) + * IN account_tied - is this account tied with the previous user */ static void _calc_tree_fs(slurmdb_assoc_rec_t** siblings, - uint16_t assoc_level, uint32_t *rank, uint32_t *i, - bool account_tied) + uint16_t assoc_level, uint32_t *rank, + uint32_t *rnt, bool account_tied) { slurmdb_assoc_rec_t *assoc = NULL; long double prev_level_fs = (long double) NO_VAL; bool tied = false; - size_t ndx; + size_t i; /* Calculate level_fs for each child */ - for (ndx = 0; (assoc = siblings[ndx]); ndx++) + for (i = 0; (assoc = siblings[i]); i++) _calc_assoc_fs(assoc); /* Sort children by level_fs */ - qsort(siblings, ndx, sizeof(slurmdb_assoc_rec_t *), - _cmp_level_fs); + qsort(siblings, i, sizeof(slurmdb_assoc_rec_t *), _cmp_level_fs); /* Iterate through children in sorted order. If it's a user, calculate * fs_factor, otherwise recurse. */ - for (ndx = 0; (assoc = siblings[ndx]); ndx++) { - if (account_tied) { + for (i = 0; (assoc = siblings[i]); i++) { + /* tied is used while iterating across siblings. + * account_tied preserves ties while recursing */ + if (i == 0 && account_tied) { + /* The parent was tied so this level starts out tied */ tied = true; - account_tied = false; } else { tied = prev_level_fs == assoc->usage->level_fs; } if (priority_debug) _ft_debug(assoc, assoc_level, tied); + + /* If user, set their final fairshare factor and + * handle ranking. + * If account, merge any tied accounts then recurse with the + * merged children array. */ if (assoc->user) { if (!tied) - *rank = *i; + *rank = *rnt; - /* Set the final fairshare factor for this user */ assoc->usage->fs_factor = *rank / (double) g_user_assoc_count; - (*i)--; + + (*rnt)--; } else { slurmdb_assoc_rec_t** children; - size_t merge_count = - _count_tied_accounts(siblings, ndx); + size_t merge_count = _count_tied_accounts(siblings, i); /* Merging does not affect child level_fs calculations * since the necessary information is stored on each * assoc's usage struct */ - children = _merge_accounts(siblings, ndx, - ndx + merge_count, + children = _merge_accounts(siblings, i, + i + merge_count, assoc_level); - _calc_tree_fs(children, assoc_level + 1, rank, i, tied); + _calc_tree_fs(children, assoc_level+1, + rank, rnt, tied); /* Skip over any merged accounts */ - ndx += merge_count; + i += merge_count; xfree(children); } @@ -348,21 +392,21 @@ static void _apply_priority_fs(void) { slurmdb_assoc_rec_t** children = NULL; uint32_t rank = g_user_assoc_count; - uint32_t i = rank; + uint32_t rnt = rank; size_t child_count = 0; if (priority_debug) info("Fair Tree fairshare algorithm, starting at root:"); - assoc_mgr_root_assoc->usage->level_fs = 1L; + assoc_mgr_root_assoc->usage->level_fs = (long double) NO_VAL; /* _calc_tree_fs requires an array instead of List */ - children = _append_children_to_array( + children = _append_list_to_array( assoc_mgr_root_assoc->usage->children_list, children, &child_count); - _calc_tree_fs(children, 0, &rank, &i, false); + _calc_tree_fs(children, 0, &rank, &rnt, false); xfree(children); } diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c index 4274f5a6964e9c492977b25bad2ab4443386bf09..56c265b852132d93e02a6a7ef0e35454c8649fbd 100644 --- a/src/plugins/select/cons_res/dist_tasks.c +++ b/src/plugins/select/cons_res/dist_tasks.c @@ -41,14 +41,6 @@ #include "select_cons_res.h" #include "dist_tasks.h" -#if (0) -/* Using CR_SOCKET or CR_SOCKET_MEMORY will not allocate a socket to more - * than one job at a time, but it also will not grant a job access to more - * CPUs on the socket than requested. If ALLOCATE_FULL_SOCKET is defined, - * then a job will be given access to every cores on each allocated socket. - */ -#define ALLOCATE_FULL_SOCKET 1 -#endif /* Max boards supported for best-fit across boards */ /* Larger board configurations may require new algorithm */ diff --git a/testsuite/expect/test24.4 b/testsuite/expect/test24.4 index 3ea66e729724cfa4d2495321165ff989de93f9de..7fb9c7ea466ee5b5540602efcb06f522dbb7a86c 100755 --- a/testsuite/expect/test24.4 +++ b/testsuite/expect/test24.4 @@ -78,7 +78,7 @@ expect { exp_continue } - "root|||0.000000|240||1.000000||1.000000|0|0|" { + "root|||0.000000|240||1.000000|||0|0|" { incr matches exp_continue }