diff --git a/contribs/perlapi/libslurm/perl/conf.c b/contribs/perlapi/libslurm/perl/conf.c index 45b3a91493f4c577304f64544d03685fd806cfdb..7e2c81f7bfd66259d1c3b18e340c900978039db5 100644 --- a/contribs/perlapi/libslurm/perl/conf.c +++ b/contribs/perlapi/libslurm/perl/conf.c @@ -143,7 +143,6 @@ slurm_ctl_conf_to_hv(slurm_ctl_conf_t *conf, HV *hv) STORE_FIELD(hv, conf, priority_calc_period, uint32_t); STORE_FIELD(hv, conf, priority_decay_hl, uint32_t); STORE_FIELD(hv, conf, priority_favor_small, uint16_t); - STORE_FIELD(hv, conf, priority_levels, uint16_t); STORE_FIELD(hv, conf, priority_max_age, uint32_t); if (conf->priority_params) STORE_FIELD(hv, conf, priority_params, charp); @@ -362,7 +361,6 @@ hv_to_slurm_ctl_conf(HV *hv, slurm_ctl_conf_t *conf) FETCH_FIELD(hv, conf, priority_calc_period, uint32_t, TRUE); FETCH_FIELD(hv, conf, priority_decay_hl, uint32_t, TRUE); FETCH_FIELD(hv, conf, priority_favor_small, uint16_t, TRUE); - FETCH_FIELD(hv, conf, priority_levels, uint16_t, TRUE); FETCH_FIELD(hv, conf, priority_max_age, uint32_t, TRUE); FETCH_FIELD(hv, conf, priority_params, charp, FALSE); FETCH_FIELD(hv, conf, priority_reset_period, uint16_t, TRUE); diff --git a/doc/html/Makefile.am b/doc/html/Makefile.am index 5aedec3d2662e9fa17a2f7311f5221daf91248af..cc928ef6184d454d5bcebe68afb9f802bd3b6b19 100644 --- a/doc/html/Makefile.am +++ b/doc/html/Makefile.am @@ -47,7 +47,7 @@ generated_html = \ job_submit_plugins.html \ jobcompplugins.html \ launch_plugins.html \ - level_based.html \ + fair_tree.html \ mail.html \ man_index.html \ maui.html \ diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in index d0f27f5f59b62ae4d00672d15386518fc627454d..df710757d94643d5a7f6ab6c0c97b74e964fa510 100644 --- a/doc/html/Makefile.in +++ b/doc/html/Makefile.in @@ -462,7 +462,7 @@ generated_html = \ job_submit_plugins.html \ jobcompplugins.html \ launch_plugins.html \ - level_based.html \ + fair_tree.html \ mail.html \ man_index.html \ maui.html \ diff --git a/doc/html/fair_tree.shtml b/doc/html/fair_tree.shtml new file mode 100644 index 0000000000000000000000000000000000000000..bee01641ef70613ed19045c7f7ba478d48a5f5cc --- /dev/null +++ b/doc/html/fair_tree.shtml @@ -0,0 +1,207 @@ +<!--#include virtual="header.txt"--> + +<!-- + +Fair Tree contributed by Brigham Young University +Authors: Ryan Cox and Levi Morrison + + --> + +<h1>Fair Tree Fairshare Algorithm</h1> + +<h2>Contents</h2> +<ul> +<li><a href="#intro">Introduction</a></li> +<li><a href="#enduser">Overview for End Users</a></li> +<li><a href="#algorithm">Algorithm</a></li> +<li><a href="#fairshare">Level Fairshare Calculation</a></li> +<li><a href="#ties">Ties</a></li> +<li><a href="#sshare">sshare</a></li> +<li><a href="#config">Configuration</a></li> +<li><a href="#notes">Important notes</a></li> +</ul> + +<h2><a name="intro">Introduction</a></h2> + +<p>PriorityFlags=FAIR_TREE results in changes to several fairshare calculations. +Fair Tree prioritizes users such that if accounts A and B are siblings and A has +a higher fairshare factor than B, all children of A will have higher fairshare +factors than all children of B.</p> + +<p>Some of the benefits include:</p> +<ul> + <li> + All users from a higher priority account receive a higher fair + share factor than all users from a lower priority account. + </li> + <li> + Users are sorted and ranked to prevent errors due to precision + loss. Ties are allowed. + </li> + <li> + Account coordinators cannot accidentally harm the priority of + their users relative to users in other accounts. + </li> + <li> + Users are extremely unlikely to have exactly the same fairshare + factor as another user due to loss of precision in calculations. + </li> + <li> + New jobs are immediately assigned a priority. + </li> +</ul> + +<h2><a name="enduser">Overview for End Users</a></h2> + +<p>This section is intended for non-admin users who just want to know how their +fairshare factor is determined. Run <code>sshare -l</code> (lowercase "L") to +view the following columns: <code>FairShare, Level FS</code>. Note that +Level FS values are infinity if the association has no usage.</p> + +<p>If an account has a higher Level FS value than any other sibling user or +sibling account, all children of that account will have a higher FairShare value +than the children of the other account. This is true at every level of the +association tree.</p> + +<p>The FairShare value is obtained by using the Fair Tree +<a href="#algorithm">algorithm</a> to rank all users in the order that they +should be prioritized (descending). The FairShare value is the user's rank +divided by the total number of user associations. The highest ranked user +receives a 1.0 fairshare value.</p> + +<p>If you (UserA) have a lower FairShare value than another user (UserB) and +want to know why, find the first common ancestor account. At the level +below the common ancestor, compare the Level FS value of your ancestor to the +Level FS value of UserB's ancestor. Your ancestor has a lower Level FS value +than UserB's ancestor. For information on how Level FS value is +calculated, read the section about the <a href="#fairshare">Level FS +equation</a>.</p> + +<p>For example, assume the association tree contains UserA and UserB as +follows:</p> +<pre> +root => Acct1 => Acct12 => UserA +root => Acct1 => Acct16 => UserB +</pre> + +<p>Acct1 is the first common ancestor of UserA and UserB. Check the Level FS +values of Acct12 and Acct16. If UserB has a higher FairShare value than UserA, +Acct16 has a higher Level FS value than Acct12.</p> + +<p>The sections below contain more information about the algorithm, including +how the final fairshare factor and the Level FS values are calculated.</p> + +<h2><a name="algorithm">Algorithm</a></h2> + +<p>An equation is used to calculate a Level Fairshare value for each +association, only considering the shares and usage of itself and its siblings. +A <a href="http://www.math.ucsd.edu/~ebender/CombText/ch-9.pdf"> +rooted plane tree</a> <small>(PDF download)</small>, also known as a rooted +ordered tree, is logically created then sorted by Level Fairshare with the +highest values on the left. The tree is then visited in a depth-first +traversal. Users are ranked in pre-order as they are found. The ranking is +used to create the final fairshare factor for the user.</p> + +<p>The algorithm performs a single traversal of the tree since all the steps +can be combined. The basic idea is to set <i>rank</i> equal to the count of user +associations then start at root:</p> + +<ul><li>Calculate Level Fairshare for the subtree's children</li> +<li>Sort children of the subtree</li> +<li>Visit the children in descending order</li> +<ul><li>If user, assign a final fairshare factor similar to + (rank-- / user_assoc_count)</li> +<li>If account, descend to account</li> +</ul></ul> + + +<h2><a name="fairshare">Level Fairshare Calculation</a></h2> + +<p>The Level Fairshare equation is described below. Under-served associations +will have a value greater than 1.0. Over-served associations will have a value +between 0.0 and 1.0. +</p> + +<pre> +LF = S / U +</pre> + +<dl> +<dt>LF</dt> +<dd> is the association's Level Fairshare</dd> +<dt> S</dt> +<dd> also known as Shares Norm, S is the association's assigned shares +normalized to the shares assigned to itself and its siblings: +<nobr><code>S = Sraw<sub>self</sub> / Sraw<sub>self+siblings</sub></code></nobr> +</dd> +<dt> U</dt> +<dd> also known as Effective Usage, U is the association's usage normalized to +the account's usage: +<nobr><code>U = Uraw<sub>self</sub> / Uraw<sub>self+siblings</sub></code></nobr> +</dd> +</dl> + +<p>U and S are in the range <nobr><code>0.0 .. 1.0</code></nobr>. LF is in the +range <nobr><code>0.0 .. infinity</code>.</nobr></p> + +<h2><a name="ties">Ties</a></h2> + +<p>Ties are handled as follows: +<ul> +<li>Sibling users with the same Level Fairshare receive the same rank</li> +<li>A user with the same Level Fairshare as a sibling account will receive the +same rank as its highest ranked user</li> +<li>Sibling accounts with the same Level Fairshare have their children lists +merged before descending</li> +</ul> +</p> + +<h2><a name="sshare">sshare</a></h2> + +<p>sshare was modified to show the Level Fairshare value as <code>Level FS</code> when +the <code>-l</code> (long) parameter is specified. The field shows the value for each +association, thus allowing users to see the results of the fairshare +calculation at each level.</p> + +<p>Note: Norm Usage is not used by Fair Tree but is still displayed.</p> + +<h2><a name="config">Configuration</a></h2> + +<p> The following slurm.conf (SLURM_CONFIG_FILE) parameters are used to +configure the Fair Tree algorithm. See slurm.conf(5) man page for more +details.</p> + +<dl> +<dt>PriorityFlags</dt> +<dd>Set to "FAIR_TREE".</dd> +<dt>PriorityType</dt> +<dd>Set this value to "priority/multifactor". +The default value for this variable is "priority/basic" +which enables simple FIFO scheduling.</dd> +<dt>PriorityCalcPeriod</dt> +<dd>PriorityCalcPeriod is the frequency in minutes that job half-life decay +and Fair Tree calculations are performed.</dd> +</dl> + +<h2><a name="notes">Important Notes</a></h2> +<ul> +<li>As the Fair Tree algorithm ranks all users, active or not, the +administrator must carefully consider how to apply other priority weights +in the priority/multifactor plugin. The <i>PriorityWeightFairshare</i> can be +usefully set to a much smaller value than usual, possibly as low as 1 or 2 times +the number of user associations. +</li> +<li>Fair Tree requires the <a href="accounting.html">Slurm Accounting +Database</a> to provide usage information and the assigned shares values. +</li> +<li><i>scontrol reconfigure</i> does not cause the Fair Tree algorithm to +run immediately, even if switching from a different algorithm. You may have to +wait until the next iteration as defined by <i>PriorityCalcPeriod</i>. +</li> +</ul> + + +<!-- -------------------------------------------------------------------- --> +<p style="text-align:center;">Last modified 15 Sep 2014</p> + +<!--#include virtual="footer.txt"--> diff --git a/doc/html/level_based.shtml b/doc/html/level_based.shtml deleted file mode 100644 index f55a587c12979c6c2c19df626a55e4873e141f8a..0000000000000000000000000000000000000000 --- a/doc/html/level_based.shtml +++ /dev/null @@ -1,255 +0,0 @@ -<!--#include virtual="header.txt"--> - -<!-- - -LEVEL_BASED contributed by Brigham Young University -Authors: Ryan Cox and Levi Morrison - - --> - -<h1>LEVEL_BASED Multifactor</h1> - -<h2>Contents</h2> -<ul> -<li><a href="#intro">Introduction</a></li> -<li><a href="#algorithm">Algorithm</a></li> -<li><a href="#fairshare">Fair share factor calculation</a></li> -<li><a href="#bitwise">Bitwise operations at each level</a></li> -<li><a href="#ranking">Ranking</a></li> -<li><a href="#sshare">sshare</a></li> -<li><a href="#config">Configuration</a></li> -<li><a href="#notes">Important notes</a></li> -</ul> - -<h2><a name="intro">Introduction</a></h2> - -<p>PriorityFlags=LEVEL_BASED results in changes to several fair share -calculations. The algorithm prioritizes users such that users in an under-served -account will always have a higher fair share factor than users in an over-served -account.</p> - -<p>Some of the benefits include:</p> -<ul> - <li> - All users from a higher priority account receive a higher fair - share factor than all users from a lower priority account. - </li> - <li> - Users are sorted and ranked to prevent errors due to precision - loss. - </li> - <li> - Account coordinators cannot accidentally harm the priority of - their users relative to users in other accounts. - </li> - <li> - Users are extremely unlikely to have exactly the same fair share - factor as another user due to loss of precision in calculations. - </li> - <li> - New jobs are immediately assigned a priority. - </li> -</ul> - -<h2><a name="algorithm">Algorithm</a></h2> - -<p>The algorithm uses a 64-bit unsigned integer, <i>priority_fs_raw</i>, to -store priority calculations on associations at each level. LEVEL_BASED -recursively descends through the association hierarchy starting at the root. -It will descend up to <i>PriorityLevels</i> levels below the root for -calculation purposes. Any child associations that are deeper than -<i>PriorityLevels</i> will have their fair share priority set to their -parent's value.</p> - -<p><i>priority_fs_raw</i> is subdivided into "buckets" to store the -fair share priority at each level. The following variables are used:</p> -<pre> -bucket_width_in_bits = 64 / priority_levels -unused_bucket_bits = 64 % priority_levels -bucket_max = UINT64_MAX >> (64 - bucket_width_in_bits) -</pre> - -<h2><a name="fairshare">Fair share factor calculation</a></h2> - -<p>As LEVEL_BASED descends through the association hierarchy, it calculates -fair share in a similar way to the traditional multifactor method except that -it does so at each level in the hierarchy and normalizes <i>S</i> beforehand. -<pre> -F = 2**(-U/S) -</pre> - -<dl> -<dt> F</dt> -<dd> is the association's fair share factor at the current level</dd> -<dt> U</dt> -<dd> also known as Usage Effective, U is the association's normalized usage at -the current level</dd> -<dt> S</dt> -<dd> is the association's normalized shares at the current level</dd> -</dl> - -<p>All values above are between zero and one.</p> - -<p>See the original <a href="priority_multifactor.html">multifactor plugin</a> -for more information on this calculation.</p> - - -<h2>Normalization of shares before fair share calculation</h2> - -<p>One major modification to the calculation of <i>F</i> is that LEVEL_BASED -normalizes the <i>S</i> value between 0.1 and 1.0 prior to the fair share -calculation. This is done because a low <i>S</i> value can result in an -<i>F</i> value that is approximately 0, even when <i>U</i> is small. An example -of this behavior can be visualized by comparing an unnormalized <i>S</i> value -between -<a href="http://www.wolframalpha.com/input/?i=2%5E-%28u%2Fs%29%2C+u+from+0+to+1%2C+s+from+0+to+1"> -0.0 and 1.0</a> to a normalized <i>S</i> value between -<a href="http://www.wolframalpha.com/input/?i=2%5E-%28u%2Fs%29%2C+u+from+0+to+1%2C+s+from+.1+to+1"> -0.1 and 1.0</a>.</p> - -<p>Assuming 150 associations at the same level and with equal Fairshare values -set in sacctmgr, <i>S</i> is 0.0067. When plugged into the equation, <i>F</i> -values are often indistinguishable from each other when <i>U</i> is high. -Compare the unnormalized -<a href="http://www.wolframalpha.com/input/?i=2%5E-%28u%2F0.0067%29%2C+u+from+0+to+1"> -S=0.0067</a> to a worst case -<a href="http://www.wolframalpha.com/input/?i=2%5E-%28u%2F.1%29%2C+u+from+0+to+1"> -S=0.1</a> when the value is normalized before <i>F</i> is calculated.</p> - -<p>The general formula used is a linear interpolation, which maps x in range -a..b to range A..B, represented by <code>interpolate</code> below.</p> - -<pre>interpolate(x, a, b, A, B) = (x - a) / (b - a) * (B - A) + A</pre> - - -<h2><a name="bitwise">Bitwise operations at each level</a></h2> - -<p>After the final <i>F</i> calculation for the given level (<i>assoc_level</i>) -, the value is normalized between 1 and <i>bucket_max</i> and stored in a -temporary variable called <i>level_fs</i>. It is then shifted as follows:</p> - -<pre>level_fs <<= ( - (priority_levels - assoc_level - 1) - * bucket_width_in_bits - + unused_bucket_bits - ) -</pre> - -<p>As the code recurses deeper into the tree, <i>assoc_level</i> is incremented from -its initial value of 0. The result is a steadily decreasing shift value. -<i>level_fs</i> is then bitwise OR'd with the parent's <i>priority_fs_raw</i> -and stored in its own <i>priority_fs_raw</i> field. This fences off the effect -of each level's calculation since there is no overlap.</p> - - -<h3><a name="example">Example</a></h3> - -<p>For example, consider a four-tiered structure with colleges, departments, -faculty, and users (faculty member himself/herself plus students). -<i>PriorityLevels=4</i> (<i>priority_levels</i> internally) should be set. -<i>priority_fs_raw</i> is divided into four 16-bit buckets:</p> -<pre> -bucket_width_in_bits = 16 -unused_bucket_bits = 0 -bucket_max = 65535 -</pre> - -<p>This results in the following example calculation:</p> - -<pre> - root - || - \/ -Life Sciences level_fs = 0xAAAA000000000000, priority_fs_raw = 0xAAAA000000000000 - || - \/ - Biology level_fs = 0x0000123400000000, priority_fs_raw = 0xAAAA123400000000 - || - \/ - Dr. Bob level_fs = 0x0000000077770000, priority_fs_raw = 0xAAAA123477770000 - || - \/ - Grad Student level_fs = 0x000000000000CCCC, priority_fs_raw = 0xAAAA12347777CCCC -</pre> - -<p>The final value for Grad Student is 0xAAAA12347777CCCC. This represents:</p> -<pre> - College | Dept | Faculty | User - AAAA 1234 7777 CCCC -</pre> - -<p>The <i>priority_fs_raw</i> values are visible as Fairshare Raw in the output -of <i>sshare -l</i>. - -<h2><a name="ranking">Ranking</a></h2> - -<p>After setting <i>priority_fs_raw</i> on all associations, all users are then -sorted by their <i>priority_fs_raw</i> to obtain a ranking. This rank is then -used to calculate the final fair share factor. Since this final number is stored -at the association level, new jobs can be immediately assigned a priority. Note -that all users, whether active or not, are included in calculations and -ranked.</p> - -<p>This step is desirable because the final fair share factor must be -normalized to a floating point number between 0.0 and 1.0 that is multipled by -PriorityWeightFairshare, an integer. Depending on the normalization and the -value of PriorityWeightFairshare, precision loss can result in poor fair share -results; users may end up with the same priorities despite widely varying -usage. LEVEL_BASED solves this by sorting users then using the normalized rank -as the fair share factor. Users with the same fair share factor as other users -before the ranking will have the same priority as each other afterwards.</p> - -<h2><a name="sshare">sshare</a></h2> - -<p>sshare was modified to show the <i>priority_fs_raw</i> value as <i>Fairshare -Raw</i> when the <i>-l</i> (long) parameter is specified. The field shows the -hexadecimal value of <i>priority_fs_raw</i> for each association, thus allowing -users to see the results of the fair share calculation at each level.</p> -<p>Note: Norm Usage is not used by LEVEL_BASED but is still displayed.</p> - -<h2><a name="config">Configuration</a></h2> - -<p> The following slurm.conf (SLURM_CONFIG_FILE) parameters are used to -configure the LEVEL_BASED prioritization mechanism. See slurm.conf(5) man -page for more details.</p> - -<dl> -<dt>PriorityFlags</dt> -<dd>Set to "LEVEL_BASED".</dd> -<dt>PriorityType</dt> -<dd>Set this value to "priority/multifactor". -The default value for this variable is "priority/basic" -which enables simple FIFO scheduling.</dd> -<dt>PriorityLevels</dt> -<dd>PriorityLevels determines how many levels of associations below the root -are considered for priority purposes. Associations deeper than this number -will inherit their parents' fair share priority. The number of levels must be -between 1 and 16.</dd> -<dt>PriorityCalcPeriod</dt> -<dd>PriorityCalcPeriod is the frequency in minutes that job half-life decay -and LEVEL_BASED calculations are performed.</dd> -</dl> - -<h2><a name="notes">Important Notes</a></h2> -<ul> -<li>As the LEVEL_BASED algorithm ranks all users, active or not, the -administrator must carefully consider how to apply other priority weights -in the priority/multifactor plugin. It is worth mentioning that the -<i>PriorityWeightFairshare</i> can be usefully set to a much smaller value than -usual, possibly as low as 1 or 2 times the number of user associations. -</li> -<li>LEVEL_BASED requires the <a href="accounting.html">Slurm Accounting -Database</a> to provide the assigned shares and the consumed, -computing resources described below. -</li> -<li><i>scontrol reconfigure</i> does not cause the LEVEL_BASED algorithm to -run immediately, even if switching from a different algorithm. You may have to -wait until the next iteration as defined by <i>PriorityCalcPeriod</i>. -</li> -</ul> - - -<!-- -------------------------------------------------------------------- --> -<p style="text-align:center;">Last modified 20 June 2014</p> - -<!--#include virtual="footer.txt"--> diff --git a/doc/man/man1/sshare.1 b/doc/man/man1/sshare.1 index 2dcb77791ee0d172693968923646b3930cb51bbb..e3a493dc03a8e2e4a6ee70686f18f9da52b4ef4b 100644 --- a/doc/man/man1/sshare.1 +++ b/doc/man/man1/sshare.1 @@ -121,8 +121,8 @@ This takes into consideration time limit of running jobs and consumes it, if the limit is reached no new jobs are started until other jobs finish to allow time to free up. -.SH "LEVEL_BASED MODIFICATIONS" -When PriorityFlags=LEVEL_BASED is set, calculations are done differently. +.SH "FAIR_TREE MODIFICATIONS" +When PriorityFlags=FAIR_TREE is set, calculations are done differently. As a result, the following fields are added or modified: .TP @@ -130,18 +130,20 @@ As a result, the following fields are added or modified: The shares assigned to the user or account normalized to the total number of assigned shares within the level. -.TP -\f3Norm Usage\fP -Norm Usage is not used by LEVEL_BASED but is displayed anyway. - .TP \f3Effectv Usage\fP Effectv Usage is the association's usage normalized with its parent. .TP -\f3FairShare Raw\fP (only appears with \fBsshare \-l\fR option) -This hexadecimal number is the final fair share factor value before the -algorithm sorts and ranks users. +\f3Level FS\fP (only appears with \fBsshare \-l\fR option) +This is the association's fairshare value compared to its siblings, calculated +as Norm Shares / Effectv Usage. If an association is over-served, the value is +between 0 and 1. If an association is under-served, the value is greater than 1. +Associations with no usage receive the highest possible value, infinity. + +.TP +More information about Fair Tree can be found in doc/html/fair_tree.html or +at http://slurm.schedmd.com/fair_tree.html .SH "EXAMPLES" .eo diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 98691cefe325faffb260ba59ea241dd5eb5d458e..339c942a1adef5ed042590020c6e6cbe4ea37a26 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1463,15 +1463,10 @@ or holds. If set, priorities will be recalculated not only for pending jobs, but also running and suspended jobs. .TP -\fBLEVEL_BASED\fR -If set, priority will be calculated at each level of accounts, with the -priority at each level outweighing the effect of priority of lower levels. For -example, consider a two-tiered system where each faculty member has an account -and the users in that account are the faculty member and his/her students. -Priority is first calculated at the account level. If account A has higher -priority than account B, all users in account A will have a higher priority -than users in account B. The number of levels is determined by -\fBPriorityLevels\fR. +\fBFAIR_TREE\fR +If set, priority will be calculated in such a way that if accounts A and B are +siblings and A has a higher fairshare factor than B, all children of A will have +higher fairshare factors than all children of B. .TP \fBDEPTH_OBLIVIOUS\fR If set, priority will be calculated based similar to the normal multifactor @@ -1490,13 +1485,6 @@ If set, priority will be calculated based on the ticket system. \fBPriorityParameters\fR Arbitrary string used by the PriorityType plugin. -.TP -\fBPriorityLevels\fR -When using PriorityFlags=LEVEL_BASED, PriorityLevels determines how many -levels of associations below the root are considered for priority purposes. -Associations deeper than this number will inherit their parents' fair share -priority. The number of levels must be between 1 and 16. - .TP \fBPriorityMaxAge\fR Specifies the job age which will be given the maximum age factor in computing diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 049a8a5c38549cc6967c3b0b227deb251044b1e4..a9e7cd1ce81a217945e15bf3468026337eb453d9 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -779,7 +779,7 @@ enum ctx_keys { #define PRIORITY_FLAGS_CALCULATE_RUNNING 0x0010 /* Calculate priorities for * running jobs, not only the * pending jobs. */ -#define PRIORITY_FLAGS_LEVEL_BASED 0x0020 /* Prioritize by level in +#define PRIORITY_FLAGS_FAIR_TREE 0x0020 /* Prioritize by level in + * account hierarchy. */ /*****************************************************************************\ * SLURM HOSTLIST FUNCTIONS @@ -2273,8 +2273,6 @@ typedef struct slurm_ctl_conf { uint16_t priority_favor_small; /* favor small jobs over large */ uint16_t priority_flags; /* set some flags for priority configuration, * see PRIORITY_FLAGS_* above */ - uint16_t priority_levels; /* number of levels of interest to consider - * in LEVEL_BASED */ uint32_t priority_max_age; /* time when not to add any more * priority to a job if reached */ char *priority_params; /* priority plugin parameters */ diff --git a/src/api/config_info.c b/src/api/config_info.c index 49b16d86cdf4e4e7ba942d83978831caf4dd45a8..bcedb136a6a2e4b5252489a7c2dec23a9a410740 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -992,13 +992,6 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) priority_flags); list_append(ret_list, key_pair); - snprintf(tmp_str, sizeof(tmp_str), "%u", - slurm_ctl_conf_ptr->priority_levels); - key_pair = xmalloc(sizeof(config_key_pair_t)); - key_pair->name = xstrdup("PriorityLevels"); - key_pair->value = xstrdup(tmp_str); - list_append(ret_list, key_pair); - secs2time_str((time_t) slurm_ctl_conf_ptr->priority_max_age, tmp_str, sizeof(tmp_str)); key_pair = xmalloc(sizeof(config_key_pair_t)); diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 5508f941a0ebe80e02fcb4890e19ea1a59fa0c22..c991c0148f3c2342db619b8b8f0025621f345f8c 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -55,6 +55,7 @@ slurmdb_association_rec_t *assoc_mgr_root_assoc = NULL; uint32_t g_qos_max_priority = 0; uint32_t g_qos_count = 0; +uint32_t g_user_assoc_count = 0; List assoc_mgr_association_list = NULL; List assoc_mgr_res_list = NULL; List assoc_mgr_qos_list = NULL; @@ -282,7 +283,7 @@ static void _delete_assoc_hash(void *assoc) } -static void _normalize_assoc_shares_level_based( +static void _normalize_assoc_shares_fair_tree( slurmdb_association_rec_t *assoc) { slurmdb_association_rec_t *fs_assoc = assoc; @@ -319,9 +320,12 @@ static void _normalize_assoc_shares_traditional( assoc2->usage->shares_norm = 1.0; while (assoc->usage->parent_assoc_ptr) { if (assoc->shares_raw != SLURMDB_FS_USE_PARENT) { - assoc2->usage->shares_norm *= - (double)assoc->shares_raw / - (double)assoc->usage->level_shares; + if (!assoc->usage->level_shares) + assoc2->usage->shares_norm = 0; + else + assoc2->usage->shares_norm *= + (double)assoc->shares_raw / + (double)assoc->usage->level_shares; debug3("assoc %u(%s %s) normalize = %f " "from %u(%s %s) %u / %u = %f", assoc2->id, assoc2->acct, assoc2->user, @@ -744,6 +748,7 @@ static int _set_assoc_parent_and_user(slurmdb_association_rec_t *assoc, if (assoc->user) { uid_t pw_uid; + g_user_assoc_count++; if (uid_from_string(assoc->user, &pw_uid) < 0) assoc->uid = NO_VAL; else @@ -861,6 +866,7 @@ static int _post_association_list(void) itr = list_iterator_create(assoc_mgr_association_list); //START_TIMER; + g_user_assoc_count = 0; while ((assoc = list_next(itr))) { _set_assoc_parent_and_user(assoc, reset); _add_assoc_hash(assoc); @@ -1721,6 +1727,8 @@ extern assoc_mgr_association_usage_t *create_assoc_mgr_association_usage() usage->usage_efctv = 0; usage->usage_norm = (long double)NO_VAL; usage->usage_raw = 0; + usage->level_fs = 0; + usage->fs_factor = 0; return usage; } @@ -2559,8 +2567,8 @@ extern List assoc_mgr_get_shares(void *db_conn, share->grp_cpu_mins = assoc->grp_cpu_mins; share->cpu_run_mins = assoc->usage->grp_used_cpu_run_secs / 60; - share->priority_fs_raw = assoc->usage->priority_fs_raw; - share->priority_fs_ranked = assoc->usage->priority_fs_ranked; + share->fs_factor = assoc->usage->fs_factor; + share->level_fs = assoc->usage->level_fs; if (assoc->user) { /* We only calculate user effective usage when @@ -2938,6 +2946,7 @@ extern int assoc_mgr_update_assocs(slurmdb_update_object_t *update) */ if (parents_changed) { int reset = 1; + g_user_assoc_count = 0; slurmdb_sort_hierarchical_assoc_list( assoc_mgr_association_list); @@ -4620,8 +4629,8 @@ extern void assoc_mgr_normalize_assoc_shares(slurmdb_association_rec_t *assoc) * global flags variable. assoc_mgr_init() would be the logical * place to set a global, but there is no great location for * resetting it when scontrol reconfigure is called */ - if (slurmctld_conf.priority_flags & PRIORITY_FLAGS_LEVEL_BASED) - _normalize_assoc_shares_level_based(assoc); + if (slurmctld_conf.priority_flags & PRIORITY_FLAGS_FAIR_TREE) + _normalize_assoc_shares_fair_tree(assoc); else _normalize_assoc_shares_traditional(assoc); } diff --git a/src/common/assoc_mgr.h b/src/common/assoc_mgr.h index 0a00a4b7f25437f375bc89ff649f2c02378337a2..b97011a133fb29f295ea945f1ddee948c2fcf0f1 100644 --- a/src/common/assoc_mgr.h +++ b/src/common/assoc_mgr.h @@ -107,7 +107,6 @@ typedef struct { struct assoc_mgr_association_usage { List children_list; /* list of children associations * (DON'T PACK) */ - uint32_t grp_used_cpus; /* count of active jobs in the group * (DON'T PACK) */ uint32_t grp_used_mem; /* count of active memory in the group @@ -118,7 +117,8 @@ struct assoc_mgr_association_usage { * running jobs (DON'T PACK) */ uint64_t grp_used_cpu_run_secs; /* count of running cpu secs * (DON'T PACK) */ - + double fs_factor; /* Fairshare factor. Not used by all algorithms + * (DON'T PACK) */ uint32_t level_shares; /* number of shares on this level of * the tree (DON'T PACK) */ @@ -143,7 +143,7 @@ struct assoc_mgr_association_usage { uint32_t used_submit_jobs; /* count of jobs pending or running * (DON'T PACK) */ - /* Currently LEVEL_BASED and TICKET_BASED systems are defining data on + /* Currently FAIR_TREE and TICKET_BASED systems are defining data on * this struct but instead we could keep a void pointer to system * specific data. This would allow subsystems to define whatever data * they need without having to modify this struct; it would also save @@ -154,10 +154,9 @@ struct assoc_mgr_association_usage { unsigned active_seqno; /* Sequence number for identifying * active associations (DON'T PACK) */ - uint64_t priority_fs_raw; /* (LEVEL_BASED) Priority used when - * sorting (DON'T PACK) */ - uint64_t priority_fs_ranked; /* (LEVEL_BASED) Priority after - * ranking (DON'T PACK) */ + long double level_fs; /* (FAIR_TREE) Result of fairshare equation + * compared to the association's siblings (DON'T + * PACK) */ bitstr_t *valid_qos; /* qos available for this association * derived from the qos_list. @@ -197,6 +196,7 @@ extern slurmdb_association_rec_t *assoc_mgr_root_assoc; extern uint32_t g_qos_max_priority; /* max priority in all qos's */ extern uint32_t g_qos_count; /* count used for generating qos bitstr's */ +extern uint32_t g_user_assoc_count; /* Number of assocations which are users */ extern int assoc_mgr_init(void *db_conn, assoc_init_args_t *args, diff --git a/src/common/read_config.c b/src/common/read_config.c index 0282dfc748080b0cc7713118646a9a84d44fb578..f2473365a3a900d8acf59a9534762bfae9728687 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -264,7 +264,6 @@ s_p_options_t slurm_conf_options[] = { {"PriorityDecayHalfLife", S_P_STRING}, {"PriorityCalcPeriod", S_P_STRING}, {"PriorityFavorSmall", S_P_BOOLEAN}, - {"PriorityLevels", S_P_UINT16}, {"PriorityMaxAge", S_P_STRING}, {"PriorityParameters", S_P_STRING}, {"PriorityUsageResetPeriod", S_P_STRING}, @@ -3572,26 +3571,8 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->priority_flags |= PRIORITY_FLAGS_TICKET_BASED; else if (slurm_strcasestr(temp_str, "DEPTH_OBLIVIOUS")) conf->priority_flags |= PRIORITY_FLAGS_DEPTH_OBLIVIOUS; - else if (slurm_strcasestr(temp_str, "LEVEL_BASED")) { - conf->priority_flags |= PRIORITY_FLAGS_LEVEL_BASED; - - if (!s_p_get_uint16(&conf->priority_levels, - "PriorityLevels", hashtbl) - || conf->priority_levels > 16 - || conf->priority_levels == 0 - ) { - /* Anything higher than 16 makes the bucket - * width be smaller than 4 bits; this has too - * much precision loss. - */ - error( - "LEVEL_BASED system requires " - "PriorityLevels to exist and " - "be between 1 and 16 inclusive" - ); - return SLURM_ERROR; - } - } + else if (slurm_strcasestr(temp_str, "FAIR_TREE")) + conf->priority_flags |= PRIORITY_FLAGS_FAIR_TREE; xfree(temp_str); } diff --git a/src/common/slurm_priority.h b/src/common/slurm_priority.h index 8373221dc1072d4d56ae84584a7a23f89b850aa9..0c8978cd88863b4fb1eeb52dc0cff297f76da271 100644 --- a/src/common/slurm_priority.h +++ b/src/common/slurm_priority.h @@ -49,13 +49,6 @@ # include <inttypes.h> /* for uint16_t, uint32_t definitions */ #endif -/* Statistically normalize a value to another range. Cast as needed to avoid - * integer division -*/ -#define NORMALIZE_VALUE(x, from_min, from_max, to_min, to_max) ( \ - to_min + (x - from_min) * (to_max - to_min) / (from_max - from_min) \ -) - #include "src/slurmctld/slurmctld.h" #include "src/common/slurm_accounting_storage.h" diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 88386bc2246d9cd1c1ba082f755b78ed6f321d57..490c03b05e9b6d206d3c28d24805f30283fedf75 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -574,25 +574,6 @@ bool slurm_get_priority_favor_small(void) return factor; } -/* slurm_get_priority_levels - * returns the number of priority levels of interest from slurmctld_conf object - * RET uint16_t - number of levels of interest - */ -uint16_t slurm_get_priority_levels(void) -{ - uint16_t levels = (uint16_t)NO_VAL; - slurm_ctl_conf_t *conf; - - if (slurmdbd_conf) { - } else { - conf = slurm_conf_lock(); - levels = conf->priority_levels; - slurm_conf_unlock(); - } - - return levels; -} - /* slurm_get_priority_max_age * returns the priority age max in seconds from slurmctld_conf object * RET uint32_t - age_max in secs. diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index d006459ed938084d79e37640cdaf1babb230e6ed..8a31f349121dbc6e9f3be6a51968e218dfb87cb8 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -283,12 +283,6 @@ uint32_t slurm_get_priority_calc_period(void); */ bool slurm_get_priority_favor_small(void); -/* slurm_get_priority_levels - * returns the number of priority levels of interest from slurmctld_conf object - * RET uint16_t - number of levels of interest - */ -uint16_t slurm_get_priority_levels(void); - /* slurm_get_priority_max_age * returns the priority age max in seconds from slurmctld_conf object * RET uint32_t - max_age in secs. diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 3420bce7dbb54c7fc817f8dccd787aa56bf11b37..ea988271acf05bc54a3129e287af4a385ba5fa5c 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -466,8 +466,10 @@ typedef struct association_shares_object { double usage_efctv; /* effective, normalized usage */ double usage_norm; /* normalized usage */ uint64_t usage_raw; /* measure of resource usage */ - uint64_t priority_fs_raw; /* LEVEL_BASED priority_fs before sorting */ - uint64_t priority_fs_ranked; /* LEVEL_BASED priority_fs after sorting */ + double fs_factor; /* fairshare factor */ + double level_fs; /* fairshare factor at this level. stored on an + * assoc as a long double, but that is not + * needed for display in sshare */ uint16_t user; /* 1 if user association 0 if account * association */ } association_shares_object_t; diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index e5701dfb672761772202a5fd6c2ff5c6bb23d583..c40aaa55d8400f0db3babb4ce13a76e1bcc0288c 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -2020,11 +2020,8 @@ static void _pack_assoc_shares_object(void *in, Buf buffer, pack64(object->grp_cpu_mins, buffer); pack64(object->cpu_run_mins, buffer); - pack64(object->priority_fs_raw, buffer); - - pack64(object->grp_cpu_mins, buffer); - pack64(object->cpu_run_mins, buffer); - pack64(object->priority_fs_ranked, buffer); + packdouble(object->fs_factor, buffer); + packdouble(object->level_fs, buffer); pack16(object->user, buffer); } else { @@ -2060,11 +2057,8 @@ static int _unpack_assoc_shares_object(void **object, Buf buffer, safe_unpack64(&object_ptr->grp_cpu_mins, buffer); safe_unpack64(&object_ptr->cpu_run_mins, buffer); - safe_unpack64(&object_ptr->priority_fs_raw, buffer); - - safe_unpack64(&object_ptr->grp_cpu_mins, buffer); - safe_unpack64(&object_ptr->cpu_run_mins, buffer); - safe_unpack64(&object_ptr->priority_fs_ranked, buffer); + safe_unpackdouble(&object_ptr->fs_factor, buffer); + safe_unpackdouble(&object_ptr->level_fs, buffer); safe_unpack16(&object_ptr->user, buffer); } else { @@ -5327,7 +5321,6 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer, pack32(build_ptr->priority_calc_period, buffer); pack16(build_ptr->priority_favor_small, buffer); pack16(build_ptr->priority_flags, buffer); - pack16(build_ptr->priority_levels, buffer); pack32(build_ptr->priority_max_age, buffer); packstr(build_ptr->priority_params, buffer); pack16(build_ptr->priority_reset_period, buffer); @@ -6139,7 +6132,6 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **build_buffer_ptr, safe_unpack32(&build_ptr->priority_calc_period, buffer); safe_unpack16(&build_ptr->priority_favor_small, buffer); safe_unpack16(&build_ptr->priority_flags, buffer); - safe_unpack16(&build_ptr->priority_levels, buffer); safe_unpack32(&build_ptr->priority_max_age, buffer); safe_unpackstr_xmalloc(&build_ptr->priority_params, &uint32_tmp, buffer); diff --git a/src/plugins/priority/multifactor/Makefile.am b/src/plugins/priority/multifactor/Makefile.am index 12f1877aa0eca60c7e9c57b2b4627d541cb188c1..53b86c4c82cd1dd8caf2aebaa3891d3e6456e605 100644 --- a/src/plugins/priority/multifactor/Makefile.am +++ b/src/plugins/priority/multifactor/Makefile.am @@ -9,7 +9,7 @@ AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = priority_multifactor.la # Null priority logging plugin. -priority_multifactor_la_SOURCES = priority_multifactor.c level_based.c level_based.h priority_multifactor.h +priority_multifactor_la_SOURCES = priority_multifactor.c fair_tree.c fair_tree.h priority_multifactor.h priority_multifactor_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) diff --git a/src/plugins/priority/multifactor/Makefile.in b/src/plugins/priority/multifactor/Makefile.in index 5bdf11a6628922cc901438c04eb0dcdfd06df4b6..adfa88d5c9dcd94802780f546888f2053701ac12 100644 --- a/src/plugins/priority/multifactor/Makefile.in +++ b/src/plugins/priority/multifactor/Makefile.in @@ -162,7 +162,7 @@ am__installdirs = "$(DESTDIR)$(pkglibdir)" LTLIBRARIES = $(pkglib_LTLIBRARIES) priority_multifactor_la_DEPENDENCIES = am_priority_multifactor_la_OBJECTS = priority_multifactor.lo \ - level_based.lo + fair_tree.lo priority_multifactor_la_OBJECTS = \ $(am_priority_multifactor_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) @@ -476,7 +476,7 @@ AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = priority_multifactor.la # Null priority logging plugin. -priority_multifactor_la_SOURCES = priority_multifactor.c level_based.c level_based.h priority_multifactor.h +priority_multifactor_la_SOURCES = priority_multifactor.c fair_tree.c fair_tree.h priority_multifactor.h priority_multifactor_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) priority_multifactor_la_LIBADD = -lm all: all-am @@ -558,7 +558,7 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/level_based.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fair_tree.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_multifactor.Plo@am__quote@ .c.o: diff --git a/src/plugins/priority/multifactor/fair_tree.c b/src/plugins/priority/multifactor/fair_tree.c new file mode 100644 index 0000000000000000000000000000000000000000..2938bec0cd3066168da67a87e803411ac70b5ccc --- /dev/null +++ b/src/plugins/priority/multifactor/fair_tree.c @@ -0,0 +1,369 @@ +/*****************************************************************************\ + * fair_tree.c - Fair Tree fairshare algorithm for Slurm + ***************************************************************************** + * + * Copyright (C) 2014 Brigham Young University + * Authors: Ryan Cox <ryan_cox@byu.edu>, Levi Morrison <levi_morrison@byu.edu> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <math.h> + +#include "fair_tree.h" + +static void _ft_decay_apply_new_usage(struct job_record *job, time_t *start); +static void _apply_priority_fs(void); +typedef int (*QsortCmpF) (void *a, void *b); + + +extern void fair_tree_init(void) { +} + + +/* Fair Tree code called from the decay thread loop */ +extern void fair_tree_decay(List jobs, time_t start) +{ + slurmctld_lock_t job_write_lock = + { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; + assoc_mgr_lock_t locks = + { WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; + + /* apply decayed usage */ + lock_slurmctld(job_write_lock); + list_for_each(jobs, (ListForF) _ft_decay_apply_new_usage, &start); + unlock_slurmctld(job_write_lock); + + /* calculate fs factor for associations */ + assoc_mgr_lock(&locks); + _apply_priority_fs(); + assoc_mgr_unlock(&locks); + + /* assign job priorities */ + lock_slurmctld(job_write_lock); + list_for_each(jobs, (ListForF) decay_apply_weighted_factors, &start); + unlock_slurmctld(job_write_lock); +} + + +/* In Fair Tree, usage_efctv is the normalized usage within the account */ +static void _ft_set_assoc_usage_efctv( + slurmdb_association_rec_t *assoc) +{ + slurmdb_association_rec_t *parent = assoc->usage->fs_assoc_ptr; + + if (!parent || !parent->usage->usage_raw) { + assoc->usage->usage_efctv = 0L; + return; + } + + assoc->usage->usage_efctv = + assoc->usage->usage_raw / parent->usage->usage_raw; +} + + +/* Apply usage with decay factor. Call standard functions */ +static void _ft_decay_apply_new_usage(struct job_record *job, time_t *start) +{ + if (!decay_apply_new_usage(job, start)) + return; + + /* Priority 0 is reserved for held jobs. Also skip priority + * calculation for non-pending jobs. */ + if ((job->priority == 0) || !IS_JOB_PENDING(job)) + return; + + set_priority_factors(*start, job); + last_job_update = time(NULL); +} + + +static void _ft_debug(slurmdb_association_rec_t *assoc, + uint16_t assoc_level, bool tied) +{ + int spaces; + char *name; + int tie_char_count = tied ? 1 : 0; + + spaces = (assoc_level + 1) * 4; + name = assoc->user ? assoc->user : assoc->acct; + + if (assoc->shares_raw == SLURMDB_FS_USE_PARENT) { + info("%*s%.*s%s (%s): parent", + spaces, + "", + tie_char_count, + "=", + name, + assoc->acct); + } else { + info("%*s%.*s%s (%s): %.20Lf", + spaces, + "", + tie_char_count, + "=", + name, + assoc->acct, + assoc->usage->level_fs); + } + +} + + +/* Sort so that higher level_fs values are first in the list */ +static int _cmp_level_fs(slurmdb_association_rec_t **x, + slurmdb_association_rec_t **y) +{ + /* We sort based on the following critereon: + * 1. level_fs value + * 2. Prioritize users over accounts (required for tie breakers when + * comparing users and accounts) + */ + slurmdb_association_rec_t *a = *x; + slurmdb_association_rec_t *b = *y; + + /* 1. level_fs value */ + if (a->usage->level_fs != b->usage->level_fs) + return a->usage->level_fs < b->usage->level_fs ? 1 : -1; + + /* 2. Prioritize users over accounts */ + + /* a and b are both users or both accounts */ + if (!a->user == !b->user) + return 0; + + /* -1 if a is user, 1 if b is user */ + return a->user ? -1 : 1; +} + + +/* Calculate LF = S / U for an association. + * + * U is usage_raw / parent's usage_raw. + * S is shares_raw / level_shares + * + * The range of values is 0.0 .. INFINITY. + * If LF > 1.0, the association is under-served. + * If LF < 1.0, the association is over-served. + */ +static void _calc_assoc_fs(slurmdb_association_rec_t *assoc) +{ + long double U; /* long double U != long W */ + long double S; + + _ft_set_assoc_usage_efctv(assoc); + + /* Fair Tree doesn't use usage_norm but we will set it anyway */ + set_assoc_usage_norm(assoc); + + U = assoc->usage->usage_efctv; + S = assoc->usage->shares_norm; + + /* Users marked as USE_PARENT are assigned the maximum level_fs so they + * rank highest in their account, subject to ties. + * Accounts marked as USE_PARENT do not use level_fs */ + if (assoc->shares_raw == SLURMDB_FS_USE_PARENT) { + if (assoc->user) + assoc->usage->level_fs = INFINITY; + else + assoc->usage->level_fs = (long double) NO_VAL; + return; + } + + /* If S is 0, the assoc is assigned the lowest possible LF value. If + * U==0 && S!=0, assoc is assigned the highest possible value, infinity. + * Checking for U==0 then setting level_fs=INFINITY is not the same + * since you would still have to check for S==0 then set level_fs=0. + * + * NOT A BUG: U can be 0. The result is infinity, a valid value. */ + if (S == 0L) + assoc->usage->level_fs = 0L; + else + assoc->usage->level_fs = S / U; +} + + +static slurmdb_association_rec_t** _append_children_to_array(List list, + slurmdb_association_rec_t** merged, + size_t *child_count) +{ + ListIterator itr; + slurmdb_association_rec_t *next; + size_t i = *child_count; + *child_count += list_count(list); + + merged = xrealloc(merged, sizeof(slurmdb_association_rec_t*) + * (*child_count + 1)); + + itr = list_iterator_create(list); + while ((next = list_next(itr))) + merged[i++] = next; + list_iterator_destroy(itr); + + return merged; +} + + +static size_t _count_tied_accounts(slurmdb_association_rec_t** assocs, + size_t i) +{ + slurmdb_association_rec_t* next_assoc; + slurmdb_association_rec_t* assoc = assocs[i]; + size_t tied_accounts = 0; + while ((next_assoc = assocs[++i])) { + if (!next_assoc->user) + break; + if (assoc->usage->level_fs != next_assoc->usage->level_fs) + break; + tied_accounts++; + } + return tied_accounts; +} + + +static slurmdb_association_rec_t** _merge_accounts( + slurmdb_association_rec_t** siblings, + size_t begin, size_t end, uint16_t assoc_level) +{ + size_t i; + size_t child_count = 0; + /* merged is a null terminated array */ + slurmdb_association_rec_t** merged = (slurmdb_association_rec_t **) + xmalloc(sizeof(slurmdb_association_rec_t *)); + merged[0] = NULL; + + for (i = begin; i <= end; i++) { + List children = siblings[i]->usage->children_list; + + if (priority_debug && i > begin) + _ft_debug(siblings[i], assoc_level, true); + + if (!children || list_is_empty(children)) { + continue; + } + + merged = _append_children_to_array(children, merged, + &child_count); + } + return merged; +} + + +/* Calculate fairshare for each child then sort children by fairshare value + * (level_fs). Once they are sorted, operate on each child in sorted order. + * This portion of the tree is now sorted and users are given a fairshare value + * based on the order they are operated on. The basic equation is + * (rank / g_user_assoc_count), though ties are allowed. The rank is decremented + * for each user that is encountered. + */ +static void _calc_tree_fs(slurmdb_association_rec_t** siblings, + uint16_t assoc_level, uint32_t *rank, uint32_t *i, + bool account_tied) +{ + slurmdb_association_rec_t *assoc = NULL; + long double prev_level_fs = (long double) NO_VAL; + bool tied = false; + size_t ndx; + + /* Calculate level_fs for each child */ + for (ndx = 0; (assoc = siblings[ndx]); ndx++) + _calc_assoc_fs(assoc); + + /* Sort children by level_fs */ + qsort(siblings, ndx, sizeof(slurmdb_association_rec_t*), + (QsortCmpF) _cmp_level_fs); + + /* Iterate through children in sorted order. If it's a user, calculate + * fs_factor, otherwise recurse. */ + for (ndx = 0; (assoc = siblings[ndx]); ndx++) { + if (account_tied) { + tied = true; + account_tied = false; + } else { + tied = prev_level_fs == assoc->usage->level_fs; + } + + if (priority_debug) + _ft_debug(assoc, assoc_level, tied); + if (assoc->user) { + if (!tied) + *rank = *i; + + /* Set the final fairshare factor for this user */ + assoc->usage->fs_factor = + *rank / (double) g_user_assoc_count; + (*i)--; + } else { + slurmdb_association_rec_t** children; + size_t merge_count = + _count_tied_accounts(siblings, ndx); + + /* Merging does not affect child level_fs calculations + * since the necessary information is stored on each + * assoc's usage struct */ + children = _merge_accounts(siblings, ndx, + ndx + merge_count, assoc_level); + + _calc_tree_fs(children, assoc_level + 1, rank, i, tied); + + /* Skip over any merged accounts */ + ndx += merge_count; + + xfree(children); + } + prev_level_fs = assoc->usage->level_fs; + } + +} + + +/* Start fairshare calculations at root. Call assoc_mgr_lock before this. */ +static void _apply_priority_fs(void) +{ + slurmdb_association_rec_t** children = NULL; + uint32_t rank = g_user_assoc_count; + uint32_t i = rank; + size_t child_count = 0; + + if (priority_debug) + info("Fair Tree fairshare algorithm, starting at root:"); + + assoc_mgr_root_assoc->usage->level_fs = 1L; + + /* _calc_tree_fs requires an array instead of List */ + children = _append_children_to_array( + assoc_mgr_root_assoc->usage->children_list, + children, + &child_count); + + _calc_tree_fs(children, 0, &rank, &i, false); + + xfree(children); +} diff --git a/src/plugins/priority/multifactor/level_based.h b/src/plugins/priority/multifactor/fair_tree.h similarity index 77% rename from src/plugins/priority/multifactor/level_based.h rename to src/plugins/priority/multifactor/fair_tree.h index 7664545e85a40eaf601e6a55937777b14a678c89..6c4c3f9cd9c784c5a5166fa960aba3af6d308372 100644 --- a/src/plugins/priority/multifactor/level_based.h +++ b/src/plugins/priority/multifactor/fair_tree.h @@ -1,5 +1,5 @@ /*****************************************************************************\ - * level_based.c - level_based slurm multifactor algorithm + * fair_tree.h - Fair Tree fairshare algorithm for Slurm ***************************************************************************** * * Copyright (C) 2014 Brigham Young University @@ -35,22 +35,14 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ -#ifndef _PRIORITY_MULTIFACTOR_LEVEL_BASED_H -#define _PRIORITY_MULTIFACTOR_LEVEL_BASED_H +#ifndef _PRIORITY_MULTIFACTOR_FAIR_TREE_H +#define _PRIORITY_MULTIFACTOR_FAIR_TREE_H #include "priority_multifactor.h" -extern void level_based_init(void); +extern void fair_tree_init(void); -/* LEVEL_BASED code called from the decay thread loop */ -extern void level_based_decay(List job_list, time_t start_time); - -/* Normalize the assoc's usage for use in usage_efctv: - * from: 0.0 to parent->usage->usage_raw - * to: 0.0 to 1.0 - * - * In LEVEL_BASED, usage_efctv is the normalized usage within the account - */ -extern double level_based_calc_assoc_usage(slurmdb_association_rec_t *assoc); +/* Fair Tree code called from the decay thread loop */ +extern void fair_tree_decay(List jobs, time_t start); #endif diff --git a/src/plugins/priority/multifactor/level_based.c b/src/plugins/priority/multifactor/level_based.c deleted file mode 100644 index 81c18c9005a53d3dbe4a2ea800468c8eade918c7..0000000000000000000000000000000000000000 --- a/src/plugins/priority/multifactor/level_based.c +++ /dev/null @@ -1,372 +0,0 @@ -/*****************************************************************************\ - * level_based.c - level_based slurm multifactor algorithm - ***************************************************************************** - * - * Copyright (C) 2014 Brigham Young University - * Authors: Ryan Cox <ryan_cox@byu.edu>, Levi Morrison <levi_morrison@byu.edu> - * - * This file is part of SLURM, a resource management program. - * For details, see <http://slurm.schedmd.com/>. - * Please also read the included file: DISCLAIMER. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under - * certain conditions as described in each individual source file, and - * distribute linked combinations including the two. You must obey the GNU - * General Public License in all respects for all of the code used other than - * OpenSSL. If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not obligated to do - * so. If you do not wish to do so, delete this exception statement from your - * version. If you delete this exception statement from all source files in - * the program, then also delete it here. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with SLURM; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -\*****************************************************************************/ - -#include <math.h> - -#include "level_based.h" - -/* How many levels to care about */ -static uint16_t priority_levels; - -/* How many bits available for each level */ -static uint32_t bucket_width_in_bits; - -/* Unused bucket bits (e.g. 64 % priority_levels) */ -static uint32_t unused_bucket_bits; - -/* Maximum value that can be stored in a bucket */ -static uint64_t bucket_max; - -static uint64_t _level_based_calc_level_fs(slurmdb_association_rec_t *assoc, - uint16_t assoc_level); -static void _level_based_calc_children_fs(List children_list, - List users, - uint16_t assoc_level); -static void _level_based_decay_apply_new_usage(struct job_record *job_ptr, - time_t *start_time_ptr); -static void _level_based_apply_priority_fs(void); - - -extern void level_based_init(void) { - priority_levels = slurm_get_priority_levels(); - /* calculate how many bits per level. truncate if necessary */ - bucket_width_in_bits = 64 / priority_levels; - unused_bucket_bits = 64 % priority_levels; - bucket_max = UINT64_MAX >> (64 - bucket_width_in_bits); -} - - -/* LEVEL_BASED code called from the decay thread loop */ -extern void level_based_decay(List job_list, time_t start_time) -{ - slurmctld_lock_t job_write_lock = - { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; - assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, - NO_LOCK, NO_LOCK, NO_LOCK }; - - /* apply decayed usage */ - lock_slurmctld(job_write_lock); - list_for_each(job_list, - (ListForF) _level_based_decay_apply_new_usage, - &start_time); - unlock_slurmctld(job_write_lock); - - /* calculate priority for associations */ - assoc_mgr_lock(&locks); - _level_based_apply_priority_fs(); - assoc_mgr_unlock(&locks); - - /* assign job priorities */ - lock_slurmctld(job_write_lock); - list_for_each(job_list, - (ListForF) decay_apply_weighted_factors, - &start_time); - unlock_slurmctld(job_write_lock); -} - - -/* Normalize the assoc's usage for use in usage_efctv: - * from: 0.0 to parent->usage->usage_raw - * to: 0.0 to 1.0 - * - * In LEVEL_BASED, usage_efctv is the normalized usage within the account - */ -extern double level_based_calc_assoc_usage(slurmdb_association_rec_t *assoc) -{ - double norm = 0.0l; - slurmdb_association_rec_t *parent = assoc->usage->fs_assoc_ptr; - - if (parent && parent->usage->usage_raw) - norm = NORMALIZE_VALUE( - assoc->usage->usage_raw, - 0.0L, (long double) parent->usage->usage_raw, - 0.0L, 1.0L); - - return norm; -} - - -/* Apply usage with decay factor. Call standard functions */ -static void _level_based_decay_apply_new_usage( - struct job_record *job_ptr, - time_t *start_time_ptr) -{ - if (!decay_apply_new_usage(job_ptr, start_time_ptr)) - return; - /* - * Priority 0 is reserved for held jobs. Also skip priority - * calculation for non-pending jobs. - */ - if ((job_ptr->priority == 0) || !IS_JOB_PENDING(job_ptr)) - return; - - set_priority_factors(*start_time_ptr, job_ptr); - last_job_update = time(NULL); -} - - -static void _level_based_calc_children_fs_priority_debug( - uint64_t priority_fs_raw, - uint64_t level_fs_raw, - slurmdb_association_rec_t *assoc, - uint16_t assoc_level) -{ - int spaces; - char *name; - - if (!priority_debug) - return; - - spaces = (assoc_level + 1) * 4; - name = assoc->user ? assoc->user : assoc->acct; - - debug2("%*s0x%016"PRIX64" | 0x%016"PRIX64" (%s)", - spaces, - "", - priority_fs_raw, - level_fs_raw, - name); - if (assoc->user) - debug2("%*s%18s = 0x%016"PRIX64" (%s)", - spaces, - "", - "", - priority_fs_raw | level_fs_raw, - assoc->user); - -} - - -/* Calculate F=2**(-Ueff/S) at the current level. Shift the result based on - * depth in the association tree and the bucket size. - */ -static uint64_t _level_based_calc_level_fs(slurmdb_association_rec_t *assoc, - uint16_t assoc_level) -{ - uint64_t level_fs = 0; - long double level_ratio = 0.0L; - long double shares_adj = 0.0L; - - if (assoc->shares_raw == SLURMDB_FS_USE_PARENT) { - if(assoc->user) - level_fs = 1.0L; - else - return 0; - } else if (assoc->usage->shares_norm) { - - /* This function normalizes shares to be between 0.1 and 1.0; - * this range fares much better than 0.0 to 1.0 when used in - * the denominator of the fairshare calculation: - * 2**(-UsageEffective / Shares) - * - * Compare these two: - * http://www.wolframalpha.com/input/?i=2%5E-%28u%2Fs%29%2C+u+from+0+to+1%2C+s+from+.1+to+1 - * http://www.wolframalpha.com/input/?i=2%5E-%28u%2Fs%29%2C+u+from+0+to+1%2C+s+from+0+to+1 - */ - shares_adj = NORMALIZE_VALUE(assoc->usage->shares_norm, - 0.0l, 1.0l, - 0.1L, 1.0L); - level_ratio = assoc->usage->usage_efctv / shares_adj; - } - - /* reserve 0 for special casing */ - level_fs = NORMALIZE_VALUE(powl(2L, -level_ratio), - 0.0L, 1.0L, - 1, bucket_max); - - - level_fs <<= ((priority_levels - assoc_level - 1) - * bucket_width_in_bits - + unused_bucket_bits); - return level_fs; -} - - -/* Calculate and set priority_fs_raw at each level then recurse to children. - * Also, append users to user list while we are traversing. - * This function calls and is called by _level_based_calc_children_fs(). - */ -static void _level_based_calc_assoc_fs( - List users, - slurmdb_association_rec_t *assoc, - uint16_t assoc_level) -{ - const uint64_t priority_fs_raw = - assoc->usage->parent_assoc_ptr->usage->priority_fs_raw; - uint64_t level_fs = 0; - - /* Calculate the fairshare factor at this level, properly shifted - * - * If assoc_level >= priority_levels, the tree is deeper than - * priority_levels; you are done with priority calculations but still - * need to set the values on each child. - */ - if (assoc_level < priority_levels) - level_fs = _level_based_calc_level_fs(assoc, assoc_level); - - /* Bitwise OR the level fairshare factor with the parent's. For a - * user, this is the final fairshare factor that is used in sorting - * and ranking. - */ - assoc->usage->priority_fs_raw = priority_fs_raw | level_fs; - - /* Found a user, add to users list */ - if (assoc->user) - list_append(users, assoc); - - _level_based_calc_children_fs_priority_debug( - priority_fs_raw, level_fs, assoc, assoc_level); - - /* If USE_PARENT, set priority_fs_raw equal to parent then work on - * children */ - if (assoc->shares_raw == SLURMDB_FS_USE_PARENT) - _level_based_calc_children_fs( - assoc->usage->children_list, users, assoc_level); - else if (!assoc->user) - /* If this is an account, descend to child accounts */ - _level_based_calc_children_fs( - assoc->usage->children_list, - users, - assoc_level + 1); -} - - -/* Call _level_based_calc_assoc_fs() on each child, if any. This function will - * be called again by _level_based_calc_assoc_fs() for child accounts (not - * users), thus making it recursive. - */ -static void _level_based_calc_children_fs(List children_list, - List users, - uint16_t assoc_level) -{ - ListIterator itr = NULL; - slurmdb_association_rec_t *assoc = NULL; - - if (!children_list || !list_count(children_list)) - return; - - itr = list_iterator_create(children_list); - while ((assoc = list_next(itr))) - _level_based_calc_assoc_fs( - users, assoc, assoc_level); - list_iterator_destroy(itr); -} - - -/* Sort so that higher priority_fs_raw values are first in the list */ -static int _level_based_sort_priority_fs(slurmdb_association_rec_t **x, - slurmdb_association_rec_t **y) -{ - uint64_t a = (*x)->usage->priority_fs_raw; - uint64_t b = (*y)->usage->priority_fs_raw; - - if (a < b) - return 1; - else if (b < a) - return -1; - else - return 0; -} - - -/* Iterate through sorted list of users. Apply priorities based on their rank, - * allowing for duplicate rankings if priority_fs_raw is equal for users - * (i vs rank). - */ -static void _level_based_apply_rank(List users) -{ - ListIterator itr = list_iterator_create(users); - slurmdb_association_rec_t *assoc; - int count = list_count(users); - int i = count - 1; - int rank = count - 1; - /* priority_fs_raw can't be equal to 0 due to normalization in - * _level_based_calc_level_fs */ - uint64_t prev_priority_fs_raw = 0; - - while ((assoc = list_next(itr))) { - xassert(assoc->usage->priority_fs_raw != 0); - - /* If same as prev, rank stays the same. This allows for - * rankings like 7,6,5,5,5,2,1,0 */ - if(prev_priority_fs_raw != assoc->usage->priority_fs_raw) - rank = i; - assoc->usage->priority_fs_ranked = - NORMALIZE_VALUE(rank, 0.0, (long double) count, - 0, UINT64_MAX); - if (priority_debug) - info("Fairshare for user %s in acct %s: ranked " - "%d/%d (0x%016"PRIX64")", - assoc->user, assoc->acct, rank, count, - assoc->usage->priority_fs_ranked); - i--; - prev_priority_fs_raw = assoc->usage->priority_fs_raw; - } - - list_iterator_destroy(itr); -} - - -/* Calculate fairshare for associations, sort users by priority_fs_raw, then - * use the rank in the sorted list as a user's fs factor - * - * Call assoc_mgr_lock before this */ -static void _level_based_apply_priority_fs(void) -{ - List users = list_create(NULL); - - if (priority_debug) { - debug2("LEVEL_BASED Fairshare, starting at root:"); - debug2("%s | %s", "parent_fs", "current_fs"); - } - assoc_mgr_root_assoc->usage->priority_fs_raw = 0; - assoc_mgr_root_assoc->usage->priority_fs_ranked = 0; - - /* set priority_fs_raw on each assoc and add users to List users */ - _level_based_calc_children_fs( - assoc_mgr_root_assoc->usage->children_list, - users, - 0); - - /* sort users by priority_fs_raw */ - list_sort(users, (ListCmpF) _level_based_sort_priority_fs); - - /* set user ranking based on their position in the sorted list */ - _level_based_apply_rank(users); - - list_destroy(users); -} diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c index 77ef0ce6732eee9f4061df187a376b61946d9882..b379d60a7b620f3898a063679296de142ee48e88 100644 --- a/src/plugins/priority/multifactor/priority_multifactor.c +++ b/src/plugins/priority/multifactor/priority_multifactor.c @@ -76,7 +76,7 @@ #include "src/common/xstring.h" #include "src/common/parse_time.h" -#include "level_based.h" +#include "fair_tree.h" #define SECS_PER_DAY (24 * 60 * 60) #define SECS_PER_WEEK (7 * SECS_PER_DAY) @@ -161,9 +161,8 @@ static double decay_factor = 1; /* The decay factor when decaying time. */ /* variables defined in prirority_multifactor.h */ bool priority_debug = 0; -extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc); -extern double priority_p_calc_fs_factor(long double usage_efctv, - long double shares_norm); +static void _priority_p_set_assoc_usage_debug(slurmdb_association_rec_t *assoc); +static void _set_assoc_usage_efctv(slurmdb_association_rec_t *assoc); /* * apply decay factor to all associations usage_raw @@ -426,9 +425,9 @@ static void _ticket_based_set_usage_efctv(slurmdb_association_rec_t *assoc) /* This should initially get the children list from assoc_mgr_root_assoc. * Since our algorithm goes from top down we calculate all the non-user * associations now. When a user submits a job, that norm_fairshare is - * calculated. Here we will set the usage_efctv to NO_VAL for users to not - * have to calculate a bunch of things that will never be used, except it will - * be calculated for LEVEL_BASED. + * calculated. Here we will set the usage_efctv to NO_VAL for users to not have + * to calculate a bunch of things that will never be used. (Fair Tree calls a + * different function.) * * NOTE: acct_mgr_association_lock must be locked before this is called. */ @@ -443,11 +442,7 @@ static int _set_children_usage_efctv(List children_list) itr = list_iterator_create(children_list); while ((assoc = list_next(itr))) { if (assoc->user) { - /* LEVEL_BASED must calculate this for users */ - if(flags & PRIORITY_FLAGS_LEVEL_BASED) - priority_p_set_assoc_usage(assoc); - else - assoc->usage->usage_efctv = (long double)NO_VAL; + assoc->usage->usage_efctv = (long double)NO_VAL; continue; } priority_p_set_assoc_usage(assoc); @@ -559,16 +554,13 @@ static double _get_fairshare_priority(struct job_record *job_ptr) job_ptr->job_id, job_assoc->user, job_assoc->acct, priority_fs); } - } else if (flags & PRIORITY_FLAGS_LEVEL_BASED) { - priority_fs = NORMALIZE_VALUE( - job_assoc->usage->priority_fs_ranked, - 0, UINT64_MAX, - 0.0L, 1.0L); + } else if (flags & PRIORITY_FLAGS_FAIR_TREE) { + priority_fs = job_assoc->usage->fs_factor; if (priority_debug) { info("Fairhare priority of job %u for user %s in acct" - " %s is %f (0x%016"PRIX64")", + " %s is %f", job_ptr->job_id, job_assoc->user, job_assoc->acct, - priority_fs, job_assoc->usage->priority_fs_ranked); + priority_fs); } } else { priority_fs = priority_p_calc_fs_factor( @@ -1257,11 +1249,14 @@ static void *_decay_thread(void *no_data) } } - /* now calculate all the normalized usage here */ - assoc_mgr_lock(&locks); - _set_children_usage_efctv( - assoc_mgr_root_assoc->usage->children_list); - assoc_mgr_unlock(&locks); + /* Calculate all the normalized usage unless this is Fair Tree; + * it handles these calculations during its tree traversal */ + if (!(flags & PRIORITY_FLAGS_FAIR_TREE)) { + assoc_mgr_lock(&locks); + _set_children_usage_efctv( + assoc_mgr_root_assoc->usage->children_list); + assoc_mgr_unlock(&locks); + } if (!g_last_ran) goto get_usage; @@ -1289,7 +1284,7 @@ static void *_decay_thread(void *no_data) } if (!(flags & (PRIORITY_FLAGS_TICKET_BASED - | PRIORITY_FLAGS_LEVEL_BASED))) { + | PRIORITY_FLAGS_FAIR_TREE))) { lock_slurmctld(job_write_lock); list_for_each( job_list, @@ -1302,8 +1297,8 @@ static void *_decay_thread(void *no_data) get_usage: if (flags & PRIORITY_FLAGS_TICKET_BASED) _ticket_based_decay(job_list, start_time); - else if (flags & PRIORITY_FLAGS_LEVEL_BASED) - level_based_decay(job_list, start_time); + else if (flags & PRIORITY_FLAGS_FAIR_TREE) + fair_tree_decay(job_list, start_time); g_last_ran = start_time; @@ -1392,8 +1387,8 @@ static void _internal_setup(void) weight_qos = slurm_get_priority_weight_qos(); flags = slurmctld_conf.priority_flags; - if (flags & PRIORITY_FLAGS_LEVEL_BASED) { - level_based_init(); + if (flags & PRIORITY_FLAGS_FAIR_TREE) { + fair_tree_init(); } if (priority_debug) { @@ -1432,15 +1427,22 @@ static void _set_norm_shares(List children_list) } -static void _depth_oblivious_set_usage_efctv( - slurmdb_association_rec_t *assoc, - char *child, - char *child_str) +static void _depth_oblivious_set_usage_efctv(slurmdb_association_rec_t *assoc) { long double ratio_p, ratio_l, k, f, ratio_s; slurmdb_association_rec_t *parent_assoc = NULL; ListIterator sib_itr = NULL; slurmdb_association_rec_t *sibling = NULL; + char *child; + char *child_str; + + if (assoc->user) { + child = "user"; + child_str = assoc->user; + } else { + child = "account"; + child_str = assoc->acct; + } /* We want priority_fs = pow(2.0, -R); where R = ratio_p * ratio_l^k @@ -1544,9 +1546,14 @@ static void _set_usage_efctv(slurmdb_association_rec_t *assoc) uint32_t s_child = assoc->shares_raw; uint32_t s_all_siblings = assoc->usage->level_shares; - assoc->usage->usage_efctv = ua_child + - (ue_parent - ua_child) * - (s_child / (long double) s_all_siblings); + /* If no user in the account has shares, avoid division by zero by + * setting usage_efctv to the parent's usage_efctv */ + if (!s_all_siblings) + assoc->usage->usage_efctv = ue_parent; + else + assoc->usage->usage_efctv = ua_child + + (ue_parent - ua_child) * + (s_child / (long double) s_all_siblings); } @@ -1656,11 +1663,11 @@ extern void priority_p_reconfig(bool assoc_clear) prevflags = flags; _internal_setup(); - /* Since LEVEL_BASED uses a different shares calculation method, we + /* Since Fair Tree uses a different shares calculation method, we * must reassign shares at reconfigure if the algorithm was switched to - * or from LEVEL_BASED */ - if ((flags & PRIORITY_FLAGS_LEVEL_BASED) != - (prevflags & PRIORITY_FLAGS_LEVEL_BASED)) { + * or from Fair Tree */ + if ((flags & PRIORITY_FLAGS_FAIR_TREE) != + (prevflags & PRIORITY_FLAGS_FAIR_TREE)) { assoc_mgr_lock(&locks); _set_norm_shares(assoc_mgr_root_assoc->usage->children_list); assoc_mgr_unlock(&locks); @@ -1679,100 +1686,39 @@ extern void priority_p_reconfig(bool assoc_clear) return; } -extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc) -{ - char *child; - char *child_str; - xassert(assoc_mgr_root_assoc); - xassert(assoc); - xassert(assoc->usage); - xassert(assoc->usage->fs_assoc_ptr); - - if (assoc->user) { - child = "user"; - child_str = assoc->user; - } else { - child = "account"; - child_str = assoc->acct; +extern void set_assoc_usage_norm(slurmdb_association_rec_t *assoc) +{ + /* If root usage is 0, there is no usage anywhere. */ + if (!assoc_mgr_root_assoc->usage->usage_raw) { + assoc->usage->usage_norm = 0L; + return; } - if (assoc_mgr_root_assoc->usage->usage_raw) { - assoc->usage->usage_norm = assoc->usage->usage_raw - / assoc_mgr_root_assoc->usage->usage_raw; - } else { - /* This should only happen when no usage has occured - * at all so no big deal, the other usage should be 0 - * as well here. */ - assoc->usage->usage_norm = 0; - } + assoc->usage->usage_norm = assoc->usage->usage_raw + / assoc_mgr_root_assoc->usage->usage_raw; + - if (priority_debug) { - info("Normalized usage for %s %s off %s(%s) %Lf / %Lf = %Lf", - child, child_str, - assoc->usage->parent_assoc_ptr->acct, - assoc->usage->fs_assoc_ptr->acct, - assoc->usage->usage_raw, - assoc_mgr_root_assoc->usage->usage_raw, - assoc->usage->usage_norm); - } /* This is needed in case someone changes the half-life on the * fly and now we have used more time than is available under * the new config */ - if (assoc->usage->usage_norm > 1.0) - assoc->usage->usage_norm = 1.0; + if (assoc->usage->usage_norm > 1L) + assoc->usage->usage_norm = 1L; +} - if (flags & PRIORITY_FLAGS_LEVEL_BASED) - assoc->usage->usage_efctv = - level_based_calc_assoc_usage(assoc); - else if (assoc->usage->fs_assoc_ptr == assoc_mgr_root_assoc) { - assoc->usage->usage_efctv = assoc->usage->usage_norm; - if (priority_debug) - info("Effective usage for %s %s off %s(%s) %Lf %Lf", - child, child_str, - assoc->usage->parent_assoc_ptr->acct, - assoc->usage->fs_assoc_ptr->acct, - assoc->usage->usage_efctv, - assoc->usage->usage_norm); - } else if (flags & PRIORITY_FLAGS_TICKET_BASED) { - _ticket_based_set_usage_efctv(assoc); - if (priority_debug) { - info("Effective usage for %s %s off %s(%s) = %Lf", - child, child_str, - assoc->usage->parent_assoc_ptr->acct, - assoc->usage->fs_assoc_ptr->acct, - assoc->usage->usage_efctv); - } - } else if (assoc->shares_raw == SLURMDB_FS_USE_PARENT) { - slurmdb_association_rec_t *parent_assoc = - assoc->usage->fs_assoc_ptr; - assoc->usage->usage_efctv = - parent_assoc->usage->usage_efctv; - if (priority_debug) { - info("Effective usage for %s %s off %s %Lf", - child, child_str, - parent_assoc->acct, - parent_assoc->usage->usage_efctv); - } - } else if (flags & PRIORITY_FLAGS_DEPTH_OBLIVIOUS) { - _depth_oblivious_set_usage_efctv(assoc, child, child_str); - } else { - _set_usage_efctv(assoc); - if (priority_debug) { - info("Effective usage for %s %s off %s(%s) " - "%Lf + ((%Lf - %Lf) * %d / %d) = %Lf", - child, child_str, - assoc->usage->parent_assoc_ptr->acct, - assoc->usage->fs_assoc_ptr->acct, - assoc->usage->usage_norm, - assoc->usage->fs_assoc_ptr->usage->usage_efctv, - assoc->usage->usage_norm, - assoc->shares_raw, - assoc->usage->level_shares, - assoc->usage->usage_efctv); - } - } +extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc) +{ + xassert(assoc_mgr_root_assoc); + xassert(assoc); + xassert(assoc->usage); + xassert(assoc->usage->fs_assoc_ptr); + + set_assoc_usage_norm(assoc); + _set_assoc_usage_efctv(assoc); + + if (priority_debug) + _priority_p_set_assoc_usage_debug(assoc); } @@ -2050,3 +1996,87 @@ extern void set_priority_factors(time_t start_time, struct job_record *job_ptr) else job_ptr->prio_factors->nice = NICE_OFFSET; } + + +/* Set usage_efctv based on algorithm-specific code. Fair Tree sets this + * elsewhere. + */ +static void _set_assoc_usage_efctv(slurmdb_association_rec_t *assoc) +{ + if (assoc->usage->fs_assoc_ptr == assoc_mgr_root_assoc) + assoc->usage->usage_efctv = assoc->usage->usage_norm; + else if (flags & PRIORITY_FLAGS_TICKET_BASED) + _ticket_based_set_usage_efctv(assoc); + else if (assoc->shares_raw == SLURMDB_FS_USE_PARENT) { + slurmdb_association_rec_t *parent_assoc = + assoc->usage->fs_assoc_ptr; + + assoc->usage->usage_efctv = + parent_assoc->usage->usage_efctv; + } else if (flags & PRIORITY_FLAGS_DEPTH_OBLIVIOUS) + _depth_oblivious_set_usage_efctv(assoc); + else + _set_usage_efctv(assoc); +} + + +static void _priority_p_set_assoc_usage_debug(slurmdb_association_rec_t *assoc) +{ + char *child; + char *child_str; + + if (assoc->user) { + child = "user"; + child_str = assoc->user; + } else { + child = "account"; + child_str = assoc->acct; + } + + info("Normalized usage for %s %s off %s(%s) %Lf / %Lf = %Lf", + child, child_str, + assoc->usage->parent_assoc_ptr->acct, + assoc->usage->fs_assoc_ptr->acct, + assoc->usage->usage_raw, + assoc_mgr_root_assoc->usage->usage_raw, + assoc->usage->usage_norm); + + if (assoc->usage->fs_assoc_ptr == assoc_mgr_root_assoc) { + info("Effective usage for %s %s off %s(%s) %Lf %Lf", + child, child_str, + assoc->usage->parent_assoc_ptr->acct, + assoc->usage->fs_assoc_ptr->acct, + assoc->usage->usage_efctv, + assoc->usage->usage_norm); + } else if (flags & PRIORITY_FLAGS_TICKET_BASED) { + info("Effective usage for %s %s off %s(%s) = %Lf", + child, child_str, + assoc->usage->parent_assoc_ptr->acct, + assoc->usage->fs_assoc_ptr->acct, + assoc->usage->usage_efctv); + } else if (assoc->shares_raw == SLURMDB_FS_USE_PARENT) { + slurmdb_association_rec_t *parent_assoc = + assoc->usage->fs_assoc_ptr; + + info("Effective usage for %s %s off %s %Lf", + child, child_str, + parent_assoc->acct, + parent_assoc->usage->usage_efctv); + } else if (flags & PRIORITY_FLAGS_DEPTH_OBLIVIOUS) { + /* Unfortunately, this must be handled inside of + * _depth_oblivious_set_usage_efctv */ + } else { + info("Effective usage for %s %s off %s(%s) " + "%Lf + ((%Lf - %Lf) * %d / %d) = %Lf", + child, child_str, + assoc->usage->parent_assoc_ptr->acct, + assoc->usage->fs_assoc_ptr->acct, + assoc->usage->usage_norm, + assoc->usage->fs_assoc_ptr->usage->usage_efctv, + assoc->usage->usage_norm, + assoc->shares_raw, + assoc->usage->level_shares, + assoc->usage->usage_efctv); + } + +} diff --git a/src/plugins/priority/multifactor/priority_multifactor.h b/src/plugins/priority/multifactor/priority_multifactor.h index f421a797feabf32bd0e48af3970ee0a7f35c1f5c..43a448d30bfa0daba61993983623e19d799b2262 100644 --- a/src/plugins/priority/multifactor/priority_multifactor.h +++ b/src/plugins/priority/multifactor/priority_multifactor.h @@ -50,13 +50,14 @@ #include "src/common/assoc_mgr.h" #include "src/slurmctld/locks.h" - -extern bool decay_apply_new_usage(struct job_record *job_ptr, - time_t *start_time_ptr); - -extern void decay_apply_weighted_factors(struct job_record *job_ptr, - time_t *start_time_ptr); - +extern void priority_p_set_assoc_usage(slurmdb_association_rec_t *assoc); +extern double priority_p_calc_fs_factor( + long double usage_efctv, long double shares_norm); +extern bool decay_apply_new_usage( + struct job_record *job_ptr, time_t *start_time_ptr); +extern void decay_apply_weighted_factors( + struct job_record *job_ptr, time_t *start_time_ptr); +extern void set_assoc_usage_norm(slurmdb_association_rec_t *assoc); extern void set_priority_factors(time_t start_time, struct job_record *job_ptr); extern bool priority_debug; diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index b92fa96bcbeb848e998a5e7d9eb039a8e9203c3f..35fea9137e33b701abddd76b18b0e46852012a63 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -721,7 +721,6 @@ static void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->priority_calc_period = conf->priority_calc_period; conf_ptr->priority_favor_small= conf->priority_favor_small; conf_ptr->priority_flags = conf->priority_flags; - conf_ptr->priority_levels = conf->priority_levels; conf_ptr->priority_max_age = conf->priority_max_age; conf_ptr->priority_params = xstrdup(conf->priority_params); conf_ptr->priority_reset_period = conf->priority_reset_period; diff --git a/src/sshare/process.c b/src/sshare/process.c index 04e73b4de3099c6f78da86c96b3bdede6da6558d..6cd0b4539f0aabce061365c4641b5fc90b9ab691 100644 --- a/src/sshare/process.c +++ b/src/sshare/process.c @@ -73,19 +73,19 @@ extern int process(shares_response_msg_t *resp) PRINT_RAWU, PRINT_RUNMINS, PRINT_USER, - PRINT_FSRAW + PRINT_LEVELFS }; if (!resp) return SLURM_ERROR; format_list = list_create(slurm_destroy_char); - if (flags & PRIORITY_FLAGS_LEVEL_BASED) { + if (flags & PRIORITY_FLAGS_FAIR_TREE) { if (long_flag) { slurm_addto_char_list(format_list, "A,User,RawShares,NormShares," "RawUsage,NormUsage,EffUsage," - "FSFctr,FSRaw,GrpCPUMins," + "FSFctr,LevelFS,GrpCPUMins," "CPURunMins"); } else { slurm_addto_char_list(format_list, @@ -131,11 +131,11 @@ extern int process(shares_response_msg_t *resp) field->name = xstrdup("FairShare"); field->len = 10; field->print_routine = print_fields_double; - } else if (!strncasecmp("FSRaw", object, 4)) { - field->type = PRINT_FSRAW; - field->name = xstrdup("FairShare Raw"); - field->len = 16; - field->print_routine = print_fields_hex064; + } else if (!strncasecmp("LevelFS", object, 1)) { + field->type = PRINT_LEVELFS; + field->name = xstrdup("Level FS"); + field->len = 10; + field->print_routine = print_fields_double; } else if (!strncasecmp("ID", object, 1)) { field->type = PRINT_ID; field->name = xstrdup("ID"); @@ -243,21 +243,18 @@ extern int process(shares_response_msg_t *resp) (curr_inx == field_count)); break; case PRINT_FSFACTOR: - if (flags & PRIORITY_FLAGS_LEVEL_BASED) { + if (flags & PRIORITY_FLAGS_FAIR_TREE) { if(share->user) field->print_routine( field, - (double) NORMALIZE_VALUE( - share->priority_fs_ranked, - 0, UINT64_MAX, - 0.0l, 1.0l - ), + share->fs_factor, (curr_inx == field_count)); else print_fields_str( field, NULL, - (curr_inx == field_count) + (curr_inx == + field_count) ); } else @@ -270,9 +267,14 @@ extern int process(shares_response_msg_t *resp) shares_norm), (curr_inx == field_count)); break; - case PRINT_FSRAW: - field->print_routine(field, - (uint64_t) share->priority_fs_raw, + case PRINT_LEVELFS: + if (share->shares_raw == SLURMDB_FS_USE_PARENT) + print_fields_str(field, NULL, + (curr_inx == + field_count)); + else + field->print_routine(field, + (double) share->level_fs, (curr_inx == field_count)); break; case PRINT_ID: diff --git a/testsuite/expect/README b/testsuite/expect/README index 9368aeb0e0622fe631720c627d2ed36900c41576..807da765bfab36d6fcd24700a8f91f056c8345aa 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -641,7 +641,7 @@ test24.# Testing of sshare/priority multifactor plugin. test24.1 multifactor plugin algo test test24.2 sshare h, n, p, P, v, and V options. test24.3 multifactor plugin algo test for fairshare=parent -test24.4 Test of LEVEL_BASED multifactor +test24.4 Test of Fair Tree multifactor test25.# Testing of sprio command and options. ================================================ diff --git a/testsuite/expect/test24.3 b/testsuite/expect/test24.3 index cb72db64fb8e1206ce6da2b9a0a15697e99c3565..30d74e10f424434ea63d21429ed45658381352e1 100755 --- a/testsuite/expect/test24.3 +++ b/testsuite/expect/test24.3 @@ -131,7 +131,7 @@ expect { incr matches exp_continue } - "AccountG|User6|0|-nan|30|0.142857|-nan|nan|0|0|" { + "AccountG|User6|0|0.000000|30|0.142857|0.142857|0.000000|0|0|" { incr matches exp_continue } diff --git a/testsuite/expect/test24.4 b/testsuite/expect/test24.4 index 04e771038d775a05b259ef8202c52211ea4f1b6a..3ea66e729724cfa4d2495321165ff989de93f9de 100755 --- a/testsuite/expect/test24.4 +++ b/testsuite/expect/test24.4 @@ -1,6 +1,6 @@ #!/usr/bin/expect ############################################################################ -# Purpose: Test of LEVEL_BASED multifactor +# Purpose: Test of Fair Tree multifactor # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -41,6 +41,7 @@ set test_id "24.4" set exit_code 0 set test_prog "test$test_id.prog" set matches 0 +set expected_matches 35 print_header $test_id @@ -77,91 +78,143 @@ expect { exp_continue } - "root|||0.000000|210||1.000000||0|0|0|" { + "root|||0.000000|240||1.000000||1.000000|0|0|" { incr matches exp_continue } - "AccountA||40|0.266667|45|0.214286|0.214286||A564000000000000|0|0|" { + "root|u1|10|0.048077|0|0.000000|0.000000|1.000000|inf|0|0|" { incr matches exp_continue } - "AccountB||30|0.750000|20|0.095238|0.444444||A564AC0700000000|0|0|" { + "root|u2|10|0.048077|10|0.041667|0.041667|0.666667|1.153846|0|0|" { incr matches exp_continue } - "AccountB|User1|1|1.000000|20|0.095238|1.000000|0.666667|A564AC0780000000|0|0|" { + "aA||40|0.192308|45|0.187500|0.187500||1.025641|0|0|" { incr matches exp_continue } - "AccountC||10|0.250000|25|0.119048|0.555556||A5644E4800000000|0|0|" { + "aAA||30|0.750000|20|0.083333|0.444444||1.687500|0|0|" { incr matches exp_continue } - "AccountC|User2|1|0.500000|25|0.119048|1.000000|0.500000|A5644E4848990000|0|0|" { + "aAA|uAA1|1|1.000000|20|0.083333|1.000000|0.619048|1.000000|0|0|" { incr matches exp_continue } - "AccountC|User3|1|0.500000|0|0.000000|0.000000|0.583333|A5644E48FFFF0000|0|0|" { + "aAB||10|0.250000|25|0.104167|0.555556||0.450000|0|0|" { incr matches exp_continue } - "AccountD||60|0.400000|25|0.119048|0.119048||D5F5000000000000|0|0|" { + "aAB|uAB1|1|0.500000|25|0.104167|1.000000|0.523810|0.500000|0|0|" { incr matches exp_continue } - "AccountE||25|0.416667|25|0.119048|1.000000||D5F53B7F00000000|0|0|" { + "aAB|uAB2|1|0.500000|0|0.000000|0.000000|0.571429|inf|0|0|" { incr matches exp_continue } - "AccountE|User4|1|1.000000|25|0.119048|1.000000|0.750000|D5F53B7F80000000|0|0|" { + "aB||60|0.288462|25|0.104167|0.104167||2.769231|0|0|" { incr matches exp_continue } - "AccountF||35|0.583333|0|0.000000|0.000000||D5F5FFFF00000000|0|0|" { + "aBA||25|0.416667|25|0.104167|1.000000||0.416667|0|0|" { incr matches exp_continue } - "AccountF|User5|1|1.000000|0|0.000000|0.000000|0.833333|D5F5FFFFFFFF0000|0|0|" { + "aBA|uBA1|1|1.000000|25|0.104167|1.000000|0.714286|1.000000|0|0|" { incr matches exp_continue } - "AccountG||0|0.000000|30|0.142857|0.142857||FFFF000000000000|0|0|" { + "aBB||35|0.583333|0|0.000000|0.000000||inf|0|0|" { incr matches exp_continue } - "AccountG|User6|0|0.000000|30|0.142857|1.000000|0.916667|FFFFFFFF00000000|0|0|" { + "aBB|uBB1|1|1.000000|0|0.000000|0.000000|0.761905|inf|0|0|" { incr matches exp_continue } - "AccountH||50|0.333333|110|0.523810|0.523810||6748000000000000|0|0|" { + "aC||0|0.000000|30|0.125000|0.125000||0.000000|0|0|" { incr matches exp_continue } - "AccountH|UHRA1|parent|0.333333|20|0.095238|0.181818|0.416667|6748FFFF00000000|0|0|" { + "aC|uC1|0|0.000000|30|0.125000|1.000000|0.047619|0.000000|0|0|" { incr matches exp_continue } - "AccountH|UHRA2|40|0.235294|20|0.095238|0.181818|0.166667|6748AAE000000000|0|0|" { + "aD||50|0.240385|110|0.458333|0.458333||0.524476|0|0|" { incr matches exp_continue } - "AccountH|UHRA3|50|0.294118|25|0.119048|0.227273|0.083333|6748A63400000000|0|0|" { + "aD|uD1|parent|0.240385|20|0.083333|0.181818|0.476190||0|0|" { incr matches exp_continue } - "AccountHTA||parent|0.333333|45|0.214286|0.409091||6748000000000000|0|0|" { + "aD|uD2|40|0.235294|20|0.083333|0.181818|0.238095|1.294118|0|0|" { incr matches exp_continue } - "AccountHTA|UHTAStd1|parent|0.333333|10|0.047619|0.090909|0.416667|6748FFFF00000000|0|0|" { + "aD|uD3|50|0.294118|25|0.104167|0.227273|0.333333|1.294118|0|0|" { incr matches exp_continue } - "AccountHTA|UHTAStd2|30|0.176471|10|0.047619|0.090909|0.250000|6748C8AD00000000|0|0|" { + "aDA||parent|0.240385|45|0.187500|0.409091|||0|0|" { incr matches exp_continue } - "AccountHTA|UHTAStd3|50|0.294118|25|0.119048|0.227273|0.083333|6748A63400000000|0|0|" { + "aDA|uDA1|parent|0.240385|10|0.041667|0.090909|0.476190||0|0|" { + incr matches + exp_continue + } + "aDA|uDA2|30|0.176471|10|0.041667|0.090909|0.380952|1.941176|0|0|" { + incr matches + exp_continue + } + "aDA|uDA3|50|0.294118|25|0.104167|0.227273|0.333333|1.294118|0|0|" { + incr matches + exp_continue + } + "aE||10|0.048077|0|0.000000|0.000000||inf|0|0|" { + incr matches + exp_continue + } + "aE|aE1|10|0.500000|0|0.000000|0.000000|1.000000|inf|0|0|" { + incr matches + exp_continue + } + "aE|aE2|10|0.500000|0|0.000000|0.000000|1.000000|inf|0|0|" { + incr matches + exp_continue + } + "aF||20|0.096154|0|0.000000|0.000000||inf|0|0|" { + incr matches + exp_continue + } + "aF|uF1|10|0.333333|0|0.000000|0.000000|1.000000|inf|0|0|" { + incr matches + exp_continue + } + "aF|uF2|20|0.666667|0|0.000000|0.000000|1.000000|inf|0|0|" { + incr matches + exp_continue + } + "aG||8|0.038462|20|0.083333|0.083333||0.461538|0|0|" { + incr matches + exp_continue + } + "aG|uG1|10|0.500000|10|0.041667|0.500000|0.190476|1.000000|0|0|" { + incr matches + exp_continue + } + "aGA||10|0.500000|10|0.041667|0.500000||1.000000|0|0|" { + incr matches + exp_continue + } + "aGA|uGA1|20|0.500000|4|0.016667|0.400000|0.190476|1.250000|0|0|" { + incr matches + exp_continue + } + "aGA|uGA2|20|0.500000|6|0.025000|0.600000|0.095238|0.833333|0|0|" { incr matches exp_continue } @@ -176,8 +229,15 @@ expect { } } -if {$matches != 22} { - send_user "\nFAILURE: we didn't get the correct priorities from the plugin ($matches != 13)\n" +if {$matches != $expected_matches} { + send_user "\nFAILURE: we didn't get the correct priorities from the plugin ($matches != $expected_matches)\n" + send_user "Some potentially useful information is below: +* Account aA, aB, and aC are general purpose algorithm tests with various shares, usage values, children counts, and depths +* Account aD is used to test USE_PARENT. Any account that is USE_PARENT should have its children effectively reparented to the parent's parent. Any user with USE_PARENT should be ranked highest in its account, subject to ties with any other association with level_fs==1.0. uD2 and uD3 do NOT have the same level_fs; sshare doesn't show enough digits to visually distinguish. +* Account aE and its children should have fairshare==1.0 as a result of tie handling code. +* Account aF is similar to aE except its shares are different; they should still be 1.0. +* Accounts aE and aF and their descendants and u1 (root) should have fairshare==1.0 or ranking is broken. +* Account aG is set up so that uGA1 and uG1 will have equivalent fs factors due to the tie handling code but uGA2 will not.\n" set exit_code 1 } diff --git a/testsuite/expect/test24.4.prog.c b/testsuite/expect/test24.4.prog.c index 19873a4e638f4f127fa8305e4151a8272b9287f5..2d4b3f4ec5f273583c4e782afd3424574b0d2058 100644 --- a/testsuite/expect/test24.4.prog.c +++ b/testsuite/expect/test24.4.prog.c @@ -1,7 +1,7 @@ /*****************************************************************************\ - * test24.3.prog.c - link and test algo of multifactor LEVEL_BASED. + * test24.4.prog.c - link and test algo of Fair Tree multifactor. * - * Usage: test24.3.prog + * Usage: test24.4.prog ***************************************************************************** * Modified by Brigham Young University * Ryan Cox <ryan_cox@byu.edu> and Levi Morrison <levi_morrison@byu.edu> @@ -97,280 +97,335 @@ int _setup_assoc_list(void) update.type = SLURMDB_ADD_ASSOC; update.objects = list_create(slurmdb_destroy_association_rec); - /* Just so we don't have to worry about lft's and rgt's we - * will just append these on in order. - * Note: the commented out lfts and rgts as of 10-29-10 are - * correct. By doing an append they go on - * sorted in hierarchy order. The sort that happens inside - * the internal slurm code will sort alpha automatically, (You can - * test this by putting AccountF before AccountE. - */ - - /* First only add the accounts */ /* root association */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 1; - /* assoc->lft = 1; */ - /* assoc->rgt = 28; */ assoc->acct = xstrdup("root"); list_append(update.objects, assoc); - /* sub of root id 1 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 2; assoc->parent_id = 1; assoc->shares_raw = 40; - /* assoc->lft = 2; */ - /* assoc->rgt = 13; */ - assoc->acct = xstrdup("AccountA"); + assoc->acct = xstrdup("aA"); list_append(update.objects, assoc); - /* sub of AccountA id 2 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 21; - /* assoc->lft = 3; */ - /* assoc->rgt = 6; */ assoc->parent_id = 2; assoc->shares_raw = 30; - assoc->acct = xstrdup("AccountB"); + assoc->acct = xstrdup("aAA"); list_append(update.objects, assoc); - /* sub of AccountB id 21 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 211; - /* assoc->lft = 4; */ - /* assoc->rgt = 5; */ assoc->parent_id = 21; assoc->shares_raw = 1; assoc->usage->usage_raw = 20; - assoc->acct = xstrdup("AccountB"); - assoc->user = xstrdup("User1"); + assoc->acct = xstrdup("aAA"); + assoc->user = xstrdup("uAA1"); list_append(update.objects, assoc); - /* sub of AccountA id 2 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 22; - /* assoc->lft = 7; */ - /* assoc->rgt = 12; */ assoc->parent_id = 2; assoc->shares_raw = 10; - assoc->acct = xstrdup("AccountC"); + assoc->acct = xstrdup("aAB"); list_append(update.objects, assoc); - /* sub of AccountC id 22 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 221; - /* assoc->lft = 8; */ - /* assoc->rgt = 9; */ assoc->parent_id = 22; assoc->shares_raw = 1; assoc->usage->usage_raw = 25; - assoc->acct = xstrdup("AccountC"); - assoc->user = xstrdup("User2"); + assoc->acct = xstrdup("aAB"); + assoc->user = xstrdup("uAB1"); list_append(update.objects, assoc); assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 222; - /* assoc->lft = 10; */ - /* assoc->rgt = 11; */ assoc->parent_id = 22; assoc->shares_raw = 1; assoc->usage->usage_raw = 0; - assoc->acct = xstrdup("AccountC"); - assoc->user = xstrdup("User3"); + assoc->acct = xstrdup("aAB"); + assoc->user = xstrdup("uAB2"); list_append(update.objects, assoc); - /* sub of root id 1 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 3; - /* assoc->lft = 14; */ - /* assoc->rgt = 23; */ assoc->parent_id = 1; assoc->shares_raw = 60; - assoc->acct = xstrdup("AccountD"); + assoc->acct = xstrdup("aB"); list_append(update.objects, assoc); - /* sub of AccountD id 3 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 31; - /* assoc->lft = 19; */ - /* assoc->rgt = 22; */ assoc->parent_id = 3; assoc->shares_raw = 25; - assoc->acct = xstrdup("AccountE"); + assoc->acct = xstrdup("aBA"); list_append(update.objects, assoc); - /* sub of AccountE id 31 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 311; - /* assoc->lft = 20; */ - /* assoc->rgt = 21; */ assoc->parent_id = 31; assoc->shares_raw = 1; assoc->usage->usage_raw = 25; - assoc->acct = xstrdup("AccountE"); - assoc->user = xstrdup("User4"); + assoc->acct = xstrdup("aBA"); + assoc->user = xstrdup("uBA1"); list_append(update.objects, assoc); - /* sub of AccountD id 3 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 32; - /* assoc->lft = 15; */ - /* assoc->rgt = 18; */ assoc->parent_id = 3; assoc->shares_raw = 35; - assoc->acct = xstrdup("AccountF"); + assoc->acct = xstrdup("aBB"); list_append(update.objects, assoc); - /* sub of AccountF id 32 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 321; - /* assoc->lft = 16; */ - /* assoc->rgt = 17; */ assoc->parent_id = 32; assoc->shares_raw = 1; assoc->usage->usage_raw = 0; - assoc->acct = xstrdup("AccountF"); - assoc->user = xstrdup("User5"); + assoc->acct = xstrdup("aBB"); + assoc->user = xstrdup("uBB1"); list_append(update.objects, assoc); - /* sub of root id 1 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 4; - /* assoc->lft = 24; */ - /* assoc->rgt = 27; */ assoc->parent_id = 1; assoc->shares_raw = 0; - assoc->acct = xstrdup("AccountG"); + assoc->usage->usage_raw = 30; + assoc->acct = xstrdup("aC"); list_append(update.objects, assoc); - /* sub of AccountG id 4 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 41; - /* assoc->lft = 25; */ - /* assoc->rgt = 26; */ assoc->parent_id = 4; assoc->shares_raw = 0; assoc->usage->usage_raw = 30; - assoc->acct = xstrdup("AccountG"); - assoc->user = xstrdup("User6"); + assoc->acct = xstrdup("aC"); + assoc->user = xstrdup("uC1"); list_append(update.objects, assoc); /* Check for proper handling of Fairshare=parent */ - /* sub of root id 1 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 5; - /* assoc->lft = ; */ - /* assoc->rgt = ; */ assoc->parent_id = 1; assoc->shares_raw = 50; - assoc->acct = xstrdup("AccountH"); + assoc->acct = xstrdup("aD"); list_append(update.objects, assoc); - /* sub of AccountH id 5 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 51; - /* assoc->lft = ; */ - /* assoc->rgt = ; */ assoc->parent_id = 5; assoc->shares_raw = SLURMDB_FS_USE_PARENT; assoc->usage->usage_raw = 35; - assoc->acct = xstrdup("AccountHTA"); + assoc->acct = xstrdup("aDA"); list_append(update.objects, assoc); - /* sub of AccountHTA id 51 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 511; - /* assoc->lft = ; */ - /* assoc->rgt = ; */ assoc->parent_id = 51; assoc->shares_raw = SLURMDB_FS_USE_PARENT; assoc->usage->usage_raw = 10; - assoc->acct = xstrdup("AccountHTA"); - assoc->user = xstrdup("UHTAStd1"); + assoc->acct = xstrdup("aDA"); + assoc->user = xstrdup("uDA1"); list_append(update.objects, assoc); - /* sub of AccountHTA id 51 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 512; - /* assoc->lft = ; */ - /* assoc->rgt = ; */ assoc->parent_id = 51; assoc->shares_raw = 30; assoc->usage->usage_raw = 10; - assoc->acct = xstrdup("AccountHTA"); - assoc->user = xstrdup("UHTAStd2"); + assoc->acct = xstrdup("aDA"); + assoc->user = xstrdup("uDA2"); list_append(update.objects, assoc); - /* sub of AccountHTA id 51 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 513; - /* assoc->lft = ; */ - /* assoc->rgt = ; */ assoc->parent_id = 51; assoc->shares_raw = 50; assoc->usage->usage_raw = 25; - assoc->acct = xstrdup("AccountHTA"); - assoc->user = xstrdup("UHTAStd3"); + assoc->acct = xstrdup("aDA"); + assoc->user = xstrdup("uDA3"); list_append(update.objects, assoc); - /* sub of AccountH id 5 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 52; - /* assoc->lft = ; */ - /* assoc->rgt = ; */ assoc->parent_id = 5; assoc->shares_raw = SLURMDB_FS_USE_PARENT; assoc->usage->usage_raw = 20; - assoc->acct = xstrdup("AccountH"); - assoc->user = xstrdup("UHRA1"); + assoc->acct = xstrdup("aD"); + assoc->user = xstrdup("uD1"); list_append(update.objects, assoc); - /* sub of AccountH id 5 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 53; - /* assoc->lft = ; */ - /* assoc->rgt = ; */ assoc->parent_id = 5; assoc->shares_raw = 40; assoc->usage->usage_raw = 20; - assoc->acct = xstrdup("AccountH"); - assoc->user = xstrdup("UHRA2"); + assoc->acct = xstrdup("aD"); + assoc->user = xstrdup("uD2"); list_append(update.objects, assoc); - /* sub of AccountH id 5 */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); assoc->usage = create_assoc_mgr_association_usage(); assoc->id = 54; - /* assoc->lft = ; */ - /* assoc->rgt = ; */ assoc->parent_id = 5; assoc->shares_raw = 50; assoc->usage->usage_raw = 25; - assoc->acct = xstrdup("AccountH"); - assoc->user = xstrdup("UHRA3"); + assoc->acct = xstrdup("aD"); + assoc->user = xstrdup("uD3"); + list_append(update.objects, assoc); + + /* Check for proper tie handling */ + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 6; + assoc->parent_id = 1; + assoc->shares_raw = 10; + assoc->usage->usage_raw = 0; + assoc->acct = xstrdup("aE"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 61; + assoc->parent_id = 6; + assoc->shares_raw = 10; + assoc->usage->usage_raw = 0; + assoc->acct = xstrdup("aE"); + assoc->user = xstrdup("aE1"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 62; + assoc->parent_id = 6; + assoc->shares_raw = 10; + assoc->usage->usage_raw = 0; + assoc->acct = xstrdup("aE"); + assoc->user = xstrdup("aE2"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 7; + assoc->parent_id = 1; + assoc->shares_raw = 10; + assoc->usage->usage_raw = 0; + assoc->acct = xstrdup("root"); + assoc->user = xstrdup("u1"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 8; + assoc->parent_id = 1; + assoc->shares_raw = 20; + assoc->usage->usage_raw = 0; + assoc->acct = xstrdup("aF"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 81; + assoc->parent_id = 8; + assoc->shares_raw = 10; + assoc->usage->usage_raw = 0; + assoc->acct = xstrdup("aF"); + assoc->user = xstrdup("uF1"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 82; + assoc->parent_id = 8; + assoc->shares_raw = 20; + assoc->usage->usage_raw = 0; + assoc->acct = xstrdup("aF"); + assoc->user = xstrdup("uF2"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 9; + assoc->parent_id = 1; + assoc->shares_raw = 8; + assoc->usage->usage_raw = 20; + assoc->acct = xstrdup("aG"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 91; + assoc->parent_id = 9; + assoc->shares_raw = 10; + assoc->usage->usage_raw = 10; + assoc->acct = xstrdup("aG"); + assoc->user = xstrdup("uG1"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 92; + assoc->parent_id = 9; + assoc->shares_raw = 10; + assoc->usage->usage_raw = 10; + assoc->acct = xstrdup("aGA"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 921; + assoc->parent_id = 92; + assoc->shares_raw = 20; + assoc->usage->usage_raw = 4; + assoc->acct = xstrdup("aGA"); + assoc->user = xstrdup("uGA1"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 921; + assoc->parent_id = 92; + assoc->shares_raw = 20; + assoc->usage->usage_raw = 6; + assoc->acct = xstrdup("aGA"); + assoc->user = xstrdup("uGA2"); + list_append(update.objects, assoc); + + assoc = xmalloc(sizeof(slurmdb_association_rec_t)); + assoc->usage = create_assoc_mgr_association_usage(); + assoc->id = 1001; + assoc->parent_id = 1; + assoc->shares_raw = 10; + assoc->usage->usage_raw = 10; + assoc->acct = xstrdup("root"); + assoc->user = xstrdup("u2"); list_append(update.objects, assoc); if (assoc_mgr_update_assocs(&update)) @@ -398,8 +453,7 @@ int main (int argc, char **argv) /* force priority type to be multifactor */ xfree(conf->priority_type); conf->priority_type = xstrdup("priority/multifactor"); - conf->priority_flags = PRIORITY_FLAGS_LEVEL_BASED; - conf->priority_levels = 4; + conf->priority_flags = PRIORITY_FLAGS_FAIR_TREE; /* force accounting type to be slurmdbd (It doesn't really talk * to any database, but needs this to work with fairshare * calculation). */