Skip to content
Snippets Groups Projects
Commit 2bf6f976 authored by Moe Jette's avatar Moe Jette
Browse files
parent 6e6865a1
No related branches found
No related tags found
No related merge requests found
...@@ -76,6 +76,11 @@ documents those changes that are of interest to users and admins. ...@@ -76,6 +76,11 @@ documents those changes that are of interest to users and admins.
-- Fix bug in tracking memory allocated on a node for select/cons_res plugin. -- Fix bug in tracking memory allocated on a node for select/cons_res plugin.
-- Fixed a race condition when writing labelled output with a file per task -- Fixed a race condition when writing labelled output with a file per task
or per node, which potentially closed a file before all data was written. or per node, which potentially closed a file before all data was written.
-- BLUEGENE - Fix, for if a job comes in spanning both less than and
over 1 midplane in size we check the connection type appropriately.
-- Make sched/backfill properly schedule jobs with constraints having node
counts. NOTE: Backfill of jobs with constraings having exclusive OR
operators are not fully supported.
* Changes in SLURM 2.0.1 * Changes in SLURM 2.0.1
======================== ========================
......
...@@ -262,9 +262,10 @@ this command as user root!</b></p> ...@@ -262,9 +262,10 @@ this command as user root!</b></p>
There are significant limitations in the current backfill scheduler plugin. There are significant limitations in the current backfill scheduler plugin.
It was designed to perform backfill node scheduling for a homogeneous cluster. It was designed to perform backfill node scheduling for a homogeneous cluster.
It does not manage scheduling on individual processors (or other consumable It does not manage scheduling on individual processors (or other consumable
resources). It also does not update the required or excluded node list of resources). It does not update the required or excluded node list of
individual jobs. These are the current limitations. You can use the individual jobs. It does support job's with constraints/features unless
scontrol show command to check if these conditions apply.</p> the exclusive OR operator is used in the constraint expression.
You can use the scontrol show command to check if these conditions apply.</p>
<ul> <ul>
<li>Partition: State=UP</li> <li>Partition: State=UP</li>
<li>Partition: RootOnly=NO</li> <li>Partition: RootOnly=NO</li>
...@@ -1203,6 +1204,6 @@ set a different nodeaddr that is known by your other nodes.</p> ...@@ -1203,6 +1204,6 @@ set a different nodeaddr that is known by your other nodes.</p>
<p class="footer"><a href="#top">top</a></p> <p class="footer"><a href="#top">top</a></p>
<p style="text-align:center;">Last modified 7 May 2009</p> <p style="text-align:center;">Last modified 12 June 2009</p>
<!--#include virtual="footer.txt"--> <!--#include virtual="footer.txt"-->
...@@ -119,6 +119,11 @@ Job_priority = ...@@ -119,6 +119,11 @@ Job_priority =
<a name=fairshare> <a name=fairshare>
<h2>Fair-share Factor</h2></a> <h2>Fair-share Factor</h2></a>
<b>Note:</b> Computing the fair-share factor requires the installation
and operation of the <a href="accounting.html">SLURM Accounting
Database</a> to provide the assigned shares and the consumed,
computing resources described below.
<P> The fair-share component to a job's priority influences the order in which a user's queued jobs are scheduled to run based on the portion of the computing resources they have been allocated and the resources their jobs have already consumed. The fair-share factor does not involve a fixed allotment, whereby a user's access to a machine is cut off once that allotment is reached.</P> <P> The fair-share component to a job's priority influences the order in which a user's queued jobs are scheduled to run based on the portion of the computing resources they have been allocated and the resources their jobs have already consumed. The fair-share factor does not involve a fixed allotment, whereby a user's access to a machine is cut off once that allotment is reached.</P>
<P> Instead, the fair-share factor serves to prioritize queued jobs such that those jobs charging accounts that are under-serviced are scheduled first, while jobs charging accounts that are over-serviced are scheduled when the machine would otherwise go idle.</P> <P> Instead, the fair-share factor serves to prioritize queued jobs such that those jobs charging accounts that are under-serviced are scheduled first, while jobs charging accounts that are over-serviced are scheduled when the machine would otherwise go idle.</P>
...@@ -531,7 +536,7 @@ PriorityWeightQOS=0 # don't use the qos factor ...@@ -531,7 +536,7 @@ PriorityWeightQOS=0 # don't use the qos factor
</PRE> </PRE>
<!--------------------------------------------------------------------------> <!-------------------------------------------------------------------------->
<p style="text-align:center;">Last modified 08 April 2009</p> <p style="text-align:center;">Last modified 12 June 2009</p>
<!--#include virtual="footer.txt"--> <!--#include virtual="footer.txt"-->
...@@ -120,7 +120,11 @@ static void _attempt_backfill(void); ...@@ -120,7 +120,11 @@ static void _attempt_backfill(void);
static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2, static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2,
char *tv_str, int len_tv_str); char *tv_str, int len_tv_str);
static bool _more_work(void); static bool _more_work(void);
static int _start_job(struct job_record *job_ptr, bitstr_t *avail_bitmap); static int _num_feature_count(struct job_record *job_ptr);
static int _start_job(struct job_record *job_ptr, bitstr_t *avail_bitmap);
static int _try_sched(struct job_record *job_ptr, bitstr_t *avail_bitmap,
uint32_t min_nodes, uint32_t max_nodes,
uint32_t req_nodes);
#if __DEBUG #if __DEBUG
/* Log resource allocate table */ /* Log resource allocate table */
...@@ -161,6 +165,110 @@ static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2, ...@@ -161,6 +165,110 @@ static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2,
snprintf(tv_str, len_tv_str, "usec=%ld", delta_t); snprintf(tv_str, len_tv_str, "usec=%ld", delta_t);
} }
/* test if job has feature count specification */
static int _num_feature_count(struct job_record *job_ptr)
{
struct job_details *detail_ptr = job_ptr->details;
int rc = 0;
ListIterator feat_iter;
struct feature_record *feat_ptr;
if (detail_ptr->feature_list == NULL) /* no constraints */
return rc;
feat_iter = list_iterator_create(detail_ptr->feature_list);
while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) {
if (feat_ptr->count)
rc++;
}
list_iterator_destroy(feat_iter);
return rc;
}
/* Attempt to schedule a specific job on specific available nodes
* IN job_ptr - job to schedule
* IN/OUT avail_bitmap - nodes available/selected to use
* RET SLURM_SUCCESS on success, otherwise an error code
*/
static int _try_sched(struct job_record *job_ptr, bitstr_t *avail_bitmap,
uint32_t min_nodes, uint32_t max_nodes,
uint32_t req_nodes)
{
bitstr_t *tmp_bitmap;
int rc = SLURM_SUCCESS;
int feat_cnt = _num_feature_count(job_ptr);
if (feat_cnt) {
/* Ideally schedule the job feature by feature,
* but I don't want to add that complexity here
* right now, so clear the feature counts and try
* to schedule. This will work if there is only
* one feature count. It should work fairly well
* in cases where there are multiple feature
* counts. */
struct job_details *detail_ptr = job_ptr->details;
ListIterator feat_iter;
struct feature_record *feat_ptr;
int i = 0, list_size;
uint16_t *feat_cnt_orig = NULL, high_cnt = 0;
/* Clear the feature counts */
list_size = list_count(detail_ptr->feature_list);
feat_cnt_orig = xmalloc(sizeof(uint16_t) * list_size);
feat_iter = list_iterator_create(detail_ptr->feature_list);
while ((feat_ptr =
(struct feature_record *) list_next(feat_iter))) {
high_cnt = MAX(high_cnt, feat_ptr->count);
feat_cnt_orig[i++] = feat_ptr->count;
feat_ptr->count = 0;
}
list_iterator_destroy(feat_iter);
if ((job_req_node_filter(job_ptr, avail_bitmap) !=
SLURM_SUCCESS) ||
(bit_set_count(avail_bitmap) < high_cnt)) {
rc = ESLURM_NODES_BUSY;
} else {
rc = select_g_job_test(job_ptr, avail_bitmap,
high_cnt, max_nodes, req_nodes,
SELECT_MODE_WILL_RUN);
}
/* Restore the feature counts */
i = 0;
feat_iter = list_iterator_create(detail_ptr->feature_list);
while ((feat_ptr =
(struct feature_record *) list_next(feat_iter))) {
feat_ptr->count = feat_cnt_orig[i++];
}
list_iterator_destroy(feat_iter);
xfree(feat_cnt_orig);
} else {
/* Try to schedule the job. First on dedicated nodes
* then on shared nodes (if so configured). */
uint16_t orig_shared;
orig_shared = job_ptr->details->shared;
job_ptr->details->shared = 0;
tmp_bitmap = bit_copy(avail_bitmap);
rc = select_g_job_test(job_ptr, avail_bitmap, min_nodes,
max_nodes, req_nodes,
SELECT_MODE_WILL_RUN);
job_ptr->details->shared = orig_shared;
if ((rc != SLURM_SUCCESS) && (orig_shared != 0)) {
FREE_NULL_BITMAP(avail_bitmap);
avail_bitmap= tmp_bitmap;
rc = select_g_job_test(job_ptr, avail_bitmap,
min_nodes, max_nodes, req_nodes,
SELECT_MODE_WILL_RUN);
} else
FREE_NULL_BITMAP(tmp_bitmap);
}
return rc;
}
/* Terminate backfill_agent */ /* Terminate backfill_agent */
extern void stop_backfill_agent(void) extern void stop_backfill_agent(void)
{ {
...@@ -230,8 +338,7 @@ static void _attempt_backfill(void) ...@@ -230,8 +338,7 @@ static void _attempt_backfill(void)
struct part_record *part_ptr; struct part_record *part_ptr;
uint32_t end_time, end_reserve, time_limit; uint32_t end_time, end_reserve, time_limit;
uint32_t min_nodes, max_nodes, req_nodes; uint32_t min_nodes, max_nodes, req_nodes;
uint16_t orig_shared; bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
bitstr_t *avail_bitmap = NULL, *tmp_bitmap;
time_t now = time(NULL), start_res; time_t now = time(NULL), start_res;
node_space_map_t node_space[MAX_BACKFILL_JOB_CNT + 2]; node_space_map_t node_space[MAX_BACKFILL_JOB_CNT + 2];
...@@ -334,8 +441,12 @@ static void _attempt_backfill(void) ...@@ -334,8 +441,12 @@ static void _attempt_backfill(void)
if ((j = node_space[j].next) == 0) if ((j = node_space[j].next) == 0)
break; break;
} }
if (job_req_node_filter(job_ptr, avail_bitmap))
continue; /* problem with features */ /* Identify nodes which are definitely off limits */
FREE_NULL_BITMAP(resv_bitmap);
resv_bitmap = bit_copy(avail_bitmap);
bit_not(resv_bitmap);
if (job_ptr->details->exc_node_bitmap) { if (job_ptr->details->exc_node_bitmap) {
bit_not(job_ptr->details->exc_node_bitmap); bit_not(job_ptr->details->exc_node_bitmap);
bit_and(avail_bitmap, bit_and(avail_bitmap,
...@@ -348,36 +459,24 @@ static void _attempt_backfill(void) ...@@ -348,36 +459,24 @@ static void _attempt_backfill(void)
continue; /* required nodes missing */ continue; /* required nodes missing */
if (bit_set_count(avail_bitmap) < min_nodes) if (bit_set_count(avail_bitmap) < min_nodes)
continue; /* insufficient nodes remain */ continue; /* insufficient nodes remain */
if (job_req_node_filter(job_ptr, avail_bitmap))
continue; /* nodes lack features */
/* Try to schedule the job. First on dedicated nodes j = _try_sched(job_ptr, avail_bitmap,
* then on shared nodes (if so configured). */ min_nodes, max_nodes, req_nodes);
orig_shared = job_ptr->details->shared;
job_ptr->details->shared = 0;
tmp_bitmap = bit_copy(avail_bitmap);
j = select_g_job_test(job_ptr, avail_bitmap, min_nodes,
max_nodes, req_nodes,
SELECT_MODE_WILL_RUN);
job_ptr->details->shared = orig_shared;
if ((j != SLURM_SUCCESS) && (orig_shared != 0)) {
FREE_NULL_BITMAP(avail_bitmap);
avail_bitmap= tmp_bitmap;
j = select_g_job_test(job_ptr, avail_bitmap, min_nodes,
max_nodes, req_nodes,
SELECT_MODE_WILL_RUN);
} else
FREE_NULL_BITMAP(tmp_bitmap);
if (j != SLURM_SUCCESS) if (j != SLURM_SUCCESS)
continue; /* not runable */ continue; /* not runable */
job_ptr->start_time = MAX(job_ptr->start_time, start_res); job_ptr->start_time = MAX(job_ptr->start_time, start_res);
if (job_ptr->start_time <= now) { if (job_ptr->start_time <= now) {
int rc = _start_job(job_ptr, avail_bitmap); int rc = _start_job(job_ptr, resv_bitmap);
if(rc == ESLURM_ACCOUNTING_POLICY) if (rc == ESLURM_ACCOUNTING_POLICY)
continue;
else if (rc != SLURM_SUCCESS)
/* Planned to start job, but something bad
* happended. Reserve nodes where this should
* apparently run and try more jobs. */
continue; continue;
else if(rc != SLURM_SUCCESS)
/* Planned to start job, but something
* bad happended */
break;
} }
if (job_ptr->start_time > (now + BACKFILL_WINDOW)) { if (job_ptr->start_time > (now + BACKFILL_WINDOW)) {
/* Starts too far in the future to worry about */ /* Starts too far in the future to worry about */
...@@ -401,6 +500,7 @@ static void _attempt_backfill(void) ...@@ -401,6 +500,7 @@ static void _attempt_backfill(void)
#endif #endif
} }
FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(avail_bitmap);
FREE_NULL_BITMAP(resv_bitmap);
for (i=0; ; ) { for (i=0; ; ) {
bit_free(node_space[i].avail_bitmap); bit_free(node_space[i].avail_bitmap);
...@@ -410,16 +510,18 @@ static void _attempt_backfill(void) ...@@ -410,16 +510,18 @@ static void _attempt_backfill(void)
xfree(job_queue); xfree(job_queue);
} }
static int _start_job(struct job_record *job_ptr, bitstr_t *avail_bitmap) /* Try to start the job on any non-reserved nodes */
static int _start_job(struct job_record *job_ptr, bitstr_t *resv_bitmap)
{ {
int rc; int rc;
bitstr_t *orig_exc_nodes = NULL; bitstr_t *orig_exc_nodes = NULL;
static uint32_t fail_jobid = 0; static uint32_t fail_jobid = 0;
if (job_ptr->details->exc_node_bitmap) if (job_ptr->details->exc_node_bitmap) {
orig_exc_nodes = job_ptr->details->exc_node_bitmap; orig_exc_nodes = job_ptr->details->exc_node_bitmap;
job_ptr->details->exc_node_bitmap = bit_copy(avail_bitmap); bit_or(job_ptr->details->exc_node_bitmap, resv_bitmap);
bit_not(job_ptr->details->exc_node_bitmap); } else
job_ptr->details->exc_node_bitmap = bit_copy(resv_bitmap);
rc = select_nodes(job_ptr, false, NULL); rc = select_nodes(job_ptr, false, NULL);
bit_free(job_ptr->details->exc_node_bitmap); bit_free(job_ptr->details->exc_node_bitmap);
...@@ -437,13 +539,15 @@ static int _start_job(struct job_record *job_ptr, bitstr_t *avail_bitmap) ...@@ -437,13 +539,15 @@ static int _start_job(struct job_record *job_ptr, bitstr_t *avail_bitmap)
#if __DEBUG #if __DEBUG
info("backfill: Jobs backfilled: %d", backfilled_jobs); info("backfill: Jobs backfilled: %d", backfilled_jobs);
#endif #endif
} else if ((job_ptr->job_id != fail_jobid) } else if ((job_ptr->job_id != fail_jobid) &&
&& (rc != ESLURM_ACCOUNTING_POLICY)) { (rc != ESLURM_ACCOUNTING_POLICY)) {
char *node_list = bitmap2node_name(avail_bitmap); char *node_list;
bit_not(resv_bitmap);
node_list = bitmap2node_name(resv_bitmap);
/* This happens when a job has sharing disabled and /* This happens when a job has sharing disabled and
* a selected node is still completing some job, * a selected node is still completing some job,
* which should be a temporary situation. */ * which should be a temporary situation. */
verbose("backfill: Failed to start JobId=%u on %s: %s", verbose("backfill: Failed to start JobId=%u in %s: %s",
job_ptr->job_id, node_list, slurm_strerror(rc)); job_ptr->job_id, node_list, slurm_strerror(rc));
xfree(node_list); xfree(node_list);
fail_jobid = job_ptr->job_id; fail_jobid = job_ptr->job_id;
......
...@@ -478,7 +478,16 @@ static bg_record_t *_find_matching_block(List block_list, ...@@ -478,7 +478,16 @@ static bg_record_t *_find_matching_block(List block_list,
continue; continue;
} }
goto good_conn_type; goto good_conn_type;
} } else if(bg_record->conn_type >= SELECT_SMALL) {
/* since we already checked to see if
the cpus were good this means we are
looking for a block in a range that
includes small and regular blocks.
So we can just continue on.
*/
goto good_conn_type;
}
#endif #endif
debug("bg block %s conn-type not usable asking for %s " debug("bg block %s conn-type not usable asking for %s "
"bg_record is %s", "bg_record is %s",
......
...@@ -1297,8 +1297,9 @@ scontrol [<OPTION>] [<COMMAND>] \n\ ...@@ -1297,8 +1297,9 @@ scontrol [<OPTION>] [<COMMAND>] \n\
!! Repeat the last command entered. \n\ !! Repeat the last command entered. \n\
\n\ \n\
<ENTITY> may be \"config\", \"daemons\", \"job\", \"node\", \"partition\"\n\ <ENTITY> may be \"config\", \"daemons\", \"job\", \"node\", \"partition\"\n\
\"reservation\", \"hostlist\", \"hostnames\", \"slurmd\", \"topology\"\n\ \"reservation\", \"hostlist\", \"hostnames\", \"slurmd\", \n\
(for BlueGene only: \"block\", \"subbp\" or \"step\"). \n\ \"topology\", or \"step\" \n\
(also for BlueGene only: \"block\" or \"subbp\"). \n\
\n\ \n\
<ID> may be a configuration parameter name, job id, node name, partition \n\ <ID> may be a configuration parameter name, job id, node name, partition \n\
name, reservation name, job step id, or hostlist or pathname to a \n\ name, reservation name, job step id, or hostlist or pathname to a \n\
......
...@@ -108,8 +108,11 @@ static int _pick_best_nodes(struct node_set *node_set_ptr, ...@@ -108,8 +108,11 @@ static int _pick_best_nodes(struct node_set *node_set_ptr,
struct part_record *part_ptr, struct part_record *part_ptr,
uint32_t min_nodes, uint32_t max_nodes, uint32_t min_nodes, uint32_t max_nodes,
uint32_t req_nodes, bool test_only); uint32_t req_nodes, bool test_only);
static void _reset_feature_counts(struct job_details *details_ptr);
static bool _valid_feature_counts(struct job_details *details_ptr);
static bitstr_t *_valid_features(struct job_details *detail_ptr, static bitstr_t *_valid_features(struct job_details *detail_ptr,
struct config_record *config_ptr); struct config_record *config_ptr,
bool update_count);
/* /*
...@@ -438,9 +441,10 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, ...@@ -438,9 +441,10 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size,
#if 0 #if 0
{ {
char *tmp_str = bitmap2node_name(feature_bitmap); char *tmp_str = bitmap2node_name(feature_bitmap);
info("job %u needs %u nodes with feature %s, using %s", info("job %u needs %u nodes with feature %s, "
job_ptr->job_id, feat_ptr->count, "using %s, error_code=%d",
feat_ptr->name, tmp_str); job_ptr->job_id, feat_ptr->count,
feat_ptr->name, tmp_str, error_code);
xfree(tmp_str); xfree(tmp_str);
} }
#endif #endif
...@@ -1145,11 +1149,48 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, ...@@ -1145,11 +1149,48 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only,
return error_code; return error_code;
} }
/* Clear tmp_cnt for all features of given job */
static void _reset_feature_counts(struct job_details *details_ptr)
{
ListIterator feat_iter;
struct feature_record *feat_ptr;
if (details_ptr->feature_list == NULL) /* no constraints */
return;
feat_iter = list_iterator_create(details_ptr->feature_list);
while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) {
feat_ptr->tmp_cnt = 0;
}
list_iterator_destroy(feat_iter);
}
/* Verify that tmp_cnt >= count for all features of given job */
static bool _valid_feature_counts(struct job_details *details_ptr)
{
ListIterator feat_iter;
struct feature_record *feat_ptr;
bool result = true;
if (details_ptr->feature_list == NULL) /* no constraints */
return result;
feat_iter = list_iterator_create(details_ptr->feature_list);
while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) {
if (feat_ptr->tmp_cnt >= feat_ptr->count)
continue;
result = false;
break;
}
list_iterator_destroy(feat_iter);
return result;
}
/* /*
* job_req_node_filter - job reqeust node filter. * job_req_node_filter - job reqeust node filter.
* clear from a bitmap the nodes which can not be used for a job * clear from a bitmap the nodes which can not be used for a job
* test memory size, required features, processor count, etc. * test memory size, required features, processor count, etc.
* NOTE: Does not support exclusive OR of features or feature counts. * NOTE: Does not support exclusive OR of features.
* It just matches first element of XOR and ignores count. * It just matches first element of XOR and ignores count.
* IN job_ptr - pointer to node to be scheduled * IN job_ptr - pointer to node to be scheduled
* IN/OUT bitmap - set of nodes being considered for use * IN/OUT bitmap - set of nodes being considered for use
...@@ -1171,13 +1212,14 @@ extern int job_req_node_filter(struct job_record *job_ptr, ...@@ -1171,13 +1212,14 @@ extern int job_req_node_filter(struct job_record *job_ptr,
return EINVAL; return EINVAL;
} }
_reset_feature_counts(detail_ptr);
mc_ptr = detail_ptr->mc_ptr; mc_ptr = detail_ptr->mc_ptr;
for (i=0; i< node_record_count; i++) { for (i=0; i< node_record_count; i++) {
if (!bit_test(avail_bitmap, i)) if (!bit_test(avail_bitmap, i))
continue; continue;
node_ptr = node_record_table_ptr + i; node_ptr = node_record_table_ptr + i;
config_ptr = node_ptr->config_ptr; config_ptr = node_ptr->config_ptr;
feature_bitmap = _valid_features(detail_ptr, config_ptr); feature_bitmap = _valid_features(detail_ptr, config_ptr, true);
if ((feature_bitmap == NULL) || if ((feature_bitmap == NULL) ||
(!bit_test(feature_bitmap, 0))) { (!bit_test(feature_bitmap, 0))) {
bit_clear(avail_bitmap, i); bit_clear(avail_bitmap, i);
...@@ -1228,6 +1270,10 @@ extern int job_req_node_filter(struct job_record *job_ptr, ...@@ -1228,6 +1270,10 @@ extern int job_req_node_filter(struct job_record *job_ptr,
} }
} }
FREE_NULL_BITMAP(feature_bitmap); FREE_NULL_BITMAP(feature_bitmap);
if (!_valid_feature_counts(detail_ptr))
return EINVAL;
return SLURM_SUCCESS; return SLURM_SUCCESS;
} }
...@@ -1354,7 +1400,8 @@ static int _build_node_list(struct job_record *job_ptr, ...@@ -1354,7 +1400,8 @@ static int _build_node_list(struct job_record *job_ptr,
continue; continue;
} }
tmp_feature = _valid_features(job_ptr->details, config_ptr); tmp_feature = _valid_features(job_ptr->details, config_ptr,
false);
if (tmp_feature == NULL) { if (tmp_feature == NULL) {
FREE_NULL_BITMAP(node_set_ptr[node_set_inx].my_bitmap); FREE_NULL_BITMAP(node_set_ptr[node_set_inx].my_bitmap);
continue; continue;
...@@ -1594,6 +1641,8 @@ extern void build_node_details(struct job_record *job_ptr) ...@@ -1594,6 +1641,8 @@ extern void build_node_details(struct job_record *job_ptr)
* the available nodes * the available nodes
* IN details_ptr - job requirement details, includes requested features * IN details_ptr - job requirement details, includes requested features
* IN config_ptr - node's configuration record * IN config_ptr - node's configuration record
* IN update_count - if set, then increment tmp_cnt (temporary counter)
* for matched features
* RET NULL if request is not satisfied, otherwise a bitmap indicating * RET NULL if request is not satisfied, otherwise a bitmap indicating
* which mutually exclusive features are satisfied. For example * which mutually exclusive features are satisfied. For example
* _valid_features("[fs1|fs2|fs3|fs4]", "fs3") returns a bitmap with * _valid_features("[fs1|fs2|fs3|fs4]", "fs3") returns a bitmap with
...@@ -1604,34 +1653,44 @@ extern void build_node_details(struct job_record *job_ptr) ...@@ -1604,34 +1653,44 @@ extern void build_node_details(struct job_record *job_ptr)
* mutually exclusive feature list. * mutually exclusive feature list.
*/ */
static bitstr_t *_valid_features(struct job_details *details_ptr, static bitstr_t *_valid_features(struct job_details *details_ptr,
struct config_record *config_ptr) struct config_record *config_ptr,
bool update_count)
{ {
bitstr_t *result_bits = (bitstr_t *) NULL; bitstr_t *result_bits = (bitstr_t *) NULL;
ListIterator feat_iter; ListIterator feat_iter;
struct feature_record *feat_ptr; struct feature_record *feat_ptr;
int found, last_op, position = 0, result; bool found, test_names, result;
int save_op = FEATURE_OP_AND, save_result=1; int last_op, position = 0;
int save_op = FEATURE_OP_AND, save_result = 1;
if (details_ptr->feature_list == NULL) {/* no constraints */ if (details_ptr->feature_list == NULL) { /* no constraints */
result_bits = bit_alloc(MAX_FEATURES); result_bits = bit_alloc(MAX_FEATURES);
bit_set(result_bits, 0); bit_set(result_bits, 0);
return result_bits; return result_bits;
} }
result = 1; /* assume good for now */ result = true; /* assume good for now */
last_op = FEATURE_OP_AND; last_op = FEATURE_OP_AND;
feat_iter = list_iterator_create(details_ptr->feature_list); feat_iter = list_iterator_create(details_ptr->feature_list);
while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) { while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) {
found = 0; test_names = false;
if (feat_ptr->count) found = false;
found = 1; if (feat_ptr->count) {
else if (config_ptr->feature_array) { found = true;
if (update_count)
test_names = true;
} else
test_names = true;
if (test_names && config_ptr->feature_array) {
int i; int i;
for (i=0; config_ptr->feature_array[i]; i++) { for (i=0; config_ptr->feature_array[i]; i++) {
if (strcmp(feat_ptr->name, if (strcmp(feat_ptr->name,
config_ptr->feature_array[i])) config_ptr->feature_array[i]))
continue; continue;
found = 1; found = true;
if (update_count && feat_ptr->count)
feat_ptr->tmp_cnt++;
break; break;
} }
} }
......
...@@ -381,6 +381,7 @@ struct feature_record { ...@@ -381,6 +381,7 @@ struct feature_record {
char *name; /* name of feature */ char *name; /* name of feature */
uint16_t count; /* count of nodes with this feature */ uint16_t count; /* count of nodes with this feature */
uint8_t op_code; /* separator, see FEATURE_OP_ above */ uint8_t op_code; /* separator, see FEATURE_OP_ above */
uint16_t tmp_cnt; /* temporary, allocated node counter */
}; };
/* job_details - specification of a job's constraints, /* job_details - specification of a job's constraints,
......
...@@ -624,8 +624,8 @@ proc test_assoc_enforced { } { ...@@ -624,8 +624,8 @@ proc test_assoc_enforced { } {
set assoc_enforced 0 set assoc_enforced 0
spawn $scontrol show config spawn $scontrol show config
expect { expect {
-re "AccountingStorageEnforce *= ($number)" { -re "AccountingStorageEnforce *= associations" {
set assoc_enforced $expect_out(1,string) set assoc_enforced 1
exp_continue exp_continue
} }
eof { eof {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment