diff --git a/NEWS b/NEWS index d99ffcffab2123dd904b8fc63887ba117880a340..c1a345ad3b3182f7837f67da11a14d497084314a 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,8 @@ documents those changes that are of interest to users and admins. 02_Cray-BASIL-node-attributes-and-coordinates.diff -- Preserve node's NodeHostName field when reordering for topology. 03_node-reordering-NodeHostName.diff + -- Set Cray node order based upon ALPS_NIDORDER configuration. + 03_Cray-BASIL-node-ranking.diff * Changes in SLURM 2.3.0.pre1 ============================= diff --git a/src/plugins/select/cray/basil_interface.c b/src/plugins/select/cray/basil_interface.c index 17e3fef9cf7822d4135501adc7b4c2a41d55b0ad..0ec5c7fd279e714e9809b7de100ae0d61bdd38e9 100644 --- a/src/plugins/select/cray/basil_interface.c +++ b/src/plugins/select/cray/basil_interface.c @@ -74,6 +74,49 @@ static struct node_record *find_node_by_basil_id(uint32_t node_id) return find_node_record(nid); } +extern int basil_node_ranking(struct node_record *node_array, int node_cnt) +{ + enum basil_version version = get_basil_version(); + struct basil_inventory *inv; + struct basil_node *node; + int rank_count = 0, i; + + inv = get_full_inventory(version); + if (inv == NULL) + /* FIXME: should retry here if the condition is transient */ + fatal("failed to get BASIL %s ranking", bv_names_long[version]); + else if (!inv->batch_total) + fatal("system has no usable batch compute nodes"); + + debug("BASIL %s RANKING INVENTORY: %d/%d batch nodes", + bv_names_long[version], inv->batch_avail, inv->batch_total); + + /* + * Node ranking is based on a subset of the inventory: only nodes in + * batch allocation mode which are up and not allocated. Assign a + * 'NO_VAL' rank to all other nodes, which will translate as a very + * high value, (unsigned)-2, to put those nodes last in the ranking. + * The rest of the code must ensure that those nodes are never chosen. + */ + for (i = 0; i < node_cnt; i++) + node_array[i].node_rank = NO_VAL; + + for (node = inv->f->node_head; node; node = node->next) { + struct node_record *node_ptr; + + node_ptr = find_node_by_basil_id(node->node_id); + if (node_ptr == NULL) + error("nid%05u (%s node in state %s) not in slurm.conf", + node->node_id, nam_noderole[node->role], + nam_nodestate[node->state]); + else + node_ptr->node_rank = inv->nodes_total - rank_count++; + } + free_inv(inv); + + return SLURM_SUCCESS; +} + /** * basil_inventory - Periodic node-state query via ALPS XML-RPC. * This should be run immediately before each scheduling cycle. diff --git a/src/plugins/select/cray/basil_interface.h b/src/plugins/select/cray/basil_interface.h index ed55be2ce2f40657fe80511431b410e434a0e32b..e74e411377b48529d2d35fa68d5359b2ee0cc0e7 100644 --- a/src/plugins/select/cray/basil_interface.h +++ b/src/plugins/select/cray/basil_interface.h @@ -20,6 +20,7 @@ #include "src/common/node_select.h" #include "src/slurmctld/slurmctld.h" +extern int basil_node_ranking(struct node_record *node_array, int node_cnt); extern int basil_inventory(void); extern int basil_geometry(struct node_record *node_ptr_array, int node_cnt); extern int do_basil_reserve(struct job_record *job_ptr); diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c index c60e6e07197f1bf9c09970d05649ee6fcb0bc6f8..ea713927c07bfad1eb48c2d6e69bf6dc00cbed72 100644 --- a/src/plugins/select/cray/select_cray.c +++ b/src/plugins/select/cray/select_cray.c @@ -185,7 +185,9 @@ extern int select_p_job_init(List job_list) */ extern bool select_p_node_ranking(struct node_record *node_ptr, int node_cnt) { - return false; /* FIXME - to be filled in */ + if (basil_node_ranking(node_ptr, node_cnt) < 0) + fatal("can not resolve node coordinates: ALPS problem?"); + return true; } extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)