From 04bfa3c16a6f878d5afbe2e8ee142be2def3272c Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Sat, 29 Jan 2011 05:12:35 +0000 Subject: [PATCH] -- Set Cray node order based upon ALPS_NIDORDER configuration. 03_Cray-BASIL-node-ranking.diff select/cray: perform node ranking This supplies the select function-pointer to request a reordering of nodes based on the current Cray node ordering. The Cray node ordering is set internally via the ALPS_NIDORDER configuration variables that controls the way ALPS considers nodes. This ordering in turn determines the order of nodes as the appear subsequently in the Inventory output. The present patch exploits this fact and uses an auto-incrementing number to reflect the node ranking (counting is reversed since the parser returns the nodes in stack/LIFO order). The node ranking is performed on slurmctld (re-)configuration, hence the tests are more stringent: exit if Inventory fails (this condition is extremely rare) and if no nodes are powered up (also a condition that can be cured by restarting slurmctld only when the system is ready). --- NEWS | 2 ++ src/plugins/select/cray/basil_interface.c | 43 +++++++++++++++++++++++ src/plugins/select/cray/basil_interface.h | 1 + src/plugins/select/cray/select_cray.c | 4 ++- 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index d99ffcffab2..c1a345ad3b3 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,8 @@ documents those changes that are of interest to users and admins. 02_Cray-BASIL-node-attributes-and-coordinates.diff -- Preserve node's NodeHostName field when reordering for topology. 03_node-reordering-NodeHostName.diff + -- Set Cray node order based upon ALPS_NIDORDER configuration. + 03_Cray-BASIL-node-ranking.diff * Changes in SLURM 2.3.0.pre1 ============================= diff --git a/src/plugins/select/cray/basil_interface.c b/src/plugins/select/cray/basil_interface.c index 17e3fef9cf7..0ec5c7fd279 100644 --- a/src/plugins/select/cray/basil_interface.c +++ b/src/plugins/select/cray/basil_interface.c @@ -74,6 +74,49 @@ static struct node_record *find_node_by_basil_id(uint32_t node_id) return find_node_record(nid); } +extern int basil_node_ranking(struct node_record *node_array, int node_cnt) +{ + enum basil_version version = get_basil_version(); + struct basil_inventory *inv; + struct basil_node *node; + int rank_count = 0, i; + + inv = get_full_inventory(version); + if (inv == NULL) + /* FIXME: should retry here if the condition is transient */ + fatal("failed to get BASIL %s ranking", bv_names_long[version]); + else if (!inv->batch_total) + fatal("system has no usable batch compute nodes"); + + debug("BASIL %s RANKING INVENTORY: %d/%d batch nodes", + bv_names_long[version], inv->batch_avail, inv->batch_total); + + /* + * Node ranking is based on a subset of the inventory: only nodes in + * batch allocation mode which are up and not allocated. Assign a + * 'NO_VAL' rank to all other nodes, which will translate as a very + * high value, (unsigned)-2, to put those nodes last in the ranking. + * The rest of the code must ensure that those nodes are never chosen. + */ + for (i = 0; i < node_cnt; i++) + node_array[i].node_rank = NO_VAL; + + for (node = inv->f->node_head; node; node = node->next) { + struct node_record *node_ptr; + + node_ptr = find_node_by_basil_id(node->node_id); + if (node_ptr == NULL) + error("nid%05u (%s node in state %s) not in slurm.conf", + node->node_id, nam_noderole[node->role], + nam_nodestate[node->state]); + else + node_ptr->node_rank = inv->nodes_total - rank_count++; + } + free_inv(inv); + + return SLURM_SUCCESS; +} + /** * basil_inventory - Periodic node-state query via ALPS XML-RPC. * This should be run immediately before each scheduling cycle. diff --git a/src/plugins/select/cray/basil_interface.h b/src/plugins/select/cray/basil_interface.h index ed55be2ce2f..e74e411377b 100644 --- a/src/plugins/select/cray/basil_interface.h +++ b/src/plugins/select/cray/basil_interface.h @@ -20,6 +20,7 @@ #include "src/common/node_select.h" #include "src/slurmctld/slurmctld.h" +extern int basil_node_ranking(struct node_record *node_array, int node_cnt); extern int basil_inventory(void); extern int basil_geometry(struct node_record *node_ptr_array, int node_cnt); extern int do_basil_reserve(struct job_record *job_ptr); diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c index c60e6e07197..ea713927c07 100644 --- a/src/plugins/select/cray/select_cray.c +++ b/src/plugins/select/cray/select_cray.c @@ -185,7 +185,9 @@ extern int select_p_job_init(List job_list) */ extern bool select_p_node_ranking(struct node_record *node_ptr, int node_cnt) { - return false; /* FIXME - to be filled in */ + if (basil_node_ranking(node_ptr, node_cnt) < 0) + fatal("can not resolve node coordinates: ALPS problem?"); + return true; } extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) -- GitLab