From b41b51f19bdb8de7af5d4056f4d89fa2830a983d Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 14 Jan 2011 04:32:05 +0000
Subject: [PATCH] Checkin new select and topology plugins to control node
 reording functions. Based upon work by Gerrit plus bug fixes and
 documentation that I've added.

---
 doc/html/selectplugins.shtml                  | 16 +++-
 doc/html/topology_plugin.shtml                | 12 ++-
 src/common/node_select.c                      | 16 ++++
 src/common/node_select.h                      | 10 +++
 src/common/slurm_topology.c                   | 15 ++++
 src/common/slurm_topology.h                   |  7 ++
 src/plugins/select/bgq/select_bgq.cc          |  5 ++
 .../select/bluegene/plugin/select_bluegene.c  |  5 ++
 src/plugins/select/cons_res/select_cons_res.c |  8 +-
 src/plugins/select/cray/other_select.c        |  1 +
 src/plugins/select/cray/select_cray.c         | 10 ++-
 src/plugins/select/linear/select_linear.c     |  7 +-
 src/plugins/topology/3d_torus/hilbert_slurm.c | 65 +--------------
 .../topology/3d_torus/topology_3d_torus.c     | 22 +++--
 .../topology/node_rank/topology_node_rank.c   | 76 +++--------------
 src/plugins/topology/none/topology_none.c     | 10 ++-
 src/plugins/topology/tree/topology_tree.c     |  9 +-
 src/slurmctld/read_config.c                   | 83 +++++++++++++++++++
 18 files changed, 233 insertions(+), 144 deletions(-)

diff --git a/doc/html/selectplugins.shtml b/doc/html/selectplugins.shtml
index 8daac3ef65e..b99e0cf68e0 100644
--- a/doc/html/selectplugins.shtml
+++ b/doc/html/selectplugins.shtml
@@ -185,6 +185,18 @@ but actually registered with 2GB of memory.</p>
 of the node in reference to the entire system.<br><br>
 <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful, otherwise SLURM_ERROR</p>
 
+<p class="commandline">bool select_p_node_ranking(struct node_record *node_ptr, int node_cnt)</p>
+<p style="margin-left:.2in"><b>Description</b>: This function is called by the slurmctld
+daemon at start time to set node rank information for reording the nodes to
+optimize application performance. </p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> node_ptr</span>&nbsp;&nbsp;&nbsp;(input/output) pointer
+to the node data structure. Each node's node rank field may be set.<br>
+<span class="commandline"> node_cnt</span>&nbsp;&nbsp;&nbsp;(input) number
+of nodes configured on the system.</p>
+<p style="margin-left:.2in"><b>Returns</b>: true if node rank information has
+been set.</p>
+
 <p class="commandline">int select_p_update_node_state (int index, uint16_t state);</p>
 <p style="margin-left:.2in"><b>Description</b>: push a change of state
 into the plugin the index should be the index from the slurmctld of
@@ -390,7 +402,7 @@ the plugin should return SLURM_ERROR.</p>
 <p class="footer"><a href="#top">top</a></p>
 
 <h2>Versioning</h2>
-<p> This document describes version 1 of the SLURM node selection API. Future
+<p> This document describes version 100 of the SLURM node selection API. Future
 releases of SLURM may revise this API. A node selection plugin conveys its ability
 to implement a particular API version using the mechanism outlined for SLURM plugins.
 In addition, the credential is transmitted along with the version number of the
@@ -399,6 +411,6 @@ to maintain data format compatibility across different versions of the plugin.</
 
 <p class="footer"><a href="#top">top</a></p>
 
-<p style="text-align:center;">Last modified 5 October 2009</p>
+<p style="text-align:center;">Last modified 13 January 2011</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/html/topology_plugin.shtml b/doc/html/topology_plugin.shtml
index 0cf8ce1a725..3cd51fd5854 100644
--- a/doc/html/topology_plugin.shtml
+++ b/doc/html/topology_plugin.shtml
@@ -7,7 +7,7 @@
 defines them.
 It is intended as a resource to programmers wishing to write their own
 SLURM topology plugin.
-This is version 100 of the API.</p>
+This is version 101 of the API.</p>
 
 <p>SLURM topology plugins are SLURM plugins that implement
 convey system topology information so that SLURM is able to
@@ -64,6 +64,12 @@ Functions which are not implemented should be stubbed.</p>
 <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS or
 SLURM_ERROR on failure.</p>
 
+<p class="commandline">bool topo_generate_node_ranking(void)</p>
+<p style="margin-left:.2in"><b>Description</b>: Determine if this plugin will
+reorder the node records based upon each job's node rank field.</p>
+<p style="margin-left:.2in"><b>Returns</b>: true if node reording is supported,
+false otherwise.</p>
+
 <p class="commandline">int topo_get_node_addr(char* node_name, char** paddr, char** ppatt);</p>
 <p style="margin-left:.2in"><b>Description</b>: Get Topology address of a given node.</p>
 <p style="margin-left:.2in"><b>Arguments</b>:<br>
@@ -80,10 +86,10 @@ in the hierarchy is separated by a period. The final element will always be
 SLURM_ERROR on failure.</p>
 
 <h2>Versioning</h2>
-<p> This document describes version 100 of the SLURM topology API.
+<p> This document describes version 101 of the SLURM topology API.
 Future releases of SLURM may revise this API.</p>
 <p class="footer"><a href="#top">top</a></p>
 
-<p style="text-align:center;">Last modified 27 August 2009</p>
+<p style="text-align:center;">Last modified 13 January 2011</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/src/common/node_select.c b/src/common/node_select.c
index bc2de083a0d..67358fdd45a 100644
--- a/src/common/node_select.c
+++ b/src/common/node_select.c
@@ -79,6 +79,7 @@ static int _select_get_ops(char *select_type,
 		"select_p_state_save",
 		"select_p_state_restore",
 		"select_p_job_init",
+		"select_p_node_ranking",
 		"select_p_node_init",
 		"select_p_block_init",
 		"select_p_job_test",
@@ -456,6 +457,21 @@ extern int select_g_job_init(List job_list)
 		(job_list);
 }
 
+/*
+ * Assign a 'node_rank' value to each of the node_ptr entries.
+ * IN node_ptr - current node data
+ * IN node_count - number of node entries
+ * Return true if node ranking was performed, false if not.
+ */
+extern bool select_g_node_ranking(struct node_record *node_ptr, int node_cnt)
+{
+	if (slurm_select_init(0) < 0)
+		return SLURM_ERROR;
+
+	return (*(select_context[select_context_default].ops.node_ranking))
+		(node_ptr, node_cnt);
+}
+
 /*
  * Note re/initialization of node record data structure
  * IN node_ptr - current node data
diff --git a/src/common/node_select.h b/src/common/node_select.h
index 85dc3556266..92d846eb3d4 100644
--- a/src/common/node_select.h
+++ b/src/common/node_select.h
@@ -69,6 +69,8 @@ typedef struct slurm_select_ops {
 	int		(*state_save)		(char *dir_name);
 	int		(*state_restore)	(char *dir_name);
 	int		(*job_init)		(List job_list);
+	int		(*node_ranking)		(struct node_record *node_ptr,
+						 int node_cnt);
 	int		(*node_init)		(struct node_record *node_ptr,
 						 int node_cnt);
 	int		(*block_init)		(List block_list);
@@ -194,6 +196,14 @@ extern int select_g_state_restore(char *dir_name);
  */
 extern int select_g_job_init(List job_list);
 
+/*
+ * Assign a 'node_rank' value to each of the node_ptr entries.
+ * IN node_ptr - current node data
+ * IN node_count - number of node entries
+ * Return true if node ranking was performed, false if not.
+ */
+extern bool select_g_node_ranking(struct node_record *node_ptr, int node_cnt);
+
 /*
  * Note re/initialization of node record data structure
  * IN node_ptr - current node data
diff --git a/src/common/slurm_topology.c b/src/common/slurm_topology.c
index a3a54ec497d..932707cc5df 100644
--- a/src/common/slurm_topology.c
+++ b/src/common/slurm_topology.c
@@ -55,6 +55,7 @@ int switch_record_cnt = 0;
 /* ************************************************************************ */
 typedef struct slurm_topo_ops {
 	int		(*build_config)		( void );
+	bool		(*node_ranking)		( void );
 	int		(*get_node_addr)	( char* node_name,
 						  char** addr,
 						  char** pattern );
@@ -87,6 +88,7 @@ slurm_topo_get_ops( slurm_topo_context_t *c )
 	 */
 	static const char *syms[] = {
 		"topo_build_config",
+		"topo_generate_node_ranking",
 		"topo_get_node_addr",
 	};
 	int n_syms = sizeof( syms ) / sizeof( char * );
@@ -268,6 +270,19 @@ slurm_topo_build_config( void )
 	return rc;
 }
 
+/* *********************************************************************** */
+/*  TAG(                      slurm_topo_generate_node_ranking          )  */
+/* NOTE: This operation is only supported by those topology plugins for    */
+/*       which the node ordering between slurmd and slurmctld is invariant */
+/* *********************************************************************** */
+extern bool
+slurm_topo_generate_node_ranking( void )
+{
+	if ( slurm_topo_init() < 0 )
+		return SLURM_ERROR;
+
+	return (*(g_topo_context->ops.node_ranking))();
+}
 
 /* *********************************************************************** */
 /*  TAG(                      slurm_topo_get_node_addr                  )  */
diff --git a/src/common/slurm_topology.h b/src/common/slurm_topology.h
index 7b144c8f593..dfda3e4b4b9 100644
--- a/src/common/slurm_topology.h
+++ b/src/common/slurm_topology.h
@@ -89,6 +89,13 @@ extern int slurm_topo_fini(void);
  */
 extern int slurm_topo_build_config( void );
 
+/*
+ * slurm_topo_generate_node_ranking  -  populate node_rank fields
+ * NOTE: This operation is only supported by those topology plugins for
+ *       which the node ordering between slurmd and slurmctld is invariant.
+ */
+extern bool slurm_topo_generate_node_ranking( void );
+
 /*
  * slurm_topo_get_node_addr - build node address and the associated pattern
  *      based on the topology information
diff --git a/src/plugins/select/bgq/select_bgq.cc b/src/plugins/select/bgq/select_bgq.cc
index d017bc01195..50d4ac6d690 100644
--- a/src/plugins/select/bgq/select_bgq.cc
+++ b/src/plugins/select/bgq/select_bgq.cc
@@ -154,6 +154,11 @@ extern int select_p_job_init(List job_list)
 #endif
 }
 
+extern bool select_p_node_ranking(struct node_record *node_ptr, int node_cnt)
+{
+	return false;
+}
+
 /* All initialization is performed by init() */
 extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 {
diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c
index fb4aadc6ac2..f72967454cc 100644
--- a/src/plugins/select/bluegene/plugin/select_bluegene.c
+++ b/src/plugins/select/bluegene/plugin/select_bluegene.c
@@ -462,6 +462,11 @@ extern int select_p_job_init(List job_list)
 #endif
 }
 
+extern bool select_p_node_ranking(struct node_record *node_ptr, int node_cnt)
+{
+	return false;
+}
+
 /* All initialization is performed by init() */
 extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 {
diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index c3f23c5d31a..b6a0527fe71 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -168,7 +168,7 @@ bitstr_t *idle_node_bitmap;
 const char plugin_name[] = "Consumable Resources (CR) Node Selection plugin";
 const char plugin_type[] = "select/cons_res";
 const uint32_t plugin_id      = 101;
-const uint32_t plugin_version = 91;
+const uint32_t plugin_version = 100;
 const uint32_t pstate_version = 7;	/* version control on saved state */
 
 uint16_t cr_type = CR_CPU; /* cr_type is overwritten in init() */
@@ -1676,6 +1676,12 @@ extern int select_p_job_init(List job_list)
 	return SLURM_SUCCESS;
 }
 
+/* This plugin does not generate a node ranking. */
+extern bool select_p_node_ranking(struct node_record *node_ptr, int node_cnt)
+{
+	return false;
+}
+
 /* This is Part 1 of a 4-part procedure which can be found in
  * src/slurmctld/read_config.c. The whole story goes like this:
  *
diff --git a/src/plugins/select/cray/other_select.c b/src/plugins/select/cray/other_select.c
index 33f79feb90d..29d1c10071a 100644
--- a/src/plugins/select/cray/other_select.c
+++ b/src/plugins/select/cray/other_select.c
@@ -78,6 +78,7 @@ static slurm_select_ops_t *_other_select_get_ops(slurm_select_context_t *c)
 		"select_p_state_save",
 		"select_p_state_restore",
 		"select_p_job_init",
+		"select_p_node_ranking",
 		"select_p_node_init",
 		"select_p_block_init",
 		"select_p_job_test",
diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c
index 5eff8fbacbc..b8ceabc0669 100644
--- a/src/plugins/select/cray/select_cray.c
+++ b/src/plugins/select/cray/select_cray.c
@@ -126,7 +126,7 @@ struct select_nodeinfo {
 const char plugin_name[]	= "Cray node selection plugin";
 const char plugin_type[]	= "select/cray";
 uint32_t plugin_id	        = 104;
-const uint32_t plugin_version	= 1;
+const uint32_t plugin_version	= 100;
 
 
 /*
@@ -179,6 +179,14 @@ extern int select_p_job_init(List job_list)
 	return other_job_init(job_list);
 }
 
+/*
+ * select_p_node_ranking - generate node ranking for Cray nodes
+ */
+extern bool select_p_node_ranking(struct node_record *node_ptr, int node_cnt)
+{
+	return false;		/* FIXME - to be filled in */
+}
+
 extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 {
 	return other_node_init(node_ptr, node_cnt);
diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c
index d4d31875d21..412b8ba3f7a 100644
--- a/src/plugins/select/linear/select_linear.c
+++ b/src/plugins/select/linear/select_linear.c
@@ -191,7 +191,7 @@ extern int select_p_select_nodeinfo_free(select_nodeinfo_t *nodeinfo);
 const char plugin_name[]       	= "Linear node selection plugin";
 const char plugin_type[]       	= "select/linear";
 const uint32_t plugin_id	= 102;
-const uint32_t plugin_version	= 90;
+const uint32_t plugin_version	= 100;
 
 static struct node_record *select_node_ptr = NULL;
 static int select_node_cnt = 0;
@@ -2352,6 +2352,11 @@ extern int select_p_job_init(List job_list)
 	return SLURM_SUCCESS;
 }
 
+extern bool select_p_node_ranking(struct node_record *node_ptr, int node_cnt)
+{
+	return false;
+}
+
 extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 {
 	if (node_ptr == NULL) {
diff --git a/src/plugins/topology/3d_torus/hilbert_slurm.c b/src/plugins/topology/3d_torus/hilbert_slurm.c
index 1ecf8ba1021..7e68c4c0160 100644
--- a/src/plugins/topology/3d_torus/hilbert_slurm.c
+++ b/src/plugins/topology/3d_torus/hilbert_slurm.c
@@ -61,10 +61,9 @@ static int _coord(char coord)
  * be called once, immediately after reading the slurm.conf file. */
 extern void nodes_to_hilbert_curve(void)
 {
-	int coord_inx, i, j, k, max_coord = 0, min_inx;
-	uint32_t min_val;
+	int coord_inx, i, j, k, max_coord = 0;
 	int *coords;
-	struct node_record *node_ptr, *node_ptr2;
+	struct node_record *node_ptr;
 	coord_t hilbert[3];
 	int dims = 3;
 #ifdef HAVE_SUN_CONST
@@ -127,64 +126,4 @@ extern void nodes_to_hilbert_curve(void)
 			((hilbert[1]>>0 & 1) <<  1) +
 			((hilbert[2]>>0 & 1) <<  0);
 	}
-
-	/* Now we need to sort the node records. We only need to move a few
-	 * fields since the others were all initialized to identical values.
-	 * The fields needing to be copied are those set by the function
-	 * _build_single_nodeline_info() in src/common/read_conf.c */
-	for (i=0; i<node_record_count; i++) {
-		min_val = node_record_table_ptr[i].node_rank;
-		min_inx = i;
-		for (j=(i+1); j<node_record_count; j++) {
-			if (node_record_table_ptr[j].node_rank < min_val) {
-				min_val = node_record_table_ptr[j].node_rank;
-				min_inx = j;
-			}
-		}
-		if (min_inx != i) {	/* swap records */
-			char *tmp_str;
-			uint16_t tmp_uint16;
-			uint32_t tmp_uint32;
-
-			node_ptr =  node_record_table_ptr + i;
-			node_ptr2 = node_record_table_ptr + min_inx;
-
-			tmp_str = node_ptr->name;
-			node_ptr->name  = node_ptr2->name;
-			node_ptr2->name = tmp_str;
-
-			tmp_str = node_ptr->comm_name;
-			node_ptr->comm_name  = node_ptr2->comm_name;
-			node_ptr2->comm_name = tmp_str;
-
-			tmp_uint32 = node_ptr->node_rank;
-			node_ptr->node_rank  = node_ptr2->node_rank;
-			node_ptr2->node_rank = tmp_uint32;
-
-			tmp_str = node_ptr->features;
-			node_ptr->features  = node_ptr2->features;
-			node_ptr2->features = tmp_str;
-
-			tmp_uint16 = node_ptr->port;
-			node_ptr->port  = node_ptr2->port;
-			node_ptr2->port = tmp_uint16;
-
-			tmp_str = node_ptr->reason;
-			node_ptr->reason  = node_ptr2->reason;
-			node_ptr2->reason = tmp_str;
-
-			tmp_uint32 = node_ptr->weight;
-			node_ptr->weight  = node_ptr2->weight;
-			node_ptr2->weight = tmp_uint32;
-		}
-	}
-
-#if _DEBUG
-	/* Log the results */
-	for (i=0, node_ptr=node_record_table_ptr; i<node_record_count;
-	     i++, node_ptr++) {
-		info("%s: %u", node_ptr->name, node_ptr->node_rank);
-	}
-#endif
 }
-
diff --git a/src/plugins/topology/3d_torus/topology_3d_torus.c b/src/plugins/topology/3d_torus/topology_3d_torus.c
index c2d37a8a603..5220be4630a 100644
--- a/src/plugins/topology/3d_torus/topology_3d_torus.c
+++ b/src/plugins/topology/3d_torus/topology_3d_torus.c
@@ -79,7 +79,7 @@
  */
 const char plugin_name[]        = "topology 3d_torus plugin";
 const char plugin_type[]        = "topology/3d_torus";
-const uint32_t plugin_version   = 100;
+const uint32_t plugin_version   = 101;
 
 extern void nodes_to_hilbert_curve(void);
 
@@ -107,19 +107,29 @@ extern int fini(void)
  *	after a system startup or reconfiguration.
  */
 extern int topo_build_config(void)
-{	static bool first_run = true;
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * topo_generate_node_ranking  -  populate node_rank fields
+ */
+extern bool topo_generate_node_ranking(void)
+{
+#ifdef HAVE_BG
+	return false;
+#else
+	static bool first_run = true;
 
 	/* We can only re-order the nodes once at slurmctld startup.
 	 * After that time, many bitmaps are created based upon the
 	 * index of each node name in the array. */
 	if (!first_run)
-		return SLURM_SUCCESS;
-	first_run = false;
+		return false;
 
-#ifndef HAVE_BG
 	nodes_to_hilbert_curve();
+	return true;
 #endif
-	return SLURM_SUCCESS;
 }
 
 /*
diff --git a/src/plugins/topology/node_rank/topology_node_rank.c b/src/plugins/topology/node_rank/topology_node_rank.c
index 3c4802948a8..7abaaca27a0 100644
--- a/src/plugins/topology/node_rank/topology_node_rank.c
+++ b/src/plugins/topology/node_rank/topology_node_rank.c
@@ -84,7 +84,7 @@
  */
 const char plugin_name[]        = "topology node_rank plugin";
 const char plugin_type[]        = "topology/node_rank";
-const uint32_t plugin_version   = 100;
+const uint32_t plugin_version   = 101;
 
 /*
  * init() is called when the plugin is loaded, before any other functions
@@ -110,79 +110,25 @@ extern int fini(void)
  *	after a system startup or reconfiguration.
  */
 extern int topo_build_config(void)
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * topo_generate_node_ranking  -  populate node_rank fields
+ */
+extern bool topo_generate_node_ranking(void)
 {
 	static bool first_run = true;
-	struct node_record *node_ptr, *node_ptr2;
-	int i, j, min_inx;
-	uint32_t min_val;
 
 	/* We can only re-order the nodes once at slurmctld startup.
 	 * After that time, many bitmaps are created based upon the
 	 * index of each node name in the array. */
 	if (!first_run)
-		return SLURM_SUCCESS;
+		return false;
 	first_run = false;
 
-	/* Now we need to sort the node records. We only need to move a few
-	 * fields since the others were all initialized to identical values.
-	 * The fields needing to be copied are those set by the function
-	 * _build_single_nodeline_info() in src/common/read_conf.c */
-	for (i=0; i<node_record_count; i++) {
-		min_val = node_record_table_ptr[i].node_rank;
-		min_inx = i;
-		for (j=(i+1); j<node_record_count; j++) {
-			if (node_record_table_ptr[j].node_rank < min_val) {
-				min_val = node_record_table_ptr[j].node_rank;
-				min_inx = j;
-			}
-		}
-		if (min_inx != i) {	/* swap records */
-			char *tmp_str;
-			uint16_t tmp_uint16;
-			uint32_t tmp_uint32;
-
-			node_ptr =  node_record_table_ptr + i;
-			node_ptr2 = node_record_table_ptr + min_inx;
-
-			tmp_str = node_ptr->name;
-			node_ptr->name  = node_ptr2->name;
-			node_ptr2->name = tmp_str;
-
-			tmp_str = node_ptr->comm_name;
-			node_ptr->comm_name  = node_ptr2->comm_name;
-			node_ptr2->comm_name = tmp_str;
-
-			tmp_uint32 = node_ptr->node_rank;
-			node_ptr->node_rank  = node_ptr2->node_rank;
-			node_ptr2->node_rank = tmp_uint32;
-
-			tmp_str = node_ptr->features;
-			node_ptr->features  = node_ptr2->features;
-			node_ptr2->features = tmp_str;
-
-			tmp_uint16 = node_ptr->port;
-			node_ptr->port  = node_ptr2->port;
-			node_ptr2->port = tmp_uint16;
-
-			tmp_str = node_ptr->reason;
-			node_ptr->reason  = node_ptr2->reason;
-			node_ptr2->reason = tmp_str;
-
-			tmp_uint32 = node_ptr->weight;
-			node_ptr->weight  = node_ptr2->weight;
-			node_ptr2->weight = tmp_uint32;
-		}
-	}
-
-#if _DEBUG
-	/* Log the results */
-	for (i=0, node_ptr=node_record_table_ptr; i<node_record_count;
-	     i++, node_ptr++) {
-		info("%s: %u", node_ptr->name, node_ptr->node_rank);
-	}
-#endif
-
-	return SLURM_SUCCESS;
+	return true;
 }
 
 /*
diff --git a/src/plugins/topology/none/topology_none.c b/src/plugins/topology/none/topology_none.c
index 20b0cb51739..806ac300809 100644
--- a/src/plugins/topology/none/topology_none.c
+++ b/src/plugins/topology/none/topology_none.c
@@ -77,7 +77,7 @@
  */
 const char plugin_name[]        = "topology NONE plugin";
 const char plugin_type[]        = "topology/none";
-const uint32_t plugin_version   = 100;
+const uint32_t plugin_version   = 101;
 
 /*
  * init() is called when the plugin is loaded, before any other functions
@@ -107,6 +107,14 @@ extern int topo_build_config(void)
 	return SLURM_SUCCESS;
 }
 
+/*
+ * topo_generate_node_ranking  -  this plugin does not set any node_rank fields
+ */
+extern bool topo_generate_node_ranking(void)
+{
+	return false;
+}
+
 /*
  * topo_get_node_addr - build node address and the associated pattern
  *      based on the topology information
diff --git a/src/plugins/topology/tree/topology_tree.c b/src/plugins/topology/tree/topology_tree.c
index 07ea2bab125..12939a16bce 100644
--- a/src/plugins/topology/tree/topology_tree.c
+++ b/src/plugins/topology/tree/topology_tree.c
@@ -81,7 +81,7 @@
  */
 const char plugin_name[]        = "topology tree plugin";
 const char plugin_type[]        = "topology/tree";
-const uint32_t plugin_version   = 100;
+const uint32_t plugin_version   = 101;
 
 typedef struct slurm_conf_switches {
 	uint32_t link_speed;	/* link speed, arbitrary units */
@@ -139,6 +139,13 @@ extern int topo_build_config(void)
 	return SLURM_SUCCESS;
 }
 
+/*
+ * topo_generate_node_ranking  -  this plugin does not set any node_rank fields
+ */
+extern bool topo_generate_node_ranking(void)
+{
+	return false;
+}
 
 /*
  * topo_get_node_addr - build node address and the associated pattern
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 5e891aabba4..4c3e25a3e71 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -121,6 +121,77 @@ static int  _update_preempt(uint16_t old_enable_preempt);
 static void _validate_node_proc_count(void);
 #endif
 
+/*
+ * _reorder_node_record_table - order node table in ascending order of node_rank
+ * This depends on the TopologyPlugin, which may generate such a ranking.
+ */
+static void _reorder_node_record_table(void)
+{
+	struct node_record *node_ptr, *node_ptr2;
+	int i, j, min_inx;
+	uint32_t min_val;
+
+	/* Now we need to sort the node records. We only need to move a few
+	 * fields since the others were all initialized to identical values.
+	 * The fields needing to be copied are those set by the function
+	 * _build_single_nodeline_info() in src/common/read_conf.c */
+	for (i = 0; i < node_record_count; i++) {
+		min_val = node_record_table_ptr[i].node_rank;
+		min_inx = i;
+		for (j = i + 1; j < node_record_count; j++) {
+			if (node_record_table_ptr[j].node_rank < min_val) {
+				min_val = node_record_table_ptr[j].node_rank;
+				min_inx = j;
+			}
+		}
+		if (min_inx != i) {	/* swap records */
+			char *tmp_str;
+			uint16_t tmp_uint16;
+			uint32_t tmp_uint32;
+
+			node_ptr =  node_record_table_ptr + i;
+			node_ptr2 = node_record_table_ptr + min_inx;
+
+			tmp_str = node_ptr->name;
+			node_ptr->name  = node_ptr2->name;
+			node_ptr2->name = tmp_str;
+
+			tmp_str = node_ptr->comm_name;
+			node_ptr->comm_name  = node_ptr2->comm_name;
+			node_ptr2->comm_name = tmp_str;
+
+			tmp_uint32 = node_ptr->node_rank;
+			node_ptr->node_rank  = node_ptr2->node_rank;
+			node_ptr2->node_rank = tmp_uint32;
+
+			tmp_str = node_ptr->features;
+			node_ptr->features  = node_ptr2->features;
+			node_ptr2->features = tmp_str;
+
+			tmp_uint16 = node_ptr->port;
+			node_ptr->port  = node_ptr2->port;
+			node_ptr2->port = tmp_uint16;
+
+			tmp_str = node_ptr->reason;
+			node_ptr->reason  = node_ptr2->reason;
+			node_ptr2->reason = tmp_str;
+
+			tmp_uint32 = node_ptr->weight;
+			node_ptr->weight  = node_ptr2->weight;
+			node_ptr2->weight = tmp_uint32;
+		}
+	}
+
+#if _DEBUG
+	/* Log the results */
+	for (i=0, node_ptr = node_record_table_ptr; i < node_record_count;
+	     i++, node_ptr++) {
+		info("%s: %u", node_ptr->name, node_ptr->node_rank);
+	}
+#endif
+}
+
+
 /*
  * _build_bitmaps_pre_select - recover some state for jobs and nodes prior to
  *	calling the select_* functions
@@ -633,6 +704,7 @@ int read_slurm_conf(int recover, bool reconfig)
 	int error_code, i, rc, load_job_ret = SLURM_SUCCESS;
 	int old_node_record_count = 0;
 	struct node_record *old_node_table_ptr = NULL, *node_ptr;
+	bool do_reorder_nodes = false;
 	List old_part_list = NULL;
 	char *old_def_part_name = NULL;
 	char *old_auth_type       = xstrdup(slurmctld_conf.authtype);
@@ -713,6 +785,17 @@ int read_slurm_conf(int recover, bool reconfig)
 		return EINVAL;
 	}
 
+	/*
+	 * Node reordering needs to be done by the topology and/or select
+	 * plugin. Reordering the table must be done before hashing the
+	 * nodes, and before any position-relative bitmaps are created.
+	 */
+	do_reorder_nodes |= slurm_topo_generate_node_ranking();
+	do_reorder_nodes |= select_g_node_ranking(node_record_table_ptr,
+						  node_record_count);
+	if (do_reorder_nodes)
+		_reorder_node_record_table();
+
 	rehash_node();
 	rehash_jobs();
 	set_slurmd_addr();
-- 
GitLab