From 7cc4eafbed0b37a37f1403980f10c1804dd1ddf8 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@schedmd.com>
Date: Wed, 13 Apr 2016 16:16:12 -0700
Subject: [PATCH] Make it so the tree_width a message starts with goes down the
 tree with the message.  This was needed for sbcast or srun --bcast since the
 tree width is different than the default.

Bug 2512
---
 src/common/forward.c                        | 11 ++++--
 src/common/slurm_protocol_api.c             | 16 +++++---
 src/common/slurm_protocol_defs.h            |  1 +
 src/common/slurm_protocol_pack.c            | 43 ++++++++++++++++++++-
 src/common/slurm_route.c                    | 23 ++++++-----
 src/common/slurm_route.h                    |  6 ++-
 src/plugins/route/default/route_default.c   |  4 +-
 src/plugins/route/topology/route_topology.c |  8 ++--
 testsuite/expect/test33.1.prog.c            |  4 +-
 9 files changed, 89 insertions(+), 27 deletions(-)

diff --git a/src/common/forward.c b/src/common/forward.c
index 026805c7661..80000d47e04 100644
--- a/src/common/forward.c
+++ b/src/common/forward.c
@@ -242,8 +242,11 @@ void *_forward_thread(void *arg)
 			if (message_timeout < 0)
 				message_timeout =
 					slurm_get_msg_timeout() * 1000;
+			if (!fwd_msg->header.forward.tree_width)
+				fwd_msg->header.forward.tree_width =
+					slurm_get_tree_width();
 			steps = (fwd_msg->header.forward.cnt+1) /
-				slurm_get_tree_width();
+					fwd_msg->header.forward.tree_width;
 			fwd_msg->timeout = (message_timeout*steps);
 			/* info("got %d * %d = %d", message_timeout, */
 			/*      steps, fwd_msg->timeout); */
@@ -649,7 +652,8 @@ extern int forward_msg(forward_struct_t *forward_struct, header_t *header)
 	hl = hostlist_create(header->forward.nodelist);
 	hostlist_uniq(hl);
 
-	if ( route_g_split_hostlist(hl, &sp_hl, &hl_count) ) {
+	if (route_g_split_hostlist(
+		    hl, &sp_hl, &hl_count, header->forward.tree_width)) {
 		error("unable to split forward hostlist");
 		hostlist_destroy(hl);
 		return SLURM_ERROR;
@@ -693,7 +697,8 @@ extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout)
 	hostlist_uniq(hl);
 	host_count = hostlist_count(hl);
 
-	if ( route_g_split_hostlist(hl, &sp_hl, &hl_count) ) {
+	if (route_g_split_hostlist(hl, &sp_hl, &hl_count,
+				   msg->forward.tree_width)) {
 		error("unable to split forward hostlist");
 		return NULL;
 	}
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 2276fe3d41b..2c20bfaec87 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3690,6 +3690,10 @@ int slurm_send_node_msg(slurm_fd_t fd, slurm_msg_t * msg)
 		forward_init(&msg->forward, NULL);
 		msg->ret_list = NULL;
 	}
+
+	if (!msg->forward.tree_width)
+		msg->forward.tree_width = slurm_get_tree_width();
+
 	forward_wait(msg);
 
 	if (difftime(time(NULL), start_time) >= 60) {
@@ -4071,7 +4075,6 @@ _send_and_recv_msgs(slurm_fd_t fd, slurm_msg_t *req, int timeout)
 	int retry = 0;
 	List ret_list = NULL;
 	int steps = 0;
-	int width;
 
 	if (!req->forward.timeout) {
 		if (!timeout)
@@ -4086,11 +4089,14 @@ _send_and_recv_msgs(slurm_fd_t fd, slurm_msg_t *req, int timeout)
 			 * (timeout+message_timeout sec per step)
 			 * to let the child timeout */
 			if (message_timeout < 0)
-				message_timeout = slurm_get_msg_timeout() * 1000;
+				message_timeout =
+					slurm_get_msg_timeout() * 1000;
 			steps = req->forward.cnt + 1;
-			width = slurm_get_tree_width();
-			if (width)
-				steps /= width;
+			if (!req->forward.tree_width)
+				req->forward.tree_width =
+					slurm_get_tree_width();
+			if (req->forward.tree_width)
+				steps /= req->forward.tree_width;
 			timeout = (message_timeout * steps);
 			steps++;
 
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index 5d17734d8ed..a03ad8ab7e8 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -398,6 +398,7 @@ typedef struct forward {
 	char      *nodelist;	/* ranged string of who to forward the
 				 * message to */
 	uint32_t   timeout;	/* original timeout increments */
+	uint16_t   tree_width;  /* what the treewidth should be */
 } forward_t;
 
 /*core api protocol message structures */
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index a98dfdb28b0..9d85f31bbc8 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -886,7 +886,25 @@ pack_header(header_t * header, Buf buffer)
 {
 	pack16((uint16_t)header->version, buffer);
 
-	if (header->version >= SLURM_15_08_PROTOCOL_VERSION) {
+	if (header->version >= SLURM_16_05_PROTOCOL_VERSION) {
+		pack16((uint16_t)header->flags, buffer);
+		pack16((uint16_t)header->msg_index, buffer);
+		pack16((uint16_t)header->msg_type, buffer);
+		pack32((uint32_t)header->body_length, buffer);
+		pack16((uint16_t)header->forward.cnt, buffer);
+		if (header->forward.cnt > 0) {
+			packstr(header->forward.nodelist, buffer);
+			pack32((uint32_t)header->forward.timeout, buffer);
+			pack16(header->forward.tree_width, buffer);
+		}
+		pack16((uint16_t)header->ret_cnt, buffer);
+		if (header->ret_cnt > 0) {
+			_pack_ret_list(header->ret_list,
+				       header->ret_cnt, buffer,
+				       header->version);
+		}
+		slurm_pack_slurm_addr(&header->orig_addr, buffer);
+	} else if (header->version >= SLURM_15_08_PROTOCOL_VERSION) {
 		pack16((uint16_t)header->flags, buffer);
 		pack16((uint16_t)header->msg_index, buffer);
 		pack16((uint16_t)header->msg_type, buffer);
@@ -940,6 +958,29 @@ unpack_header(header_t * header, Buf buffer)
 	safe_unpack16(&header->version, buffer);
 
 	if (header->version >= SLURM_15_08_PROTOCOL_VERSION) {
+		safe_unpack16(&header->flags, buffer);
+		safe_unpack16(&header->msg_index, buffer);
+		safe_unpack16(&header->msg_type, buffer);
+		safe_unpack32(&header->body_length, buffer);
+		safe_unpack16(&header->forward.cnt, buffer);
+		if (header->forward.cnt > 0) {
+			safe_unpackstr_xmalloc(&header->forward.nodelist,
+					       &uint32_tmp, buffer);
+			safe_unpack32(&header->forward.timeout, buffer);
+			safe_unpack16(&header->forward.tree_width, buffer);
+		}
+
+		safe_unpack16(&header->ret_cnt, buffer);
+		if (header->ret_cnt > 0) {
+			if (_unpack_ret_list(&(header->ret_list),
+					     header->ret_cnt, buffer,
+					     header->version))
+				goto unpack_error;
+		} else {
+			header->ret_list = NULL;
+		}
+		slurm_unpack_slurm_addr_no_alloc(&header->orig_addr, buffer);
+	} else if (header->version >= SLURM_15_08_PROTOCOL_VERSION) {
 		safe_unpack16(&header->flags, buffer);
 		safe_unpack16(&header->msg_index, buffer);
 		safe_unpack16(&header->msg_type, buffer);
diff --git a/src/common/slurm_route.c b/src/common/slurm_route.c
index 9918e115e5b..8d473c82d3e 100644
--- a/src/common/slurm_route.c
+++ b/src/common/slurm_route.c
@@ -63,7 +63,7 @@ strong_alias(route_split_hostlist_treewidth,
 typedef struct slurm_route_ops {
 	int  (*split_hostlist)    (hostlist_t hl,
 				   hostlist_t** sp_hl,
-				   int* count);
+				   int* count, uint16_t tree_width);
 	int  (*reconfigure)       (void);
 	slurm_addr_t* (*next_collector) (bool* is_collector);
 	slurm_addr_t* (*next_collector_backup) (void);
@@ -84,7 +84,7 @@ static plugin_context_t	*g_context = NULL;
 static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER;
 static bool init_run = false;
 static uint32_t debug_flags = 0;
-static uint16_t tree_width;
+static uint16_t g_tree_width;
 static bool this_is_collector = false; /* this node is a collector node */
 static slurm_addr_t *msg_collect_node = NULL; /* address of node to aggregate
 						 messages from this node */
@@ -155,7 +155,7 @@ static void _set_collectors(char *this_node_name)
 	backup_port = parent_port;
 	slurm_conf_unlock();
 	while (1) {
-		if ( route_g_split_hostlist(nodes, &hll, &hl_count) ) {
+		if (route_g_split_hostlist(nodes, &hll, &hl_count, 0)) {
 			error("unable to split forward hostlist");
 			goto clean; /* collector addrs remains null */
 		}
@@ -298,7 +298,7 @@ extern int route_init(char *node_name)
 		goto done;
 	}
 
-	tree_width = slurm_get_tree_width();
+	g_tree_width = slurm_get_tree_width();
 	debug_flags = slurm_get_debug_flags();
 
 	init_run = true;
@@ -351,7 +351,7 @@ extern int route_fini(void)
  */
 extern int route_g_split_hostlist(hostlist_t hl,
 				  hostlist_t** sp_hl,
-				  int* count)
+				  int* count, uint16_t tree_width)
 {
 	int rc;
 	int j, nnodes, nnodex;
@@ -366,11 +366,13 @@ extern int route_g_split_hostlist(hostlist_t hl,
 		 * split_hostlise call.  */
 		nnodes = hostlist_count(hl);
 		buf = hostlist_ranged_string_xmalloc(hl);
-		info("ROUTE: split_hostlist: hl=%s",buf);
+		info("ROUTE: split_hostlist: hl=%s tree_width %u",
+		     buf, tree_width);
 		xfree(buf);
 	}
 
-	rc = (*(ops.split_hostlist))(hl, sp_hl, count);
+	rc = (*(ops.split_hostlist))(hl, sp_hl, count,
+				     tree_width ? tree_width : g_tree_width);
 	if (debug_flags & DEBUG_FLAG_ROUTE) {
 		/* Sanity check to make sure all nodes in msg list are in
 		 * a child list */
@@ -400,7 +402,7 @@ extern int route_g_reconfigure(void)
 	if (route_init(NULL) != SLURM_SUCCESS)
 		return SLURM_ERROR;
 	debug_flags = slurm_get_debug_flags();
-	tree_width = slurm_get_tree_width();
+	g_tree_width = slurm_get_tree_width();
 
 	return (*(ops.reconfigure))();
 }
@@ -464,7 +466,7 @@ extern slurm_addr_t* route_g_next_collector_backup(void)
  */
 extern int route_split_hostlist_treewidth(hostlist_t hl,
 					  hostlist_t** sp_hl,
-					  int* count)
+					  int* count, uint16_t tree_width)
 {
 	int host_count;
 	int *span = NULL;
@@ -473,6 +475,9 @@ extern int route_split_hostlist_treewidth(hostlist_t hl,
 	int nhl = 0;
 	int j;
 
+	if (!tree_width)
+		tree_width = g_tree_width;
+
 	host_count = hostlist_count(hl);
 	span = set_span(host_count, tree_width);
 	*sp_hl = (hostlist_t*) xmalloc(tree_width * sizeof(hostlist_t));
diff --git a/src/common/slurm_route.h b/src/common/slurm_route.h
index fc85a217207..5a6d230a3a6 100644
--- a/src/common/slurm_route.h
+++ b/src/common/slurm_route.h
@@ -72,6 +72,7 @@ extern int route_fini(void);
  *                                as similar code replaced in forward.c
  * OUT: sp_hl    - hostlist_t** - the array of hostlists that will be malloced
  * OUT: count    - int*         - the count of created hostlists
+ * IN: tree_width- int          - Max width of each branch on the tree.
  * RET: SLURM_SUCCESS - int
  *
  * Note: created hostlist will have to be freed independently using
@@ -80,7 +81,7 @@ extern int route_fini(void);
  */
 extern int route_g_split_hostlist(hostlist_t hl,
 				  hostlist_t** sp_hl,
-				  int* count);
+				  int* count, uint16_t tree_width);
 
 /*
  * route_g_reconfigure - reset during reconfigure
@@ -123,6 +124,7 @@ extern slurm_addr_t* route_g_next_collector_backup ( void );
  *                                as similar code replaced in forward.c
  * OUT: sp_hl    - hostlist_t** - the array of hostlists that will be malloced
  * OUT: count    - int*         - the count of created hostlists
+ * IN: tree_width- int          - Max width of each branch on the tree.
  * RET: SLURM_SUCCESS - int
  *
  * Note: created hostlist will have to be freed independently using
@@ -131,7 +133,7 @@ extern slurm_addr_t* route_g_next_collector_backup ( void );
  */
 extern int route_split_hostlist_treewidth(hostlist_t hl,
 					  hostlist_t** sp_hl,
-					  int* count);
+					  int* count, uint16_t tree_width);
 
 /*
  * route_next_collector - return address of next collector
diff --git a/src/plugins/route/default/route_default.c b/src/plugins/route/default/route_default.c
index b0f196f1c90..09005763f0c 100644
--- a/src/plugins/route/default/route_default.c
+++ b/src/plugins/route/default/route_default.c
@@ -120,9 +120,9 @@ extern int fini(void)
  */
 extern int route_p_split_hostlist(hostlist_t hl,
 				  hostlist_t** sp_hl,
-				  int* count)
+				  int* count, uint16_t tree_width)
 {
-	return route_split_hostlist_treewidth(hl, sp_hl, count);
+	return route_split_hostlist_treewidth(hl, sp_hl, count, tree_width);
 }
 
 /*
diff --git a/src/plugins/route/topology/route_topology.c b/src/plugins/route/topology/route_topology.c
index 8c932b8b336..5c35da237a9 100644
--- a/src/plugins/route/topology/route_topology.c
+++ b/src/plugins/route/topology/route_topology.c
@@ -145,7 +145,7 @@ extern int fini(void)
  */
 extern int route_p_split_hostlist(hostlist_t hl,
 				  hostlist_t** sp_hl,
-				  int* count)
+				  int* count, uint16_t tree_width)
 {
 	int i, j, k, hl_ndx, msg_count, sw_count, lst_count;
 	char  *buf;
@@ -210,13 +210,15 @@ extern int route_p_split_hostlist(hostlist_t hl,
 		}
 		FREE_NULL_BITMAP(nodes_bitmap);
 		xfree(*sp_hl);
-		return route_split_hostlist_treewidth(hl, sp_hl, count);
+		return route_split_hostlist_treewidth(
+			hl, sp_hl, count, tree_width);
 	}
 	if (switch_record_table[j].level == 0) {
 		/* This is a leaf switch. Construct list based on TreeWidth */
 		FREE_NULL_BITMAP(nodes_bitmap);
 		xfree(*sp_hl);
-		return route_split_hostlist_treewidth(hl, sp_hl, count);
+		return route_split_hostlist_treewidth(
+			hl, sp_hl, count, tree_width);
 	}
 	/* loop through children, construction a hostlist for each child switch
 	 * with nodes in the message list */
diff --git a/testsuite/expect/test33.1.prog.c b/testsuite/expect/test33.1.prog.c
index 1f81a3ece3a..bac10e1bfda 100644
--- a/testsuite/expect/test33.1.prog.c
+++ b/testsuite/expect/test33.1.prog.c
@@ -185,7 +185,7 @@ int _measure_api(char* measure_case)
 	nodes = measure_case;
 	hl = hostlist_create(nodes);
 	START_TIMER;
-	if (route_g_split_hostlist(hl, &sp_hl, &hl_count) ) {
+	if (route_g_split_hostlist(hl, &sp_hl, &hl_count, 0)) {
 		hostlist_destroy(hl);
 		fatal("unable to split forward hostlist");
 	}
@@ -230,7 +230,7 @@ int _run_test(char** testcase, int lines)
 	char *nodes;
 	nodes = testcase[0];
 	hostlist_t hl = hostlist_create(nodes);
-	if (route_g_split_hostlist(hl, &hll, &hl_count)) {
+	if (route_g_split_hostlist(hl, &hll, &hl_count, 0)) {
 		info("Unable to split forward hostlist");
 		_print_test(testcase,lines);
 		rc = SLURM_FAILURE;
-- 
GitLab