From 7cc4eafbed0b37a37f1403980f10c1804dd1ddf8 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Wed, 13 Apr 2016 16:16:12 -0700 Subject: [PATCH] Make it so the tree_width a message starts with goes down the tree with the message. This was needed for sbcast or srun --bcast since the tree width is different than the default. Bug 2512 --- src/common/forward.c | 11 ++++-- src/common/slurm_protocol_api.c | 16 +++++--- src/common/slurm_protocol_defs.h | 1 + src/common/slurm_protocol_pack.c | 43 ++++++++++++++++++++- src/common/slurm_route.c | 23 ++++++----- src/common/slurm_route.h | 6 ++- src/plugins/route/default/route_default.c | 4 +- src/plugins/route/topology/route_topology.c | 8 ++-- testsuite/expect/test33.1.prog.c | 4 +- 9 files changed, 89 insertions(+), 27 deletions(-) diff --git a/src/common/forward.c b/src/common/forward.c index 026805c7661..80000d47e04 100644 --- a/src/common/forward.c +++ b/src/common/forward.c @@ -242,8 +242,11 @@ void *_forward_thread(void *arg) if (message_timeout < 0) message_timeout = slurm_get_msg_timeout() * 1000; + if (!fwd_msg->header.forward.tree_width) + fwd_msg->header.forward.tree_width = + slurm_get_tree_width(); steps = (fwd_msg->header.forward.cnt+1) / - slurm_get_tree_width(); + fwd_msg->header.forward.tree_width; fwd_msg->timeout = (message_timeout*steps); /* info("got %d * %d = %d", message_timeout, */ /* steps, fwd_msg->timeout); */ @@ -649,7 +652,8 @@ extern int forward_msg(forward_struct_t *forward_struct, header_t *header) hl = hostlist_create(header->forward.nodelist); hostlist_uniq(hl); - if ( route_g_split_hostlist(hl, &sp_hl, &hl_count) ) { + if (route_g_split_hostlist( + hl, &sp_hl, &hl_count, header->forward.tree_width)) { error("unable to split forward hostlist"); hostlist_destroy(hl); return SLURM_ERROR; @@ -693,7 +697,8 @@ extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout) hostlist_uniq(hl); host_count = hostlist_count(hl); - if ( route_g_split_hostlist(hl, &sp_hl, &hl_count) ) { + if (route_g_split_hostlist(hl, &sp_hl, &hl_count, + msg->forward.tree_width)) { error("unable to split forward hostlist"); return NULL; } diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 2276fe3d41b..2c20bfaec87 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -3690,6 +3690,10 @@ int slurm_send_node_msg(slurm_fd_t fd, slurm_msg_t * msg) forward_init(&msg->forward, NULL); msg->ret_list = NULL; } + + if (!msg->forward.tree_width) + msg->forward.tree_width = slurm_get_tree_width(); + forward_wait(msg); if (difftime(time(NULL), start_time) >= 60) { @@ -4071,7 +4075,6 @@ _send_and_recv_msgs(slurm_fd_t fd, slurm_msg_t *req, int timeout) int retry = 0; List ret_list = NULL; int steps = 0; - int width; if (!req->forward.timeout) { if (!timeout) @@ -4086,11 +4089,14 @@ _send_and_recv_msgs(slurm_fd_t fd, slurm_msg_t *req, int timeout) * (timeout+message_timeout sec per step) * to let the child timeout */ if (message_timeout < 0) - message_timeout = slurm_get_msg_timeout() * 1000; + message_timeout = + slurm_get_msg_timeout() * 1000; steps = req->forward.cnt + 1; - width = slurm_get_tree_width(); - if (width) - steps /= width; + if (!req->forward.tree_width) + req->forward.tree_width = + slurm_get_tree_width(); + if (req->forward.tree_width) + steps /= req->forward.tree_width; timeout = (message_timeout * steps); steps++; diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 5d17734d8ed..a03ad8ab7e8 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -398,6 +398,7 @@ typedef struct forward { char *nodelist; /* ranged string of who to forward the * message to */ uint32_t timeout; /* original timeout increments */ + uint16_t tree_width; /* what the treewidth should be */ } forward_t; /*core api protocol message structures */ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index a98dfdb28b0..9d85f31bbc8 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -886,7 +886,25 @@ pack_header(header_t * header, Buf buffer) { pack16((uint16_t)header->version, buffer); - if (header->version >= SLURM_15_08_PROTOCOL_VERSION) { + if (header->version >= SLURM_16_05_PROTOCOL_VERSION) { + pack16((uint16_t)header->flags, buffer); + pack16((uint16_t)header->msg_index, buffer); + pack16((uint16_t)header->msg_type, buffer); + pack32((uint32_t)header->body_length, buffer); + pack16((uint16_t)header->forward.cnt, buffer); + if (header->forward.cnt > 0) { + packstr(header->forward.nodelist, buffer); + pack32((uint32_t)header->forward.timeout, buffer); + pack16(header->forward.tree_width, buffer); + } + pack16((uint16_t)header->ret_cnt, buffer); + if (header->ret_cnt > 0) { + _pack_ret_list(header->ret_list, + header->ret_cnt, buffer, + header->version); + } + slurm_pack_slurm_addr(&header->orig_addr, buffer); + } else if (header->version >= SLURM_15_08_PROTOCOL_VERSION) { pack16((uint16_t)header->flags, buffer); pack16((uint16_t)header->msg_index, buffer); pack16((uint16_t)header->msg_type, buffer); @@ -940,6 +958,29 @@ unpack_header(header_t * header, Buf buffer) safe_unpack16(&header->version, buffer); if (header->version >= SLURM_15_08_PROTOCOL_VERSION) { + safe_unpack16(&header->flags, buffer); + safe_unpack16(&header->msg_index, buffer); + safe_unpack16(&header->msg_type, buffer); + safe_unpack32(&header->body_length, buffer); + safe_unpack16(&header->forward.cnt, buffer); + if (header->forward.cnt > 0) { + safe_unpackstr_xmalloc(&header->forward.nodelist, + &uint32_tmp, buffer); + safe_unpack32(&header->forward.timeout, buffer); + safe_unpack16(&header->forward.tree_width, buffer); + } + + safe_unpack16(&header->ret_cnt, buffer); + if (header->ret_cnt > 0) { + if (_unpack_ret_list(&(header->ret_list), + header->ret_cnt, buffer, + header->version)) + goto unpack_error; + } else { + header->ret_list = NULL; + } + slurm_unpack_slurm_addr_no_alloc(&header->orig_addr, buffer); + } else if (header->version >= SLURM_15_08_PROTOCOL_VERSION) { safe_unpack16(&header->flags, buffer); safe_unpack16(&header->msg_index, buffer); safe_unpack16(&header->msg_type, buffer); diff --git a/src/common/slurm_route.c b/src/common/slurm_route.c index 9918e115e5b..8d473c82d3e 100644 --- a/src/common/slurm_route.c +++ b/src/common/slurm_route.c @@ -63,7 +63,7 @@ strong_alias(route_split_hostlist_treewidth, typedef struct slurm_route_ops { int (*split_hostlist) (hostlist_t hl, hostlist_t** sp_hl, - int* count); + int* count, uint16_t tree_width); int (*reconfigure) (void); slurm_addr_t* (*next_collector) (bool* is_collector); slurm_addr_t* (*next_collector_backup) (void); @@ -84,7 +84,7 @@ static plugin_context_t *g_context = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; static bool init_run = false; static uint32_t debug_flags = 0; -static uint16_t tree_width; +static uint16_t g_tree_width; static bool this_is_collector = false; /* this node is a collector node */ static slurm_addr_t *msg_collect_node = NULL; /* address of node to aggregate messages from this node */ @@ -155,7 +155,7 @@ static void _set_collectors(char *this_node_name) backup_port = parent_port; slurm_conf_unlock(); while (1) { - if ( route_g_split_hostlist(nodes, &hll, &hl_count) ) { + if (route_g_split_hostlist(nodes, &hll, &hl_count, 0)) { error("unable to split forward hostlist"); goto clean; /* collector addrs remains null */ } @@ -298,7 +298,7 @@ extern int route_init(char *node_name) goto done; } - tree_width = slurm_get_tree_width(); + g_tree_width = slurm_get_tree_width(); debug_flags = slurm_get_debug_flags(); init_run = true; @@ -351,7 +351,7 @@ extern int route_fini(void) */ extern int route_g_split_hostlist(hostlist_t hl, hostlist_t** sp_hl, - int* count) + int* count, uint16_t tree_width) { int rc; int j, nnodes, nnodex; @@ -366,11 +366,13 @@ extern int route_g_split_hostlist(hostlist_t hl, * split_hostlise call. */ nnodes = hostlist_count(hl); buf = hostlist_ranged_string_xmalloc(hl); - info("ROUTE: split_hostlist: hl=%s",buf); + info("ROUTE: split_hostlist: hl=%s tree_width %u", + buf, tree_width); xfree(buf); } - rc = (*(ops.split_hostlist))(hl, sp_hl, count); + rc = (*(ops.split_hostlist))(hl, sp_hl, count, + tree_width ? tree_width : g_tree_width); if (debug_flags & DEBUG_FLAG_ROUTE) { /* Sanity check to make sure all nodes in msg list are in * a child list */ @@ -400,7 +402,7 @@ extern int route_g_reconfigure(void) if (route_init(NULL) != SLURM_SUCCESS) return SLURM_ERROR; debug_flags = slurm_get_debug_flags(); - tree_width = slurm_get_tree_width(); + g_tree_width = slurm_get_tree_width(); return (*(ops.reconfigure))(); } @@ -464,7 +466,7 @@ extern slurm_addr_t* route_g_next_collector_backup(void) */ extern int route_split_hostlist_treewidth(hostlist_t hl, hostlist_t** sp_hl, - int* count) + int* count, uint16_t tree_width) { int host_count; int *span = NULL; @@ -473,6 +475,9 @@ extern int route_split_hostlist_treewidth(hostlist_t hl, int nhl = 0; int j; + if (!tree_width) + tree_width = g_tree_width; + host_count = hostlist_count(hl); span = set_span(host_count, tree_width); *sp_hl = (hostlist_t*) xmalloc(tree_width * sizeof(hostlist_t)); diff --git a/src/common/slurm_route.h b/src/common/slurm_route.h index fc85a217207..5a6d230a3a6 100644 --- a/src/common/slurm_route.h +++ b/src/common/slurm_route.h @@ -72,6 +72,7 @@ extern int route_fini(void); * as similar code replaced in forward.c * OUT: sp_hl - hostlist_t** - the array of hostlists that will be malloced * OUT: count - int* - the count of created hostlists + * IN: tree_width- int - Max width of each branch on the tree. * RET: SLURM_SUCCESS - int * * Note: created hostlist will have to be freed independently using @@ -80,7 +81,7 @@ extern int route_fini(void); */ extern int route_g_split_hostlist(hostlist_t hl, hostlist_t** sp_hl, - int* count); + int* count, uint16_t tree_width); /* * route_g_reconfigure - reset during reconfigure @@ -123,6 +124,7 @@ extern slurm_addr_t* route_g_next_collector_backup ( void ); * as similar code replaced in forward.c * OUT: sp_hl - hostlist_t** - the array of hostlists that will be malloced * OUT: count - int* - the count of created hostlists + * IN: tree_width- int - Max width of each branch on the tree. * RET: SLURM_SUCCESS - int * * Note: created hostlist will have to be freed independently using @@ -131,7 +133,7 @@ extern slurm_addr_t* route_g_next_collector_backup ( void ); */ extern int route_split_hostlist_treewidth(hostlist_t hl, hostlist_t** sp_hl, - int* count); + int* count, uint16_t tree_width); /* * route_next_collector - return address of next collector diff --git a/src/plugins/route/default/route_default.c b/src/plugins/route/default/route_default.c index b0f196f1c90..09005763f0c 100644 --- a/src/plugins/route/default/route_default.c +++ b/src/plugins/route/default/route_default.c @@ -120,9 +120,9 @@ extern int fini(void) */ extern int route_p_split_hostlist(hostlist_t hl, hostlist_t** sp_hl, - int* count) + int* count, uint16_t tree_width) { - return route_split_hostlist_treewidth(hl, sp_hl, count); + return route_split_hostlist_treewidth(hl, sp_hl, count, tree_width); } /* diff --git a/src/plugins/route/topology/route_topology.c b/src/plugins/route/topology/route_topology.c index 8c932b8b336..5c35da237a9 100644 --- a/src/plugins/route/topology/route_topology.c +++ b/src/plugins/route/topology/route_topology.c @@ -145,7 +145,7 @@ extern int fini(void) */ extern int route_p_split_hostlist(hostlist_t hl, hostlist_t** sp_hl, - int* count) + int* count, uint16_t tree_width) { int i, j, k, hl_ndx, msg_count, sw_count, lst_count; char *buf; @@ -210,13 +210,15 @@ extern int route_p_split_hostlist(hostlist_t hl, } FREE_NULL_BITMAP(nodes_bitmap); xfree(*sp_hl); - return route_split_hostlist_treewidth(hl, sp_hl, count); + return route_split_hostlist_treewidth( + hl, sp_hl, count, tree_width); } if (switch_record_table[j].level == 0) { /* This is a leaf switch. Construct list based on TreeWidth */ FREE_NULL_BITMAP(nodes_bitmap); xfree(*sp_hl); - return route_split_hostlist_treewidth(hl, sp_hl, count); + return route_split_hostlist_treewidth( + hl, sp_hl, count, tree_width); } /* loop through children, construction a hostlist for each child switch * with nodes in the message list */ diff --git a/testsuite/expect/test33.1.prog.c b/testsuite/expect/test33.1.prog.c index 1f81a3ece3a..bac10e1bfda 100644 --- a/testsuite/expect/test33.1.prog.c +++ b/testsuite/expect/test33.1.prog.c @@ -185,7 +185,7 @@ int _measure_api(char* measure_case) nodes = measure_case; hl = hostlist_create(nodes); START_TIMER; - if (route_g_split_hostlist(hl, &sp_hl, &hl_count) ) { + if (route_g_split_hostlist(hl, &sp_hl, &hl_count, 0)) { hostlist_destroy(hl); fatal("unable to split forward hostlist"); } @@ -230,7 +230,7 @@ int _run_test(char** testcase, int lines) char *nodes; nodes = testcase[0]; hostlist_t hl = hostlist_create(nodes); - if (route_g_split_hostlist(hl, &hll, &hl_count)) { + if (route_g_split_hostlist(hl, &hll, &hl_count, 0)) { info("Unable to split forward hostlist"); _print_test(testcase,lines); rc = SLURM_FAILURE; -- GitLab