From 897c4b27bc1e0d9f6a72024895dd21e3e74df84a Mon Sep 17 00:00:00 2001
From: Bill Brophy <bill.brophy@bull.com>
Date: Tue, 8 Mar 2016 16:46:29 -0500
Subject: [PATCH] Fix route/topology plugin to prevent segfault in sbcast.

route_p_split_hostlist was not thread-safe, and would cause
one of several segfaults depending on where in the initialization
code each thread was.

Bug 2495.
---
 NEWS                                        | 1 +
 src/common/slurm_route.c                    | 3 ++-
 src/plugins/route/topology/route_topology.c | 3 +++
 src/sbcast/sbcast.c                         | 2 ++
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index fa06c291978..7a10ad2ee7e 100644
--- a/NEWS
+++ b/NEWS
@@ -32,6 +32,7 @@ documents those changes that are of interest to users and administrators.
  -- Fix issue where steps weren't always getting the gres/tres involved.
  -- Fixed double read lock on getting job's gres/tres.
  -- Fix display for RoutePlugin parameter to display the correct value.
+ -- Fix route/topology plugin to prevent segfault in sbcast when in use.
 
 * Changes in Slurm 15.08.8
 ==========================
diff --git a/src/common/slurm_route.c b/src/common/slurm_route.c
index f7d30297b8d..309a6a79958 100644
--- a/src/common/slurm_route.c
+++ b/src/common/slurm_route.c
@@ -302,7 +302,8 @@ extern int route_init(char *node_name)
 	debug_flags = slurm_get_debug_flags();
 
 	init_run = true;
-	_set_collectors(node_name);
+	if (node_name)
+		_set_collectors(node_name);
 
 done:
 	slurm_mutex_unlock(&g_context_lock);
diff --git a/src/plugins/route/topology/route_topology.c b/src/plugins/route/topology/route_topology.c
index afad694bb04..8350fd74b36 100644
--- a/src/plugins/route/topology/route_topology.c
+++ b/src/plugins/route/topology/route_topology.c
@@ -96,6 +96,7 @@ const uint32_t plugin_version   = SLURM_VERSION_NUMBER;
 
 /* Global data */
 static uint64_t debug_flags = 0;
+static pthread_mutex_t route_lock = PTHREAD_MUTEX_INITIALIZER;
 
 /*****************************************************************************\
  *  Functions required of all plugins
@@ -152,6 +153,7 @@ extern int route_p_split_hostlist(hostlist_t hl,
 	bitstr_t *fwd_bitmap = NULL;		/* nodes in forward list */
 
 	msg_count = hostlist_count(hl);
+	slurm_mutex_lock(&route_lock);
 	if (switch_record_cnt == 0) {
 		/* configs have not already been processed */
 		slurm_conf_init(NULL);
@@ -167,6 +169,7 @@ extern int route_p_split_hostlist(hostlist_t hl,
 			fatal("ROUTE: Failed to build topology config");
 		}
 	}
+	slurm_mutex_unlock(&route_lock);
 	*sp_hl = (hostlist_t*) xmalloc(switch_record_cnt * sizeof(hostlist_t));
 	/* create bitmap of nodes to send message too */
 	if (hostlist2bitmap (hl, false, &nodes_bitmap) != SLURM_SUCCESS) {
diff --git a/src/sbcast/sbcast.c b/src/sbcast/sbcast.c
index fba14278194..cec1751dd01 100644
--- a/src/sbcast/sbcast.c
+++ b/src/sbcast/sbcast.c
@@ -60,6 +60,7 @@
 #include "src/common/slurm_cred.h"
 #include "src/common/slurm_protocol_api.h"
 #include "src/common/slurm_protocol_interface.h"
+#include "src/common/slurm_route.h"
 #include "src/common/slurm_time.h"
 #include "src/common/uid.h"
 #include "src/common/xmalloc.h"
@@ -83,6 +84,7 @@ int main(int argc, char *argv[])
 	return 1;
 #endif
 	slurm_conf_init(NULL);
+	route_init(NULL);
 	parse_command_line(argc, argv);
 	if (params.verbose) {
 		opts.stderr_level += params.verbose;
-- 
GitLab