From 897c4b27bc1e0d9f6a72024895dd21e3e74df84a Mon Sep 17 00:00:00 2001 From: Bill Brophy <bill.brophy@bull.com> Date: Tue, 8 Mar 2016 16:46:29 -0500 Subject: [PATCH] Fix route/topology plugin to prevent segfault in sbcast. route_p_split_hostlist was not thread-safe, and would cause one of several segfaults depending on where in the initialization code each thread was. Bug 2495. --- NEWS | 1 + src/common/slurm_route.c | 3 ++- src/plugins/route/topology/route_topology.c | 3 +++ src/sbcast/sbcast.c | 2 ++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index fa06c291978..7a10ad2ee7e 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,7 @@ documents those changes that are of interest to users and administrators. -- Fix issue where steps weren't always getting the gres/tres involved. -- Fixed double read lock on getting job's gres/tres. -- Fix display for RoutePlugin parameter to display the correct value. + -- Fix route/topology plugin to prevent segfault in sbcast when in use. * Changes in Slurm 15.08.8 ========================== diff --git a/src/common/slurm_route.c b/src/common/slurm_route.c index f7d30297b8d..309a6a79958 100644 --- a/src/common/slurm_route.c +++ b/src/common/slurm_route.c @@ -302,7 +302,8 @@ extern int route_init(char *node_name) debug_flags = slurm_get_debug_flags(); init_run = true; - _set_collectors(node_name); + if (node_name) + _set_collectors(node_name); done: slurm_mutex_unlock(&g_context_lock); diff --git a/src/plugins/route/topology/route_topology.c b/src/plugins/route/topology/route_topology.c index afad694bb04..8350fd74b36 100644 --- a/src/plugins/route/topology/route_topology.c +++ b/src/plugins/route/topology/route_topology.c @@ -96,6 +96,7 @@ const uint32_t plugin_version = SLURM_VERSION_NUMBER; /* Global data */ static uint64_t debug_flags = 0; +static pthread_mutex_t route_lock = PTHREAD_MUTEX_INITIALIZER; /*****************************************************************************\ * Functions required of all plugins @@ -152,6 +153,7 @@ extern int route_p_split_hostlist(hostlist_t hl, bitstr_t *fwd_bitmap = NULL; /* nodes in forward list */ msg_count = hostlist_count(hl); + slurm_mutex_lock(&route_lock); if (switch_record_cnt == 0) { /* configs have not already been processed */ slurm_conf_init(NULL); @@ -167,6 +169,7 @@ extern int route_p_split_hostlist(hostlist_t hl, fatal("ROUTE: Failed to build topology config"); } } + slurm_mutex_unlock(&route_lock); *sp_hl = (hostlist_t*) xmalloc(switch_record_cnt * sizeof(hostlist_t)); /* create bitmap of nodes to send message too */ if (hostlist2bitmap (hl, false, &nodes_bitmap) != SLURM_SUCCESS) { diff --git a/src/sbcast/sbcast.c b/src/sbcast/sbcast.c index fba14278194..cec1751dd01 100644 --- a/src/sbcast/sbcast.c +++ b/src/sbcast/sbcast.c @@ -60,6 +60,7 @@ #include "src/common/slurm_cred.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_interface.h" +#include "src/common/slurm_route.h" #include "src/common/slurm_time.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" @@ -83,6 +84,7 @@ int main(int argc, char *argv[]) return 1; #endif slurm_conf_init(NULL); + route_init(NULL); parse_command_line(argc, argv); if (params.verbose) { opts.stderr_level += params.verbose; -- GitLab