From 056f9ce259780476267cd0173e0fcbc554e05556 Mon Sep 17 00:00:00 2001
From: Artem Polyakov <artpol84@gmail.com>
Date: Tue, 8 Aug 2017 13:21:36 +0700
Subject: [PATCH] mpi/pmix: Fix the case where UCX fails to connect

There were segmentation faults because of double free of a pending
list when UCX comonent was trying to connect multiple times.

Signed-off-by: Artem Polyakov <artpol84@gmail.com>
---
 src/plugins/mpi/pmix/pmixp_dconn_ucx.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/plugins/mpi/pmix/pmixp_dconn_ucx.c b/src/plugins/mpi/pmix/pmixp_dconn_ucx.c
index 3c150705630..783f845cd53 100644
--- a/src/plugins/mpi/pmix/pmixp_dconn_ucx.c
+++ b/src/plugins/mpi/pmix/pmixp_dconn_ucx.c
@@ -637,8 +637,7 @@ static void _ucx_fini(void *_priv)
 		slurm_mutex_unlock(&_ucx_worker_lock);
 	} else {
 		slurm_mutex_lock(&_ucx_worker_lock);
-		pmixp_rlist_init(&priv->pending, &_free_list,
-				 PMIXP_UCX_LIST_PREALLOC);
+		pmixp_rlist_fini(&priv->pending);
 		slurm_mutex_unlock(&_ucx_worker_lock);
 	}
 	xfree(priv);
@@ -664,8 +663,9 @@ static int _ucx_connect(void *_priv, void *ep_data, size_t ep_len,
 	if (status != UCS_OK) {
 		PMIXP_ERROR("ucp_ep_create failed: %s",
 			    ucs_status_string(status));
-		rc = SLURM_ERROR;
-		goto exit;
+		xfree(priv->ucx_addr);
+		slurm_mutex_unlock(&_ucx_worker_lock);
+		return SLURM_ERROR;
 	}
 	priv->connected = true;
 
@@ -673,7 +673,6 @@ static int _ucx_connect(void *_priv, void *ep_data, size_t ep_len,
 	if (init_msg) {
 		pmixp_rlist_push(&priv->pending, init_msg);
 	}
-exit:
 	slurm_mutex_unlock(&_ucx_worker_lock);
 
 	/* we need to send data while being unlocked */
-- 
GitLab