Skip to content
Snippets Groups Projects
Commit 056f9ce2 authored by Artem Polyakov's avatar Artem Polyakov Committed by Danny Auble
Browse files

mpi/pmix: Fix the case where UCX fails to connect


There were segmentation faults because of double free of a pending
list when UCX comonent was trying to connect multiple times.

Signed-off-by: default avatarArtem Polyakov <artpol84@gmail.com>
parent 86a02ce5
No related branches found
No related tags found
No related merge requests found
...@@ -637,8 +637,7 @@ static void _ucx_fini(void *_priv) ...@@ -637,8 +637,7 @@ static void _ucx_fini(void *_priv)
slurm_mutex_unlock(&_ucx_worker_lock); slurm_mutex_unlock(&_ucx_worker_lock);
} else { } else {
slurm_mutex_lock(&_ucx_worker_lock); slurm_mutex_lock(&_ucx_worker_lock);
pmixp_rlist_init(&priv->pending, &_free_list, pmixp_rlist_fini(&priv->pending);
PMIXP_UCX_LIST_PREALLOC);
slurm_mutex_unlock(&_ucx_worker_lock); slurm_mutex_unlock(&_ucx_worker_lock);
} }
xfree(priv); xfree(priv);
...@@ -664,8 +663,9 @@ static int _ucx_connect(void *_priv, void *ep_data, size_t ep_len, ...@@ -664,8 +663,9 @@ static int _ucx_connect(void *_priv, void *ep_data, size_t ep_len,
if (status != UCS_OK) { if (status != UCS_OK) {
PMIXP_ERROR("ucp_ep_create failed: %s", PMIXP_ERROR("ucp_ep_create failed: %s",
ucs_status_string(status)); ucs_status_string(status));
rc = SLURM_ERROR; xfree(priv->ucx_addr);
goto exit; slurm_mutex_unlock(&_ucx_worker_lock);
return SLURM_ERROR;
} }
priv->connected = true; priv->connected = true;
...@@ -673,7 +673,6 @@ static int _ucx_connect(void *_priv, void *ep_data, size_t ep_len, ...@@ -673,7 +673,6 @@ static int _ucx_connect(void *_priv, void *ep_data, size_t ep_len,
if (init_msg) { if (init_msg) {
pmixp_rlist_push(&priv->pending, init_msg); pmixp_rlist_push(&priv->pending, init_msg);
} }
exit:
slurm_mutex_unlock(&_ucx_worker_lock); slurm_mutex_unlock(&_ucx_worker_lock);
/* we need to send data while being unlocked */ /* we need to send data while being unlocked */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment