Skip to content
Snippets Groups Projects
Commit e00cf339 authored by Christopher J. Morrone's avatar Christopher J. Morrone
Browse files

Really put pthreads in detached state.

Don't wait until the end to join threads in parallel_debug mode.
parent 438ece9b
No related branches found
No related tags found
No related merge requests found
...@@ -51,9 +51,10 @@ extern char **environ; ...@@ -51,9 +51,10 @@ extern char **environ;
static pthread_mutex_t active_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t active_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t active_cond = PTHREAD_COND_INITIALIZER; static pthread_cond_t active_cond = PTHREAD_COND_INITIALIZER;
static int active = 0; static int active = 0;
static int joinable = 0;
static int fail_launch_cnt = 0; static int fail_launch_cnt = 0;
typedef enum {DSH_NEW, DSH_ACTIVE, DSH_DONE, DSH_FAILED} state_t; typedef enum {DSH_NEW, DSH_ACTIVE, DSH_DONE, DSH_FAILED, DSH_JOINED} state_t;
typedef struct task_info { typedef struct task_info {
slurm_msg_t *req; slurm_msg_t *req;
...@@ -208,23 +209,33 @@ static int _check_pending_threads(thd_t *thd, int count) ...@@ -208,23 +209,33 @@ static int _check_pending_threads(thd_t *thd, int count)
static void _set_attr_detached (pthread_attr_t *attr) static void _set_attr_detached (pthread_attr_t *attr)
{ {
int err; int err;
if (!opt.parallel_debug) if (opt.parallel_debug) {
return; return;
}
if ((err = pthread_attr_setdetachstate(attr, PTHREAD_CREATE_DETACHED))) if ((err = pthread_attr_setdetachstate(attr, PTHREAD_CREATE_DETACHED)))
error ("pthread_attr_setdetachstate: %s", slurm_strerror(err)); error ("pthread_attr_setdetachstate: %s", slurm_strerror(err));
return; return;
} }
/*
* Need to join with all attached threads if running
* under parallel debugger
*/
static void _join_attached_threads (int nthreads, thd_t *th) static void _join_attached_threads (int nthreads, thd_t *th)
{ {
int i; int i;
void *retval; void *retval;
if (!opt.parallel_debug) if (opt.parallel_debug) {
return; for (i = 0; i < nthreads; i++) {
for (i = 0; i < nthreads; i++) { if (th[i].thread != (pthread_t) NULL
if (th[i].thread != (pthread_t) NULL) && th[i].state == DSH_DONE) {
pthread_join (th[i].thread, &retval); pthread_join (th[i].thread, &retval);
th[i].state = DSH_JOINED;
}
}
} }
return; return;
} }
...@@ -302,7 +313,8 @@ static void _p_launch(slurm_msg_t *req, srun_job_t *job) ...@@ -302,7 +313,8 @@ static void _p_launch(slurm_msg_t *req, srun_job_t *job)
pthread_mutex_lock(&active_mutex); pthread_mutex_lock(&active_mutex);
while (active >= opt.max_threads || rc < 0) while (active >= opt.max_threads || rc < 0)
rc = _wait_on_active(thd, job); rc = _wait_on_active(thd, job);
if (joinable >= (opt.max_threads/2))
_join_attached_threads(job->nhosts, thd);
active++; active++;
pthread_mutex_unlock(&active_mutex); pthread_mutex_unlock(&active_mutex);
...@@ -320,10 +332,6 @@ static void _p_launch(slurm_msg_t *req, srun_job_t *job) ...@@ -320,10 +332,6 @@ static void _p_launch(slurm_msg_t *req, srun_job_t *job)
_wait_on_active(thd, job); _wait_on_active(thd, job);
pthread_mutex_unlock(&active_mutex); pthread_mutex_unlock(&active_mutex);
/*
* Need to join with all attached threads if running
* under parallel debugger
*/
_join_attached_threads (job->nhosts, thd); _join_attached_threads (job->nhosts, thd);
/* /*
...@@ -454,10 +462,11 @@ static void * _p_launch_task(void *arg) ...@@ -454,10 +462,11 @@ static void * _p_launch_task(void *arg)
} else } else
_update_contacted_node(job, nodeid); _update_contacted_node(job, nodeid);
pthread_mutex_lock(&active_mutex); pthread_mutex_lock(&active_mutex);
th->state = DSH_DONE; th->state = DSH_DONE;
active--; active--;
if (opt.parallel_debug)
joinable++;
fail_launch_cnt += failure; fail_launch_cnt += failure;
pthread_cond_signal(&active_cond); pthread_cond_signal(&active_cond);
pthread_mutex_unlock(&active_mutex); pthread_mutex_unlock(&active_mutex);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment