Skip to content
Snippets Groups Projects
Commit 36e4ed9b authored by Mark Grondona's avatar Mark Grondona
Browse files

o Apply fixes needed for TotalView (proposed by Dong Ahn <anh1@llnl.gov>)

  - handle negative return code w/ errno == EINTR from sigwait()
  - don't create launch threads detached when running under parallel
    debugger
parent ab11fb6b
No related branches found
No related tags found
No related merge requests found
...@@ -204,6 +204,29 @@ static int _check_pending_threads(thd_t *thd, int count) ...@@ -204,6 +204,29 @@ static int _check_pending_threads(thd_t *thd, int count)
return 0; return 0;
} }
/*
* When running under parallel debugger, do not create threads in
* detached state, as this seems to confuse TotalView specifically
*/
static void _set_attr_detached (pthread_attr_t *attr)
{
int err;
if (!opt.parallel_debug)
return;
if ((err = pthread_attr_setdetachstate(attr, PTHREAD_CREATE_DETACHED)))
error ("pthread_attr_setdetachstate: %s", slurm_strerror(err));
return;
}
static void _join_attached_threads (int nthreads, thd_t *th)
{
int i;
void *retval;
for (i = 0; i < nthreads; i++)
pthread_join (th[i].thread, &retval);
return;
}
static void _spawn_launch_thr(thd_t *th) static void _spawn_launch_thr(thd_t *th)
{ {
...@@ -211,9 +234,7 @@ static void _spawn_launch_thr(thd_t *th) ...@@ -211,9 +234,7 @@ static void _spawn_launch_thr(thd_t *th)
int err = 0; int err = 0;
slurm_attr_init (&attr); slurm_attr_init (&attr);
err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); _set_attr_detached (&attr);
if (err)
error ("pthread_attr_setdetachstate: %s", slurm_strerror(err));
err = pthread_create(&th->thread, &attr, _p_launch_task, (void *)th); err = pthread_create(&th->thread, &attr, _p_launch_task, (void *)th);
if (err) { if (err) {
...@@ -297,6 +318,12 @@ static void _p_launch(slurm_msg_t *req, job_t *job) ...@@ -297,6 +318,12 @@ static void _p_launch(slurm_msg_t *req, job_t *job)
_wait_on_active(thd, job); _wait_on_active(thd, job);
pthread_mutex_unlock(&active_mutex); pthread_mutex_unlock(&active_mutex);
/*
* Need to join with all attached threads if running
* under parallel debugger
*/
_join_attached_threads (job->nhosts, thd);
/* /*
* xsignal_restore_mask(&set); * xsignal_restore_mask(&set);
* xsignal(SIGALRM, oldh); * xsignal(SIGALRM, oldh);
......
...@@ -234,7 +234,12 @@ _sig_thr(void *arg) ...@@ -234,7 +234,12 @@ _sig_thr(void *arg)
xsignal_sigset_create(srun_sigarray, &set); xsignal_sigset_create(srun_sigarray, &set);
sigwait(&set, &signo); if (sigwait(&set, &signo) < 0) {
if (errno != EINTR)
error ("sigwait: %m");
continue;
}
debug2("recvd signal %d", signo); debug2("recvd signal %d", signo);
switch (signo) { switch (signo) {
case SIGINT: case SIGINT:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment