From d61a5159e884b448eab652adfc1eccd14ac4fca2 Mon Sep 17 00:00:00 2001 From: Mark Grondona <mgrondona@llnl.gov> Date: Thu, 10 Jul 2003 21:38:12 +0000 Subject: [PATCH] o Handle forwarding of SIGUSR1,2 o Block SIGALRM by default o Only allow one thread to enter fwd_signal() at a time. --- src/srun/allocate.c | 19 +++++-------------- src/srun/launch.c | 6 ++++++ src/srun/signals.c | 11 ++++++++++- src/srun/srun.c | 8 +++++--- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 4079d200b1d..81081aac615 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -68,22 +68,15 @@ allocate_nodes(void) resource_allocation_response_msg_t *resp = NULL; job_desc_msg_t *j = job_desc_msg_create(); - /* - * Save old signal mask for this thread - */ - if ((rc = pthread_sigmask(SIG_BLOCK, NULL, &oset)) != 0) { - error("pthread_sigmask: %s", slurm_strerror(rc)); - return NULL; - } + oquitf = xsignal(SIGQUIT, _intr_handler); + ointf = xsignal(SIGINT, _intr_handler); + otermf = xsignal(SIGTERM, _intr_handler); + xsignal_save_mask(&oset); xsignal_unblock(SIGQUIT); xsignal_unblock(SIGINT); xsignal_unblock(SIGTERM); - oquitf = xsignal(SIGQUIT, _intr_handler); - ointf = xsignal(SIGINT, _intr_handler); - otermf = xsignal(SIGTERM, _intr_handler); - while ((rc = slurm_allocate_resources(j, &resp) < 0) && _retry()) { if (destroy_job) goto done; @@ -96,9 +89,7 @@ allocate_nodes(void) } done: - if ((rc = pthread_sigmask(SIG_BLOCK, &oset, NULL)) != 0) - error("Unable to restore signal mask: %s", slurm_strerror(rc)); - + xsignal_restore_mask(&oset); xsignal(SIGINT, ointf); xsignal(SIGTERM, otermf); xsignal(SIGQUIT, oquitf); diff --git a/src/srun/launch.c b/src/srun/launch.c index baa035d313e..1bb6ca21687 100644 --- a/src/srun/launch.c +++ b/src/srun/launch.c @@ -240,6 +240,7 @@ static int _check_pending_threads(thd_t *thd, int count) for (i = 0; i < count; i++) { if ((thd[i].state == DSH_ACTIVE) && ((now - thd[i].tstart) >= 2) ) + verbose("sending SIGALRM to thread %d", thd[i].thread); pthread_kill(thd[i].thread, SIGALRM); } @@ -305,8 +306,12 @@ static void _p_launch(slurm_msg_t *req, job_t *job) thd_t *thd; int rc = 0; SigFunc *oldh; + sigset_t set; oldh = xsignal(SIGALRM, (SigFunc *) _alrm_handler); + xsignal_save_mask(&set); + xsignal_unblock(SIGALRM); + /* * Set job timeout to maximum launch time + current time */ @@ -341,6 +346,7 @@ static void _p_launch(slurm_msg_t *req, job_t *job) _wait_on_active(thd, job); pthread_mutex_unlock(&active_mutex); + xsignal_restore_mask(&set); xsignal(SIGALRM, oldh); xfree(thd); diff --git a/src/srun/signals.c b/src/srun/signals.c index 7dfc0951e78..cd2aca04849 100644 --- a/src/srun/signals.c +++ b/src/srun/signals.c @@ -102,6 +102,9 @@ sig_setup_sigmask(void) sigaddset(&sigset, SIGTSTP); sigaddset(&sigset, SIGSTOP); sigaddset(&sigset, SIGCONT); + sigaddset(&sigset, SIGALRM); + sigaddset(&sigset, SIGUSR1); + sigaddset(&sigset, SIGUSR2); if ((err = pthread_sigmask(SIG_BLOCK, &sigset, NULL)) != 0) { error("pthread_sigmask: %s", slurm_strerror(err)); @@ -137,6 +140,9 @@ fwd_signal(job_t *job, int signo) int i; slurm_msg_t *req; kill_tasks_msg_t msg; + static pthread_mutex_t sig_mutex = PTHREAD_MUTEX_INITIALIZER; + + slurm_mutex_lock(&sig_mutex); if (signo == SIGKILL || signo == SIGINT || signo == SIGTERM) { slurm_mutex_lock(&job->state_mutex); @@ -172,6 +178,7 @@ fwd_signal(job_t *job, int signo) debug2("All tasks have been signalled"); xfree(req); + slurm_mutex_unlock(&sig_mutex); } @@ -219,7 +226,8 @@ _sig_thr_setup(sigset_t *set) sigaddset(set, SIGQUIT); sigaddset(set, SIGTSTP); sigaddset(set, SIGSTOP); - sigaddset(set, SIGCONT); + sigaddset(set, SIGUSR1); + sigaddset(set, SIGUSR2); } /* simple signal handling thread */ @@ -256,6 +264,7 @@ _sig_thr(void *arg) job_force_termination(job); break; default: + fwd_signal(job, signo); break; } } diff --git a/src/srun/srun.c b/src/srun/srun.c index 4036c0572cd..32c8f3b9a61 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -226,15 +226,17 @@ srun(int ac, char **av) /* Tell slurmctld that job is done */ job_destroy(job, 0); + unblock_all_signals(); + /* kill msg server thread */ - /*pthread_kill(job->jtid, SIGHUP);*/ + pthread_kill(job->jtid, SIGTERM); /* kill signal thread */ - /*pthread_kill(job->sigid, SIGHUP);*/ + pthread_kill(job->sigid, SIGKILL); log_fini(); - return job_rc(job); + exit(job_rc(job)); } -- GitLab