From d61a5159e884b448eab652adfc1eccd14ac4fca2 Mon Sep 17 00:00:00 2001
From: Mark Grondona <mgrondona@llnl.gov>
Date: Thu, 10 Jul 2003 21:38:12 +0000
Subject: [PATCH]  o Handle forwarding of SIGUSR1,2  o Block SIGALRM by default
  o Only allow one thread to enter fwd_signal() at a time.

---
 src/srun/allocate.c | 19 +++++--------------
 src/srun/launch.c   |  6 ++++++
 src/srun/signals.c  | 11 ++++++++++-
 src/srun/srun.c     |  8 +++++---
 4 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index 4079d200b1d..81081aac615 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -68,22 +68,15 @@ allocate_nodes(void)
 	resource_allocation_response_msg_t *resp = NULL;
 	job_desc_msg_t *j = job_desc_msg_create();
 
-	/* 
-	 *  Save old signal mask for this thread
-	 */
-	if ((rc = pthread_sigmask(SIG_BLOCK, NULL, &oset)) != 0) {
-		error("pthread_sigmask: %s", slurm_strerror(rc));
-		return NULL;
-	}
+	oquitf = xsignal(SIGQUIT, _intr_handler);
+	ointf  = xsignal(SIGINT,  _intr_handler);
+	otermf = xsignal(SIGTERM, _intr_handler);
 
+	xsignal_save_mask(&oset);
 	xsignal_unblock(SIGQUIT);
 	xsignal_unblock(SIGINT);
 	xsignal_unblock(SIGTERM);
 
-	oquitf = xsignal(SIGQUIT, _intr_handler);
-	ointf  = xsignal(SIGINT,  _intr_handler);
-	otermf = xsignal(SIGTERM, _intr_handler);
-
 	while ((rc = slurm_allocate_resources(j, &resp) < 0) && _retry()) {
 		if (destroy_job)
 			goto done;
@@ -96,9 +89,7 @@ allocate_nodes(void)
 	}
 
     done:
-	if ((rc = pthread_sigmask(SIG_BLOCK, &oset, NULL)) != 0) 
-		error("Unable to restore signal mask: %s", slurm_strerror(rc));
-
+	xsignal_restore_mask(&oset);
 	xsignal(SIGINT,  ointf);
 	xsignal(SIGTERM, otermf);
 	xsignal(SIGQUIT, oquitf);
diff --git a/src/srun/launch.c b/src/srun/launch.c
index baa035d313e..1bb6ca21687 100644
--- a/src/srun/launch.c
+++ b/src/srun/launch.c
@@ -240,6 +240,7 @@ static int _check_pending_threads(thd_t *thd, int count)
 	for (i = 0; i < count; i++) {
 		if ((thd[i].state == DSH_ACTIVE) 
 		    && ((now - thd[i].tstart) >= 2) ) 
+			verbose("sending SIGALRM to thread %d", thd[i].thread);
 			pthread_kill(thd[i].thread, SIGALRM);
 	}
 
@@ -305,8 +306,12 @@ static void _p_launch(slurm_msg_t *req, job_t *job)
 	thd_t *thd;
 	int rc = 0;
 	SigFunc *oldh;
+	sigset_t set;
 
 	oldh = xsignal(SIGALRM, (SigFunc *) _alrm_handler);
+	xsignal_save_mask(&set);
+	xsignal_unblock(SIGALRM);
+
 	/*
 	 * Set job timeout to maximum launch time + current time
 	 */
@@ -341,6 +346,7 @@ static void _p_launch(slurm_msg_t *req, job_t *job)
 		_wait_on_active(thd, job);
 	pthread_mutex_unlock(&active_mutex);
 
+	xsignal_restore_mask(&set);
 	xsignal(SIGALRM, oldh);
 
 	xfree(thd);
diff --git a/src/srun/signals.c b/src/srun/signals.c
index 7dfc0951e78..cd2aca04849 100644
--- a/src/srun/signals.c
+++ b/src/srun/signals.c
@@ -102,6 +102,9 @@ sig_setup_sigmask(void)
 	sigaddset(&sigset, SIGTSTP);
 	sigaddset(&sigset, SIGSTOP);
 	sigaddset(&sigset, SIGCONT);
+	sigaddset(&sigset, SIGALRM);
+	sigaddset(&sigset, SIGUSR1);
+	sigaddset(&sigset, SIGUSR2);
 
 	if ((err = pthread_sigmask(SIG_BLOCK, &sigset, NULL)) != 0) {
 		error("pthread_sigmask: %s", slurm_strerror(err));
@@ -137,6 +140,9 @@ fwd_signal(job_t *job, int signo)
 	int i;
 	slurm_msg_t *req;
 	kill_tasks_msg_t msg;
+	static pthread_mutex_t sig_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+	slurm_mutex_lock(&sig_mutex);
 
 	if (signo == SIGKILL || signo == SIGINT || signo == SIGTERM) {
 		slurm_mutex_lock(&job->state_mutex);
@@ -172,6 +178,7 @@ fwd_signal(job_t *job, int signo)
 
 	debug2("All tasks have been signalled");
 	xfree(req);
+	slurm_mutex_unlock(&sig_mutex);
 }
 
 
@@ -219,7 +226,8 @@ _sig_thr_setup(sigset_t *set)
 	sigaddset(set, SIGQUIT);
 	sigaddset(set, SIGTSTP);
 	sigaddset(set, SIGSTOP);
-	sigaddset(set, SIGCONT);
+	sigaddset(set, SIGUSR1);
+	sigaddset(set, SIGUSR2);
 }
 
 /* simple signal handling thread */
@@ -256,6 +264,7 @@ _sig_thr(void *arg)
 			job_force_termination(job);
 			break;
 		  default:
+			fwd_signal(job, signo);
 			break;
 		}
 	}
diff --git a/src/srun/srun.c b/src/srun/srun.c
index 4036c0572cd..32c8f3b9a61 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -226,15 +226,17 @@ srun(int ac, char **av)
 	/* Tell slurmctld that job is done */
 	job_destroy(job, 0);
 
+	unblock_all_signals();
+
 	/* kill msg server thread */
-	/*pthread_kill(job->jtid,  SIGHUP);*/
+	pthread_kill(job->jtid,  SIGTERM);
 
 	/* kill signal thread */
-	/*pthread_kill(job->sigid, SIGHUP);*/
+	pthread_kill(job->sigid, SIGKILL);
 
 	log_fini();
 
-	return job_rc(job);
+	exit(job_rc(job));
 }
 
 
-- 
GitLab