From c1ec148c12a97f8b11029d380da2264938a7b2ac Mon Sep 17 00:00:00 2001
From: Mark Grondona <mgrondona@llnl.gov>
Date: Fri, 13 Jun 2003 20:54:35 +0000
Subject: [PATCH]  o install signal handlers just before allocating nodes to
 avoid race    where signal is recieved after having allocated nodes but
 before    real signal handlers are installed.

---
 src/srun/allocate.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index 985054d8c70..b060ed1fab7 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -51,21 +51,32 @@
  */
 static void  _wait_for_resources(resource_allocation_response_msg_t **rp);
 static bool  _retry();
-static bool  _destroy_job(bool set);
 static void  _intr_handler(int signo);
 
 static job_step_create_request_msg_t * _step_req_create(job_t *j);
 static void _step_req_destroy(job_step_create_request_msg_t *r);
 
+static sig_atomic_t destroy_job = 0;
 
 resource_allocation_response_msg_t *
 allocate_nodes(void)
 {
 	int rc = 0;
+	SigFunc *oquitf, *ointf, *otermf;
 	resource_allocation_response_msg_t *resp = NULL;
 	job_desc_msg_t *j = job_desc_msg_create();
 
-	while ((rc = slurm_allocate_resources(j, &resp) < 0) && _retry()) {;}
+	xsignal_unblock(SIGINT);
+	oquitf = xsignal(SIGINT,   _intr_handler);
+	xsignal_unblock(SIGQUIT);
+	ointf  = xsignal(SIGQUIT,  _intr_handler);
+	xsignal_unblock(SIGTERM);
+	otermf = xsignal(SIGTERM,  _intr_handler);
+
+	while ((rc = slurm_allocate_resources(j, &resp) < 0) && _retry()) {
+		if (destroy_job)
+			goto done;
+	} 
 
 	if ((rc == 0) && (resp->node_list == NULL)) {
 		if (resp->error_code)
@@ -73,6 +84,11 @@ allocate_nodes(void)
 		_wait_for_resources(&resp);
 	}
 
+    done:
+	xsignal(SIGINT,  ointf);
+	xsignal(SIGTERM, otermf);
+	xsignal(SIGQUIT, oquitf);
+
 	job_desc_msg_destroy(j);
 
 	return resp;
@@ -124,16 +140,12 @@ existing_allocation(void)
 static void
 _wait_for_resources(resource_allocation_response_msg_t **resp)
 {
-	SigFunc *old_handler;
 	old_job_alloc_msg_t old_job;
 	resource_allocation_response_msg_t *r = *resp;
 	int sleep_time = MIN_ALLOC_WAIT;
 
 	info ("job %u queued and waiting for resources", r->job_id);
 
-	xsignal_unblock(SIGINT);
-	old_handler = xsignal(SIGINT,  _intr_handler);
-
 	old_job.job_id = r->job_id;
 	old_job.uid = (uint32_t) getuid();
 	slurm_free_resource_allocation_response_msg(r);
@@ -151,7 +163,7 @@ _wait_for_resources(resource_allocation_response_msg_t **resp)
 			exit (1);
 		}
 
-		if (_destroy_job(0)) {
+		if (destroy_job) {
 			verbose("cancelling job %u", old_job.job_id);
 			slurm_complete_job(old_job.job_id, 0, 0);
 #ifdef HAVE_TOTALVIEW
@@ -162,8 +174,6 @@ _wait_for_resources(resource_allocation_response_msg_t **resp)
 
 	}
 	info ("job %u has been allocated resources", (*resp)->job_id);
-
-	xsignal(SIGINT, SIG_IGN);
 }
 
 
@@ -191,24 +201,13 @@ _retry()
 	return true;
 }
 
-/* Returns true if user requested immediate destruction of pending job
- * _destroy_job() will return false until it has been called once with
- * parameter `set' equal to TRUE.
- */
-static bool
-_destroy_job(bool set)
-{
-	static bool destroy = false; 
-	return (set ? (destroy = true) : destroy);
-}
-
 /*
  * SIGINT handler while waiting for resources to become available.
  */
 static void
 _intr_handler(int signo)
 {
-	_destroy_job(true);
+	destroy_job = 1;
 }
 
 
-- 
GitLab