From c1ec148c12a97f8b11029d380da2264938a7b2ac Mon Sep 17 00:00:00 2001 From: Mark Grondona <mgrondona@llnl.gov> Date: Fri, 13 Jun 2003 20:54:35 +0000 Subject: [PATCH] o install signal handlers just before allocating nodes to avoid race where signal is recieved after having allocated nodes but before real signal handlers are installed. --- src/srun/allocate.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 985054d8c70..b060ed1fab7 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -51,21 +51,32 @@ */ static void _wait_for_resources(resource_allocation_response_msg_t **rp); static bool _retry(); -static bool _destroy_job(bool set); static void _intr_handler(int signo); static job_step_create_request_msg_t * _step_req_create(job_t *j); static void _step_req_destroy(job_step_create_request_msg_t *r); +static sig_atomic_t destroy_job = 0; resource_allocation_response_msg_t * allocate_nodes(void) { int rc = 0; + SigFunc *oquitf, *ointf, *otermf; resource_allocation_response_msg_t *resp = NULL; job_desc_msg_t *j = job_desc_msg_create(); - while ((rc = slurm_allocate_resources(j, &resp) < 0) && _retry()) {;} + xsignal_unblock(SIGINT); + oquitf = xsignal(SIGINT, _intr_handler); + xsignal_unblock(SIGQUIT); + ointf = xsignal(SIGQUIT, _intr_handler); + xsignal_unblock(SIGTERM); + otermf = xsignal(SIGTERM, _intr_handler); + + while ((rc = slurm_allocate_resources(j, &resp) < 0) && _retry()) { + if (destroy_job) + goto done; + } if ((rc == 0) && (resp->node_list == NULL)) { if (resp->error_code) @@ -73,6 +84,11 @@ allocate_nodes(void) _wait_for_resources(&resp); } + done: + xsignal(SIGINT, ointf); + xsignal(SIGTERM, otermf); + xsignal(SIGQUIT, oquitf); + job_desc_msg_destroy(j); return resp; @@ -124,16 +140,12 @@ existing_allocation(void) static void _wait_for_resources(resource_allocation_response_msg_t **resp) { - SigFunc *old_handler; old_job_alloc_msg_t old_job; resource_allocation_response_msg_t *r = *resp; int sleep_time = MIN_ALLOC_WAIT; info ("job %u queued and waiting for resources", r->job_id); - xsignal_unblock(SIGINT); - old_handler = xsignal(SIGINT, _intr_handler); - old_job.job_id = r->job_id; old_job.uid = (uint32_t) getuid(); slurm_free_resource_allocation_response_msg(r); @@ -151,7 +163,7 @@ _wait_for_resources(resource_allocation_response_msg_t **resp) exit (1); } - if (_destroy_job(0)) { + if (destroy_job) { verbose("cancelling job %u", old_job.job_id); slurm_complete_job(old_job.job_id, 0, 0); #ifdef HAVE_TOTALVIEW @@ -162,8 +174,6 @@ _wait_for_resources(resource_allocation_response_msg_t **resp) } info ("job %u has been allocated resources", (*resp)->job_id); - - xsignal(SIGINT, SIG_IGN); } @@ -191,24 +201,13 @@ _retry() return true; } -/* Returns true if user requested immediate destruction of pending job - * _destroy_job() will return false until it has been called once with - * parameter `set' equal to TRUE. - */ -static bool -_destroy_job(bool set) -{ - static bool destroy = false; - return (set ? (destroy = true) : destroy); -} - /* * SIGINT handler while waiting for resources to become available. */ static void _intr_handler(int signo) { - _destroy_job(true); + destroy_job = 1; } -- GitLab