From 813ec4c57d68f5ef71b77db4276e68507995ef48 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Sat, 31 Dec 2016 13:13:52 -0700 Subject: [PATCH] Revise salloc abort logic Revision to 06fbdd751c4d4a7343c8f78ea63d8a52869f5fb0 Previous logic was causing tests 1.87 and 15.19 to fail sometimes due to a race condition by adding a "Job allocation revoked" message to salloc output even if no allocation occured. bug 3351 --- src/salloc/salloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 45bea337e25..35d4e6663e5 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -109,6 +109,7 @@ pthread_mutex_t allocation_state_lock = PTHREAD_MUTEX_INITIALIZER; static bool exit_flag = false; static bool suspend_flag = false; static bool allocation_interrupted = false; +static bool allocation_revoked = false; static uint32_t pending_job_id = 0; static time_t last_timeout = 0; static struct termios saved_tty_attributes; @@ -891,7 +892,7 @@ static void _job_complete_handler(srun_job_complete_msg_t *comp) } else { info("Job allocation %u has been revoked.", comp->job_id); - allocation_interrupted = true; + allocation_revoked = true; } } allocation_state = REVOKED; @@ -1059,7 +1060,7 @@ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc) is_ready = 1; break; } - if (allocation_interrupted) + if (allocation_interrupted || allocation_revoked) break; } if (is_ready) @@ -1164,7 +1165,7 @@ static int _wait_nodes_ready(resource_allocation_response_msg_t *alloc) is_ready = 1; break; } - if (allocation_interrupted) + if (allocation_interrupted || allocation_revoked) break; } if (is_ready) { -- GitLab