diff --git a/NEWS b/NEWS index 07caefd58070204b15340a594d6eaf3522541a2e..c99c0c1f40df2f0f1bacba142a719176ceb72612 100644 --- a/NEWS +++ b/NEWS @@ -60,6 +60,8 @@ documents those changes that are of interest to users and administrators. -- Update gang scheduling data structures when job changes in size. -- Associations - prevent hash table corruption if uid initially unset for a user, which can cause slurmctld to crash if that user is deleted. + -- Avoid possibly aborting srun that gets simultaneous SIGSTOP+SIGCONT while + creating the job step. * Changes in Slurm 15.08.8 ========================== diff --git a/src/api/step_ctx.c b/src/api/step_ctx.c index 0302c6652f647eaff7173b7c78db929347e7e725..fe3be8b3e9651362dd87ed8c5a9f8595ffb73426 100644 --- a/src/api/step_ctx.c +++ b/src/api/step_ctx.c @@ -77,7 +77,8 @@ static int destroy_step = 0; static void _signal_while_allocating(int signo) { debug("Got signal %d", signo); - if (signo == SIGCONT) + /* NOTE: Near simultaneous SIGSTOP+SIGCONT can result in signo == 0 */ + if ((signo == SIGCONT) || (signo == 0)) return; destroy_step = signo; diff --git a/src/srun/libsrun/allocate.c b/src/srun/libsrun/allocate.c index d74df0b5834a2a99bc85f787d05a598d55b50e82..c861c9532d47e90aad57442c69bcff9a3e331e44 100644 --- a/src/srun/libsrun/allocate.c +++ b/src/srun/libsrun/allocate.c @@ -118,7 +118,8 @@ static void *_safe_signal_while_allocating(void *in_data) int signo = *(int *)in_data; debug("Got signal %d", signo); - if (signo == SIGCONT) + /* NOTE: Near simultaneous SIGSTOP+SIGCONT can result in signo == 0 */ + if ((signo == SIGCONT) || (signo == 0)) return NULL; destroy_job = 1;