From 6b7ee386fbc92bf0a1c4f84093cabc3ec77c393f Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Fri, 4 Aug 2017 15:48:48 -0600 Subject: [PATCH] Fix job abort on step launch failure Logic recently introduced would cancel an entire job allocation if a step launch failed, even if the srun command did not create the allocation (running under salloc or sbatch). --- src/srun/libsrun/srun_job.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/srun/libsrun/srun_job.c b/src/srun/libsrun/srun_job.c index 1bcb86027ab..b8a56108103 100644 --- a/src/srun/libsrun/srun_job.c +++ b/src/srun/libsrun/srun_job.c @@ -826,7 +826,8 @@ extern void create_srun_job(void **p_job, bool *got_alloc, } if (_create_job_step(job, false, srun_job_list) < 0) { - slurm_complete_job(my_job_id, 1); + if (*got_alloc) + slurm_complete_job(my_job_id, 1); exit(error_exit); } } else { -- GitLab