From 6b7ee386fbc92bf0a1c4f84093cabc3ec77c393f Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Fri, 4 Aug 2017 15:48:48 -0600
Subject: [PATCH] Fix job abort on step launch failure

Logic recently introduced would cancel an entire job allocation if
  a step launch failed, even if the srun command did not create the
  allocation (running under salloc or sbatch).
---
 src/srun/libsrun/srun_job.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/srun/libsrun/srun_job.c b/src/srun/libsrun/srun_job.c
index 1bcb86027ab..b8a56108103 100644
--- a/src/srun/libsrun/srun_job.c
+++ b/src/srun/libsrun/srun_job.c
@@ -826,7 +826,8 @@ extern void create_srun_job(void **p_job, bool *got_alloc,
 		}
 
 		if (_create_job_step(job, false, srun_job_list) < 0) {
-			slurm_complete_job(my_job_id, 1);
+			if (*got_alloc)
+				slurm_complete_job(my_job_id, 1);
 			exit(error_exit);
 		}
 	} else {
-- 
GitLab