From f2b1c35fb899bb9fad5187e95d01ed4463290afe Mon Sep 17 00:00:00 2001
From: Danny Auble <da@schedmd.com>
Date: Thu, 14 Jul 2016 16:55:50 -0700
Subject: [PATCH] Move commit 30f4f81c1f4a7b8 to be above code that could call
 delete_step_records which would delete the steps without the killing flag
 set.

---
 src/slurmctld/node_scheduler.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 91a50e6fb99..03fc0ae07cb 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -3726,6 +3726,22 @@ extern void re_kill_job(struct job_record *job_ptr)
 					    job_ptr->spank_job_env);
 	kill_job->spank_job_env_size = job_ptr->spank_job_env_size;
 
+	/* On a Cray system this will start the NHC early so it is
+	 * able to gather any information it can from the apparent
+	 * unkillable processes.
+	 * NOTE: do not do a list_for_each here, that will hold on the list
+	 * lock while processing the entire list which could
+	 * potentially be needed to lock again in
+	 * select_g_step_finish which could potentially call
+	 * post_job_step which calls delete_step_record which locks
+	 * the list to create a list_iterator on the same list and
+	 * could cause deadlock :).
+	 */
+	step_iterator = list_iterator_create(job_ptr->step_list);
+	while ((step_ptr = list_next(step_iterator)))
+		select_g_step_finish(step_ptr, true);
+	list_iterator_destroy(step_iterator);
+
 #ifdef HAVE_FRONT_END
 	if (job_ptr->batch_host &&
 	    (front_end_ptr = find_front_end_record(job_ptr->batch_host))) {
@@ -3819,21 +3835,6 @@ extern void re_kill_job(struct job_record *job_ptr)
 		      job_ptr->job_id, host_str);
 	}
 #endif
-	/* On a Cray system this will start the NHC early so it is
-	 * able to gather any information it can from the apparent
-	 * unkillable processes.
-	 * NOTE: do not do a list_for_each here, that will hold on the list
-	 * lock while processing the entire list which could
-	 * potentially be needed to lock again in
-	 * select_g_step_finish which could potentially call
-	 * post_job_step which calls delete_step_record which locks
-	 * the list to create a list_iterator on the same list and
-	 * could cause deadlock :).
-	 */
-	step_iterator = list_iterator_create(job_ptr->step_list);
-	while ((step_ptr = list_next(step_iterator)))
-		select_g_step_finish(step_ptr, true);
-	list_iterator_destroy(step_iterator);
 
 	xfree(host_str);
 	last_job_id = job_ptr->job_id;
-- 
GitLab