From b4dc9eea92c63040a28a25f1bc5e3618b94f5871 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@schedmd.com>
Date: Wed, 13 Jul 2016 12:12:54 -0700
Subject: [PATCH] Continuation of last commit.

We have decided to go back to the way 15.08 called NHC instead of calling
it first before sending a SIGKILL to the steps tasks.  With this patch we
only start the NHC early when we have to resend the SIGKILL for unkillable
processes.  This will hopefully get us the backtrace of the unkillable
processes which was the reason we did it this way in the first place :).
---
 src/slurmctld/job_mgr.c        | 19 -------------------
 src/slurmctld/node_scheduler.c | 13 +++++++++++++
 src/slurmctld/step_mgr.c       | 10 +---------
 3 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index f369be3db60..e9d6f371efd 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -4358,8 +4358,6 @@ extern int job_fail(uint32_t job_id, uint32_t job_state)
 static int _job_signal(struct job_record *job_ptr, uint16_t signal,
 		       uint16_t flags, uid_t uid, bool preempt)
 {
-	ListIterator step_iterator;
-	struct step_record *step_ptr;
 	uint16_t job_term_state;
 	char jbuf[JBUFSIZ];
 	time_t now = time(NULL);
@@ -4402,15 +4400,6 @@ static int _job_signal(struct job_record *job_ptr, uint16_t signal,
 		return SLURM_SUCCESS;
 	}
 
-	if ((IS_JOB_RUNNING(job_ptr) || IS_JOB_SUSPENDED(job_ptr)) &&
-	    (signal == SIGKILL)) {
-		step_iterator = list_iterator_create(job_ptr->step_list);
-		while ((step_ptr =
-		       (struct step_record *) list_next(step_iterator)))
-			select_g_step_finish(step_ptr, true);
-		list_iterator_destroy(step_iterator);
-	}
-
 	if (preempt)
 		job_term_state = JOB_PREEMPTED;
 	else
@@ -7777,16 +7766,8 @@ extern int job_update_tres_cnt(struct job_record *job_ptr, int node_inx)
 /* Terminate a job that has exhausted its time limit */
 static void _job_timed_out(struct job_record *job_ptr)
 {
-	ListIterator step_iterator;
-	struct step_record *step_ptr;
-
 	xassert(job_ptr);
 
-	step_iterator = list_iterator_create(job_ptr->step_list);
-	while ((step_ptr = (struct step_record *) list_next(step_iterator)))
-		select_g_step_finish(step_ptr, true);
-	list_iterator_destroy(step_iterator);
-
 	srun_timeout(job_ptr);
 	if (job_ptr->details) {
 		time_t now      = time(NULL);
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 238939322fd..a64311995a1 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -3676,6 +3676,13 @@ static bitstr_t *_valid_features(struct job_record *job_ptr,
 	return result_bits;
 }
 
+static int _kill_step(struct step_record *step_ptr, void *arg)
+{
+	select_g_step_finish(step_ptr, true);
+
+	return SLURM_SUCCESS;
+}
+
 /*
  * re_kill_job - for a given job, deallocate its nodes for a second time,
  *	basically a cleanup for failed deallocate() calls
@@ -3812,6 +3819,12 @@ extern void re_kill_job(struct job_record *job_ptr)
 		      job_ptr->job_id, host_str);
 	}
 #endif
+	/* On a Cray system this will start the NHC early so it is
+	 * able to gather any information it can from the apparent
+	 * unkillable processes.
+	 */
+	list_for_each(job_ptr->step_list, (ListForF)_kill_step, NULL);
+
 	xfree(host_str);
 	last_job_id = job_ptr->job_id;
 	hostlist_destroy(kill_hostlist);
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index 98b3a8d2fd3..65a14b7fb12 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -556,11 +556,6 @@ int job_step_signal(uint32_t job_id, uint32_t step_id,
 	} else if ((signal == SIGKILL) || notify_slurmd)
 		signal_step_tasks(step_ptr, signal, REQUEST_SIGNAL_TASKS);
 
-	/* This has to be done last or we have a race condition with the
-	 * step_ptr not being around after this is called.  */
-	if (signal == SIGKILL)
-		select_g_step_finish(step_ptr, true);
-
 	return SLURM_SUCCESS;
 }
 
@@ -736,7 +731,7 @@ int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid,
 		return ESLURM_INVALID_JOB_ID;
 
 	if (step_ptr->step_id == SLURM_EXTERN_CONT)
-		return select_g_step_finish(step_ptr, true);
+		return select_g_step_finish(step_ptr, false);
 
 	/* If the job is already cleaning we have already been here
 	 * before, so just return. */
@@ -4220,7 +4215,6 @@ static void _signal_step_timelimit(struct job_record *job_ptr,
 
 	if (notify_srun) {	/* Handle termination from srun, not slurmd */
 		srun_step_timeout(step_ptr, now);
-		(void) select_g_step_finish(step_ptr, true);
 		return;
 	}
 
@@ -4262,8 +4256,6 @@ static void _signal_step_timelimit(struct job_record *job_ptr,
 	}
 #endif
 
-	(void) select_g_step_finish(step_ptr, true);
-
 	if (agent_args->node_count == 0) {
 		hostlist_destroy(agent_args->hostlist);
 		xfree(agent_args);
-- 
GitLab