From 4ce626789dbfff156254345362abb54ebda92784 Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Thu, 26 May 2016 16:16:51 -0700
Subject: [PATCH] Fix for tracking a node's allocated CPUs with gang
 scheduling.

This bug was introduced by commit 21c52d2f61e8086209d0c4d18f4700c07588ead9
which fixed a different problem tracking resources associated with suspended
jobs. There are subtle differences between jobs that are suspended by a
user/administrator and jobs suspended by gang scheduling which resulted in
undercounting allocated CPUs when a job suspended by gang scheduling
was active at the same time of a slurmctld reconfiguration request.
See bugs 2353 (original bug related to commit 21c52d2f61e8086209d0c4d18f4700c07588ead9
and bug 2765
---
 NEWS                                          |  1 +
 src/plugins/select/cons_res/select_cons_res.c | 14 ++++++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index 201d7376544..a383bc966a3 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,7 @@ documents those changes that are of interest to users and administrators.
  -- Make it so --mail-type=NONE didn't throw an invalid error.
  -- If no default account is given for a user when creating (only a list of
     accounts) no default account is printed, previously NULL was printed.
+ -- Fix for tracking a node's allocated CPUs with gang scheduling.
 
 * Changes in Slurm 15.08.11
 ===========================
diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index 3c8714acad1..89aca1ab0e4 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -2535,9 +2535,12 @@ extern int select_p_select_nodeinfo_set(struct job_record *job_ptr)
 
 	if (IS_JOB_RUNNING(job_ptr))
 		rc = _add_job_to_res(job_ptr, 0);
-	else if (IS_JOB_SUSPENDED(job_ptr))
-		rc = _add_job_to_res(job_ptr, 1);
-	else
+	else if (IS_JOB_SUSPENDED(job_ptr)) {
+		if (job_ptr->priority == 0)
+			rc = _add_job_to_res(job_ptr, 1);
+		else	/* Gang schedule suspend */
+			rc = _add_job_to_res(job_ptr, 0);
+	} else
 		return SLURM_SUCCESS;
 	gres_plugin_job_state_log(job_ptr->gres_list, job_ptr->job_id);
 
@@ -2743,7 +2746,10 @@ extern int select_p_reconfigure(void)
 			_add_job_to_res(job_ptr, 0);
 		} else if (IS_JOB_SUSPENDED(job_ptr)) {
 			/* add the job in a suspended state */
-			_add_job_to_res(job_ptr, 1);
+			if (job_ptr->priority == 0)
+				rc = _add_job_to_res(job_ptr, 1);
+			else	/* Gang schedule suspend */
+				rc = _add_job_to_res(job_ptr, 0);
 		} else if (_job_cleaning(job_ptr)) {
 			cleaning_job_cnt++;
 			run_time = (int) difftime(now, job_ptr->end_time);
-- 
GitLab