From dffde6fe304d1bb82a5c5a7e46b26f55b7df812b Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 12 Jun 2008 19:53:16 +0000
Subject: [PATCH] We are not saving a pending job's task distribution, so after
 restarting     slurmctld select/cons_res was over-allocating resources based
 upon an     uninitialized distribution value. Since we can't save the value
 without     changing the state save file format, we'll just set it to the
 default     value for now. This will result in an incorrect task distribution
 for     jobs that had a task distribution that was not the default and were  
   pending when the slurmctld daemon restarted, but at least resources    
 will not be over-allocated.

---
 NEWS                                          |  8 ++++++++
 src/plugins/select/cons_res/select_cons_res.c | 10 ++++++----
 src/slurmctld/job_mgr.c                       |  2 ++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index f2a5db43446..c491753aea8 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,14 @@ documents those changes that are of interest to users and admins.
  -- For Moab, sbatch --get-user-env option processed by slurmd daemon
     rather than the sbatch command itself to permit faster response
     for Moab.
+ -- We are not saving a pending job's task distribution, so after restarting
+    slurmctld select/cons_res was over-allocating resources based upon an 
+    uninitialized distribution value. Since we can't save the value without 
+    changing the state save file format, we'll just set it to the default 
+    value for now. This will result in an incorrect task distribution for 
+    jobs that had a task distribution that was not the default and were 
+    pending when the slurmctld daemon restarted, but at least resources
+    will not be over-allocated.
 
 * Changes in SLURM 1.3.3
 ========================
diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index 50d9f7a00a6..2afd1034e1e 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -2640,11 +2640,13 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap,
 			error_code = cr_plane_dist(job, mc_ptr->plane_size, cr_type); 
 			break;
 		case SLURM_DIST_ARBITRARY:
-		default:
 			error_code = compute_c_b_task_dist(job);
-			if (error_code != SLURM_SUCCESS) {
-				error(" Error in compute_c_b_task_dist");
-			}
+			if (error_code != SLURM_SUCCESS)
+				error("Error in compute_c_b_task_dist");
+			break;
+		default:
+			error("select/cons_res: invalid dist_type");
+			error_code = SLURM_ERROR;
 			break;
 		}
 	}
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 135484cb5d1..b42d529967b 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -893,6 +893,8 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer)
 	job_ptr->details->acctg_freq = acctg_freq;
 	job_ptr->details->contiguous = contiguous;
 	job_ptr->details->cpus_per_task = cpus_per_task;
+	/* FIXME: Need to save/restore actual task_dist value */
+	job_ptr->details->task_dist = SLURM_DIST_CYCLIC;
 	job_ptr->details->ntasks_per_node = ntasks_per_node;
 	job_ptr->details->job_min_procs = job_min_procs;
 	job_ptr->details->job_min_memory = job_min_memory;
-- 
GitLab