From 04fbf26a2254c8505a7111a66867acdef984ca6c Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Wed, 16 Jan 2013 11:53:18 -0800
Subject: [PATCH] Fix for scheduling batch jobs in multiple partitions

Without this change a high priority batch job may not start at submit
time. In addtion, a pending job with mutltiple partitions be cancelled
when the scheduler runs if any of it's partitions can not be used by
the job.
---
 src/slurmctld/job_scheduler.c | 6 ++++++
 src/slurmctld/proc_req.c      | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index 03fd0d7cc15..cf740c0422a 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -1016,6 +1016,12 @@ next_part:			part_ptr = (struct part_record *)
 				launch_job(job_ptr);
 			rebuild_job_part_list(job_ptr);
 			job_cnt++;
+		} else if ((error_code ==
+			    ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE) &&
+			   job_ptr->part_ptr_list) {
+			debug("JobId=%u non-runnable in partition %s: %s",
+			      job_ptr->job_id, job_ptr->part_ptr->name,
+			      slurm_strerror(error_code));
 		} else if ((error_code !=
 			    ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) &&
 			   (error_code != ESLURM_NODE_NOT_AVAIL)      &&
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 1d3b711177a..410d88299ca 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -2695,6 +2695,9 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg)
 		     slurm_strerror(error_code));
 		slurm_send_rc_msg(msg, error_code);
 	} else {
+		int sched_count = 1;	/* Job count to attempt to schedule */
+		if (job_ptr->part_ptr_list)
+			sched_count = list_count(job_ptr->part_ptr_list);
 		info("_slurm_rpc_submit_batch_job JobId=%u %s",
 		     job_ptr->job_id, TIME_STR);
 		/* send job_ID */
@@ -2709,7 +2712,7 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg)
 		 * We also run schedule() even if this job could not start,
 		 * say due to a higher priority job, since the locks are
 		 * released above and we might start some other job here. */
-		schedule(1);		/* has own locks */
+		schedule(sched_count);	/* has own locks */
 		schedule_job_save();	/* has own locks */
 		schedule_node_save();	/* has own locks */
 	}
-- 
GitLab