From 0fc4539e7a0e23f1eaab5c692ce33418c9f7768e Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Thu, 4 Apr 2013 12:24:13 -0700
Subject: [PATCH] Prevent assert failure in scheduling logic

This fixes a bug introduced in commit f1cf6d2df1abbcf2703e5ba81bc4792487a691d8
Without this change, a job with a partition related reason for not running
(e.g. MaxNodes) plus some other reason (e.g. dependency), could after satisfying
the second reason (e.g. dependency), have the select_nodes() function executed
and return SUCCESS on an allocation request, but it would not actually be
allocation resources since select_nodes() would interpret the request as a
test to see if it could ever run (e.g. will_run).
Bug 263
---
 src/slurmctld/job_mgr.c        | 12 ++++--------
 src/slurmctld/job_scheduler.c  | 15 ++++++++++-----
 src/slurmctld/node_scheduler.c | 14 +-------------
 3 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index c79055a2077..b9f21108199 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -2727,6 +2727,8 @@ static int _select_nodes_parts(struct job_record *job_ptr, bool test_only,
 		}
 		list_iterator_destroy(iter);
 	} else {
+		if (job_limits_check(&job_ptr) != WAIT_NO_REASON)
+			test_only = true;
 		rc = select_nodes(job_ptr, test_only, select_node_bitmap);
 	}
 
@@ -2851,14 +2853,8 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate,
 	test_only = will_run || (allocate == 0);
 
 	no_alloc = test_only || too_fragmented ||
-		   (!top_prio) || (!independent);
-	if (!no_alloc && !avail_front_end()) {
-		debug("sched: job_allocate() returning, no front end nodes "
-		       "are available");
-		error_code = ESLURM_NODES_BUSY;
-	} else {
-		error_code = _select_nodes_parts(job_ptr, no_alloc, NULL);
-	}
+		   (!top_prio) || (!independent) || !avail_front_end();
+	error_code = _select_nodes_parts(job_ptr, no_alloc, NULL);
 	if (!test_only) {
 		last_job_update = now;
 		slurm_sched_schedule();	/* work for external scheduler */
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index cf9c95d2cf4..4052a61f9de 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -200,13 +200,16 @@ static bool _job_runnable_test1(struct job_record *job_ptr, bool clear_start)
 /* Job and partition tests for ability to run now */
 static bool _job_runnable_test2(struct job_record *job_ptr)
 {
-	if (!part_policy_job_runnable_state(job_ptr)) {
-		job_ptr->state_reason = job_limits_check(&job_ptr);
-		xfree(job_ptr->state_desc);
+	int reason;
 
-		if (job_ptr->state_reason  != WAIT_NO_REASON)
-			return false;
+	reason = job_limits_check(&job_ptr);
+	if ((reason != job_ptr->state_reason) &&
+	    (part_policy_job_runnable_state(job_ptr))) {
+		job_ptr->state_reason = reason;
+		xfree(job_ptr->state_desc);
 	}
+	if (reason != WAIT_NO_REASON)
+		return false;
 	return true;
 }
 
@@ -498,6 +501,8 @@ next_part:		part_ptr = (struct part_record *)
 				continue;
 			}
 		}
+		if (job_limits_check(&job_ptr) != WAIT_NO_REASON)
+			continue;
 
 		/* Test for valid account, QOS and required nodes on each pass */
 		if (job_ptr->state_reason == FAIL_ACCOUNT) {
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index f7135bdcdb1..dec0a15982c 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -1426,7 +1426,6 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only,
 	struct node_set *node_set_ptr = NULL;
 	struct part_record *part_ptr = NULL;
 	uint32_t min_nodes, max_nodes, req_nodes;
-	enum job_state_reason fail_reason;
 	time_t now = time(NULL);
 	bool configuring = false;
 	List preemptee_job_list = NULL;
@@ -1458,13 +1457,6 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only,
 		return ESLURM_JOB_HELD;
 	}
 
-	/* Confirm that partition is up and has compatible nodes limits.
-	 * If not, we still continue to determine if the job can ever run.
-	 * For example, the geometry might be incompatible with BlueGene. */
-	fail_reason = job_limits_check(&job_ptr);
-	if (fail_reason != WAIT_NO_REASON)
-		test_only = true;
-
 	/* build sets of usable nodes based upon their configuration */
 	error_code = _build_node_list(job_ptr, &node_set_ptr, &node_set_size);
 	if (error_code)
@@ -1552,7 +1544,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only,
 			}
 		}
 	}
-	if (error_code || fail_reason) {
+	if (error_code) {
 		/* Fatal errors for job here */
 		if (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) {
 			/* Too many nodes requested */
@@ -1561,10 +1553,6 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only,
 			job_ptr->state_reason = WAIT_PART_NODE_LIMIT;
 			xfree(job_ptr->state_desc);
 			last_job_update = now;
-		} else if (fail_reason) {
-			job_ptr->state_reason = fail_reason;
-			xfree(job_ptr->state_desc);
-			last_job_update = now;
 
 		/* Non-fatal errors for job below */
 		} else if (error_code == ESLURM_NODE_NOT_AVAIL) {
-- 
GitLab