From 98c9ebe5bb66e42b8a1004353b126423fdca35e1 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 7 Mar 2003 21:19:50 +0000
Subject: [PATCH] Add error_code for warning on allocation response msg.
 Slurmctld now supports MinNodes parameter for partition, no job under that
 size gets initiated. Partition Min/MaxNodes check at job schedule time only,
 not just at submit time.

---
 src/slurmctld/controller.c     |  30 +++++----
 src/slurmctld/job_mgr.c        |  62 ++++++++++--------
 src/slurmctld/job_scheduler.c  |   4 ++
 src/slurmctld/node_scheduler.c | 114 +++++++++++++++------------------
 src/slurmctld/slurmctld.h      |   4 +-
 5 files changed, 114 insertions(+), 100 deletions(-)

diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 4ea3897a1d9..a891603213d 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -1351,7 +1351,8 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg)
 	}
 
 	/* return result */
-	if (error_code) {
+	if ((error_code != SLURM_SUCCESS) &&
+	    (error_code != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)) {
 		info("_slurm_rpc_submit_batch_job time=%ld, error=%s",
 		     (long) (clock() - start_time),
 		     slurm_strerror(error_code));
@@ -1361,7 +1362,8 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg)
 		   "_slurm_rpc_submit_batch_job success for id=%u, time=%ld", 
 		   job_id, (long) (clock() - start_time));
 		/* send job_ID */
-		submit_msg.job_id = job_id;
+		submit_msg.job_id     = job_id;
+		submit_msg.error_code = error_code;
 		response_msg.msg_type = RESPONSE_SUBMIT_BATCH_JOB;
 		response_msg.data = &submit_msg;
 		slurm_send_node_msg(msg->conn_fd, &response_msg);
@@ -1382,14 +1384,14 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg)
 	char *node_list_ptr = NULL;
 	uint16_t num_cpu_groups = 0;
 	uint32_t *cpus_per_node = NULL, *cpu_count_reps = NULL;
-	uint32_t job_id;
+	uint32_t job_id = 0;
 	resource_allocation_response_msg_t alloc_msg;
 	/* Locks: Write job, write node, read partition */
 	slurmctld_lock_t job_write_lock = { 
 		NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
 	uid_t uid;
-	uint16_t node_cnt;
-	slurm_addr *node_addr;
+	uint16_t node_cnt = 0;
+	slurm_addr *node_addr = NULL;
 
 	start_time = clock();
 	debug("Processing RPC: REQUEST_RESOURCE_ALLOCATION");
@@ -1403,6 +1405,7 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg)
 		error("Security violation, RESOURCE_ALLOCATE from uid %u",
 		      (unsigned int) uid);
 	}
+
 	if (error_code == SLURM_SUCCESS) {
 		int immediate = job_desc_msg->immediate;
 		lock_slurmctld(job_write_lock);
@@ -1415,7 +1418,8 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg)
 	}
 
 	/* return result */
-	if (error_code) {
+	if ((error_code != SLURM_SUCCESS) &&
+	    (error_code != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)) {
 		info("_slurm_rpc_allocate_resources time=%ld, error=%s ", 
 		     (long) (clock() - start_time), 
 		     slurm_strerror(error_code));
@@ -1426,14 +1430,14 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg)
 		   node_list_ptr, job_id, (long) (clock() - start_time));
 
 		/* send job_ID  and node_name_ptr */
-
-		alloc_msg.job_id = job_id;
-		alloc_msg.node_list = node_list_ptr;
-		alloc_msg.num_cpu_groups = num_cpu_groups;
-		alloc_msg.cpus_per_node = cpus_per_node;
 		alloc_msg.cpu_count_reps = cpu_count_reps;
-		alloc_msg.node_cnt = node_cnt;
-		alloc_msg.node_addr = node_addr;
+		alloc_msg.cpus_per_node  = cpus_per_node;
+		alloc_msg.error_code     = error_code;
+		alloc_msg.job_id         = job_id;
+		alloc_msg.node_addr      = node_addr;
+		alloc_msg.node_cnt       = node_cnt;
+		alloc_msg.node_list      = node_list_ptr;
+		alloc_msg.num_cpu_groups = num_cpu_groups;
 		response_msg.msg_type = RESPONSE_RESOURCE_ALLOCATION;
 		response_msg.data = &alloc_msg;
 
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index eec48e8b6ab..eaa144d5c6d 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -1110,12 +1110,15 @@ int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id,
 
 	no_alloc = test_only || (!top_prio);
 	error_code = select_nodes(job_ptr, no_alloc);
-	if (error_code == ESLURM_NODES_BUSY) {
+	if ((error_code == ESLURM_NODES_BUSY) ||
+	    (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)) {
+		/* Not fatal error, but job can't be scheduled right now */
 		if (immediate) {
 			job_ptr->job_state = JOB_FAILED;
 			job_ptr->end_time = 0;
 		} else		/* job remains queued */
-			error_code = 0;
+			if (error_code == ESLURM_NODES_BUSY) 
+				error_code = SLURM_SUCCESS;
 		return error_code;
 	}
 
@@ -1305,7 +1308,7 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id,
 		       int allocate, int will_run,
 		       struct job_record **job_rec_ptr, uid_t submit_uid)
 {
-	int error_code, i;
+	int error_code = SLURM_SUCCESS, i;
 	struct part_record *part_ptr;
 	bitstr_t *req_bitmap = NULL, *exc_bitmap = NULL;
 
@@ -1413,14 +1416,10 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id,
 		error_code = ESLURM_TOO_MANY_REQUESTED_CPUS;
 		goto cleanup;
 	}
-	if ((job_desc->min_nodes > part_ptr->total_nodes) ||
-	    (job_desc->min_nodes > part_ptr->max_nodes)) {
-		if (part_ptr->total_nodes > part_ptr->max_nodes)
-			i = part_ptr->max_nodes;
-		else
-			i = part_ptr->total_nodes;
+	if (job_desc->min_nodes > part_ptr->total_nodes) {
 		info("Job requested too many nodes (%d) of partition %s(%d)", 
-		     job_desc->min_nodes, part_ptr->name, i);
+		     job_desc->min_nodes, part_ptr->name, 
+		     part_ptr->total_nodes);
 		error_code = ESLURM_TOO_MANY_REQUESTED_NODES;
 		goto cleanup;
 	}
@@ -1430,15 +1429,13 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id,
 		error_code = ESLURM_TOO_MANY_REQUESTED_NODES;
 		goto cleanup;
 	}
-	if (job_desc->max_nodes > part_ptr->max_nodes) 
-		job_desc->max_nodes = part_ptr->max_nodes;
 
 
 	if ((error_code =_validate_job_create_req(job_desc)))
 		goto cleanup;
 
 	if (will_run) {
-		error_code = 0;
+		error_code = SLURM_SUCCESS;
 		goto cleanup;
 	}
 
@@ -1462,15 +1459,26 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id,
 		(*job_rec_ptr)->batch_flag = 1;
 	} else
 		(*job_rec_ptr)->batch_flag = 0;
-
-	if (part_ptr->shared == SHARED_FORCE)	/* shared=force */
-		(*job_rec_ptr)->details->shared = 1;
-	else if (((*job_rec_ptr)->details->shared != 1) || 
-	         (part_ptr->shared == SHARED_NO))	/* can't share */
-		(*job_rec_ptr)->details->shared = 0;
-
 	*new_job_id = (*job_rec_ptr)->job_id;
-	return SLURM_SUCCESS;
+
+	/* Insure that requested partition is valid right now, 
+	 * otherwise leave job queued and provide warning code */
+	if (job_desc->min_nodes > part_ptr->max_nodes) {
+		info("Job %u requested too many nodes (%d) of partition %s(%d)",
+		     *new_job_id, job_desc->min_nodes, part_ptr->name, 
+		     part_ptr->max_nodes);
+		error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
+	} else if ((job_desc->max_nodes != 0) &&    /* no max_nodes for job */
+		   (job_desc->max_nodes < part_ptr->min_nodes)) {
+		info("Job %u requested too few nodes (%d) of partition %s(%d)",
+		     *new_job_id, job_desc->max_nodes, 
+		     part_ptr->name, part_ptr->min_nodes);
+		error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
+	} else if (part_ptr->state_up == 0) {
+		info("Job %u requested down partition %s", 
+		     *new_job_id, part_ptr->name);
+		error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
+	}
 
       cleanup:
 	FREE_NULL_BITMAP(req_bitmap);
@@ -1802,7 +1810,7 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc,
 
 	strncpy(job_ptr->partition, part_ptr->name, MAX_NAME_LEN);
 	job_ptr->part_ptr = part_ptr;
-	if (job_desc->job_id != NO_VAL)
+	if (job_desc->job_id != NO_VAL)		/* already confirmed unique */
 		job_ptr->job_id = job_desc->job_id;
 	else
 		_set_job_id(job_ptr);
@@ -1817,11 +1825,12 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc,
 	job_ptr->time_limit = job_desc->time_limit;
 	job_ptr->alloc_sid  = job_desc->alloc_sid;
 	job_ptr->alloc_node = xstrdup(job_desc->alloc_node);
-	if ((job_desc->priority !=
-	     NO_VAL) /* also check submit UID is root */ )
+
+	if (job_desc->priority != NO_VAL) /* already confirmed submit_uid==0 */
 		job_ptr->priority = job_desc->priority;
 	else
 		_set_job_prio(job_ptr);
+
 	if (job_desc->kill_on_node_fail != (uint16_t) NO_VAL)
 		job_ptr->kill_on_node_fail = job_desc->kill_on_node_fail;
 
@@ -1967,7 +1976,7 @@ static void _job_timed_out(struct job_record *job_ptr)
 
 /* _validate_job_desc - validate that a job descriptor for job submit or 
  *	allocate has valid data, set values to defaults as required 
- * IN job_desc_msg - pointer to job descriptor
+ * IN/OUT job_desc_msg - pointer to job descriptor, modified as needed
  * IN allocate - if clear job to be queued, if set allocate for user now 
  * IN submit_uid - who request originated
  */
@@ -2020,6 +2029,9 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate,
 			_purge_job_record(job_desc_msg->job_id);
 	}
 
+	if (submit_uid != 0)		/* only root can set job priority */
+		job_desc_msg->priority = NO_VAL;
+
 	if (job_desc_msg->num_procs == NO_VAL)
 		job_desc_msg->num_procs = 1;	/* default cpu count of 1 */
 	if (job_desc_msg->min_nodes == NO_VAL)
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index 70a00bf7fc2..9de3d73e1e9 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -143,6 +143,10 @@ int schedule(void)
 				 sizeof(struct part_record *));
 			failed_parts[failed_part_cnt++] =
 			    job_ptr->part_ptr;
+		} else if (error_code == 
+			   ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) {
+			debug2("job %u not runnable with present config",
+			       job_ptr->job_id);
 		} else if (error_code == SLURM_SUCCESS) {	
 			/* job initiated */
 			last_job_update = time(NULL);
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index afb039bdb6e..1f03c1aac87 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -76,8 +76,7 @@ static int _pick_best_nodes(struct node_set *node_set_ptr,
 			    int node_set_size, bitstr_t ** req_bitmap,
 			    uint32_t req_cpus, 
 			    uint32_t min_nodes, uint32_t max_nodes,
-			    int contiguous, int shared,
-			    uint32_t node_lim);
+			    int contiguous, int shared);
 static int _valid_features(char *requested, char *available);
 
 
@@ -489,8 +488,6 @@ _enough_nodes(int avail_nodes, int rem_nodes, int min_nodes, int max_nodes)
  * IN max_nodes - maximum count of nodes required by the job (0==no limit)
  * IN contiguous - 1 if allocated nodes must be contiguous, 0 otherwise
  * IN shared - set to 1 if nodes may be shared, 0 otherwise
- * IN node_lim - maximum number of nodes permitted for job, 
- *	INFIITE for no limit (partition limit)
  * RET 0 on success, EAGAIN if request can not be satisfied now, EINVAL if
  *	request can never be satisfied (insufficient contiguous nodes)
  * NOTE: the caller must xfree memory pointed to by req_bitmap
@@ -511,7 +508,7 @@ static int
 _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 		 bitstr_t ** req_bitmap, uint32_t req_cpus,
 		 uint32_t min_nodes, uint32_t max_nodes,
-		 int contiguous, int shared, uint32_t node_lim)
+		 int contiguous, int shared)
 {
 	int error_code = SLURM_SUCCESS, i, j, pick_code;
 	int total_nodes = 0, total_cpus = 0;	/* total resources configured 
@@ -526,14 +523,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 		info("_pick_best_nodes: empty node set for selection");
 		return EINVAL;
 	}
-	if (node_lim != INFINITE) {
-		if (min_nodes > node_lim) {
-			info("_pick_best_nodes: exceed partition node limit");
-			return EINVAL;
-		}
-		if (max_nodes > node_lim)
-			max_nodes = node_lim;
-	}
 
 	if (*req_bitmap) {	/* specific nodes required */
 		/* we have already confirmed that all of these nodes have a
@@ -542,8 +531,8 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 			total_nodes = bit_set_count(*req_bitmap);
 		if (req_cpus != 0)
 			total_cpus = count_cpus(*req_bitmap);
-		if (total_nodes > node_lim) {
-			info("_pick_best_nodes: exceed partition node limit");
+		if (total_nodes > max_nodes) {
+			info("_pick_best_nodes: required nodes exceed limit");
 			return EINVAL;
 		}
 		if ((min_nodes <= total_nodes) && 
@@ -588,23 +577,16 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 			if ((*req_bitmap) &&
 			    (!bit_super_set(*req_bitmap, avail_bitmap)))
 				continue;
-			if ((avail_nodes < min_nodes) ||
-			    (avail_cpus < req_cpus) ||
-			    ((max_nodes > min_nodes) && 
+			if ((avail_nodes  < min_nodes) ||
+			    (avail_cpus   < req_cpus) ||
+			    ((max_nodes   > min_nodes) && 
 			     (avail_nodes < max_nodes)))
-				continue;
-			pick_code =
-			    _pick_best_quadrics(avail_bitmap, *req_bitmap,
-						min_nodes, max_nodes, 
-						req_cpus, contiguous);
-			if ((pick_code == 0) && (node_lim != INFINITE) && 
-			    (bit_set_count(avail_bitmap) > node_lim)) {
-				info("_pick_best_nodes: %u nodes, max is %u", 
-				     bit_set_count(avail_bitmap), node_lim);
-				error_code = EINVAL;
-				break;
-			}
-			if (pick_code == 0) {
+				continue;	/* Keep accumulating nodes */
+			pick_code = _pick_best_quadrics(avail_bitmap, 
+							*req_bitmap, min_nodes,
+							max_nodes, req_cpus, 
+							contiguous);
+			if (pick_code == SLURM_SUCCESS) {
 				FREE_NULL_BITMAP(total_bitmap);
 				FREE_NULL_BITMAP(*req_bitmap);
 				*req_bitmap = avail_bitmap;
@@ -613,13 +595,13 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 		}
 
 		/* try to get max_nodes now for this feature */
-		if ((max_nodes > min_nodes) && 
+		if ((max_nodes > min_nodes) && (avail_nodes >= min_nodes) &&
 		    (avail_nodes < max_nodes)) {
 			pick_code =
 			    _pick_best_quadrics(avail_bitmap, *req_bitmap,
 						min_nodes, max_nodes, 
 						req_cpus, contiguous);
-			if (pick_code == 0) {
+			if (pick_code == SLURM_SUCCESS) {
 				FREE_NULL_BITMAP(total_bitmap);
 				FREE_NULL_BITMAP(*req_bitmap);
 				*req_bitmap = avail_bitmap;
@@ -629,22 +611,15 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 
 		/* determine if job could possibly run (if all configured 
 		 * nodes available) */
-		if ((error_code == 0) && (!runable) &&
+		if ((error_code == SLURM_SUCCESS) && (!runable) &&
 		    (total_nodes >= min_nodes) && (total_cpus >= req_cpus) &&
 		    ((*req_bitmap == NULL) ||
-		     (bit_super_set(*req_bitmap, total_bitmap))) && 
-		    ((node_lim == INFINITE) || (min_nodes <= node_lim))) {
-			pick_code =
-			    _pick_best_quadrics(total_bitmap, *req_bitmap,
-						min_nodes, 0,
-						req_cpus, contiguous);
-			if ((pick_code == 0) && (node_lim != INFINITE) &&
-			    (bit_set_count(total_bitmap) > node_lim)) {
-				info("_pick_best_nodes: %u nodes, max is %u", 
-				     bit_set_count(avail_bitmap), node_lim);
-				error_code = EINVAL;
-			}
-			if (pick_code == 0)
+		     (bit_super_set(*req_bitmap, total_bitmap)))) {
+			pick_code = _pick_best_quadrics(total_bitmap, 
+							*req_bitmap, min_nodes,
+							max_nodes, req_cpus, 
+							contiguous);
+			if (pick_code == SLURM_SUCCESS)
 				runable = true;
 		}
 		FREE_NULL_BITMAP(avail_bitmap);
@@ -706,10 +681,11 @@ _add_node_set_info(struct node_set *node_set_ptr,
  */
 int select_nodes(struct job_record *job_ptr, bool test_only)
 {
-	int error_code = SLURM_SUCCESS, i, node_set_size = 0;
+	int error_code = SLURM_SUCCESS, i, shared, node_set_size = 0;
 	bitstr_t *req_bitmap = NULL;
 	struct node_set *node_set_ptr = NULL;
 	struct part_record *part_ptr = job_ptr->part_ptr;
+	uint32_t min_nodes, max_nodes;
 
 	if (job_ptr == NULL)
 		fatal ("select_nodes: job_ptr == NULL");
@@ -718,17 +694,22 @@ int select_nodes(struct job_record *job_ptr, bool test_only)
 	/* insure that partition exists and is up */
 	if (part_ptr == NULL) {
 		part_ptr = find_part_record(job_ptr->partition);
+		if (part_ptr == NULL)
+			fatal("Invalid partition name %s for job %u",
+		   	   job_ptr->partition, job_ptr->job_id);
 		job_ptr->part_ptr = part_ptr;
 		error("partition pointer reset for job %u, part %s",
 		      job_ptr->job_id, job_ptr->partition);
 	}
-	if (part_ptr == NULL)
-		fatal("Invalid partition name %s for job %u",
-		      job_ptr->partition, job_ptr->job_id);
-	if (part_ptr->state_up == 0)
-		return ESLURM_NODES_BUSY;
 
-	/* get sets of nodes from the configuration list */
+	/* Confirm that partition is up and has compatible nodes limits */
+	if ((part_ptr->state_up == 0) ||
+	    ((job_ptr->details->max_nodes != 0) &&	/* no node limit */
+	     (job_ptr->details->max_nodes < part_ptr->min_nodes)) ||
+	    (job_ptr->details->min_nodes > part_ptr->max_nodes))
+		return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
+
+	/* build sets of usable nodes based upon their configuration */
 	error_code = _build_node_list(job_ptr, &node_set_ptr, &node_set_size);
 	if (error_code)
 		return error_code;
@@ -746,14 +727,25 @@ int select_nodes(struct job_record *job_ptr, bool test_only)
 	}
 
 	/* pick the nodes providing a best-fit */
+	min_nodes = MAX(job_ptr->details->min_nodes, part_ptr->min_nodes);
+	if (part_ptr->max_nodes == INFINITE)
+		max_nodes = job_ptr->details->max_nodes;
+	else if (job_ptr->details->max_nodes == 0)
+		max_nodes = part_ptr->max_nodes;
+	else
+		max_nodes = MIN(job_ptr->details->max_nodes, 
+				part_ptr->max_nodes);
+ 	if (part_ptr->shared == SHARED_FORCE)	/* shared=force */
+ 		shared = 1;
+	else if (part_ptr->shared == SHARED_NO)	/* can't share */
+		shared = 0;
+	else
+		shared = job_ptr->details->shared;
+
 	error_code = _pick_best_nodes(node_set_ptr, node_set_size,
-				      &req_bitmap,
-				      job_ptr->details->num_procs,
-				      job_ptr->details->min_nodes,
-				      job_ptr->details->max_nodes,
-				      job_ptr->details->contiguous,
-				      job_ptr->details->shared,
-				      part_ptr->max_nodes);
+				      &req_bitmap, job_ptr->details->num_procs,
+				      min_nodes, max_nodes,
+				      job_ptr->details->contiguous, shared);
 	if (error_code == EAGAIN) {
 		error_code = ESLURM_NODES_BUSY;
 		goto cleanup;
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index a500d61c847..d3cd32ff2b9 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -68,9 +68,11 @@
 		_X	= NULL; 	\
 	} while (0)
 #define IS_JOB_FINISHED(_X)		\
-	((_X->job_state & (~JOB_COMPLETING)) > JOB_RUNNING)
+	((_X->job_state & (~JOB_COMPLETING)) >  JOB_RUNNING)
 #define IS_JOB_PENDING(_X)		\
 	((_X->job_state & (~JOB_COMPLETING)) == JOB_PENDING)
+#define MAX(x,y) (((x)>(y))?(x):(y))
+#define MIN(x,y) (((x)<(y))?(x):(y))
 
 /*****************************************************************************\
  *  GENERAL CONFIGURATION parameters and data structures
-- 
GitLab