From edf3880c66292dd7a5376c44dc7204897644069b Mon Sep 17 00:00:00 2001
From: Nathan Yee <nyee32@schedmd.com>
Date: Tue, 5 Jan 2016 16:52:31 -0800
Subject: [PATCH] Initial commit for Rejecting a multipartition job if it
 violates a partition limit.

---
 doc/man/man5/slurm.conf.5        |  11 ++-
 slurm/slurm.h.in                 |   6 ++
 src/api/config_info.c            |   3 +-
 src/common/read_config.c         |  15 +++-
 src/common/slurm_protocol_defs.c |  42 +++++++++
 src/common/slurm_protocol_defs.h |   4 +
 src/slurmctld/job_mgr.c          | 142 +++++++++++++++++++++++--------
 7 files changed, 179 insertions(+), 44 deletions(-)

diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 00466d57c46..b16386edd44 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -770,10 +770,13 @@ May not exceed 65533.
 
 .TP
 \fBEnforcePartLimits\fR
-If set to "YES" then jobs which exceed a partition's size and/or time limits
-will be rejected at submission time. If set to "NO" then the job will be
-accepted and remain queued until the partition limits are altered.
-The default value is "NO".
+If set to "ALL" then jobs which exceed a partition's size and/or
+time limits will be rejected at submission time. If job is submitted to
+multiple partitions, the job must satisfy the limits on all the requested
+paritions. If set to "NO" then the job will be accepted and remain queued
+until the partition limits are altered(Time and Node Limits).
+If set to "ANY" or "YES" a job must satisfy any of the requested partitions
+to be submitted. The default value is "NO".
 NOTE: If set, then a job's QOS can not be used to exceed partition limits.
 
 .TP
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index 236da4121e8..03aa004dfe2 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -557,6 +557,12 @@ enum job_acct_types {
 #define PARTITION_DRAIN		(PARTITION_SCHED)
 #define PARTITION_INACTIVE	0x00
 
+/* Partition enforce flags for jobs */
+#define PARTITION_ENFORCE_NONE 0
+#define PARTITION_ENFORCE_ALL  1 /* job limit must be valid for ALL
+				  * partitions */
+#define PARTITION_ENFORCE_ANY  2 /* job limit must be valid for ANY
+				  * partition */
 enum connection_type {
 	SELECT_MESH, 		/* nodes wired in mesh */
 	SELECT_TORUS, 		/* nodes wired in torus */
diff --git a/src/api/config_info.c b/src/api/config_info.c
index 6a459477b01..320c4ce72d6 100644
--- a/src/api/config_info.c
+++ b/src/api/config_info.c
@@ -569,7 +569,8 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr)
 	list_append(ret_list, key_pair);
 	key_pair->name = xstrdup("EnforcePartLimits");
 	key_pair->value = xstrdup(
-		slurm_ctl_conf_ptr->enforce_part_limits ? "Yes" : "No");
+		parse_part_enforce_type_2str(
+			slurm_ctl_conf_ptr->enforce_part_limits));
 
 	key_pair = xmalloc(sizeof(config_key_pair_t));
 	key_pair->name = xstrdup("Epilog");
diff --git a/src/common/read_config.c b/src/common/read_config.c
index a41555074d0..231e717ba69 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -208,7 +208,7 @@ s_p_options_t slurm_conf_options[] = {
 	{"DefMemPerNode", S_P_UINT32},
 	{"DisableRootJobs", S_P_BOOLEAN},
 	{"EioTimeout", S_P_UINT16},
-	{"EnforcePartLimits", S_P_BOOLEAN},
+	{"EnforcePartLimits", S_P_STRING},
 	{"Epilog", S_P_STRING},
 	{"EpilogMsgTime", S_P_UINT32},
 	{"EpilogSlurmctld", S_P_STRING},
@@ -3153,9 +3153,18 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 			     "DisableRootJobs", hashtbl))
 		conf->disable_root_jobs = DEFAULT_DISABLE_ROOT_JOBS;
 
-	if (!s_p_get_boolean((bool *) &conf->enforce_part_limits,
-			     "EnforcePartLimits", hashtbl))
+	if (s_p_get_string(&temp_str,
+			   "EnforcePartLimits", hashtbl)) {
+		uint16_t enforce_param;
+		if (parse_part_enforce_type(temp_str, &enforce_param) < 0) {
+			error("Bad EnforcePartLimits: %s", temp_str);
+			xfree(temp_str);
+			return SLURM_ERROR;
+		}
+		conf->enforce_part_limits = enforce_param;
+	} else {
 		conf->enforce_part_limits = DEFAULT_ENFORCE_PART_LIMITS;
+	}
 
 	s_p_get_string(&conf->epilog, "Epilog", hashtbl);
 
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index e9120444695..f99ddfda88d 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -4449,3 +4449,45 @@ extern void slurm_free_assoc_mgr_info_request_msg(
 	FREE_NULL_LIST(msg->user_list);
 	xfree(msg);
 }
+
+extern int parse_part_enforce_type(char *enforce_part_type, uint16_t *param)
+{
+	int rc = SLURM_SUCCESS;
+
+	char *value = xstrdup(enforce_part_type);
+
+	if (!strcasecmp(value, "yes")
+		|| !strcasecmp(value, "up")
+		|| !strcasecmp(value, "true")
+		|| !strcasecmp(value, "1") || !strcasecmp(value, "any")) {
+		*param = PARTITION_ENFORCE_ANY;
+	} else if (!strcasecmp(value, "no")
+		   || !strcasecmp(value, "down")
+		   || !strcasecmp(value, "false")
+		   || !strcasecmp(value, "0")) {
+		*param = PARTITION_ENFORCE_NONE;
+	} else if (!strcasecmp(value, "all")) {
+		*param = PARTITION_ENFORCE_ALL;
+	} else {
+		error("Bad EnforcePartLimits: %s\n", value);
+		rc = SLURM_FAILURE;
+	}
+
+	xfree(value);
+	return rc;
+}
+
+extern char * parse_part_enforce_type_2str (uint16_t type)
+{
+	static char type_str[1024];
+
+	if (type == PARTITION_ENFORCE_NONE) {
+		strcpy(type_str, "NO");
+	} else if (type == PARTITION_ENFORCE_ANY) {
+		strcpy(type_str, "ANY");
+	} else if (type == PARTITION_ENFORCE_ALL) {
+		strcpy(type_str, "ALL");
+	}
+
+	return type_str;
+}
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index f5f7c71eb80..deea37cd876 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -1427,6 +1427,10 @@ extern char *reservation_flags_string(uint32_t flags);
 extern char *   slurm_bb_flags2str(uint32_t bb_flags);
 extern uint32_t slurm_bb_str2flags(char *bb_str);
 
+/* Function to convert enforce type flags between strings and numbers */
+extern int parse_part_enforce_type(char *enforce_part_type, uint16_t *param);
+extern char * parse_part_enforce_type_2str (uint16_t type);
+
 /* Given a protocol opcode return its string
  * description mapping the slurm_msg_type_t
  * to its name.
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index f2707ae9dc2..47f1376348c 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -5094,7 +5094,7 @@ static int _part_access_check(struct part_record *part_ptr,
 			      uid_t submit_uid, slurmdb_qos_rec_t *qos_ptr,
 			      char *acct)
 {
-	uint32_t total_nodes;
+	uint32_t total_nodes, min_nodes_tmp, max_nodes_tmp;
 	size_t resv_name_leng = 0;
 	int rc = SLURM_SUCCESS;
 
@@ -5150,15 +5150,27 @@ static int _part_access_check(struct part_record *part_ptr,
 	}
 
 	if ((part_ptr->state_up & PARTITION_SCHED) &&
-	    (job_desc->min_cpus != NO_VAL) &&
-	    (job_desc->min_cpus >  part_ptr->total_cpus)) {
-		info("_part_access_check: Job requested too many cpus (%u) of "
-		     "partition %s(%u)",
-		     job_desc->min_cpus, part_ptr->name,
-		     part_ptr->total_cpus);
-		return ESLURM_TOO_MANY_REQUESTED_CPUS;
-	}
-
+	    (job_desc->min_cpus != NO_VAL)) {
+
+		if (job_desc->min_cpus > part_ptr->total_cpus) {
+			info("_part_access_check: Job requested too many "
+			     "cpus (%u) of partition %s(%u)",
+			     job_desc->min_cpus, part_ptr->name,
+			     part_ptr->total_cpus);
+			return ESLURM_TOO_MANY_REQUESTED_CPUS;
+		} else if (job_desc->min_cpus >
+			   (part_ptr->max_cpus_per_node *
+			    part_ptr->total_nodes)) {
+			info("_part_access_check: Job requested too many "
+			     "cpus (%u) of partition %s(%u)",
+			     job_desc->min_cpus, part_ptr->name,
+			     (part_ptr->max_cpus_per_node *
+			     part_ptr->total_nodes));
+			return ESLURM_TOO_MANY_REQUESTED_CPUS;
+		}
+	}
+
+	/* Check against total nodes on the partition */
 	total_nodes = part_ptr->total_nodes;
 	select_g_alter_node_cnt(SELECT_APPLY_NODE_MAX_OFFSET, &total_nodes);
 	if ((part_ptr->state_up & PARTITION_SCHED) &&
@@ -5176,6 +5188,36 @@ static int _part_access_check(struct part_record *part_ptr,
 		return ESLURM_REQUESTED_NODES_NOT_IN_PARTITION;
 	}
 
+	/* Check against min/max node limits in the partition */
+	min_nodes_tmp = part_ptr->min_nodes;
+	max_nodes_tmp = part_ptr->max_nodes;
+	if ((part_ptr->state_up & PARTITION_SCHED) &&
+	    (job_desc->min_nodes != NO_VAL) &&
+	    (job_desc->min_nodes < min_nodes_tmp)) {
+		info("_part_access_check: Job requested for nodes (%u) "
+		     "smaller than partition %s(%u) min nodes",
+		     job_desc->min_nodes, part_ptr->name, min_nodes_tmp);
+		return  ESLURM_INVALID_NODE_COUNT;
+	}
+
+	if ((part_ptr->state_up & PARTITION_SCHED) &&
+	    (job_desc->min_nodes != NO_VAL) &&
+	    (job_desc->max_nodes > max_nodes_tmp)) {
+		info("_part_access_check: Job requested for nodes (%u) "
+		     "greater than partition %s(%u) max nodes",
+		     job_desc->max_nodes, part_ptr->name, max_nodes_tmp);
+		return ESLURM_INVALID_NODE_COUNT;
+	}
+
+	if ((part_ptr->state_up & PARTITION_SCHED) &&
+	    (job_desc->time_limit != NO_VAL) &&
+	    (job_desc->time_limit > part_ptr->max_time)) {
+		info("_part_access_check: Job time limit (%u) exceeds limit of "
+		     "partition %s(%u)",
+		     job_desc->time_limit, part_ptr->name, part_ptr->max_time);
+		return ESLURM_INVALID_TIME_LIMIT;
+	}
+
 	if (slurmctld_conf.enforce_part_limits) {
 		if ((rc = part_policy_valid_acct(part_ptr, acct))
 		    != SLURM_SUCCESS)
@@ -5304,11 +5346,11 @@ static int _valid_job_part(job_desc_msg_t * job_desc,
 	slurmdb_assoc_rec_t assoc_rec;
 	uint32_t min_nodes_orig = INFINITE, max_nodes_orig = 1;
 	uint32_t max_time = 0;
+	bool any_check = false;
 
 	/* Change partition pointer(s) to alternates as needed */
 	if (part_ptr_list) {
 		int fail_rc = SLURM_SUCCESS;
-		bool rebuild_name_list = false;
 		ListIterator iter = list_iterator_create(part_ptr_list);
 
 		while ((part_ptr_tmp = (struct part_record *)list_next(iter))) {
@@ -5339,12 +5381,24 @@ static int _valid_job_part(job_desc_msg_t * job_desc,
 							qos_ptr, assoc_ptr ?
 							assoc_ptr->acct : NULL);
 
+
+
+
 			if (rc != SLURM_SUCCESS) {
 				fail_rc = rc;
-				list_remove(iter);
-				rebuild_name_list = true;
-				continue;
+				if (slurmctld_conf.enforce_part_limits ==
+				    PARTITION_ENFORCE_ALL) {
+					break;
+				}
 			}
+			else {
+				any_check = true;
+			}
+
+			// Set to success since we found a usable partition
+			if (any_check && slurmctld_conf.enforce_part_limits ==
+			    PARTITION_ENFORCE_ANY)
+				fail_rc = SLURM_SUCCESS;
 
 			min_nodes_orig = MIN(min_nodes_orig,
 					     part_ptr_tmp->min_nodes_orig);
@@ -5353,28 +5407,23 @@ static int _valid_job_part(job_desc_msg_t * job_desc,
 			max_time = MAX(max_time, part_ptr_tmp->max_time);
 		}
 		list_iterator_destroy(iter);
-		if (list_is_empty(part_ptr_list)) {
-			if (fail_rc != SLURM_SUCCESS)
+
+		if (list_is_empty(part_ptr_list) ||
+		    (slurmctld_conf.enforce_part_limits &&
+		     (fail_rc != SLURM_SUCCESS))) {
+			if (slurmctld_conf.enforce_part_limits ==
+			    PARTITION_ENFORCE_ALL)
 				rc = fail_rc;
-			else
+			else if (slurmctld_conf.enforce_part_limits ==
+				 PARTITION_ENFORCE_ANY && !any_check)
+				rc = fail_rc;
+			else {
 				rc = ESLURM_PARTITION_NOT_AVAIL;
-			goto fini;
-		}
-		rc = SLURM_SUCCESS;	/* At least some partition usable */
-		if (rebuild_name_list) {
-			*part_pptr = part_ptr = NULL;
-			xfree(job_desc->partition);
-			iter = list_iterator_create(part_ptr_list);
-			while ((part_ptr_tmp = list_next(iter))) {
-				if (job_desc->partition)
-					xstrcat(job_desc->partition, ",");
-				else
-					*part_pptr = part_ptr = part_ptr_tmp;
-				xstrcat(job_desc->partition,
-					part_ptr_tmp->name);
 			}
-			list_iterator_destroy(iter);
+			goto fini;
 		}
+		rc = SLURM_SUCCESS;	/* At least some partition
+					 * usable */
 	} else {
 		min_nodes_orig = part_ptr->min_nodes_orig;
 		max_nodes_orig = part_ptr->max_nodes_orig;
@@ -5382,11 +5431,18 @@ static int _valid_job_part(job_desc_msg_t * job_desc,
 		rc = _part_access_check(part_ptr, job_desc, req_bitmap,
 					submit_uid, qos_ptr,
 					assoc_ptr ? assoc_ptr->acct : NULL);
-		if (rc != SLURM_SUCCESS)
+
+		if (rc != SLURM_SUCCESS && slurmctld_conf.enforce_part_limits)
 			goto fini;
+
+		// Enforce Part Limit = no
+		rc = SLURM_SUCCESS;
 	}
 
 	/* Validate job limits against partition limits */
+
+	// Check Partition with the highest limits when there are
+	// muliple
 	if (job_desc->min_nodes == NO_VAL) {
 		/* Avoid setting the job request to 0 nodes if the
 		   user didn't ask for 0.
@@ -6223,7 +6279,7 @@ static int _test_job_desc_fields(job_desc_msg_t * job_desc)
 	    _test_strlen(job_desc->licenses, "licenses", 1024)		||
 	    _test_strlen(job_desc->linuximage, "linuximage", 1024)	||
 	    _test_strlen(job_desc->mail_user, "mail_user", 1024)	||
-	    _test_strlen(job_desc->mcs_label, "mcs_label", 1024) 	||
+	    _test_strlen(job_desc->mcs_label, "mcs_label", 1024)	||
 	    _test_strlen(job_desc->mem_bind, "mem_bind", 1024)		||
 	    _test_strlen(job_desc->mloaderimage, "mloaderimage", 1024)	||
 	    _test_strlen(job_desc->name, "name", 1024)			||
@@ -7716,8 +7772,9 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate,
 			job_desc_msg->pn_min_memory =
 					slurmctld_conf.def_mem_per_cpu;
 		}
-	} else if (!_validate_min_mem_partition(job_desc_msg, part_ptr, part_list))
+	} else if (!_validate_min_mem_partition(job_desc_msg, part_ptr, part_list)) {
 		return ESLURM_INVALID_TASK_MEMORY;
+	}
 	if (job_desc_msg->pn_min_memory == MEM_PER_CPU) {
 		/* Map --mem-per-cpu=0 to --mem=0 for simpler logic */
 		job_desc_msg->pn_min_memory = 0;
@@ -7760,8 +7817,21 @@ _validate_min_mem_partition(job_desc_msg_t *job_desc_msg,
 	cc = false;
 	iter = list_iterator_create(part_list);
 	while ((part = list_next(iter))) {
-		if ((cc = _valid_pn_min_mem(job_desc_msg, part)))
+		if (!(cc = _valid_pn_min_mem(job_desc_msg, part))) {
+			printf("mem is bad\n");
+			if (slurmctld_conf.enforce_part_limits ==
+			    PARTITION_ENFORCE_ALL) {
+				break;
+			} else if (slurmctld_conf.enforce_part_limits ==
+				   PARTITION_ENFORCE_ANY) {
+				info("%s: Job requested for (%u)MB is invalid"
+				     " for partition %s",
+				     __func__, job_desc_msg->pn_min_memory, part->name);
+			}
+		} else if ((cc = _valid_pn_min_mem(job_desc_msg, part))) {
 			break;
+		}
+
 	}
 	list_iterator_destroy(iter);
 
-- 
GitLab