From ab3054151f9b48c8f173c0e0d25dab6c7c342065 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 22 Mar 2002 00:29:36 +0000
Subject: [PATCH] Scheduler functioning will all bells and whistles: NodeList,
 CPU count, Node count, Contiguous, contraints, weights, partitions, etc. -
 Jette

---
 src/slurmctld/node_mgr.c       |  25 ++-
 src/slurmctld/node_scheduler.c | 393 +++++++++++++++++++--------------
 src/slurmctld/partition_mgr.c  |   2 +-
 src/slurmctld/read_config.c    |  12 +-
 4 files changed, 245 insertions(+), 187 deletions(-)

diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index c48df1ea170..3e22526f16e 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -24,14 +24,15 @@
 #define NO_VAL	 	(-99)
 #define SEPCHARS 	" \n\t"
 
-List 	Config_List = NULL;		/* List of Config_Record entries */
-int	Node_Record_Count = 0;		/* Count of records in the Node Record Table */
+List 	Config_List = NULL;			/* List of Config_Record entries */
+int	Node_Record_Count = 0;			/* Count of records in the Node Record Table */
 struct Node_Record *Node_Record_Table_Ptr = NULL; /* Location of the node records */
 char 	*Node_State_String[] = {"DOWN", "UNKNOWN", "IDLE", "STAGE_IN", "BUSY", "STAGE_OUT", "DRAINED", "DRAINING", "END"};
-int	*Hash_Table = NULL;		/* Table of hashed indicies into Node_Record */
+int	*Hash_Table = NULL;			/* Table of hashed indicies into Node_Record */
 struct 	Config_Record Default_Config_Record;
 struct 	Node_Record   Default_Node_Record;
-time_t 	Last_Node_Update =(time_t)NULL;	/* Time of last update to Node Records */
+time_t 	Last_BitMap_Update =(time_t)NULL;	/* Time of last node creation or deletion */
+time_t 	Last_Node_Update =(time_t)NULL;		/* Time of last update to Node Records */
 
 unsigned *Up_NodeBitMap  = NULL;		/* Bitmap of nodes are UP */
 unsigned *Idle_NodeBitMap = NULL;	/* Bitmap of nodes are IDLE */
@@ -301,7 +302,7 @@ int BitMap2NodeName(unsigned *BitMap, char **Node_List) {
 	    } /* if */
 	} /* if need more memory */
 	if (BitMapValue(BitMap, i) == 0) continue;
-	Split_Node_Name((Node_Record_Table_Ptr+i)->Name, Prefix, Suffix, &Index, &Digits);
+	Split_Node_Name(Node_Record_Table_Ptr[i].Name, Prefix, Suffix, &Index, &Digits);
 	if ((Index == (Last_Index+1)) && 		/* Next in sequence */
 	    (strcmp(Last_Prefix, Prefix) == 0) &&
 	    (strcmp(Last_Suffix, Suffix) == 0)) {
@@ -330,7 +331,7 @@ int BitMap2NodeName(unsigned *BitMap, char **Node_List) {
 	} /* if */
 	if (Index == NO_VAL) {
 	    if (strlen(Node_List[0]) > 0) strcat(Node_List[0],",");
-	    strcat(Node_List[0], (Node_Record_Table_Ptr+i)->Name);
+	    strcat(Node_List[0], Node_Record_Table_Ptr[i].Name);
 	} else {
 	    strcpy(Last_Prefix, Prefix);
 	    strcpy(Last_Suffix, Suffix);
@@ -490,6 +491,7 @@ struct Node_Record *Create_Node_Record(int *Error_Code, struct Config_Record *Co
     Node_Record_Point->CPUs 		= Config_Point->CPUs;
     Node_Record_Point->RealMemory  	= Config_Point->RealMemory;
     Node_Record_Point->TmpDisk  	= Config_Point->TmpDisk;
+    Last_BitMap_Update = time(NULL);
     return Node_Record_Point;
 } /* Create_Node_Record */
 
@@ -532,6 +534,7 @@ int Delete_Node_Record(char *name) {
     } /* if */
     strcpy(Node_Record_Point->Name, "");
     Node_Record_Point->NodeState = STATE_DOWN;
+    Last_BitMap_Update = time(NULL);
     return 0;
 } /* Delete_Node_Record */
 
@@ -542,8 +545,8 @@ void Dump_Hash() {
 
     if (Hash_Table ==  NULL) return;
     for (i=0; i<Node_Record_Count; i++) {
-	if (strlen((Node_Record_Table_Ptr+Hash_Table[i])->Name) == 0) continue;
-	printf("Hash:%d:%s\n", i, (Node_Record_Table_Ptr+Hash_Table[i])->Name);
+	if (strlen(Node_Record_Table_Ptr[Hash_Table[i]].Name) == 0) continue;
+	printf("Hash:%d:%s\n", i, Node_Record_Table_Ptr[Hash_Table[i]].Name);
     } /* for */
 } /* Dump_Hash */
 
@@ -826,7 +829,7 @@ struct Node_Record *Find_Node_Record(char *name) {
 
     /* Revert to sequential search */
     for (i=0; i<Node_Record_Count; i++) {
-	if (strcmp(name, (Node_Record_Table_Ptr+i)->Name) != 0) continue;
+	if (strcmp(name, Node_Record_Table_Ptr[i].Name) != 0) continue;
 	return (Node_Record_Table_Ptr+i);
     } /* for */
 
@@ -1218,8 +1221,8 @@ void Rehash() {
     memset(Hash_Table, 0, (sizeof(int) * Node_Record_Count));
 
     for (i=0; i<Node_Record_Count; i++) {
-	if (strlen((Node_Record_Table_Ptr+i)->Name) == 0) continue;
-	inx = Hash_Index((Node_Record_Table_Ptr+i)->Name);
+	if (strlen(Node_Record_Table_Ptr[i].Name) == 0) continue;
+	inx = Hash_Index(Node_Record_Table_Ptr[i].Name);
 	Hash_Table[inx] = i;
     } /* for */
 
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 2e8e322fad4..2d7b1f93c17 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -1,5 +1,5 @@
 /* 
- * node_scheduler.c - Allocated nodes to jobs 
+ * node_scheduler.c - Select and allocated nodes to jobs 
  * See slurm.h for documentation on external functions and data structures
  *
  * NOTE: DEBUG_MODULE mode test with execution line
@@ -38,9 +38,10 @@ int Match_Feature(char *Seek, char *Available);
 int Parse_Job_Specs(char *Job_Specs, char **Req_Features, char **Req_Node_List, char **Job_Name,
 	char **Req_Group, char **Req_Partition, int *Contiguous, int *Req_CPUs, 
 	int *Req_Nodes, int *Min_CPUs, int *Min_Memory, int *Min_TmpDisk, int *Key, int *Shared);
-int Pick_Best_CPUs(unsigned *BitMap, int Req_Nodes, int Req_CPUs, int Consecutive);
-int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size, 
-	unsigned **Req_BitMap, int Req_CPUs, int  Req_Nodes, int Contiguous, int Shared);
+int Pick_Best_CPUs(unsigned *BitMap, unsigned *Req_BitMap, int Req_Nodes, int Req_CPUs, 
+	int Consecutive);
+int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size, unsigned **Req_BitMap, 
+	int Req_CPUs, int  Req_Nodes, int Contiguous, int Shared, int Max_Nodes);
 int ValidFeatures(char *Requested, char *Available);
 
 #if DEBUG_MODULE
@@ -114,6 +115,20 @@ main(int argc, char * argv[]) {
 #endif
 
 
+/* For a given bitmap, change the state of specified nodes to STAGE_IN */
+/* This is a simple prototype for testing */
+void Allocate_Nodes(unsigned *BitMap) {
+    int i;
+
+    for (i=0; i<Node_Record_Count; i++) {
+	if (BitMapValue(BitMap, i) == 0) continue;
+	Node_Record_Table_Ptr[i].NodeState = STATE_STAGE_IN;
+	BitMapClear(Idle_NodeBitMap, i);
+    } /* for */
+    return;
+} /* Allocate_Nodes */
+
+
 /* 
  * Count_CPUs - Report how many CPUs are associated with the identified nodes 
  * Input: BitMap - A node bitmap
@@ -125,25 +140,12 @@ int Count_CPUs(unsigned *BitMap) {
     sum = 0;
     for (i=0; i<Node_Record_Count; i++) {
 	if (BitMapValue(BitMap, i) != 1) continue;
-	sum += (Node_Record_Table_Ptr+i)->CPUs;
+	sum += Node_Record_Table_Ptr[i].CPUs;
     } /* for */
     return sum;
 } /* Count_CPUs */
 
 
-/* For a given bitmap, change the state of specified nodes to STAGE_IN */
-/* This is a simple prototype for testing */
-void Allocate_Nodes(unsigned *BitMap) {
-    int i;
-
-    for (i=0; i<Node_Record_Count; i++) {
-	if (BitMapValue(BitMap, i) == 0) continue;
-	(Node_Record_Table_Ptr+i)->NodeState = STATE_STAGE_IN;
-	BitMapClear(Idle_NodeBitMap, i);
-    } /* for */
-    return;
-} /* Allocate_Nodes */
-
 /* 
  * Is_Key_Valid - Determine if supplied key is valid
  * Input: Key - A SLURM key acquired by user root
@@ -354,16 +356,26 @@ cleanup:
 /*
  * Pick_Best_CPUs - Identify the nodes which best fit the Req_Nodes and Req_CPUs counts
  * Input: BitMap - The bit map to search
+ *        Req_BitMap - The bit map of nodes that MUST be selected, if not NULL these 
+ *                     have already been confirmed to be in the input BitMap
  *        Req_Nodes - Number of nodes required
  *        Req_CPUs - Number of CPUs required
  *        Consecutive - Nodes must be consecutive is 1, otherwise 0
  * Output: BitMap - Nodes NOT required to satisfy the request are cleared, other left set
  *         Returns zero on success, EINVAL otherwise
+ * NOTE: BitMap must be a superset of Req_Nodes at function call time
  */
-int Pick_Best_CPUs(unsigned *BitMap, int Req_Nodes, int Req_CPUs, int Consecutive) {
+int Pick_Best_CPUs(unsigned *BitMap, unsigned *Req_BitMap, int Req_Nodes, int Req_CPUs, 
+	int Consecutive) {
     int bit, size, word, i, index, Error_Code, Sufficient;
-    int *Consec_Nodes, *Consec_CPUs, *Consec_Start, Consec_Index, Consec_Size;
-    int Rem_CPUs, Rem_Nodes, Best_Fit_Nodes, Best_Fit_CPUs, Best_Fit_Location;
+    int *Consec_Nodes;	/* How many nodes we can add from this consecutive set of nodes */
+    int *Consec_CPUs;	/* How many nodes we can add from this consecutive set of nodes */
+    int *Consec_Start;	/* Where this consecutive set starts (index) */
+    int *Consec_End;	/* Where this consecutive set ends (index) */
+    int *Consec_Req;	/* Are nodes from this set required (in Req_BitMap) */
+    int Consec_Index, Consec_Size;
+    int Rem_CPUs, Rem_Nodes;	/* Remaining resources required */
+    int Best_Fit_Nodes, Best_Fit_CPUs, Best_Fit_Req, Best_Fit_Location;
     unsigned mask;
 
     if (BitMap == NULL) {
@@ -377,11 +389,14 @@ int Pick_Best_CPUs(unsigned *BitMap, int Req_Nodes, int Req_CPUs, int Consecutiv
 
     Error_Code = EINVAL;	/* Default is no fit */
     Consec_Index = 0;
-    Consec_Size = 1000;
+    Consec_Size  = 50;		/* Start allocation for 50 sets of consecutive nodes */
     Consec_CPUs  = malloc(sizeof(int)*Consec_Size);
     Consec_Nodes = malloc(sizeof(int)*Consec_Size);
     Consec_Start = malloc(sizeof(int)*Consec_Size);
-    if ((Consec_CPUs == NULL) || (Consec_Nodes == NULL) || (Consec_Start == NULL)) {
+    Consec_End   = malloc(sizeof(int)*Consec_Size);
+    Consec_Req   = malloc(sizeof(int)*Consec_Size);
+    if ((Consec_CPUs  == NULL) || (Consec_Nodes == NULL) || 
+	(Consec_Start == NULL) || (Consec_End   == NULL) || (Consec_Req   == NULL)) {
 #if DEBUG_SYSTEM
 	fprintf(stderr, "Pick_Best_CPUs: unable to allocate memory\n");
 #else
@@ -390,29 +405,43 @@ int Pick_Best_CPUs(unsigned *BitMap, int Req_Nodes, int Req_CPUs, int Consecutiv
 	goto cleanup;
     } /* if */
 
-    Consec_CPUs[Consec_Index] = 0;
-    Consec_Nodes[Consec_Index] = 0;
+    Consec_CPUs[Consec_Index] = Consec_Nodes[Consec_Index] = 0;
+    Consec_Req[Consec_Index]  = -1;	/* No required nodes here by default */
     size = (Node_Record_Count + (sizeof(unsigned)*8) - 1) / 8;	/* Bytes */
     size /= sizeof(unsigned);			/* Count of unsigned's */
     index = -1;
+    Rem_CPUs  = Req_CPUs;
+    Rem_Nodes = Req_Nodes;
     for (word=0; word<size; word++) {
 	for (bit=0; bit<(sizeof(unsigned)*8); bit++) {
 	    mask = (0x1 << ((sizeof(unsigned)*8)-1-bit));
 	    index++;
+	    if (Req_BitMap && (Req_BitMap[word] & mask) && (Consec_Req[Consec_Index] == -1))
+		Consec_Req[Consec_Index] = index;	/* First required node in set */
 	    if (BitMap[word] & mask) {
-		if ((Consec_Nodes[Consec_Index]++) == 0) 
-			Consec_Start[Consec_Index] = (word * sizeof(unsigned) * 8 + bit);
-		Consec_CPUs[Consec_Index] += Node_Record_Table_Ptr[index].CPUs;
-		BitMap[word] &= (~mask);
-	    } else if ((Consec_Index == 0) && (Consec_Nodes[0] == 0)) { 
+		if (Consec_Nodes[Consec_Index] == 0) Consec_Start[Consec_Index] = index;
+		i = Node_Record_Table_Ptr[index].CPUs;
+		if (Req_BitMap && (Req_BitMap[word] & mask)) {
+		    Rem_CPUs -= i;	/* Reduce count of additional resources required */
+		    Rem_Nodes--;	/* Reduce count of additional resources required */
+		} else {
+		    BitMap[word] &= (~mask);
+		    Consec_CPUs[Consec_Index] += i;
+		    Consec_Nodes[Consec_Index]++;
+		} /* else */
+	    } else if (Consec_Nodes[Consec_Index] == 0) { 
 		continue;
 	    } else {
+		Consec_End[Consec_Index] = index - 1;
 		if (++Consec_Index >= Consec_Size) {
 		    Consec_Size *= 2;
 		    Consec_CPUs  = realloc(Consec_CPUs , sizeof(int)*Consec_Size);
 		    Consec_Nodes = realloc(Consec_Nodes, sizeof(int)*Consec_Size);
 		    Consec_Start = realloc(Consec_Start, sizeof(int)*Consec_Size);
-		    if ((Consec_CPUs == NULL) || (Consec_Nodes == NULL) || (Consec_Start == NULL)) {
+		    Consec_End   = realloc(Consec_End,   sizeof(int)*Consec_Size);
+		    Consec_Req   = realloc(Consec_Req,   sizeof(int)*Consec_Size);
+		    if ((Consec_CPUs  == NULL) || (Consec_Nodes == NULL) || 
+		        (Consec_Start == NULL) || (Consec_End   == NULL) || (Consec_Req   == NULL)) {
 #if DEBUG_SYSTEM
 			fprintf(stderr, "Pick_Best_CPUs: unable to allocate memory\n");
 #else
@@ -423,46 +452,80 @@ int Pick_Best_CPUs(unsigned *BitMap, int Req_Nodes, int Req_CPUs, int Consecutiv
 		} /* if */
 		Consec_CPUs[Consec_Index]  = 0;
 		Consec_Nodes[Consec_Index] = 0;
+		Consec_Req[Consec_Index]   = -1;
 	    } /* else */
 	} /* for (bit */
     } /* for (word */
-    Consec_Index++;
+    if (Consec_Nodes[Consec_Index] != 0) Consec_End[Consec_Index] = index;
+
+#if DEBUG_SYSTEM > 1
+    for (i=0; i<Consec_Index; i++) {
+	printf("Start=%s, End=%s, Nodes=%d, CPUs=%d", 
+		Node_Record_Table_Ptr[Consec_Start[i]].Name, 
+		Node_Record_Table_Ptr[Consec_End[i]].Name, 
+		Consec_Nodes[i], Consec_CPUs[i]);
+	if (Consec_Req[i] != -1) 
+	    printf(", Req=%s\n", Node_Record_Table_Ptr[Consec_Req[i]].Name);
+	else
+	    printf("\n");
+    } /* if */
+#endif
+
 
-    Rem_CPUs  = Req_CPUs;
-    Rem_Nodes = Req_Nodes;
     while (1) {
 	Best_Fit_CPUs = Best_Fit_Nodes = 0;
+	Best_Fit_Req = -1;
 	for (i=0; i<Consec_Index; i++) {
 	    if (Consec_Nodes[i] == 0) continue;
-	    Sufficient = ((Best_Fit_Nodes >= Rem_Nodes) && (Best_Fit_CPUs >= Rem_CPUs));
+	    Sufficient = ((Consec_Nodes[i] >= Rem_Nodes) && (Consec_CPUs[i] >= Rem_CPUs));
 	    if ((Best_Fit_Nodes == 0) || 					/* First possibility */
+		((Best_Fit_Req == -1) && (Consec_Req[i] != -1)) ||		/* Required nodes */
 	        (Sufficient && (Consec_CPUs[i] < Best_Fit_CPUs)) ||		/* Less waste option */
 	        ((Sufficient == 0) && (Consec_CPUs[i] > Best_Fit_CPUs))) {	/* Larger option */ 
 		Best_Fit_CPUs  = Consec_CPUs[i];
 		Best_Fit_Nodes = Consec_Nodes[i];
 		Best_Fit_Location = i;
+		Best_Fit_Req = Consec_Req[i];
 	    } /* if */
 	} /* for */
-	if (Consecutive && ((Best_Fit_Nodes < Req_Nodes) || (Best_Fit_CPUs < Req_CPUs))) 
+	if (Consecutive && ((Best_Fit_Nodes < Rem_Nodes) || (Best_Fit_CPUs < Rem_CPUs))) 
 		break; /* No hole large enough */
-	for (i=0; i<Best_Fit_Nodes; i++) {
-	    BitMapSet(BitMap, i+Consec_Start[Best_Fit_Location]);
-	    Rem_Nodes--;
-	    Rem_CPUs -= Node_Record_Table_Ptr[i].CPUs;
-	    if ((Rem_Nodes <= 0) && (Rem_CPUs <= 0)) break;
-	} /* for */
-	Consec_CPUs[Best_Fit_Location]  = 0;
-	Consec_Nodes[Best_Fit_Location] = 0;
+	if (Best_Fit_Req != -1) {	/* Work out from required nodes */
+	    for (i=Best_Fit_Req; i<=Consec_End[Best_Fit_Location]; i++) {
+		if ((Rem_Nodes <= 0) && (Rem_CPUs <= 0)) break;
+		BitMapSet(BitMap, i);
+		Rem_Nodes--;
+		Rem_CPUs -= Node_Record_Table_Ptr[i].CPUs;
+	    } /* for */
+	    for (i=(Best_Fit_Req-1); i>=Consec_Start[Best_Fit_Location]; i--) {
+		if ((Rem_Nodes <= 0) && (Rem_CPUs <= 0)) break;
+		if (BitMapValue(BitMap, i) == 1) continue;
+		BitMapSet(BitMap, i);
+		Rem_Nodes--;
+		Rem_CPUs -= Node_Record_Table_Ptr[i].CPUs;
+	    } /* for */
+	} else {
+	    for (i=Consec_Start[Best_Fit_Location]; i<=Consec_End[Best_Fit_Location]; i++) {
+		if ((Rem_Nodes <= 0) && (Rem_CPUs <= 0)) break;
+		BitMapSet(BitMap, i);
+		Rem_Nodes--;
+		Rem_CPUs -= Node_Record_Table_Ptr[i].CPUs;
+	    } /* for */
+	} /* else */
 	if ((Rem_Nodes <= 0) && (Rem_CPUs <= 0)) {
 	    Error_Code = 0;
 	    break;
 	} /* if */
+	Consec_CPUs[Best_Fit_Location]  = 0;
+	Consec_Nodes[Best_Fit_Location] = 0;
     } /* while */
 
 cleanup:
     if (Consec_CPUs ) free(Consec_CPUs);
     if (Consec_Nodes) free(Consec_Nodes);
     if (Consec_Start) free(Consec_Start);
+    if (Consec_End  ) free(Consec_End);
+    if (Consec_Req  ) free(Consec_Req);
     return Error_Code;
 } /* Pick_Best_CPUs */
 
@@ -477,13 +540,14 @@ cleanup:
  *        Req_Nodes - Count of nodes required by the job
  *        Contiguous - Set to 1 if allocated nodes must be contiguous, 0 otherwise
  *        Shared - Set to 1 if nodes may be shared, 0 otherwise
+ *        Max_Nodes - Maximum number of nodes permitted for job, -1 for none (partition limit)
  * Output: Req_BitMap - Pointer to bitmap of selected nodes
  *         Returns 0 on success, EAGAIN if request can not be satisfied now, 
  *		EINVAL if request can never be satisfied (insufficient contiguous nodes)
  * NOTE: The caller must free memory pointed to by Req_BitMap
  */
-int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size, 
-	unsigned **Req_BitMap, int Req_CPUs, int  Req_Nodes, int Contiguous, int Shared) {
+int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size, unsigned **Req_BitMap, 
+	int Req_CPUs, int  Req_Nodes, int Contiguous, int Shared, int Max_Nodes) {
     int Error_Code, i, j, size;
     int Total_Nodes, Total_CPUs;	/* Total resources configured in partition */
     int Avail_Nodes, Avail_CPUs;	/* Resources available for use now */
@@ -491,38 +555,25 @@ int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size,
     int Max_Feature, Min_Feature;
     int *CPUs_Per_Node;
     int Avail_Set, Total_Set, Runable;
-    int Min_CPUs_Per_Node, Max_CPUs_Per_Node, My_CPU_Count, My_Node_Count;
 
     if (Node_Set_Size == 0) return EINVAL;
+    if ((Max_Nodes != -1) && (Req_Nodes > Max_Nodes)) return EINVAL; 
     Error_Code = 0;
+    Avail_BitMap = Total_BitMap = NULL;
     Avail_Nodes = Avail_CPUs = 0;
+    Total_Nodes = Total_CPUs = 0;
     if (Req_BitMap[0]) {	/* Specific nodes required */
 	/* NOTE: We have already confirmed that all of these nodes have a usable */
 	/*       configuration and are in the proper partition */
-	if (Req_Nodes != NO_VAL) Total_Nodes=BitMapCount(Req_BitMap[0]);
-	if (Req_CPUs  != NO_VAL) Total_CPUs=Count_CPUs(Req_BitMap[0]);
-	if (((Req_Nodes == NO_VAL) || (Req_Nodes <= Total_Nodes)) && 
-	    ((Req_CPUs  == NO_VAL) || (Req_CPUs  <= Total_CPUs ))) { 
+	if (Req_Nodes != 0) Total_Nodes=BitMapCount(Req_BitMap[0]);
+	if (Req_CPUs  != 0) Total_CPUs=Count_CPUs(Req_BitMap[0]);
+	if (Total_Nodes > Max_Nodes) return EINVAL;
+	if ((Req_Nodes <= Total_Nodes) && (Req_CPUs  <= Total_CPUs)) { 
 	    if (BitMapIsSuper(Req_BitMap[0], Up_NodeBitMap) != 1) return EAGAIN;
 	    if ((Shared != 1) && (BitMapIsSuper(Req_BitMap[0], Idle_NodeBitMap) != 1)) return EAGAIN;
-	    return 0;
+	    return 0;		/* User can have selected nodes, we're done! */
 	} /* if */
-    } else {			/* Any nodes usable */
-	size = (Node_Record_Count + (sizeof(unsigned)*8) - 1) / 8;	/* Bytes */
-	Avail_BitMap  = malloc(size);
-	Total_BitMap  = malloc(size);
-	if ((Avail_BitMap == NULL) || (Total_BitMap == NULL)){
-#if DEBUG_SYSTEM
-	    fprintf(stderr, "BitMapCopy: unable to allocate memory\n");
-#else
-	    syslog(LOG_ALERT, "BitMapCopy: unable to allocate memory\n");
-#endif
-	    if (Avail_BitMap) free(Avail_BitMap);
-	    if (Total_BitMap) free(Total_BitMap);
-	    return EAGAIN;
-	} /* if */
-	Total_Nodes = Total_CPUs = 0;
-    } /* else */
+    } /* if */
 
     /* Identify how many feature sets we have (e.g. "[FS1|FS2|FS3|FS4]" */
     Max_Feature = Min_Feature = Node_Set_Ptr[0].Feature;
@@ -531,33 +582,25 @@ int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size,
 	if (Node_Set_Ptr[i].Feature < Min_Feature) Min_Feature = Node_Set_Ptr[i].Feature;
     } /* for */
 
-    if (Req_BitMap[0]) {
-	if ((Req_CPUs  != NO_VAL) && (Req_CPUs  != 0)) Req_CPUs  -= Total_CPUs;
-	if ((Req_Nodes != NO_VAL) && (Req_Nodes != 0)) Req_Nodes -= Total_Nodes;
-	if (Contiguous) printf("WARNING: Incomplete NodeList with Contiguous not yet supported\n");
-	Contiguous = 0;
-    } /* if */
-
     Runable = 0;	/* Assume not runable until otherwise demonstrated */
-    Min_CPUs_Per_Node = Max_CPUs_Per_Node = -1;
     for (j=Min_Feature; j<=Max_Feature; j++) {
 	Avail_Set = Total_Set = 0;
 	for (i=0; i<Node_Set_Size; i++) {
 	    if (Node_Set_Ptr[i].Feature != j) continue;
-	    if ((Runable == 0) && Total_Set)
-		BitMapOR(Total_BitMap, Node_Set_Ptr[i].My_BitMap);
-	    else if (Runable == 0) {
-		Total_BitMap = BitMapCopy(Node_Set_Ptr[i].My_BitMap);
-		Total_Set = 1;
-	    } /* else */
-	    if (Min_CPUs_Per_Node == -1) 
-		Min_CPUs_Per_Node = Max_CPUs_Per_Node = Node_Set_Ptr[i].CPUs_Per_Node;
-	    else if (Min_CPUs_Per_Node > Node_Set_Ptr[i].CPUs_Per_Node) 
-		Min_CPUs_Per_Node = Node_Set_Ptr[i].CPUs_Per_Node;
-	    else if (Max_CPUs_Per_Node < Node_Set_Ptr[i].CPUs_Per_Node) 
-		Max_CPUs_Per_Node = Node_Set_Ptr[i].CPUs_Per_Node;
-	    Total_Nodes += Node_Set_Ptr[i].Nodes;
-	    Total_CPUs += (Node_Set_Ptr[i].Nodes * Node_Set_Ptr[i].CPUs_Per_Node);
+	    if (Runable == 0) {
+		if (Total_Set)
+		    BitMapOR(Total_BitMap, Node_Set_Ptr[i].My_BitMap);
+		else {
+		    Total_BitMap = BitMapCopy(Node_Set_Ptr[i].My_BitMap);
+		    if (Total_BitMap == NULL) {		/* No memory */
+			if (Avail_BitMap) free(Avail_BitMap);
+			return EAGAIN;
+		    } /* if */
+		    Total_Set = 1;
+		} /* else */
+		Total_Nodes += Node_Set_Ptr[i].Nodes;
+		Total_CPUs += (Node_Set_Ptr[i].Nodes * Node_Set_Ptr[i].CPUs_Per_Node);
+	    } /* if */
 	    BitMapAND(Node_Set_Ptr[i].My_BitMap, Up_NodeBitMap);
 	    if (Shared != 1) BitMapAND(Node_Set_Ptr[i].My_BitMap, Idle_NodeBitMap);
 	    Node_Set_Ptr[i].Nodes = BitMapCount(Node_Set_Ptr[i].My_BitMap);
@@ -565,62 +608,54 @@ int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size,
 		BitMapOR(Avail_BitMap, Node_Set_Ptr[i].My_BitMap);
 	    else {
 		Avail_BitMap = BitMapCopy(Node_Set_Ptr[i].My_BitMap);
+		if (Avail_BitMap == NULL) {		/* No memory */
+		    if (Total_BitMap) free(Total_BitMap);
+		    return EAGAIN;
+		} /* if */
 		Avail_Set = 1;
 	    } /* else */
 	    Avail_Nodes += Node_Set_Ptr[i].Nodes;
 	    Avail_CPUs += (Node_Set_Ptr[i].Nodes * Node_Set_Ptr[i].CPUs_Per_Node);
-	    /* Reset node and CPU counts required, remove CPU count if possible */
-	    My_Node_Count = (Req_CPUs + Min_CPUs_Per_Node - 1) / Min_CPUs_Per_Node;
-	    if (Req_CPUs != NO_VAL) {
-		if ((Min_CPUs_Per_Node == Max_CPUs_Per_Node) || (My_Node_Count <= Req_Nodes)) {
-		    My_CPU_Count = 0;	/* FAST: Only need node count */
-		    if (My_Node_Count < Req_Nodes) My_Node_Count = Req_Nodes;
-		} else {		/* SLOW: Need to check both node and CPU counts */
-		    My_CPU_Count  = Req_CPUs;
-		    My_Node_Count = Req_Nodes;
-		} /* else */
-	    } else {
-		My_CPU_Count = 0;
-		if (Req_Nodes == NO_VAL)
-		    My_Node_Count = 0;
-		else
-		    My_Node_Count = Req_Nodes;
-	    } /* else */
-	    if (Avail_Nodes < My_Node_Count) continue;
-	    if (Avail_CPUs  < My_CPU_Count ) continue;
-	    if (My_CPU_Count) 
-		Error_Code = Pick_Best_CPUs(Avail_BitMap, My_Node_Count, My_CPU_Count, Contiguous);
-	    else
-		Error_Code = BitMapFit(Avail_BitMap, My_Node_Count, Contiguous);
+	    if ((Req_BitMap[0]) && (BitMapIsSuper(Req_BitMap[0],Avail_BitMap) == 0)) continue;
+	    if (Avail_Nodes < Req_Nodes) continue;
+	    if (Avail_CPUs  < Req_CPUs ) continue;
+	    Error_Code = Pick_Best_CPUs(Avail_BitMap, Req_BitMap[0], Req_Nodes, Req_CPUs, Contiguous);
+	    if ((Error_Code == 0) && (Max_Nodes != -1) && 
+		(BitMapCount(Avail_BitMap) > Max_Nodes)) {
+		Error_Code = EINVAL;
+		break;
+	    } /* if */
 	    if (Error_Code == 0) {
-		free(Total_BitMap);
-		if (Req_BitMap[0]) {
-		    BitMapOR(Req_BitMap[0], Avail_BitMap);
-		    free(Avail_BitMap);
-		} else
-		    Req_BitMap[0] = Avail_BitMap;
+		if (Total_BitMap)  free(Total_BitMap);
+		if (Req_BitMap[0]) free(Req_BitMap[0]);
+		Req_BitMap[0] = Avail_BitMap;
 		return 0;
 	    } /* if */
 	} /* for (i */
-	if ((Runable == 0) && (Total_Nodes > Req_Nodes) && (Total_CPUs > Req_CPUs)) {	
+	if ((Error_Code == 0) && (Runable == 0) && 
+	    (Total_Nodes > Req_Nodes) && (Total_CPUs > Req_CPUs) && 
+	    ((Req_BitMap[0] == NULL) || (BitMapIsSuper(Req_BitMap[0],Avail_BitMap) == 1)) &&
+	     ((Max_Nodes == -1) || (Req_Nodes <= Max_Nodes))) {	
 	    /* Determine if job could possibly run (if configured nodes all available) */
-	    if (Req_CPUs) 
-		Error_Code = Pick_Best_CPUs(Avail_BitMap, My_Node_Count, Req_CPUs, Contiguous);
-	    else
-		Error_Code = BitMapFit(Total_BitMap, Req_Nodes, Contiguous);
+	    Error_Code = Pick_Best_CPUs(Avail_BitMap, Req_BitMap[0], Req_Nodes, Req_CPUs, Contiguous);
+	    if ((Error_Code == 0) && (Max_Nodes != -1) && 
+		    (BitMapCount(Avail_BitMap) > Max_Nodes)) Error_Code = EINVAL;
 	    if (Error_Code == 0) Runable=1;
 	} /* if */
+	if (Avail_BitMap) free(Avail_BitMap);
+	if (Total_BitMap) free(Total_BitMap);
+	Avail_BitMap = Total_BitMap = NULL;
+	if (Error_Code != 0) break;
     } /* for (j */
 
-    if (Avail_BitMap) free(Avail_BitMap);
-    if (Total_BitMap) free(Total_BitMap);
-    if (Runable == 0) return EINVAL;
-    return EAGAIN;
+    if (Runable == 0)    Error_Code=EINVAL;
+    if (Error_Code == 0) Error_Code=EAGAIN;
+    return Error_Code;
 } /* Pick_Best_Nodes */
 
 
 /*
- * Select_Nodes - Allocate nodes to a job with the given specifications
+ * Select_Nodes - Select and allocate nodes to a job with the given specifications
  * Input: Job_Specs - Job specifications
  *        Node_List - Pointer to node list returned
  * Output: Node_List - List of allocated nodes
@@ -640,13 +675,14 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
     struct Node_Set *Node_Set_Ptr;
     int Node_Set_Index, Node_Set_Size;
 
-    Req_Features = Req_Node_List = Req_Group = Req_Partition = NULL;
+    Req_Features = Req_Node_List = Job_Name = Req_Group = Req_Partition = NULL;
     Req_BitMap = Scratch_BitMap = NULL;
     Contiguous = Req_CPUs = Req_Nodes = Min_CPUs = Min_Memory = Min_TmpDisk = NO_VAL;
     Key = Shared = NO_VAL;
     Node_Set_Ptr = NULL;
     Config_Record_Iterator = NULL;
     Node_List[0] = NULL;
+    Config_Record_Iterator = (ListIterator)NULL;
 
     /* Setup and basic parsing */
     Error_Code = Parse_Job_Specs(Job_Specs, &Req_Features, &Req_Node_List, &Job_Name, &Req_Group, 
@@ -674,6 +710,9 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 	Error_Code =  EINVAL;
 	goto cleanup;
     } /* if */
+    if (Contiguous == NO_VAL) Contiguous=0;	/* Default not contiguous */
+    if (Req_CPUs   == NO_VAL) Req_CPUs=0;	/* Default no CPU count requirements */
+    if (Req_Nodes  == NO_VAL) Req_Nodes=0;	/* Default no node count requirements */
 
 
     /* Find selected partition */
@@ -728,28 +767,6 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 
 
     /* Check if select partition has sufficient resources to satisfy request */
-    if ((Req_CPUs != NO_VAL) && (Req_CPUs > Part_Ptr->TotalCPUs)) {
-#if DEBUG_SYSTEM
-	fprintf(stderr, "Select_Nodes: Too many CPUs (%d) requested of partition %s(%d)\n", 
-		Req_CPUs, Part_Ptr->Name, Part_Ptr->TotalCPUs);
-#else
-	syslog(LOG_NOTICE, "Select_Nodes: Too many CPUs (%d) requested of partition %s(%d)\n", 
-		Req_CPUs, Part_Ptr->Name, Part_Ptr->TotalCPUs);
-#endif
-	Error_Code = EINVAL;
-	goto cleanup;
-    } /* if */
-    if ((Req_Nodes != NO_VAL) && (Req_Nodes > Part_Ptr->TotalNodes)) {
-#if DEBUG_SYSTEM
-	fprintf(stderr, "Select_Nodes: Too many nodes (%d) requested of partition %s(%d)\n", 
-		Req_Nodes, Part_Ptr->Name, Part_Ptr->TotalNodes);
-#else
-	syslog(LOG_NOTICE, "Select_Nodes: Too many nodes (%d) requested of partition %s(%d)\n", 
-		Req_Nodes, Part_Ptr->Name, Part_Ptr->TotalNodes);
-#endif
-	Error_Code = EINVAL;
-	goto cleanup;
-    } /* if */
     if (Req_Node_List) { /* Insure that selected nodes are in this partition */
 	Error_Code = NodeName2BitMap(Req_Node_List, &Req_BitMap);
 	if (Error_Code == EINVAL) goto cleanup;
@@ -769,6 +786,36 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 	    Error_Code = EINVAL;
 	    goto cleanup;
 	} /* if */
+	i = Count_CPUs(Req_BitMap);
+	if (i > Req_CPUs) Req_CPUs=i;
+	i = BitMapCount(Req_BitMap);
+	if (i > Req_Nodes) Req_Nodes=i;
+    } /* if */
+    if (Req_CPUs > Part_Ptr->TotalCPUs) {
+#if DEBUG_SYSTEM
+	fprintf(stderr, "Select_Nodes: Too many CPUs (%d) requested of partition %s(%d)\n", 
+		Req_CPUs, Part_Ptr->Name, Part_Ptr->TotalCPUs);
+#else
+	syslog(LOG_NOTICE, "Select_Nodes: Too many CPUs (%d) requested of partition %s(%d)\n", 
+		Req_CPUs, Part_Ptr->Name, Part_Ptr->TotalCPUs);
+#endif
+	Error_Code = EINVAL;
+	goto cleanup;
+    } /* if */
+    if ((Req_Nodes > Part_Ptr->TotalNodes) || (Req_Nodes > Part_Ptr->MaxNodes)) {
+	if (Part_Ptr->TotalNodes > Part_Ptr->MaxNodes)
+	    i = Part_Ptr->MaxNodes;
+	else
+	    i = Part_Ptr->TotalNodes;
+#if DEBUG_SYSTEM
+	fprintf(stderr, "Select_Nodes: Too many nodes (%d) requested of partition %s(%d)\n", 
+		Req_Nodes, Part_Ptr->Name, i);
+#else
+	syslog(LOG_NOTICE, "Select_Nodes: Too many nodes (%d) requested of partition %s(%d)\n", 
+		Req_Nodes, Part_Ptr->Name, i);
+#endif
+	Error_Code = EINVAL;
+	goto cleanup;
     } /* if */
     if (Part_Ptr->Shared == 2)				/* Shared=FORCE */
 	Shared = 1;
@@ -810,16 +857,15 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 
 	/* Since nodes can register with more resources than defined in the configuration,    */
 	/* we want to use those higher values for scheduling, but only as needed */
-	if (((Min_CPUs    != NO_VAL) && (Min_CPUs    > Config_Record_Point->CPUs))  ||
-	    ((Min_Memory  != NO_VAL) && (Min_Memory  > Config_Record_Point->RealMemory)) ||
-	    ((Min_TmpDisk != NO_VAL) && (Min_TmpDisk > Config_Record_Point->TmpDisk))) 
+	if ((Min_CPUs    > Config_Record_Point->CPUs)  ||
+	    (Min_Memory  > Config_Record_Point->RealMemory) ||
+	    (Min_TmpDisk > Config_Record_Point->TmpDisk) )
 	    Check_Node_Config = 1;
 	else
 	    Check_Node_Config = 0;
 	Node_Set_Ptr[Node_Set_Index].My_BitMap = BitMapCopy(Config_Record_Point->NodeBitMap);
 	if (Node_Set_Ptr[Node_Set_Index].My_BitMap == NULL) {
 	    Error_Code = EAGAIN;  /* No memory */
-	    list_iterator_destroy(Config_Record_Iterator);
 	    goto cleanup;
 	} /* if */
 	BitMapAND(Node_Set_Ptr[Node_Set_Index].My_BitMap, Part_Ptr->NodeBitMap);
@@ -829,12 +875,12 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 	if (Check_Node_Config && (Node_Set_Ptr[Node_Set_Index].Nodes != 0)) {
 	    for (i=0; i<Node_Record_Count; i++) {
 		if (BitMapValue(Node_Set_Ptr[Node_Set_Index].My_BitMap, i) == 0) continue;
-		if (((Min_CPUs    != NO_VAL) && (Min_CPUs    > Node_Record_Table_Ptr[i].CPUs))       ||
-		    ((Min_Memory  != NO_VAL) && (Min_Memory  > Node_Record_Table_Ptr[i].RealMemory)) ||
-		    ((Min_TmpDisk != NO_VAL) && (Min_TmpDisk > Node_Record_Table_Ptr[i].TmpDisk))) 
-		    BitMapClear(Node_Set_Ptr[Node_Set_Index].My_BitMap, i);
+		if ((Min_CPUs    <= Node_Record_Table_Ptr[i].CPUs)       &&
+		    (Min_Memory  <= Node_Record_Table_Ptr[i].RealMemory) &&
+		    (Min_TmpDisk <= Node_Record_Table_Ptr[i].TmpDisk)) continue;
+		BitMapClear(Node_Set_Ptr[Node_Set_Index].My_BitMap, i);
+		if ((--Node_Set_Ptr[Node_Set_Index].Nodes) == 0) break;
 	    } /* for */
-	    Node_Set_Ptr[Node_Set_Index].Nodes = BitMapCount(Node_Set_Ptr[Node_Set_Index].My_BitMap);
 	} /* if */
 	if (Node_Set_Ptr[Node_Set_Index].Nodes == 0) {
 	    free(Node_Set_Ptr[Node_Set_Index].My_BitMap);
@@ -844,8 +890,13 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 	if (Req_BitMap) {
 	    if (Scratch_BitMap) 
 		BitMapOR(Scratch_BitMap, Node_Set_Ptr[Node_Set_Index].My_BitMap);
-	    else
+	    else {
 		Scratch_BitMap = BitMapCopy(Node_Set_Ptr[Node_Set_Index].My_BitMap);
+		if (Scratch_BitMap == NULL) { /* No memory */
+		    Error_Code = EAGAIN; 
+		    goto cleanup;
+		} /* if */
+	    } /* else */
 	} /* if */
 	Node_Set_Ptr[Node_Set_Index].CPUs_Per_Node = Config_Record_Point->CPUs;
 	Node_Set_Ptr[Node_Set_Index].Weight = Config_Record_Point->Weight;
@@ -863,13 +914,11 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 #else
 	    syslog(LOG_ALERT, "Select_Nodes: Unable to allocate memory\n");
 #endif
-	    list_iterator_destroy(Config_Record_Iterator);
 	    Error_Code = EAGAIN;   /* No memory */
 	    goto cleanup;
 	} /* if */
 	Node_Set_Ptr[Node_Set_Size++].My_BitMap = NULL;
     } /* while */
-    list_iterator_destroy(Config_Record_Iterator);
     if (Node_Set_Index == 0) {
 #if DEBUG_SYSTEM
 	fprintf(stderr, "Select_Nodes: No node configurations satisfy requirements %d:%d:%d:%s\n", 
@@ -901,12 +950,17 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 
 
     /* Pick the nodes providing a best-fit */
-    if (Contiguous == NO_VAL) Contiguous=0;	/* Default not contiguous */
-    if (Req_CPUs   == NO_VAL) Req_CPUs=0;	/* Default no CPU count requirements */
-    if (Req_Nodes  == NO_VAL) Req_Nodes=0;	/* Default no node count requirements */
     Error_Code = Pick_Best_Nodes(Node_Set_Ptr, Node_Set_Size, 
-	&Req_BitMap, Req_CPUs, Req_Nodes, Contiguous, Shared);
-    if (Error_Code) goto cleanup;
+	&Req_BitMap, Req_CPUs, Req_Nodes, Contiguous, Shared, Part_Ptr->MaxNodes);
+    if (Error_Code == EAGAIN) goto cleanup;
+    if (Error_Code == EINVAL) {
+#if DEBUG_SYSTEM
+	fprintf(stderr, "Select_Nodes: No nodes can satisfy job request\n");
+#else
+	syslog(LOG_NOTICE, "Select_Nodes: No nodes can satisfy job request\n");
+#endif
+	goto cleanup;
+    } /* if */
 
     /* Mark the selected nodes as STATE_STAGE_IN */
     Allocate_Nodes(Req_BitMap);
@@ -928,6 +982,7 @@ cleanup:
 	} /* for */
 	free(Node_Set_Ptr);
     } /* if */
+    if (Config_Record_Iterator) list_iterator_destroy(Config_Record_Iterator);
     return Error_Code;
 } /* Select_Nodes */
 
diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c
index db8f58e6de0..196c70e8c7f 100644
--- a/src/slurmctld/partition_mgr.c
+++ b/src/slurmctld/partition_mgr.c
@@ -272,7 +272,7 @@ int Build_Part_BitMap(struct Part_Record *Part_Record_Point) {
     /* Unlink nodes removed from the partition */
     for (i=0; i<Node_Record_Count; i++) {
 	if (BitMapValue(Old_BitMap, i) == 0) continue;
-	(Node_Record_Table_Ptr+i)->Partition_Ptr = NULL;
+	Node_Record_Table_Ptr[i].Partition_Ptr = NULL;
     } /* for */
 
     if(My_Node_List) free(My_Node_List);
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index cba33101221..f0f83e1dd1b 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -66,8 +66,8 @@ main(int argc, char * argv[]) {
     printf("\n");
 
     for (i=0; i<Node_Record_Count; i++) {
-	if (strlen((Node_Record_Table_Ptr+i)->Name) == 0) continue;
-	printf("NodeName=%s ",      (Node_Record_Table_Ptr+i)->Name);
+	if (strlen(Node_Record_Table_Ptr[i].Name) == 0) continue;
+	printf("NodeName=%s ",      Node_Record_Table_Ptr[i].Name);
 	printf("NodeState=%s ",     Node_State_String[Node_Record_Table_Ptr[i].NodeState]);
 	printf("LastResponse=%ld ", (long)Node_Record_Table_Ptr[i].LastResponse);
 
@@ -206,10 +206,10 @@ int Build_BitMaps() {
 
     /* Scan all nodes and identify which are UP and IDLE and their configuration */
     for (i=0; i<Node_Record_Count; i++) {
-	if (strlen((Node_Record_Table_Ptr+i)->Name) == 0) continue;	/* Defunct */
-	if ((Node_Record_Table_Ptr+i)->NodeState == STATE_IDLE) BitMapSet(Idle_NodeBitMap, i);
-	if ((Node_Record_Table_Ptr+i)->NodeState != STATE_DOWN) BitMapSet(Up_NodeBitMap, i);
-	if ((Node_Record_Table_Ptr+i)->Config_Ptr) 
+	if (strlen(Node_Record_Table_Ptr[i].Name) == 0) continue;	/* Defunct */
+	if (Node_Record_Table_Ptr[i].NodeState == STATE_IDLE) BitMapSet(Idle_NodeBitMap, i);
+	if (Node_Record_Table_Ptr[i].NodeState != STATE_DOWN) BitMapSet(Up_NodeBitMap, i);
+	if (Node_Record_Table_Ptr[i].Config_Ptr) 
 		BitMapSet(Node_Record_Table_Ptr[i].Config_Ptr->NodeBitMap, i);
     } /* for */
 
-- 
GitLab