From 3d4a589978a7cd688f353e847fa2512c8637fc30 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 20 Mar 2002 22:23:17 +0000
Subject: [PATCH] Node selection for scheduling completed except does not
 support NodeList with Contiguous option when CPU or node count specification
 exceeds the count in the list (additional selected nodes may not be
 contiguous with those in the NodeList). - Jette

---
 src/slurmctld/bits_bytes.c     |  11 +-
 src/slurmctld/node_scheduler.c | 210 ++++++++++++++++++++++++++++++---
 src/slurmctld/read_config.c    |  20 ++--
 3 files changed, 207 insertions(+), 34 deletions(-)

diff --git a/src/slurmctld/bits_bytes.c b/src/slurmctld/bits_bytes.c
index 7f6672ed22c..1ed882af2ba 100644
--- a/src/slurmctld/bits_bytes.c
+++ b/src/slurmctld/bits_bytes.c
@@ -242,7 +242,7 @@ int BitMapConsecutive(unsigned *BitMap, int *Position) {
  * NOTE:  The returned value MUST BE FREED by the calling routine
  */
 unsigned *BitMapCopy(unsigned *BitMap) {
-    int i, size;
+    int size;
     unsigned *Output;
 
     if (BitMap == NULL) {
@@ -265,10 +265,7 @@ unsigned *BitMapCopy(unsigned *BitMap) {
 	return NULL;
     } /* if */
 
-    size /= sizeof(unsigned);			/* Count of unsigned's */
-    for (i=0; i<size; i++) {
-	Output[i] = BitMap[i];
-    } /* for (i */
+    (void) memcpy(Output, BitMap, size);
     return Output;
 } /* BitMapCopy */
 
@@ -362,7 +359,7 @@ void BitMapFill(unsigned *BitMap) {
 
 
 /*
- * BitMapFit - Identify the nodes which best fit the Request count
+ * BitMapFit - Identify the nodes which best fit the Req_Nodes count
  * Input: BitMap - The bit map to search
  *        Req_Nodes - Number of nodes required
  *        Consecutive - Nodes must be consecutive is 1, otherwise 0
@@ -408,6 +405,8 @@ int BitMapFit(unsigned *BitMap, int Req_Nodes, int Consecutive) {
 		if ((Consec_Count[Consec_Index]++) == 0) 
 			Consec_Start[Consec_Index] = (word * sizeof(unsigned) * 8 + bit);
 		BitMap[word] &= (~mask);
+	    } else if ((Consec_Index == 0) && (Consec_Count[0] == 0)) { 
+		continue;
 	    } else {
 		if (++Consec_Index >= Consec_Size) {
 		    Consec_Size *= 2;
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 9b7f3dd8ad2..2e8e322fad4 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -38,6 +38,7 @@ int Match_Feature(char *Seek, char *Available);
 int Parse_Job_Specs(char *Job_Specs, char **Req_Features, char **Req_Node_List, char **Job_Name,
 	char **Req_Group, char **Req_Partition, int *Contiguous, int *Req_CPUs, 
 	int *Req_Nodes, int *Min_CPUs, int *Min_Memory, int *Min_TmpDisk, int *Key, int *Shared);
+int Pick_Best_CPUs(unsigned *BitMap, int Req_Nodes, int Req_CPUs, int Consecutive);
 int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size, 
 	unsigned **Req_BitMap, int Req_CPUs, int  Req_Nodes, int Contiguous, int Shared);
 int ValidFeatures(char *Requested, char *Available);
@@ -350,6 +351,122 @@ cleanup:
 } /* Parse_Job_Specs */
 
 
+/*
+ * Pick_Best_CPUs - Identify the nodes which best fit the Req_Nodes and Req_CPUs counts
+ * Input: BitMap - The bit map to search
+ *        Req_Nodes - Number of nodes required
+ *        Req_CPUs - Number of CPUs required
+ *        Consecutive - Nodes must be consecutive is 1, otherwise 0
+ * Output: BitMap - Nodes NOT required to satisfy the request are cleared, other left set
+ *         Returns zero on success, EINVAL otherwise
+ */
+int Pick_Best_CPUs(unsigned *BitMap, int Req_Nodes, int Req_CPUs, int Consecutive) {
+    int bit, size, word, i, index, Error_Code, Sufficient;
+    int *Consec_Nodes, *Consec_CPUs, *Consec_Start, Consec_Index, Consec_Size;
+    int Rem_CPUs, Rem_Nodes, Best_Fit_Nodes, Best_Fit_CPUs, Best_Fit_Location;
+    unsigned mask;
+
+    if (BitMap == NULL) {
+#if DEBUG_SYSTEM
+	fprintf(stderr, "Pick_Best_CPUs: BitMap pointer is NULL\n");
+#else
+	syslog(LOG_ALERT, "Pick_Best_CPUs: BitMap pointer is NULL\n");
+#endif
+	return EINVAL;
+    } /* if */
+
+    Error_Code = EINVAL;	/* Default is no fit */
+    Consec_Index = 0;
+    Consec_Size = 1000;
+    Consec_CPUs  = malloc(sizeof(int)*Consec_Size);
+    Consec_Nodes = malloc(sizeof(int)*Consec_Size);
+    Consec_Start = malloc(sizeof(int)*Consec_Size);
+    if ((Consec_CPUs == NULL) || (Consec_Nodes == NULL) || (Consec_Start == NULL)) {
+#if DEBUG_SYSTEM
+	fprintf(stderr, "Pick_Best_CPUs: unable to allocate memory\n");
+#else
+	syslog(LOG_ALERT, "Pick_Best_CPUs: unable to allocate memory\n");
+#endif
+	goto cleanup;
+    } /* if */
+
+    Consec_CPUs[Consec_Index] = 0;
+    Consec_Nodes[Consec_Index] = 0;
+    size = (Node_Record_Count + (sizeof(unsigned)*8) - 1) / 8;	/* Bytes */
+    size /= sizeof(unsigned);			/* Count of unsigned's */
+    index = -1;
+    for (word=0; word<size; word++) {
+	for (bit=0; bit<(sizeof(unsigned)*8); bit++) {
+	    mask = (0x1 << ((sizeof(unsigned)*8)-1-bit));
+	    index++;
+	    if (BitMap[word] & mask) {
+		if ((Consec_Nodes[Consec_Index]++) == 0) 
+			Consec_Start[Consec_Index] = (word * sizeof(unsigned) * 8 + bit);
+		Consec_CPUs[Consec_Index] += Node_Record_Table_Ptr[index].CPUs;
+		BitMap[word] &= (~mask);
+	    } else if ((Consec_Index == 0) && (Consec_Nodes[0] == 0)) { 
+		continue;
+	    } else {
+		if (++Consec_Index >= Consec_Size) {
+		    Consec_Size *= 2;
+		    Consec_CPUs  = realloc(Consec_CPUs , sizeof(int)*Consec_Size);
+		    Consec_Nodes = realloc(Consec_Nodes, sizeof(int)*Consec_Size);
+		    Consec_Start = realloc(Consec_Start, sizeof(int)*Consec_Size);
+		    if ((Consec_CPUs == NULL) || (Consec_Nodes == NULL) || (Consec_Start == NULL)) {
+#if DEBUG_SYSTEM
+			fprintf(stderr, "Pick_Best_CPUs: unable to allocate memory\n");
+#else
+			syslog(LOG_ALERT, "Pick_Best_CPUs: unable to allocate memory\n");
+#endif
+			goto cleanup;
+		    } /* if */
+		} /* if */
+		Consec_CPUs[Consec_Index]  = 0;
+		Consec_Nodes[Consec_Index] = 0;
+	    } /* else */
+	} /* for (bit */
+    } /* for (word */
+    Consec_Index++;
+
+    Rem_CPUs  = Req_CPUs;
+    Rem_Nodes = Req_Nodes;
+    while (1) {
+	Best_Fit_CPUs = Best_Fit_Nodes = 0;
+	for (i=0; i<Consec_Index; i++) {
+	    if (Consec_Nodes[i] == 0) continue;
+	    Sufficient = ((Best_Fit_Nodes >= Rem_Nodes) && (Best_Fit_CPUs >= Rem_CPUs));
+	    if ((Best_Fit_Nodes == 0) || 					/* First possibility */
+	        (Sufficient && (Consec_CPUs[i] < Best_Fit_CPUs)) ||		/* Less waste option */
+	        ((Sufficient == 0) && (Consec_CPUs[i] > Best_Fit_CPUs))) {	/* Larger option */ 
+		Best_Fit_CPUs  = Consec_CPUs[i];
+		Best_Fit_Nodes = Consec_Nodes[i];
+		Best_Fit_Location = i;
+	    } /* if */
+	} /* for */
+	if (Consecutive && ((Best_Fit_Nodes < Req_Nodes) || (Best_Fit_CPUs < Req_CPUs))) 
+		break; /* No hole large enough */
+	for (i=0; i<Best_Fit_Nodes; i++) {
+	    BitMapSet(BitMap, i+Consec_Start[Best_Fit_Location]);
+	    Rem_Nodes--;
+	    Rem_CPUs -= Node_Record_Table_Ptr[i].CPUs;
+	    if ((Rem_Nodes <= 0) && (Rem_CPUs <= 0)) break;
+	} /* for */
+	Consec_CPUs[Best_Fit_Location]  = 0;
+	Consec_Nodes[Best_Fit_Location] = 0;
+	if ((Rem_Nodes <= 0) && (Rem_CPUs <= 0)) {
+	    Error_Code = 0;
+	    break;
+	} /* if */
+    } /* while */
+
+cleanup:
+    if (Consec_CPUs ) free(Consec_CPUs);
+    if (Consec_Nodes) free(Consec_Nodes);
+    if (Consec_Start) free(Consec_Start);
+    return Error_Code;
+} /* Pick_Best_CPUs */
+
+
 /*
  * Pick_Best_Nodes - From nodes satisfying partition and configuration specifications, 
  *	select the "best" for use
@@ -373,6 +490,8 @@ int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size,
     unsigned *Avail_BitMap, *Total_BitMap;
     int Max_Feature, Min_Feature;
     int *CPUs_Per_Node;
+    int Avail_Set, Total_Set, Runable;
+    int Min_CPUs_Per_Node, Max_CPUs_Per_Node, My_CPU_Count, My_Node_Count;
 
     if (Node_Set_Size == 0) return EINVAL;
     Error_Code = 0;
@@ -402,8 +521,6 @@ int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size,
 	    if (Total_BitMap) free(Total_BitMap);
 	    return EAGAIN;
 	} /* if */
-	memset(Total_BitMap, 0, size);
-	memset(Avail_BitMap, 0, size);
 	Total_Nodes = Total_CPUs = 0;
     } /* else */
 
@@ -414,36 +531,90 @@ int Pick_Best_Nodes(struct Node_Set *Node_Set_Ptr, int Node_Set_Size,
 	if (Node_Set_Ptr[i].Feature < Min_Feature) Min_Feature = Node_Set_Ptr[i].Feature;
     } /* for */
 
-if (Req_CPUs != NO_VAL)	{ printf("CPU requirement for job not yet supported\n"); return EINVAL; }
-if (Req_BitMap[0])	{ printf("Incomplete job NodeList not yet supported\n");return EINVAL; }
-if (Contiguous!= NO_VAL){ printf("Contiguous node allocation for job not yet supported\n"); return EINVAL; }
-printf("More work to be done in node selection\n");
+    if (Req_BitMap[0]) {
+	if ((Req_CPUs  != NO_VAL) && (Req_CPUs  != 0)) Req_CPUs  -= Total_CPUs;
+	if ((Req_Nodes != NO_VAL) && (Req_Nodes != 0)) Req_Nodes -= Total_Nodes;
+	if (Contiguous) printf("WARNING: Incomplete NodeList with Contiguous not yet supported\n");
+	Contiguous = 0;
+    } /* if */
 
+    Runable = 0;	/* Assume not runable until otherwise demonstrated */
+    Min_CPUs_Per_Node = Max_CPUs_Per_Node = -1;
     for (j=Min_Feature; j<=Max_Feature; j++) {
+	Avail_Set = Total_Set = 0;
 	for (i=0; i<Node_Set_Size; i++) {
 	    if (Node_Set_Ptr[i].Feature != j) continue;
-	    BitMapOR(Total_BitMap, Node_Set_Ptr[i].My_BitMap);
+	    if ((Runable == 0) && Total_Set)
+		BitMapOR(Total_BitMap, Node_Set_Ptr[i].My_BitMap);
+	    else if (Runable == 0) {
+		Total_BitMap = BitMapCopy(Node_Set_Ptr[i].My_BitMap);
+		Total_Set = 1;
+	    } /* else */
+	    if (Min_CPUs_Per_Node == -1) 
+		Min_CPUs_Per_Node = Max_CPUs_Per_Node = Node_Set_Ptr[i].CPUs_Per_Node;
+	    else if (Min_CPUs_Per_Node > Node_Set_Ptr[i].CPUs_Per_Node) 
+		Min_CPUs_Per_Node = Node_Set_Ptr[i].CPUs_Per_Node;
+	    else if (Max_CPUs_Per_Node < Node_Set_Ptr[i].CPUs_Per_Node) 
+		Max_CPUs_Per_Node = Node_Set_Ptr[i].CPUs_Per_Node;
 	    Total_Nodes += Node_Set_Ptr[i].Nodes;
 	    Total_CPUs += (Node_Set_Ptr[i].Nodes * Node_Set_Ptr[i].CPUs_Per_Node);
 	    BitMapAND(Node_Set_Ptr[i].My_BitMap, Up_NodeBitMap);
 	    if (Shared != 1) BitMapAND(Node_Set_Ptr[i].My_BitMap, Idle_NodeBitMap);
 	    Node_Set_Ptr[i].Nodes = BitMapCount(Node_Set_Ptr[i].My_BitMap);
-	    BitMapOR(Avail_BitMap, Node_Set_Ptr[i].My_BitMap);
+	    if (Avail_Set)
+		BitMapOR(Avail_BitMap, Node_Set_Ptr[i].My_BitMap);
+	    else {
+		Avail_BitMap = BitMapCopy(Node_Set_Ptr[i].My_BitMap);
+		Avail_Set = 1;
+	    } /* else */
 	    Avail_Nodes += Node_Set_Ptr[i].Nodes;
 	    Avail_CPUs += (Node_Set_Ptr[i].Nodes * Node_Set_Ptr[i].CPUs_Per_Node);
-	    if (Req_Nodes != NO_VAL) {
-		Error_Code = BitMapFit(Avail_BitMap, Req_Nodes, Contiguous);
-		if (Error_Code == 0) {
+	    /* Reset node and CPU counts required, remove CPU count if possible */
+	    My_Node_Count = (Req_CPUs + Min_CPUs_Per_Node - 1) / Min_CPUs_Per_Node;
+	    if (Req_CPUs != NO_VAL) {
+		if ((Min_CPUs_Per_Node == Max_CPUs_Per_Node) || (My_Node_Count <= Req_Nodes)) {
+		    My_CPU_Count = 0;	/* FAST: Only need node count */
+		    if (My_Node_Count < Req_Nodes) My_Node_Count = Req_Nodes;
+		} else {		/* SLOW: Need to check both node and CPU counts */
+		    My_CPU_Count  = Req_CPUs;
+		    My_Node_Count = Req_Nodes;
+		} /* else */
+	    } else {
+		My_CPU_Count = 0;
+		if (Req_Nodes == NO_VAL)
+		    My_Node_Count = 0;
+		else
+		    My_Node_Count = Req_Nodes;
+	    } /* else */
+	    if (Avail_Nodes < My_Node_Count) continue;
+	    if (Avail_CPUs  < My_CPU_Count ) continue;
+	    if (My_CPU_Count) 
+		Error_Code = Pick_Best_CPUs(Avail_BitMap, My_Node_Count, My_CPU_Count, Contiguous);
+	    else
+		Error_Code = BitMapFit(Avail_BitMap, My_Node_Count, Contiguous);
+	    if (Error_Code == 0) {
+		free(Total_BitMap);
+		if (Req_BitMap[0]) {
+		    BitMapOR(Req_BitMap[0], Avail_BitMap);
+		    free(Avail_BitMap);
+		} else
 		    Req_BitMap[0] = Avail_BitMap;
-		    free(Total_BitMap);
-		    return 0;
-		} /* if */
+		return 0;
 	    } /* if */
 	} /* for (i */
-	memset(Total_BitMap, 0, size);
-	memset(Avail_BitMap, 0, size);
+	if ((Runable == 0) && (Total_Nodes > Req_Nodes) && (Total_CPUs > Req_CPUs)) {	
+	    /* Determine if job could possibly run (if configured nodes all available) */
+	    if (Req_CPUs) 
+		Error_Code = Pick_Best_CPUs(Avail_BitMap, My_Node_Count, Req_CPUs, Contiguous);
+	    else
+		Error_Code = BitMapFit(Total_BitMap, Req_Nodes, Contiguous);
+	    if (Error_Code == 0) Runable=1;
+	} /* if */
     } /* for (j */
 
+    if (Avail_BitMap) free(Avail_BitMap);
+    if (Total_BitMap) free(Total_BitMap);
+    if (Runable == 0) return EINVAL;
     return EAGAIN;
 } /* Pick_Best_Nodes */
 
@@ -599,9 +770,9 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 	    goto cleanup;
 	} /* if */
     } /* if */
-    if (Part_Ptr->Shared == 2)
+    if (Part_Ptr->Shared == 2)				/* Shared=FORCE */
 	Shared = 1;
-    else if ((Shared != 1) || (Part_Ptr->Shared == 0)) 
+    else if ((Shared != 1) || (Part_Ptr->Shared == 0)) 	/* User or partition want no sharing */
 	Shared = 0;
 
 
@@ -730,6 +901,9 @@ int Select_Nodes(char *Job_Specs, char **Node_List) {
 
 
     /* Pick the nodes providing a best-fit */
+    if (Contiguous == NO_VAL) Contiguous=0;	/* Default not contiguous */
+    if (Req_CPUs   == NO_VAL) Req_CPUs=0;	/* Default no CPU count requirements */
+    if (Req_Nodes  == NO_VAL) Req_Nodes=0;	/* Default no node count requirements */
     Error_Code = Pick_Best_Nodes(Node_Set_Ptr, Node_Set_Size, 
 	&Req_BitMap, Req_CPUs, Req_Nodes, Contiguous, Shared);
     if (Error_Code) goto cleanup;
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index ca6c32edae2..cba33101221 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -68,14 +68,14 @@ main(int argc, char * argv[]) {
     for (i=0; i<Node_Record_Count; i++) {
 	if (strlen((Node_Record_Table_Ptr+i)->Name) == 0) continue;
 	printf("NodeName=%s ",      (Node_Record_Table_Ptr+i)->Name);
-	printf("NodeState=%s ",     Node_State_String[(Node_Record_Table_Ptr+i)->NodeState]);
-	printf("LastResponse=%ld ", (long)(Node_Record_Table_Ptr+i)->LastResponse);
-
-	printf("Weight=%d ",        (Node_Record_Table_Ptr+i)->Config_Ptr->Weight);
-	printf("Feature=%s\n",      (Node_Record_Table_Ptr+i)->Config_Ptr->Feature);
-	printf("CPUs=%d ",          (Node_Record_Table_Ptr+i)->CPUs);
-	printf("RealMemory=%d ",    (Node_Record_Table_Ptr+i)->RealMemory);
-	printf("TmpDisk=%d ",       (Node_Record_Table_Ptr+i)->TmpDisk);
+	printf("NodeState=%s ",     Node_State_String[Node_Record_Table_Ptr[i].NodeState]);
+	printf("LastResponse=%ld ", (long)Node_Record_Table_Ptr[i].LastResponse);
+
+	printf("CPUs=%d ",          Node_Record_Table_Ptr[i].CPUs);
+	printf("RealMemory=%d ",    Node_Record_Table_Ptr[i].RealMemory);
+	printf("TmpDisk=%d ",       Node_Record_Table_Ptr[i].TmpDisk);
+	printf("Weight=%d ",        Node_Record_Table_Ptr[i].Config_Ptr->Weight);
+	printf("Feature=%s\n",      Node_Record_Table_Ptr[i].Config_Ptr->Feature);
     } /* for */
     BitMap = BitMapPrint(Up_NodeBitMap);
     printf("\nUp_NodeBitMap  =%s\n", BitMap);
@@ -210,7 +210,7 @@ int Build_BitMaps() {
 	if ((Node_Record_Table_Ptr+i)->NodeState == STATE_IDLE) BitMapSet(Idle_NodeBitMap, i);
 	if ((Node_Record_Table_Ptr+i)->NodeState != STATE_DOWN) BitMapSet(Up_NodeBitMap, i);
 	if ((Node_Record_Table_Ptr+i)->Config_Ptr) 
-		BitMapSet((Node_Record_Table_Ptr+i)->Config_Ptr->NodeBitMap, i);
+		BitMapSet(Node_Record_Table_Ptr[i].Config_Ptr->NodeBitMap, i);
     } /* for */
 
     /* Scan partition table and identify nodes in each */
@@ -251,7 +251,7 @@ int Build_BitMaps() {
 	/* Check for each node in the partition */
 	if ((Part_Record_Point->Nodes == NULL) || 
 	    (strlen(Part_Record_Point->Nodes) == 0)) continue;
-	My_Node_List = malloc(strlen(Part_Record_Point->Nodes)+1);
+	My_Node_List = (char *)malloc(strlen(Part_Record_Point->Nodes)+1);
 	if (My_Node_List == NULL) {
 #if DEBUG_SYSTEM
 	    fprintf(stderr, "Build_BitMaps: unable to allocate memory\n");
-- 
GitLab