From 880587c0aa2addc5c38a483f29f5874f6469631f Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Mon, 19 Jul 2010 23:38:13 +0000
Subject: [PATCH] improve topology support for the gres job_test logic

---
 src/common/gres.c | 86 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 59 insertions(+), 27 deletions(-)

diff --git a/src/common/gres.c b/src/common/gres.c
index ccfc563f88d..3d4e91e2b32 100644
--- a/src/common/gres.c
+++ b/src/common/gres.c
@@ -2234,48 +2234,80 @@ static uint32_t _job_test(void *job_gres_data, void *node_gres_data,
 			  bool use_total_gres, bitstr_t *cpu_bitmap,
 			  int cpu_start_bit, int cpu_end_bit)
 {
-	int i, j, cpus_ctld, cpu_cnt, tot_cpu_cnt = 0, gres_avail;
+	int i, j, cpus_ctld, gres_avail, top_inx;
 	gres_job_state_t  *job_gres_ptr  = (gres_job_state_t *)  job_gres_data;
 	gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data;
-	bitstr_t *new_gres_bitmap = NULL;
+	uint32_t *cpus_avail = NULL, cpu_cnt = 0;
+	bitstr_t *alloc_cpu_bitmap = NULL;
 
-	if (!use_total_gres && cpu_bitmap && node_gres_ptr->topo_cnt) {
+	if (job_gres_ptr->gres_cnt_alloc && node_gres_ptr->topo_cnt) {
 		/* Need to determine which specific CPUs can be used */
-		new_gres_bitmap = bit_alloc(node_gres_ptr->gres_cnt_avail);
-		if (new_gres_bitmap == NULL)
-			fatal("bit_alloc: malloc failure");
-		cpus_ctld = cpu_end_bit - cpu_start_bit + 1;
-		_validate_gres_node_cpus(node_gres_ptr, cpus_ctld);
-		if (cpus_ctld < 1) {
-			error("gres_plugin_job_test: cpus on node < 1");
-			return (uint32_t) 0;
+		if (cpu_bitmap) {
+			cpus_ctld = cpu_end_bit - cpu_start_bit + 1;
+			if (cpus_ctld < 1) {
+				error("gres_plugin_job_test: cpus on node < 1");
+				return (uint32_t) 0;
+			}
+			_validate_gres_node_cpus(node_gres_ptr, cpus_ctld);
+		} else {
+			cpus_ctld = bit_size(node_gres_ptr->cpus_bitmap[0]);
 		}
+		cpus_avail = xmalloc(sizeof(uint32_t) * node_gres_ptr->topo_cnt);
+		alloc_cpu_bitmap = bit_alloc(cpus_ctld);
+		if (alloc_cpu_bitmap == NULL)
+			fatal("bit_alloc: malloc failure");
 		for (i=0; i<node_gres_ptr->topo_cnt; i++) {
-			cpu_cnt = 0;
+			if (!use_total_gres &&
+			    bit_test(node_gres_ptr->gres_bit_alloc, i)) {
+				continue;	/* gres already allocated */
+			}
 			for (j=0; j<cpus_ctld; j++) {
-				if (bit_test(node_gres_ptr->cpus_bitmap[i],j)&&
-				    bit_test(cpu_bitmap, cpu_start_bit+j))
-					cpu_cnt++;
+				if (cpu_bitmap && 
+				    !bit_test(cpu_bitmap, cpu_start_bit+j))
+					continue;
+				if (bit_test(node_gres_ptr->cpus_bitmap[i],j) &&
+				    !bit_test(alloc_cpu_bitmap, j)) {
+					bit_set(alloc_cpu_bitmap, j);
+					cpus_avail[i]++;
+				}
 			}
-			if (cpu_cnt == 0)
-				continue;
-			tot_cpu_cnt += cpu_cnt;
-			for (j=0; j<node_gres_ptr->gres_cnt_avail; j++) {
-				if (!bit_test(node_gres_ptr->gres_bit_alloc, j))
-					bit_set(new_gres_bitmap, j);
+		}
+
+		/* Pick the gres with the most CPUs available */
+		bit_nclear(alloc_cpu_bitmap, 0, (cpus_ctld - 1));
+		for (i=0; i<job_gres_ptr->gres_cnt_alloc; i++) {
+			top_inx = -1;
+			for (j=0; j<node_gres_ptr->topo_cnt; j++) {
+				if (top_inx == -1) {
+					if (cpus_avail[j])
+						top_inx = j;
+				} else if (cpus_avail[j] > cpus_avail[top_inx])
+					top_inx = j;
+			}
+			if ((top_inx < 0) || (cpus_avail[top_inx] == 0)) {
+				cpu_cnt = 0;
+				break;
+			}
+			cpu_cnt += cpus_avail[top_inx];
+			cpus_avail[top_inx] = 0;
+			bit_or(alloc_cpu_bitmap,
+			       node_gres_ptr->cpus_bitmap[top_inx]);
+		}
+		if (cpu_bitmap && (cpu_cnt > 0)) {
+			for (i=0; i<cpus_ctld; i++) {
+				if (!bit_test(alloc_cpu_bitmap, i))
+					bit_clear(cpu_bitmap, cpu_start_bit+i);
 			}
 		}
-		gres_avail = bit_set_count(new_gres_bitmap);
-		FREE_NULL_BITMAP(new_gres_bitmap);
-		if (job_gres_ptr->gres_cnt_alloc > gres_avail)
-			return (uint32_t) 0;
-		return tot_cpu_cnt;
+		FREE_NULL_BITMAP(alloc_cpu_bitmap);
+		xfree(cpus_avail);
+		return cpu_cnt;
 	} else {
 		gres_avail = node_gres_ptr->gres_cnt_avail;
 		if (!use_total_gres)
 			gres_avail -= node_gres_ptr->gres_cnt_alloc;
 		if (job_gres_ptr->gres_cnt_alloc > gres_avail)
-			return (uint32_t) 0;
+			return (uint32_t) 0;	/* insufficient, gres to use */
 		return NO_VAL;
 	}
 }
-- 
GitLab