From d9e07f7acfd3d184f22d6c2bef58e7fb9f8a3462 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero Palau <alejandro.lucero@bsc.es>
Date: Thu, 25 Oct 2012 15:20:51 -0700
Subject: [PATCH] hanges for core based reservation topology support

---
 src/plugins/select/cons_res/select_cons_res.c | 157 ++++++++++++++----
 src/slurmctld/reservation.c                   |  36 ++--
 testsuite/expect/inc3.11.1                    |   1 -
 3 files changed, 150 insertions(+), 44 deletions(-)

diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index 736ffef606d..8b7e0291078 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -2514,8 +2514,9 @@ bitstr_t *_make_core_bitmap_filtered(bitstr_t *node_map, int filter)
 		return core_map;
 
 	nodes = bit_size(node_map);
-	for (n = 0, c = 0; n < nodes; n++) {
+	for (n = 0; n < nodes; n++) {
 		if (bit_test(node_map, n)) {
+			c = cr_get_coremap_offset(n);
 			coff = cr_get_coremap_offset(n+1);
 			while (c < coff) {
 				bit_set(core_map, c++);
@@ -2663,6 +2664,22 @@ bitstr_t *sequential_pick(bitstr_t *avail_bitmap, uint32_t node_cnt,
 	return sp_avail_bitmap;
 }
 
+static int _get_avail_core_in_node(bitstr_t *core_bitmap, int node)
+{
+	int coff;
+	int total_cores;
+	int i;
+	int avail = 0;
+
+	coff = cr_get_coremap_offset(node);
+	total_cores = cr_node_num_cores[node];
+
+	for (i = 0; i < total_cores; i++)
+		if (!bit_test(core_bitmap, coff + i))
+			avail++;
+	return avail;
+}
+
 /*
  * select_p_resv_test - Identify the nodes which "best" satisfy a reservation
  *	request. "best" is defined as either single set of consecutive nodes
@@ -2677,6 +2694,7 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt,
 				     uint32_t core_cnt, bitstr_t **core_bitmap)
 {
 	bitstr_t **switches_bitmap;		/* nodes on this switch */
+	bitstr_t **switches_core_bitmap;	/* cores on this switch */
 	int       *switches_cpu_cnt;		/* total CPUs on switch */
 	int       *switches_node_cnt;		/* total nodes on switch */
 	int       *switches_required;		/* set if has required node */
@@ -2689,6 +2707,7 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt,
 	int best_fit_nodes;
 	int best_fit_location = 0, best_fit_sufficient;
 	bool sufficient;
+	int cores_per_node;
 
 	xassert(avail_bitmap);
 
@@ -2701,38 +2720,63 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt,
 	if (bit_set_count(avail_bitmap) < node_cnt)
 		return avail_nodes_bitmap;
 
+	if (*core_bitmap == NULL)
+		*core_bitmap = _make_core_bitmap_filtered(avail_bitmap, 0);
+	
 	rem_nodes = node_cnt;
 	rem_cores = core_cnt;
 
+	/* TODO: allowing asymmetric cluster */
+	cores_per_node = core_cnt / MAX(node_cnt, 1);
+
 	/* Construct a set of switch array entries,
 	 * use the same indexes as switch_record_table in slurmctld */
 	switches_bitmap   = xmalloc(sizeof(bitstr_t *) * switch_record_cnt);
+	switches_core_bitmap = xmalloc(sizeof(bitstr_t *) * switch_record_cnt);
 	switches_cpu_cnt  = xmalloc(sizeof(int)        * switch_record_cnt);
 	switches_node_cnt = xmalloc(sizeof(int)        * switch_record_cnt);
 	switches_required = xmalloc(sizeof(int)        * switch_record_cnt);
 
 	for (i=0; i<switch_record_cnt; i++) {
-		bitstr_t *switch_bitmap_copy = xmalloc(sizeof(bitstr_t *));
-		int node_cnt;
+		char str[100];
 		switches_bitmap[i] = bit_copy(switch_record_table[i].
 						  node_bitmap);
 		bit_and(switches_bitmap[i], avail_bitmap);
 		switches_node_cnt[i] = bit_set_count(switches_bitmap[i]);
-		switch_bitmap_copy = bit_copy(switches_bitmap[i]);
-		node_cnt = switches_node_cnt[i];
-		debug2("switch %d looking cores in %d nodes",
-		       i, switches_node_cnt[i]);
-		while (node_cnt--) {
-			int node_inx;
-			node_inx = bit_ffs(switch_bitmap_copy);
-			switches_cpu_cnt[i] += cr_node_num_cores[node_inx];
-			bit_nclear(switch_bitmap_copy, node_inx, node_inx);
-		}
-
-		debug2("switch %d with %d nodes and %d cores",
+
+		switches_core_bitmap[i] =
+			_make_core_bitmap_filtered(switches_bitmap[i], 1);
+
+		if (*core_bitmap) {
+			bit_not(*core_bitmap);
+			bit_and(switches_core_bitmap[i], *core_bitmap);
+			bit_not(*core_bitmap);
+		}
+		bit_fmt(str, sizeof(str), switches_core_bitmap[i]);
+		debug2("Switch %d can use cores: %s", i, str);
+
+		switches_cpu_cnt[i] = bit_set_count(switches_core_bitmap[i]);
+
+		debug2("switch %d looking cores in %d nodes (%d)",
 		       i, switches_node_cnt[i], switches_cpu_cnt[i]);
 	}
 
+	/* Let's check nodes with less avail cores than needed */
+
+	for (j=0; j<switch_record_cnt; j++) {
+		first = bit_ffs(switches_bitmap[j]);
+		last  = bit_fls(switches_bitmap[j]);
+		for (i=first; ((i<=last) && (first>=0)); i++) {
+			int c;
+			if (!bit_test(switches_bitmap[j], i))
+				continue;
+
+			c = _get_avail_core_in_node(*core_bitmap, i);
+			if (c < cores_per_node)
+				switches_node_cnt[j] -= c;
+		}
+	}
+
 #if SELECT_DEBUG
 	/* Don't compile this, it slows things down too much */
 	for (i=0; i<switch_record_cnt; i++) {
@@ -2781,6 +2825,7 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt,
 	/* Select resources from these leafs on a best-fit basis */
 	avail_nodes_bitmap = bit_alloc(node_record_count);
 	while (rem_nodes > 0) {
+		int avail_cores_in_node;
 		best_fit_nodes = best_fit_sufficient = 0;
 		for (j=0; j<switch_record_cnt; j++) {
 			if (switches_node_cnt[j] == 0)
@@ -2810,7 +2855,7 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt,
 		for (i=first; ((i<=last) && (first>=0)); i++) {
 			if (!bit_test(switches_bitmap[best_fit_location], i)){
 				continue;
-            }
+			}
 
 			bit_clear(switches_bitmap[best_fit_location], i);
 			switches_node_cnt[best_fit_location]--;
@@ -2821,9 +2866,26 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt,
 				continue;
 			}
 
+			if (*core_bitmap) {
+				int coff;
+				avail_cores_in_node = 0;
+				coff = cr_get_coremap_offset(i);
+				debug2("Testing node %d, core offset %d",
+				       i, coff);
+				for (j=0; j<cr_node_num_cores[i]; j++){
+					if (!bit_test(*core_bitmap, coff + j))
+						avail_cores_in_node++;
+				}
+				if (avail_cores_in_node < cores_per_node)
+					continue;
+				
+				debug2("Using node %d with %d cores available",
+				       i, avail_cores_in_node);
+			}
+
 			bit_set(avail_nodes_bitmap, i);
 			if (core_cnt)
-			    rem_cores -= cr_node_num_cores[i];
+			    rem_cores -= cores_per_node;
 			if (--rem_nodes <= 0)
 				break;
 		}
@@ -2832,31 +2894,44 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt,
 	if (rem_nodes > 0)	/* insufficient resources */
 		FREE_NULL_BITMAP(avail_nodes_bitmap);
 
-fini:	for (i=0; i<switch_record_cnt; i++)
+fini:	for (i=0; i<switch_record_cnt; i++) {
 		FREE_NULL_BITMAP(switches_bitmap[i]);
+		FREE_NULL_BITMAP(switches_core_bitmap[i]);
+	}
 
 	xfree(switches_bitmap);
+	xfree(switches_core_bitmap);
 	xfree(switches_cpu_cnt);
 	xfree(switches_node_cnt);
 	xfree(switches_required);
 
-	if (core_cnt) { /* Reservation is using partial nodes */
+	if (avail_nodes_bitmap && core_cnt) { 
+		/* Reservation is using partial nodes */
 		//char str[100];
-		int cores_per_node;
+		bitstr_t *exc_core_bitmap = NULL;
 
 		sp_avail_bitmap = bit_alloc(bit_size(avail_bitmap));
 		if (sp_avail_bitmap == NULL)
 			fatal ("memory allocation failure");
 
-		*core_bitmap = _make_core_bitmap_filtered(avail_bitmap, 0);
+		if (*core_bitmap) {
+			exc_core_bitmap = bit_alloc(bit_size(*core_bitmap));
+			if (!exc_core_bitmap)
+				fatal("bit_alloc: malloc failure");
+		} else {
+			error("select_p_resv_test: core_bitmap is NULL");
+			FREE_NULL_BITMAP(sp_avail_bitmap);
+			return NULL;
+		}
 
 		cores_per_node = core_cnt / MAX(node_cnt, 1);
 
 		while (core_cnt) {
 			uint32_t inx, coff;
 			int i;
+			int avail_cores_in_node;
 
-			inx = bit_ffs(avail_bitmap);
+			inx = bit_ffs(avail_nodes_bitmap);
 			if ((inx < 0) || (inx > bit_size(avail_bitmap)))
 				break;
 
@@ -2864,19 +2939,43 @@ fini:	for (i=0; i<switch_record_cnt; i++)
 			       "core_cnt: %d", inx, cores_per_node, core_cnt);
 			coff = cr_get_coremap_offset(inx);
 
-			for (i = 0; i < cores_per_node; i++){
-				/* TODO: checking cores_per_nodes is lower
-				 * than real cores per node */
-				bit_set(*core_bitmap, coff++);
-				core_cnt--;
+			/* Clear this node from the initial available bitmap */
+			bit_clear(avail_nodes_bitmap, inx);
+
+			if (cr_node_num_cores[inx] < cores_per_node)
+				continue;
+
+			avail_cores_in_node = 0;
+			for (i = 0; i < cr_node_num_cores[inx]; i++) {
+				if (!bit_test(exc_core_bitmap, coff + i)) {
+					avail_cores_in_node++;
+				}
+			}
+
+			debug2("Node %d has %d available cores", inx,
+			       avail_cores_in_node);
+
+			if (avail_cores_in_node < cores_per_node)
+				continue;
+
+			avail_cores_in_node = 0;
+			for (i = 0; i < cr_node_num_cores[inx]; i++) {
+				if (!bit_test(exc_core_bitmap, coff + i)) {
+					bit_set(*core_bitmap, coff + i);
+					core_cnt--;
+					avail_cores_in_node++;
+				}
+
+				if ((avail_cores_in_node == cores_per_node) ||
+				    (core_cnt == 0))
+					break;
 			}
 
 			/* Add this node to the final node bitmap */
 			bit_set(sp_avail_bitmap, inx);
 
-			/* Clear this node from the initial available bitmap */
-			bit_clear(avail_bitmap, inx);
 		}
+		FREE_NULL_BITMAP(exc_core_bitmap);
 
 		//bit_fmt(str, (sizeof(str) - 1), *core_bitmap);
 		//info("sequential pick using coremap: %s", str);
diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c
index 8a73084b696..ccfcf961c21 100644
--- a/src/slurmctld/reservation.c
+++ b/src/slurmctld/reservation.c
@@ -1783,6 +1783,10 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr)
 	if (!resv_ptr)
 		return ESLURM_RESERVATION_INVALID;
 
+	/* TODO: core based reservation updates */
+	if (resv_ptr->full_nodes == 0)
+		return ESLURM_RESERVATION_NOT_USABLE;
+
 	/* Make backup to restore state in case of failure */
 	resv_backup = _copy_resv(resv_ptr);
 
@@ -2462,6 +2466,7 @@ static void _validate_all_reservations(void)
 static void _validate_node_choice(slurmctld_resv_t *resv_ptr)
 {
 	bitstr_t *tmp_bitmap = NULL;
+	bitstr_t *core_bitmap = NULL;
 	int i;
 	resv_desc_msg_t resv_desc;
 
@@ -2482,7 +2487,8 @@ static void _validate_node_choice(slurmctld_resv_t *resv_ptr)
 	resv_desc.features   = resv_ptr->features;
 	resv_desc.node_cnt   = xmalloc(sizeof(uint32_t) * 2);
 	resv_desc.node_cnt[0]= resv_ptr->node_cnt - i;
-	i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &tmp_bitmap, NULL);
+	i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &tmp_bitmap,
+			  &core_bitmap);
 	xfree(resv_desc.node_cnt);
 	xfree(resv_desc.node_list);
 	xfree(resv_desc.partition);
@@ -2490,6 +2496,8 @@ static void _validate_node_choice(slurmctld_resv_t *resv_ptr)
 		bit_and(resv_ptr->node_bitmap, avail_node_bitmap);
 		bit_or(resv_ptr->node_bitmap, tmp_bitmap);
 		FREE_NULL_BITMAP(tmp_bitmap);
+		FREE_NULL_BITMAP(resv_ptr->core_bitmap);
+		resv_ptr->core_bitmap = core_bitmap;
 		xfree(resv_ptr->node_list);
 		resv_ptr->node_list = bitmap2node_name(resv_ptr->node_bitmap);
 		info("modified reservation %s due to unusable nodes, "
@@ -2691,6 +2699,7 @@ extern int validate_job_resv(struct job_record *job_ptr)
 static int  _resize_resv(slurmctld_resv_t *resv_ptr, uint32_t node_cnt)
 {
 	bitstr_t *tmp1_bitmap = NULL, *tmp2_bitmap = NULL;
+	bitstr_t *core_bitmap = NULL;
 	int delta_node_cnt, i;
 	resv_desc_msg_t resv_desc;
 
@@ -2746,13 +2755,16 @@ static int  _resize_resv(slurmctld_resv_t *resv_ptr, uint32_t node_cnt)
 	resv_desc.flags      = resv_ptr->flags;
 	resv_desc.node_cnt   = xmalloc(sizeof(uint32_t) * 2);
 	resv_desc.node_cnt[0]= 0 - delta_node_cnt;
-	i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &tmp1_bitmap, NULL);
+	i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &tmp1_bitmap,
+			  &core_bitmap);
 	xfree(resv_desc.node_cnt);
 	xfree(resv_desc.node_list);
 	xfree(resv_desc.partition);
 	if (i == SLURM_SUCCESS) {
 		bit_or(resv_ptr->node_bitmap, tmp1_bitmap);
 		FREE_NULL_BITMAP(tmp1_bitmap);
+		FREE_NULL_BITMAP(resv_ptr->core_bitmap);
+		resv_ptr->core_bitmap = core_bitmap;
 		xfree(resv_ptr->node_list);
 		resv_ptr->node_list = bitmap2node_name(resv_ptr->node_bitmap);
 		resv_ptr->node_cnt = node_cnt;
@@ -3515,7 +3527,8 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when,
 			    (res2_ptr == resv_ptr) ||
 			    (res2_ptr->node_bitmap == NULL) ||
 			    (res2_ptr->start_time >= job_end_time) ||
-			    (res2_ptr->end_time   <= job_start_time))
+			    (res2_ptr->end_time   <= job_start_time) ||
+			    (!res2_ptr->full_nodes))
 				continue;
 			bit_not(res2_ptr->node_bitmap);
 			bit_and(*node_bitmap, res2_ptr->node_bitmap);
@@ -3564,10 +3577,10 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when,
 			    (resv_ptr->end_time   <= job_start_time))
 				continue;
 			if (job_ptr->details->req_node_bitmap &&
-			     bit_overlap(job_ptr->details->req_node_bitmap,
-					 resv_ptr->node_bitmap) &&
-			     ((resv_ptr->cpu_cnt == 0) ||
-			      (!job_ptr->details->shared))) {
+			    bit_overlap(job_ptr->details->req_node_bitmap,
+					resv_ptr->node_bitmap) &&
+			    ((resv_ptr->cpu_cnt == 0) ||
+			    (!job_ptr->details->shared))) {
 				*when = resv_ptr->end_time;
 				rc = ESLURM_NODES_BUSY;
 				break;
@@ -3596,13 +3609,8 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when,
 					*exc_core_bitmap = 
 						bit_copy(resv_ptr->core_bitmap);
 				} else {
-					char str[100];
-					bit_and(*exc_core_bitmap,
-						resv_ptr->core_bitmap);
-					bit_fmt(str, (sizeof(str) - 1), 
-							*exc_core_bitmap);
-					debug2("New exclude core bitmap %s", 
-						str);
+					bit_or(*exc_core_bitmap,
+					       resv_ptr->core_bitmap);
 				}
 			}
 		}
diff --git a/testsuite/expect/inc3.11.1 b/testsuite/expect/inc3.11.1
index 7d8e7d90093..c62798eb6bb 100644
--- a/testsuite/expect/inc3.11.1
+++ b/testsuite/expect/inc3.11.1
@@ -59,7 +59,6 @@ proc inc3_11_1 {} {
 	{StartTime=now   Duration=5   Nodes=$def_node   User=$user_name  Flags=badtype,ignore_jobs}
 	{StartTime=now+10minutes   EndTime=now   Nodes=$def_node   User=$user_name Flags=ignore_jobs}
 	{StartTime=now   Duration=5   Nodes=$def_node   User=$user_name Licenses=DUMMY_FOR_TESTING Flags=ignore_jobs}
-	#{StartTime=now   Duration=5   NodeCnt=2 CoreCnt=1  User=$user_name}
 	{StartTime=now   Duration=5   NodeCnt=1 CoreCnt=$core_res_num  User=$user_name}
 "
 	#{StartTime=now   Duration=5   Nodes=$def_node   Account=badaccountname}
-- 
GitLab