From 2a89b6bcc25619defd4cf3f4076ec3dd5a969cbf Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 15 Jul 2010 18:02:54 +0000
Subject: [PATCH] get topology working for gres job_test

---
 src/common/gres.c                         | 154 +++++++++++++++++++---
 src/common/gres.h                         |   8 +-
 src/plugins/select/cons_res/job_test.c    |  21 +--
 src/plugins/select/linear/select_linear.c |   3 +-
 4 files changed, 156 insertions(+), 30 deletions(-)

diff --git a/src/common/gres.c b/src/common/gres.c
index 350eb5992b6..f9e2e731163 100644
--- a/src/common/gres.c
+++ b/src/common/gres.c
@@ -120,6 +120,7 @@ static List gres_conf_list = NULL;
 /* Local functions */
 static gres_node_state_t *
 		_build_gres_node_state(void);
+static bitstr_t *_cpu_bitmap_rebuild(bitstr_t *old_cpu_bitmap, int new_size);
 static void	_destroy_gres_slurmd_conf(void *x);
 static uint32_t	_get_gres_cnt(char *orig_config, char *gres_name,
 			      char *gres_name_colon, int gres_name_colon_len);
@@ -133,6 +134,9 @@ static int	_job_state_unpack(void **gres_data, Buf buffer,
 				  char *gres_name);
 static int	_job_state_validate(char *config, void **gres_data,
 				    slurm_gres_context_t *gres_name);
+static uint32_t	_job_test(void *job_gres_data, void *node_gres_data,
+			  bool use_total_gres, bitstr_t *cpu_bitmap,
+			  int cpu_start_bit, int cpu_end_bit);
 static int	_load_gres_plugin(char *plugin_name,
 				  slurm_gres_context_t *plugin_context);
 static int	_log_gres_slurmd_conf(void *x, void *arg);
@@ -159,6 +163,8 @@ static void	_set_gres_cnt(char *orig_config, char **new_config,
 			      char *gres_name_colon, int gres_name_colon_len);
 static int	_strcmp(const char *s1, const char *s2);
 static int	_unload_gres_plugin(slurm_gres_context_t *plugin_context);
+static void	_validate_gres_node_cpus(gres_node_state_t *node_gres_ptr,
+					 int cpus_ctld);
 
 
 /* Variant of strcmp that will accept NULL string pointers */
@@ -800,7 +806,8 @@ static void _set_gres_cnt(char *orig_config, char **new_config,
 			  uint32_t new_cnt, char *gres_name,
 			  char *gres_name_colon, int gres_name_colon_len)
 {
-	char *new_configured_res = NULL, *node_gres_config, *last_tok, *tok;
+	char *new_configured_res = NULL, *node_gres_config;
+	char *last_tok = NULL, *tok;
 
 	if (*new_config)
 		node_gres_config = xstrdup(*new_config);
@@ -2163,40 +2170,148 @@ unpack_error:
 	goto fini;
 }
 
+/* If CPU bitmap from slurmd differs in size from that in slurmctld,
+ * then modify bitmap from slurmd so we can use bit_and, bit_or, etc. */
+static bitstr_t *_cpu_bitmap_rebuild(bitstr_t *old_cpu_bitmap, int new_size)
+{
+	int i, j, old_size, ratio;
+	bitstr_t *new_cpu_bitmap;
+
+	new_cpu_bitmap = bit_alloc(new_size);
+	if (new_cpu_bitmap == NULL)
+		fatal("bit_alloc: malloc failure");
+	old_size = bit_size(old_cpu_bitmap);
+	if (old_size > new_size) {
+		ratio = old_size / new_size;
+		for (i=0; i<new_size; i++) {
+			for (j=0; j<ratio; j++) {
+				if (bit_test(old_cpu_bitmap, i*ratio+j)) {
+					bit_set(new_cpu_bitmap, i);
+					break;
+				}
+			}
+		}
+	} else {
+		ratio = new_size / old_size;
+		for (i=0; i<old_size; i++) {
+			if (!bit_test(old_cpu_bitmap, i))
+				continue;
+			for (j=0; j<ratio; j++) {
+				bit_set(new_cpu_bitmap, i*ratio+j);
+			}
+		}
+	}
+
+	return new_cpu_bitmap;
+}
+
+static void _validate_gres_node_cpus(gres_node_state_t *node_gres_ptr,
+				     int cpus_ctld)
+{
+	int i, cpus_slurmd;
+	bitstr_t *new_cpu_bitmap;
+
+	if (node_gres_ptr->topo_cnt == 0)
+		return;
+	cpus_slurmd = bit_size(node_gres_ptr->cpus_bitmap[0]);
+	if (cpus_slurmd == cpus_ctld)
+		return;
+
+	debug("Gres CPU count mismatch (%d != %d)", cpus_slurmd, cpus_ctld);
+	for (i=0; i<node_gres_ptr->topo_cnt; i++) {
+		if (i != 0)
+			cpus_slurmd = bit_size(node_gres_ptr->cpus_bitmap[i]);
+		if (cpus_slurmd == cpus_ctld)	/* should never happen here */
+			continue;
+		new_cpu_bitmap = _cpu_bitmap_rebuild(node_gres_ptr->
+						     cpus_bitmap[i], cpus_ctld);
+		FREE_NULL_BITMAP(node_gres_ptr->cpus_bitmap[i]);
+		node_gres_ptr->cpus_bitmap[i] = new_cpu_bitmap;
+	}
+}
+
 static uint32_t _job_test(void *job_gres_data, void *node_gres_data,
-			  bool use_total_gres)
+			  bool use_total_gres, bitstr_t *cpu_bitmap,
+			  int cpu_start_bit, int cpu_end_bit)
 {
-	uint32_t gres_avail;
+	int i, j, cpus_ctld, cpu_cnt, tot_cpu_cnt = 0, gres_avail;
 	gres_job_state_t  *job_gres_ptr  = (gres_job_state_t *)  job_gres_data;
 	gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data;
+	bitstr_t *new_gres_bitmap = NULL;
 
-	gres_avail = node_gres_ptr->gres_cnt_avail;
-	if (!use_total_gres)
-		gres_avail -= node_gres_ptr->gres_cnt_alloc;
-
-	if (job_gres_ptr->gres_cnt_mult == 0) {
-		/* per node gres limit */
-		if (job_gres_ptr->gres_cnt_alloc > gres_avail)
+	if (!use_total_gres && cpu_bitmap && node_gres_ptr->topo_cnt) {
+		/* Need to determine which specific CPUs can be used */
+		new_gres_bitmap = bit_alloc(node_gres_ptr->gres_cnt_avail);
+		if (new_gres_bitmap == NULL)
+			fatal("bit_alloc: malloc failure");
+		cpus_ctld = cpu_end_bit - cpu_start_bit + 1;
+		_validate_gres_node_cpus(node_gres_ptr, cpus_ctld);
+		if (cpus_ctld < 1) {
+			error("gres_plugin_job_test: cpus on node < 1");
 			return (uint32_t) 0;
-		return NO_VAL;
+		}
+		for (i=0; i<node_gres_ptr->topo_cnt; i++) {
+			cpu_cnt = 0;
+			for (j=0; j<cpus_ctld; j++) {
+				if (bit_test(node_gres_ptr->cpus_bitmap[i],j)&&
+				    bit_test(cpu_bitmap, cpu_start_bit+j))
+					cpu_cnt++;
+			}
+			if (cpu_cnt == 0)
+				continue;
+			tot_cpu_cnt += cpu_cnt;
+			for (j=0; j<node_gres_ptr->gres_cnt_avail; j++) {
+				if (!bit_test(node_gres_ptr->
+					      gres_block_bitmap[i], j) &&
+				    !bit_test(node_gres_ptr->gres_bit_alloc, j))
+					bit_set(new_gres_bitmap, j);
+			}
+		}
+		gres_avail = bit_set_count(new_gres_bitmap);
+		FREE_NULL_BITMAP(new_gres_bitmap);
+		if (job_gres_ptr->gres_cnt_mult == 0) {
+			/* per node gres limit */
+			if (job_gres_ptr->gres_cnt_alloc > gres_avail)
+				return (uint32_t) 0;
+			return tot_cpu_cnt;
+		} else {
+			/* per CPU gres limit */
+			return (uint32_t) (gres_avail / 
+					   job_gres_ptr->gres_cnt_alloc);
+		}
 	} else {
-		/* per CPU gres limit */
-		return (uint32_t) (gres_avail / job_gres_ptr->gres_cnt_alloc);
+		gres_avail = node_gres_ptr->gres_cnt_avail;
+		if (!use_total_gres)
+			gres_avail -= node_gres_ptr->gres_cnt_alloc;
+
+		if (job_gres_ptr->gres_cnt_mult == 0) {
+			/* per node gres limit */
+			if (job_gres_ptr->gres_cnt_alloc > gres_avail)
+				return (uint32_t) 0;
+			return NO_VAL;
+		} else {
+			/* per CPU gres limit */
+			return (uint32_t) (gres_avail / 
+					   job_gres_ptr->gres_cnt_alloc);
+		}
 	}
 }
 
 /*
  * Determine how many CPUs on the node can be used by this job
- * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
- * IN node_gres_list - node's gres_list built by
- *                     gres_plugin_node_config_validate()
+ * IN job_gres_list  - job's gres_list built by gres_plugin_job_state_validate()
+ * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate()
  * IN use_total_gres - if set then consider all gres resources as available,
  *		       and none are commited to running jobs
+ * IN cpu_bitmap     - Identification of available CPUs (NULL if no restriction)
+ * IN cpu_start_bit  - index into cpu_bitmap for this node's first CPU
+ * IN cpu_end_bit    - index into cpu_bitmap for this node's last CPU
  * RET: NO_VAL    - All CPUs on node are available
  *      otherwise - Specific CPU count
  */
-extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
-				     bool use_total_gres)
+extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list, 
+				     bool use_total_gres, bitstr_t *cpu_bitmap,
+				     int cpu_start_bit, int cpu_end_bit)
 {
 	int i;
 	uint32_t cpu_cnt, tmp_cnt;
@@ -2233,7 +2348,8 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
 				continue;
 			tmp_cnt = _job_test(job_gres_ptr->gres_data,
 					    node_gres_ptr->gres_data,
-					    use_total_gres);
+					    use_total_gres, cpu_bitmap,
+					    cpu_start_bit, cpu_end_bit);
 			cpu_cnt = MIN(tmp_cnt, cpu_cnt);
 			break;
 		}
diff --git a/src/common/gres.h b/src/common/gres.h
index a115369dfa4..00f811b76d7 100644
--- a/src/common/gres.h
+++ b/src/common/gres.h
@@ -311,15 +311,19 @@ extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer,
 
 /*
  * Determine how many CPUs on the node can be used by this job
- * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
+ * IN job_gres_list  - job's gres_list built by gres_plugin_job_state_validate()
  * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate()
  * IN use_total_gres - if set then consider all gres resources as available,
  *		       and none are commited to running jobs
+ * IN cpu_bitmap     - Identification of available CPUs (NULL if no restriction)
+ * IN cpu_start_bit  - index into cpu_bitmap for this node's first CPU
+ * IN cpu_end_bit    - index into cpu_bitmap for this node's last CPU
  * RET: NO_VAL    - All CPUs on node are available
  *      otherwise - Specific CPU count
  */
 extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list, 
-				     bool use_total_gres);
+				     bool use_total_gres, bitstr_t *cpu_bitmap,
+				     int cpu_start_bit, int cpu_end_bit);
 
 /*
  * Allocate resource to a job and update node and job gres information
diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c
index 28751d7aabd..881daca80e1 100644
--- a/src/plugins/select/cons_res/job_test.c
+++ b/src/plugins/select/cons_res/job_test.c
@@ -617,6 +617,7 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
 {
 	uint16_t cpus;
 	uint32_t avail_mem, req_mem, gres_cpus;
+	int core_start_bit, core_end_bit;
 	struct node_record *node_ptr;
 	List gres_list;
 
@@ -627,13 +628,17 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
 	else
 		cpus = _allocate_cores(job_ptr, core_map, node_i, 1);
 
+	core_start_bit = cr_get_coremap_offset(node_i);
+	core_end_bit   = cr_get_coremap_offset(node_i+1) - 1;
 	node_ptr = select_node_record[node_i].node_ptr;
 	if (node_usage[node_i].gres_list)
 		gres_list = node_usage[node_i].gres_list;
 	else
 		gres_list = node_ptr->gres_list;
 	gres_cpus = gres_plugin_job_test(job_ptr->gres_list,
-					 node_ptr->gres_list, test_only);
+					 node_ptr->gres_list, test_only,
+					 core_map, core_start_bit,
+					 core_end_bit);
 	if (gres_cpus < cpus)
 		cpus = gres_cpus;
 
@@ -660,10 +665,8 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
 		if (req_mem > avail_mem)
 			cpus = 0;
 	}
-	if (cpus == 0) {
-		bit_nclear(core_map, cr_get_coremap_offset(node_i),
-			   (cr_get_coremap_offset(node_i+1))-1);
-	}
+	if (cpus == 0)
+		bit_nclear(core_map, core_start_bit, core_end_bit);
 
 	debug3("cons_res: _can_job_run_on_node: %u cpus on %s(%d), mem %u/%u",
 		cpus, select_node_record[node_i].node_ptr->name,
@@ -731,7 +734,7 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr,
 			      enum node_cr_state job_node_req)
 {
 	struct node_record *node_ptr;
-	uint32_t i, free_mem, min_mem, size;
+	uint32_t i, free_mem, gres_cpus, min_mem, size;
 	List gres_list;
 
 	min_mem = job_ptr->details->pn_min_memory & (~MEM_PER_CPU);
@@ -759,8 +762,10 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr,
 			gres_list = node_usage[i].gres_list;
 		else
 			gres_list = node_ptr->gres_list;
-		if (gres_plugin_job_test(job_ptr->gres_list, 
-					 node_ptr->gres_list, true) == 0) {
+		gres_cpus = gres_plugin_job_test(job_ptr->gres_list, 
+						 node_ptr->gres_list, true,
+						 NULL, 0, 0);
+		if (gres_cpus == 0) {
 			info("cons_res: _vns: node %s lacks gres",
 			     node_ptr->name);
 			goto clear_bit;
diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c
index 2509e597a8a..5a8782a6da8 100644
--- a/src/plugins/select/linear/select_linear.c
+++ b/src/plugins/select/linear/select_linear.c
@@ -648,7 +648,8 @@ static int _job_count_bitmap(struct cr_record *cr_ptr,
 		else
 			gres_list = node_ptr->gres_list;
 		gres_cpus = gres_plugin_job_test(job_ptr->gres_list, 
-						 gres_list, use_total_gres);
+						 gres_list, use_total_gres,
+						 NULL, 0, 0);
 		if ((gres_cpus != NO_VAL) && (gres_cpus < cpu_cnt)) {
 			bit_clear(jobmap, i);
 			continue;
-- 
GitLab