From 903b5654f27f76565bba1b2ef4ff33de4033be90 Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Tue, 15 Jan 2013 15:19:27 -0800
Subject: [PATCH] Correct gres logic to handle difference in core/cpu count

The gres_plugin_job_test was returning a count of cores available
to a job, but the select plugins was treating this as a CPU count.
This change converts the core count into a CPU count as needed in
the select plugin and changes the comments related to the function
gres_plugin_job_test().
---
 src/common/gres.c                         |  4 +--
 src/common/gres.h                         |  4 +--
 src/plugins/select/cons_res/job_test.c    | 36 ++++++++++++++--------
 src/plugins/select/linear/select_linear.c | 16 +++++++---
 src/plugins/select/serial/job_test.c      | 37 +++++++++++++++--------
 5 files changed, 63 insertions(+), 34 deletions(-)

diff --git a/src/common/gres.c b/src/common/gres.c
index a1155575cce..fa321506b4f 100644
--- a/src/common/gres.c
+++ b/src/common/gres.c
@@ -2681,8 +2681,8 @@ extern uint32_t _job_test(void *job_gres_data, void *node_gres_data,
  * IN cpu_end_bit    - index into cpu_bitmap for this node's last CPU
  * IN job_id         - job's ID (for logging)
  * IN node_name      - name of the node (for logging)
- * RET: NO_VAL    - All CPUs on node are available
- *      otherwise - Specific CPU count
+ * RET: NO_VAL    - All cores on node are available
+ *      otherwise - Count of available cores
  */
 extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
 				     bool use_total_gres, bitstr_t *cpu_bitmap,
diff --git a/src/common/gres.h b/src/common/gres.h
index 525998ef29b..65d2470c8ac 100644
--- a/src/common/gres.h
+++ b/src/common/gres.h
@@ -374,8 +374,8 @@ extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer,
  * IN cpu_end_bit    - index into cpu_bitmap for this node's last CPU
  * IN job_id         - job's ID (for logging)
  * IN node_name      - name of the node (for logging)
- * RET: NO_VAL    - All CPUs on node are available
- *      otherwise - Specific CPU count
+ * RET: NO_VAL    - All cores on node are available
+ *      otherwise - Count of available cores
  */
 extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
 				     bool use_total_gres, bitstr_t *cpu_bitmap,
diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c
index a31d3520aab..1ce0972a458 100644
--- a/src/plugins/select/cons_res/job_test.c
+++ b/src/plugins/select/cons_res/job_test.c
@@ -588,7 +588,7 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
 			      bool test_only)
 {
 	uint16_t cpus;
-	uint32_t avail_mem, req_mem, gres_cpus;
+	uint32_t avail_mem, req_mem, gres_cores, gres_cpus, cpus_per_core;
 	int core_start_bit, core_end_bit, cpu_alloc_size;
 	struct node_record *node_ptr = node_record_table_ptr + node_i;
 	List gres_list;
@@ -614,6 +614,8 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
 	}
 	core_start_bit = cr_get_coremap_offset(node_i);
 	core_end_bit   = cr_get_coremap_offset(node_i+1) - 1;
+	cpus_per_core  = select_node_record[node_i].cpus /
+			 (core_end_bit - core_start_bit + 1);
 	node_ptr = select_node_record[node_i].node_ptr;
 
 	if (cr_type & CR_MEMORY) {
@@ -645,11 +647,14 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
 		gres_list = node_usage[node_i].gres_list;
 	else
 		gres_list = node_ptr->gres_list;
-	gres_cpus = gres_plugin_job_test(job_ptr->gres_list,
-					 gres_list, test_only,
-					 core_map, core_start_bit,
-					 core_end_bit, job_ptr->job_id,
-					 node_ptr->name);
+	gres_cores = gres_plugin_job_test(job_ptr->gres_list,
+					  gres_list, test_only,
+					  core_map, core_start_bit,
+					  core_end_bit, job_ptr->job_id,
+					  node_ptr->name);
+	gres_cpus = gres_cores;
+	if (gres_cpus != NO_VAL)
+		gres_cpus *= cpus_per_core;
 	if ((gres_cpus < job_ptr->details->ntasks_per_node) ||
 	    ((job_ptr->details->cpus_per_task > 1) &&
 	     (gres_cpus < job_ptr->details->cpus_per_task)))
@@ -729,7 +734,8 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr,
 			      enum node_cr_state job_node_req)
 {
 	struct node_record *node_ptr;
-	uint32_t i, free_mem, gres_cpus, min_mem, size;
+	uint32_t i, free_mem, gres_cpus, gres_cores, min_mem, size;
+	int core_start_bit, core_end_bit, cpus_per_core;
 	List gres_list;
 
 	if (job_ptr->details->pn_min_memory & MEM_PER_CPU) {
@@ -748,7 +754,10 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr,
 		if (!bit_test(bitmap, i))
 			continue;
 		node_ptr = select_node_record[i].node_ptr;
-
+		core_start_bit = cr_get_coremap_offset(i);
+		core_end_bit   = cr_get_coremap_offset(i+1) - 1;
+		cpus_per_core  = select_node_record[i].cpus /
+				 (core_end_bit - core_start_bit + 1);
 		/* node-level memory check */
 		if ((job_ptr->details->pn_min_memory) &&
 		    (cr_type & CR_MEMORY)) {
@@ -771,10 +780,13 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr,
 			gres_list = node_usage[i].gres_list;
 		else
 			gres_list = node_ptr->gres_list;
-		gres_cpus = gres_plugin_job_test(job_ptr->gres_list, 
-						 gres_list, true,
-						 NULL, 0, 0, job_ptr->job_id,
-						 node_ptr->name);
+		gres_cores = gres_plugin_job_test(job_ptr->gres_list,
+						  gres_list, true,
+						  NULL, 0, 0, job_ptr->job_id,
+						  node_ptr->name);
+		gres_cpus = gres_cores;
+		if (gres_cpus != NO_VAL)
+			gres_cpus *= cpus_per_core;
 		if (gres_cpus == 0) {
 			debug3("cons_res: _vns: node %s lacks gres",
 			       node_ptr->name);
diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c
index 116c923f76a..f353f68a5a5 100644
--- a/src/plugins/select/linear/select_linear.c
+++ b/src/plugins/select/linear/select_linear.c
@@ -635,7 +635,8 @@ static int _job_count_bitmap(struct cr_record *cr_ptr,
 	struct node_record *node_ptr;
 	uint32_t job_memory_cpu = 0, job_memory_node = 0;
 	uint32_t alloc_mem = 0, job_mem = 0, avail_mem = 0;
-	uint32_t cpu_cnt, gres_cpus;
+	uint32_t cpu_cnt, gres_cpus, gres_cores;
+	int core_start_bit, core_end_bit, cpus_per_core;
 	List gres_list;
 	bool use_total_gres = true;
 
@@ -675,11 +676,16 @@ static int _job_count_bitmap(struct cr_record *cr_ptr,
 			gres_list = cr_ptr->nodes[i].gres_list;
 		else
 			gres_list = node_ptr->gres_list;
-		gres_cpus = gres_plugin_job_test(job_ptr->gres_list,
-						 gres_list, use_total_gres,
-						 NULL, 0, 0, job_ptr->job_id,
-						 node_ptr->name);
+		core_start_bit = cr_get_coremap_offset(i);
+		core_end_bit   = cr_get_coremap_offset(i+1) - 1;
+		cpus_per_core  = cpu_cnt / (core_end_bit - core_start_bit + 1);
+		gres_cores = gres_plugin_job_test(job_ptr->gres_list,
+						  gres_list, use_total_gres,
+						  NULL, 0, 0, job_ptr->job_id,
+						  node_ptr->name);
+		gres_cpus = gres_cores;
 		if (gres_cpus != NO_VAL) {
+			gres_cpus *= cpus_per_core;
 			if ((gres_cpus < cpu_cnt) ||
 			    (gres_cpus < job_ptr->details->ntasks_per_node) ||
 			    ((job_ptr->details->cpus_per_task > 1) &&
diff --git a/src/plugins/select/serial/job_test.c b/src/plugins/select/serial/job_test.c
index 997360e1923..78cf7e41e48 100644
--- a/src/plugins/select/serial/job_test.c
+++ b/src/plugins/select/serial/job_test.c
@@ -101,7 +101,7 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
 			      bool test_only)
 {
 	uint16_t cpus;
-	uint32_t avail_mem, req_mem, gres_cpus;
+	uint32_t avail_mem, req_mem, gres_cpus, gres_cores, cpus_per_core;
 	int core_start_bit, core_end_bit;
 	struct node_record *node_ptr = node_record_table_ptr + node_i;
 	List gres_list;
@@ -117,7 +117,8 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
 	core_start_bit = cr_get_coremap_offset(node_i);
 	core_end_bit   = cr_get_coremap_offset(node_i + 1) - 1;
 	node_ptr = select_node_record[node_i].node_ptr;
-
+	cpus_per_core  = select_node_record[node_i].cpus /
+			 (core_end_bit - core_start_bit + 1);
 	if ((cr_type & CR_MEMORY) && cpus) {
 		req_mem   = job_ptr->details->pn_min_memory & ~MEM_PER_CPU;
 		avail_mem = select_node_record[node_i].real_memory;
@@ -131,11 +132,14 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
 		gres_list = node_usage[node_i].gres_list;
 	else
 		gres_list = node_ptr->gres_list;
-	gres_cpus = gres_plugin_job_test(job_ptr->gres_list,
-					 gres_list, test_only,
-					 core_map, core_start_bit,
-					 core_end_bit, job_ptr->job_id,
-					 node_ptr->name);
+	gres_cores = gres_plugin_job_test(job_ptr->gres_list,
+					  gres_list, test_only,
+					  core_map, core_start_bit,
+					  core_end_bit, job_ptr->job_id,
+					  node_ptr->name);
+	gres_cpus = gres_cores;
+	if (gres_cpus != NO_VAL)
+		gres_cpus *= cpus_per_core;
 	if ((gres_cpus < job_ptr->details->ntasks_per_node) ||
 	    ((job_ptr->details->cpus_per_task > 1) &&
 	     (gres_cpus < job_ptr->details->cpus_per_task)))
@@ -215,8 +219,9 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr,
 			      enum node_cr_state job_node_req)
 {
 	struct node_record *node_ptr;
-	uint32_t i, free_mem, gres_cpus, min_mem;
+	uint32_t i, free_mem, gres_cpus, gres_cores, min_mem;
 	int i_first, i_last;
+	int core_start_bit, core_end_bit, cpus_per_core;
 	List gres_list;
 
 	if (job_ptr->details->pn_min_memory & MEM_PER_CPU)
@@ -232,7 +237,10 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr,
 		if (!bit_test(bitmap, i))
 			continue;
 		node_ptr = select_node_record[i].node_ptr;
-
+		core_start_bit = cr_get_coremap_offset(i);
+		core_end_bit   = cr_get_coremap_offset(i+1) - 1;
+		cpus_per_core  = select_node_record[i].cpus /
+				 (core_end_bit - core_start_bit + 1);
 		/* node-level memory check */
 		if ((job_ptr->details->pn_min_memory) &&
 		    (cr_type & CR_MEMORY)) {
@@ -251,10 +259,13 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr,
 			gres_list = node_usage[i].gres_list;
 		else
 			gres_list = node_ptr->gres_list;
-		gres_cpus = gres_plugin_job_test(job_ptr->gres_list, 
-						 gres_list, true,
-						 NULL, 0, 0, job_ptr->job_id,
-						 node_ptr->name);
+		gres_cores = gres_plugin_job_test(job_ptr->gres_list,
+						  gres_list, true,
+						  NULL, 0, 0, job_ptr->job_id,
+						  node_ptr->name);
+		gres_cpus = gres_cores;
+		if (gres_cpus != NO_VAL)
+			gres_cpus *= cpus_per_core;
 		if (gres_cpus == 0) {
 			debug3("select/serial: node %s lacks gres",
 			       node_ptr->name);
-- 
GitLab