From ade1be4ff829fea748d6031eae227ac70f21e25f Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Sun, 25 Sep 2016 19:01:08 -0600
Subject: [PATCH] Fix CPU mapping for heterogeneous NUMA

This patch finally resolves absolute/relative CPU mapping for nodes
where the NUMA (or sockets) have different core counts (e.g. KNL SNC4).
---
 src/slurmd/common/xcpuinfo.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/slurmd/common/xcpuinfo.c b/src/slurmd/common/xcpuinfo.c
index 8231d3e8242..e9d6803edeb 100644
--- a/src/slurmd/common/xcpuinfo.c
+++ b/src/slurmd/common/xcpuinfo.c
@@ -205,11 +205,12 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
 	unsigned idx[LAST_OBJ];
 	int nobj[LAST_OBJ];
 	bitstr_t *used_socket = NULL;
+	int *cores_per_socket;
 	int actual_cpus;
 	int macid;
 	int absid;
 	int actual_boards = 1, depth, sock_cnt, tot_socks = 0;
-	int i, used_sock_idx;
+	int i, used_core_idx, used_sock_idx;
 
 	debug2("hwloc_topology_init");
 	if (hwloc_topology_init(&topology)) {
@@ -274,11 +275,13 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
 	nobj[SOCKET] = 0;
 	depth = hwloc_get_type_depth(topology, objtype[SOCKET]);
 	used_socket = bit_alloc(_MAX_SOCKET_INX);
+	cores_per_socket = xmalloc(sizeof(int) * _MAX_SOCKET_INX);
 	sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth);
 	for (i = 0; i < sock_cnt; i++) {
 		obj = hwloc_get_obj_by_depth(topology, depth, i);
 		if (obj->type == objtype[SOCKET]) {
-			if (_core_child_count(topology, obj) > 0) {
+			cores_per_socket[i] = _core_child_count(topology, obj);
+			if (cores_per_socket[i] > 0) {
 				nobj[SOCKET]++;
 				bit_set(used_socket, tot_socks);
 			}
@@ -354,12 +357,16 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
 		}
 		/* create map with hwloc */
 		used_sock_idx = -1;
+		used_core_idx = -1;
 		for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET];
 		     idx[SOCKET]++) {
 			if (!bit_test(used_socket, idx[SOCKET]))
 				continue;
 			used_sock_idx++;
-			for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) {
+			for (idx[CORE] = 0;
+			     idx[CORE] < cores_per_socket[idx[SOCKET]];
+			     idx[CORE]++) {
+				used_core_idx++;
 				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
 					/* get hwloc_obj by indexes */
 					obj=hwloc_get_obj_below_array_by_type(
@@ -367,9 +374,7 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
 					if (!obj)
 						continue;
 					macid = obj->os_index;
-					absid = used_sock_idx * nobj[CORE] * nobj[PU]
-					      + idx[CORE] * nobj[PU]
-					      + idx[PU];
+					absid = used_core_idx * nobj[PU] + idx[PU];
 
 					if ((macid >= actual_cpus) ||
 					    (absid >= actual_cpus)) {
@@ -377,7 +382,8 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
 						 * out of range */
 						continue;
 					}
-					debug4("CPU map[%d]=>%d", absid, macid);
+					debug4("CPU map[%d]=>%d S:C:T %d:%d:%d", absid, macid,
+					       used_sock_idx, idx[CORE], idx[PU]);
 					(*p_block_map)[absid]     = macid;
 					(*p_block_map_inv)[macid] = absid;
 				}
@@ -385,6 +391,7 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
 		}
 	}
 	FREE_NULL_BITMAP(used_socket);
+	xfree(cores_per_socket);
 	hwloc_topology_destroy(topology);
 
 	/* update output parameters */
-- 
GitLab