Skip to content
Snippets Groups Projects
Commit ade1be4f authored by Morris Jette's avatar Morris Jette
Browse files

Fix CPU mapping for heterogeneous NUMA

This patch finally resolves absolute/relative CPU mapping for nodes
where the NUMA (or sockets) have different core counts (e.g. KNL SNC4).
parent 11332b5b
No related branches found
No related tags found
No related merge requests found
...@@ -205,11 +205,12 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, ...@@ -205,11 +205,12 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
unsigned idx[LAST_OBJ]; unsigned idx[LAST_OBJ];
int nobj[LAST_OBJ]; int nobj[LAST_OBJ];
bitstr_t *used_socket = NULL; bitstr_t *used_socket = NULL;
int *cores_per_socket;
int actual_cpus; int actual_cpus;
int macid; int macid;
int absid; int absid;
int actual_boards = 1, depth, sock_cnt, tot_socks = 0; int actual_boards = 1, depth, sock_cnt, tot_socks = 0;
int i, used_sock_idx; int i, used_core_idx, used_sock_idx;
debug2("hwloc_topology_init"); debug2("hwloc_topology_init");
if (hwloc_topology_init(&topology)) { if (hwloc_topology_init(&topology)) {
...@@ -274,11 +275,13 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, ...@@ -274,11 +275,13 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
nobj[SOCKET] = 0; nobj[SOCKET] = 0;
depth = hwloc_get_type_depth(topology, objtype[SOCKET]); depth = hwloc_get_type_depth(topology, objtype[SOCKET]);
used_socket = bit_alloc(_MAX_SOCKET_INX); used_socket = bit_alloc(_MAX_SOCKET_INX);
cores_per_socket = xmalloc(sizeof(int) * _MAX_SOCKET_INX);
sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth); sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth);
for (i = 0; i < sock_cnt; i++) { for (i = 0; i < sock_cnt; i++) {
obj = hwloc_get_obj_by_depth(topology, depth, i); obj = hwloc_get_obj_by_depth(topology, depth, i);
if (obj->type == objtype[SOCKET]) { if (obj->type == objtype[SOCKET]) {
if (_core_child_count(topology, obj) > 0) { cores_per_socket[i] = _core_child_count(topology, obj);
if (cores_per_socket[i] > 0) {
nobj[SOCKET]++; nobj[SOCKET]++;
bit_set(used_socket, tot_socks); bit_set(used_socket, tot_socks);
} }
...@@ -354,12 +357,16 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, ...@@ -354,12 +357,16 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
} }
/* create map with hwloc */ /* create map with hwloc */
used_sock_idx = -1; used_sock_idx = -1;
used_core_idx = -1;
for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET]; for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET];
idx[SOCKET]++) { idx[SOCKET]++) {
if (!bit_test(used_socket, idx[SOCKET])) if (!bit_test(used_socket, idx[SOCKET]))
continue; continue;
used_sock_idx++; used_sock_idx++;
for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) { for (idx[CORE] = 0;
idx[CORE] < cores_per_socket[idx[SOCKET]];
idx[CORE]++) {
used_core_idx++;
for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
/* get hwloc_obj by indexes */ /* get hwloc_obj by indexes */
obj=hwloc_get_obj_below_array_by_type( obj=hwloc_get_obj_below_array_by_type(
...@@ -367,9 +374,7 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, ...@@ -367,9 +374,7 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
if (!obj) if (!obj)
continue; continue;
macid = obj->os_index; macid = obj->os_index;
absid = used_sock_idx * nobj[CORE] * nobj[PU] absid = used_core_idx * nobj[PU] + idx[PU];
+ idx[CORE] * nobj[PU]
+ idx[PU];
if ((macid >= actual_cpus) || if ((macid >= actual_cpus) ||
(absid >= actual_cpus)) { (absid >= actual_cpus)) {
...@@ -377,7 +382,8 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, ...@@ -377,7 +382,8 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
* out of range */ * out of range */
continue; continue;
} }
debug4("CPU map[%d]=>%d", absid, macid); debug4("CPU map[%d]=>%d S:C:T %d:%d:%d", absid, macid,
used_sock_idx, idx[CORE], idx[PU]);
(*p_block_map)[absid] = macid; (*p_block_map)[absid] = macid;
(*p_block_map_inv)[macid] = absid; (*p_block_map_inv)[macid] = absid;
} }
...@@ -385,6 +391,7 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, ...@@ -385,6 +391,7 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
} }
} }
FREE_NULL_BITMAP(used_socket); FREE_NULL_BITMAP(used_socket);
xfree(cores_per_socket);
hwloc_topology_destroy(topology); hwloc_topology_destroy(topology);
/* update output parameters */ /* update output parameters */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment