diff --git a/src/plugins/task/cray/task_cray.c b/src/plugins/task/cray/task_cray.c index 0d70f4efd91d00a6724b8ba9564645e05a9673ff..fa60798ffd848ec8b218d96fbc9f978b098975ec 100644 --- a/src/plugins/task/cray/task_cray.c +++ b/src/plugins/task/cray/task_cray.c @@ -52,6 +52,7 @@ #include <sys/param.h> #include <errno.h> #include "limits.h" +#include <sched.h> #ifdef HAVE_NUMA #include <numa.h> @@ -117,7 +118,8 @@ unsigned int numa_bitmask_weight(const struct bitmask *bmp); #ifdef HAVE_NATIVE_CRAY static int _get_numa_nodes(char *path, int *cnt, int **numa_array); -static int _get_cpu_masks(char *path, cpu_set_t **cpuMasks); +static int _get_cpu_masks(int num_numa_nodes, int32_t *numa_array, + cpu_set_t **cpuMasks); #endif /* @@ -450,7 +452,7 @@ extern int task_p_post_step (stepd_step_rec_t *job) return SLURM_ERROR; } - rc = _get_cpu_masks(path, &cpuMasks); + rc = _get_cpu_masks(cnt, numa_nodes, &cpuMasks); if (rc < 0) { error("(%s: %d: %s) get_cpu_masks failed. Return code: %d", THIS_FILE, __LINE__, __FUNCTION__, rc); @@ -465,7 +467,7 @@ extern int task_p_post_step (stepd_step_rec_t *job) rc = alpsc_compact_mem(&err_msg, cnt, numa_nodes, cpuMasks, NULL); xfree(numa_nodes); - CPU_FREE(cpuMasks); + xfree(cpuMasks); if (rc != 1) { if (err_msg) { @@ -608,79 +610,134 @@ static int _get_numa_nodes(char *path, int *cnt, int32_t **numa_array) { * RETURN * 0 on success and -1 on failure. */ -static int _get_cpu_masks(char *path, cpu_set_t **cpuMasks) { - struct bitmask *bm; - int i, rc, cnt; - char buffer[PATH_MAX]; - FILE *f = NULL; - char *lin = NULL; - int lsz; - size_t sz; - - rc = snprintf(buffer, sizeof(buffer), "%s/%s", path, "cpus"); - if (rc < 0) { - error("(%s: %d: %s) snprintf failed. Return code: %d", - THIS_FILE, __LINE__, __FUNCTION__, rc); +#define NUM_INTS_TO_HOLD_ALL_CPUS \ + (numa_all_cpus_ptr->size / (sizeof(unsigned long) * 8)) +static int _get_cpu_masks(int num_numa_nodes, int32_t *numa_array, + cpu_set_t **cpuMasks) { + + struct bitmask **remaining_numa_node_cpus = NULL, *collective; + unsigned long **numa_node_cpus = NULL; + int i, j, at_least_one_cpu = 0, rc = 0; + cpu_set_t *cpusetptr; + + if (numa_available()) { + error("(%s: %d: %s) Libnuma not available", THIS_FILE, + __LINE__, __FUNCTION__); return -1; } - f = fopen(buffer, "r"); - if (f == NULL ) { - error("Failed to open file %s: %m\n", buffer); - return -1; + /* + * numa_node_cpus: The CPUs available to the NUMA node. + * numa_all_cpus_ptr: all CPUs on which the calling task may execute. + * remaining_numa_node_cpus: Bitwise-AND of the above two to get all of + * the CPUs that the task can run on in this + * NUMA node. + * collective: Collects all of the CPUs as a precaution. + */ + remaining_numa_node_cpus = xmalloc(num_numa_nodes * + sizeof(struct bitmask *)); + collective = numa_allocate_cpumask(); + numa_node_cpus = xmalloc(num_numa_nodes * sizeof(unsigned long*)); + for (i = 0; i < num_numa_nodes; i++) { + remaining_numa_node_cpus[i] = numa_allocate_cpumask(); + numa_node_cpus[i] = xmalloc(sizeof(unsigned long) * + NUM_INTS_TO_HOLD_ALL_CPUS); + rc = numa_node_to_cpus(numa_array[i], numa_node_cpus[i], + NUM_INTS_TO_HOLD_ALL_CPUS); + if (rc) { + error("(%s: %d: %s) numa_node_to_cpus. Return code: %d", + THIS_FILE, __LINE__, __FUNCTION__, rc); + } + for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { + (remaining_numa_node_cpus[i]->maskp[j]) = + (numa_node_cpus[i][j]) & + (numa_all_cpus_ptr->maskp[j]); + collective->maskp[j] |= + (remaining_numa_node_cpus[i]->maskp[j]); + } } - lsz = getline(&lin, &sz, f); - if (lsz > 0) { - if (lin[strlen(lin) - 1] == '\n') { - lin[strlen(lin) - 1] = '\0'; - } - bm = numa_parse_cpustring(lin); - if (bm == NULL ) { - error("(%s: %d: %s) Error numa_parse_nodestring", - THIS_FILE, __LINE__, __FUNCTION__); - free(lin); - return -1; + /* + * Ensure that we have not masked off all of the CPUs. + * If we have, just re-enable them all. Better to clear them all than + * none of them. + */ + for (j=0; j < collective->size; j++) { + if (numa_bitmask_isbitset(collective, j)) { + at_least_one_cpu = 1; } - } else { - error("(%s: %d: %s) Reading %s failed.", THIS_FILE, __LINE__, - __FUNCTION__, buffer); - return -1; } - free(lin); - cnt = numa_bitmask_weight(bm); - if (cnt == 0) { - error("(%s: %d: %s)Error no CPUs found.", THIS_FILE, __LINE__, - __FUNCTION__); - return -1; + if (!at_least_one_cpu) { + for (i = 0; i < num_numa_nodes; i++) { + for (j = 0; j < + (remaining_numa_node_cpus[i]->size / + (sizeof(unsigned long) * 8)); + j++) { + (remaining_numa_node_cpus[i]->maskp[j]) = + (numa_all_cpus_ptr->maskp[j]); + } + } + } if (debug_flags & DEBUG_FLAG_TASK) { - info("Bitmask size: %lu\nSizeof(*(bm->maskp)):%zd\n" - "Bitmask %#lx\nBitmask weight(number of bits set): %u\n", - bm->size, sizeof(*(bm->maskp)), *(bm->maskp), cnt); - } + for (i =0; i < num_numa_nodes; i++) { + for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { + info("%6lx", numa_node_cpus[i][j]); + } + info("|"); + } + info("\t Bitmask: Allowed CPUs for NUMA Node\n"); - *cpuMasks = CPU_ALLOC(cnt); + for (i =0; i < num_numa_nodes; i++) { + for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { + info("%6lx", numa_all_cpus_ptr->maskp[j]); + } + info("|"); + } + info("\t Bitmask: Allowed CPUs for for CPUSET\n"); - if (*cpuMasks == NULL ) { - error("(%s: %d: %s)Error out of memory.\n", THIS_FILE, __LINE__, - __FUNCTION__); - return -1; + for (i =0; i < num_numa_nodes; i++) { + for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { + info("%6lx", + remaining_numa_node_cpus[i]->maskp[j]); + } + info("|"); + } + info("\t Bitmask: Allowed CPUs between CPUSet and NUMA Node\n"); } - for (i = 0; i < bm->size; i++) { - if (*(bm->maskp) & ((long unsigned) 1 << i)) { - if (debug_flags & DEBUG_FLAG_TASK) { - info("(%s: %d: %s)CPU %d is present.\n", - THIS_FILE, __LINE__, __FUNCTION__, i); + + // Convert bitmasks to cpu_set_t types + cpusetptr = xmalloc(num_numa_nodes * sizeof(cpu_set_t)); + + for (i=0; i < num_numa_nodes; i++) { + CPU_ZERO(&cpusetptr[i]); + for (j=0; j < remaining_numa_node_cpus[i]->size; j++) { + if (numa_bitmask_isbitset(remaining_numa_node_cpus[i], + j)) { + CPU_SET(j, &cpusetptr[i]); } - CPU_SET(i, *cpuMasks); } + if (debug_flags & DEBUG_FLAG_TASK) { + info("CPU_COUNT() of set: %d\n", + CPU_COUNT(&cpusetptr[i])); + } + } + + *cpuMasks = cpusetptr; + + // Freeing Everything + numa_free_cpumask(collective); + for (i =0; i < num_numa_nodes; i++) { + xfree(numa_node_cpus[i]); + numa_free_cpumask(remaining_numa_node_cpus[i]); } + xfree(numa_node_cpus); + xfree(numa_node_cpus); + xfree(remaining_numa_node_cpus); - numa_free_cpumask(bm); return 0; } #endif