diff --git a/NEWS b/NEWS index abc98b76e7279709f70672bcf9404e28194a14fb..d6d99b06dde8166ab5df03d2d72721f171a8eea9 100644 --- a/NEWS +++ b/NEWS @@ -69,6 +69,7 @@ documents those changes that are of interest to users and administrators. modified while the job is pending. -- Fix check of per-user qos limits on the initial run by a user. -- task/cgroup: Fix for task layout logic when there are disabled resources. + -- task/cgroup: Fix for task binding anomaly. * Changes in Slurm 15.08.8 ========================== diff --git a/src/plugins/task/cgroup/task_cgroup_cpuset.c b/src/plugins/task/cgroup/task_cgroup_cpuset.c index 512ec75c8763a87b060229f2934e0b4ea6419914..c2ae9806d37869e53421aa7f3d4c049af5ea04a0 100644 --- a/src/plugins/task/cgroup/task_cgroup_cpuset.c +++ b/src/plugins/task/cgroup/task_cgroup_cpuset.c @@ -636,6 +636,24 @@ static void _add_hwloc_cpuset( } } +static int _hwloc_bit_count(hwloc_bitmap_t cpuset) +{ + int i_first, i_last, i, cnt = 0; + + if (!cpuset) + return cnt; + i_first = hwloc_bitmap_first(cpuset); + if (i_first < 0) + return cnt; + cnt = 1; /* For bit set at i_first */ + i_last = hwloc_bitmap_last(cpuset); + for (i = i_first + 1; i <= i_last; i++) { + if (hwloc_bitmap_isset(cpuset, i)) + cnt++; + } + return cnt; +} + static int _task_cgroup_cpuset_dist_cyclic( hwloc_topology_t topology, hwloc_obj_type_t hwtype, hwloc_obj_type_t req_hwtype, stepd_step_rec_t *job, int bind_verbose, @@ -648,7 +666,7 @@ static int _task_cgroup_cpuset_dist_cyclic( uint32_t *t_ix; /* thread index by core by socket */ uint32_t npus, ncores, nsockets; uint32_t taskid = job->envtp->localid; - int spec_thread_cnt = 0; + int spec_thread_cnt = 0, pu_cnt; bitstr_t *spec_threads = NULL; uint32_t obj_idxs[3], nthreads, cps, @@ -736,6 +754,7 @@ static int _task_cgroup_cpuset_dist_cyclic( topology, HWLOC_OBJ_SOCKET, s_ix, hwtype, c_ixc[s_ix]); if (obj != NULL) { + pu_cnt = _hwloc_bit_count(obj->allowed_cpuset); if (hwloc_compare_types(hwtype, HWLOC_OBJ_PU) >= 0) { /* granularity is thread */ @@ -768,10 +787,15 @@ static int _task_cgroup_cpuset_dist_cyclic( if (c_ixc[s_ix] == cps) s_ix++; } + } else if (pu_cnt < 1) { + /* No CPUs available on this core */ + c_ixc[s_ix]++; + if (c_ixc[s_ix] == cps) + s_ix++; } else { /* granularity is core or larger */ c_ixc[s_ix]++; - j++; + j += pu_cnt; if (i == ntskip) _add_hwloc_cpuset(hwtype, req_hwtype, obj, taskid, @@ -785,7 +809,7 @@ static int _task_cgroup_cpuset_dist_cyclic( /* if it succeeds, switch to the next task, starting * with the next available socket, otherwise, loop back * from the first socket trying to find available slots. */ - if (j == npdist) { + if (j >= npdist) { i++; j = 0; s_ix++; // no validity check, handled by the while @@ -829,7 +853,7 @@ static int _task_cgroup_cpuset_dist_block( uint32_t taskid = job->envtp->localid; int hwdepth; uint32_t npus, ncores, nsockets; - int spec_thread_cnt = 0; + int spec_thread_cnt = 0, pu_cnt; bitstr_t *spec_threads = NULL; uint32_t core_idx; @@ -867,6 +891,10 @@ static int _task_cgroup_cpuset_dist_block( hwtype, thread_idx[core_idx]); if (obj != NULL) { thread_idx[core_idx]++; + pu_cnt = _hwloc_bit_count( + obj->allowed_cpuset); + if (pu_cnt < 1) /* No avail CPUs */ + continue; j++; if (i == ntskip) _add_hwloc_cpuset(hwtype,