diff --git a/NEWS b/NEWS index 91907c4864c1983b0525ff89473a5e6830db7790..5f8139e83e2ae14ec584a0bf6f41d0c5c0e25cad 100644 --- a/NEWS +++ b/NEWS @@ -240,6 +240,10 @@ documents those changes that are of interest to users and administrators. -- jobcomp/elasticsearch - fix printf format for a value on 32-bit builds. -- task/cgroup - Change error message if CPU binding can not take place to better identify the root cause of the problem. + -- Fix issue where task/cgroup would not always honor --cpu_bind=threads. + -- Fix race condition in with getgrouplist() in slurmd that can lead to + user accounts being granted access to incorrect group memberships during + job launch. * Changes in Slurm 16.05.6 ========================== diff --git a/src/plugins/task/cgroup/task_cgroup_cpuset.c b/src/plugins/task/cgroup/task_cgroup_cpuset.c index 551e5032cf9f2756cc1ed33d52a52e50c34dc0ba..1b355a431c11302b7fc0ca537050529484b2ad54 100644 --- a/src/plugins/task/cgroup/task_cgroup_cpuset.c +++ b/src/plugins/task/cgroup/task_cgroup_cpuset.c @@ -1529,14 +1529,24 @@ extern int task_cgroup_cpuset_set_task_affinity(stepd_step_rec_t *job) (job->job_core_spec != CORE_SPEC_THREAD)) { spec_threads = job->job_core_spec & (~CORE_SPEC_THREAD); } - if (npus >= (jnpus + spec_threads) || bind_type & CPU_BIND_TO_THREADS) { + + /* Set this to PU but realise it could be overridden later if we can + * fill up a core. + */ + if (npus >= (jnpus + spec_threads)) { hwtype = HWLOC_OBJ_PU; nobj = npus; } - if (ncores >= jnpus || bind_type & CPU_BIND_TO_CORES) { + + /* Force to bind to Threads */ + if (bind_type & CPU_BIND_TO_THREADS) { + hwtype = HWLOC_OBJ_PU; + nobj = npus; + } else if (ncores >= jnpus || bind_type & CPU_BIND_TO_CORES) { hwtype = HWLOC_OBJ_CORE; nobj = ncores; } + if (nsockets >= jntasks && bind_type & CPU_BIND_TO_SOCKETS) { hwtype = socket_or_node; diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index f2e70c83f3bac5d43b9621fef870ae44c9cff544..ac1b6601dd9433da1200b09a4b3d2e93d02a46b9 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -6064,14 +6064,16 @@ _gids_cache_register(char *user, gid_t gid, gids_t *gids) debug2("Cached group access list for %s/%d", user, gid); } -static gids_t * -_gids_cache_lookup(char *user, gid_t gid) +/* how many groups to use by default to avoid repeated calls to getgrouplist */ +#define NGROUPS_START 64 + +static gids_t *_gids_cache_lookup(char *user, gid_t gid) { size_t idx; gids_cache_t *p; bool found_but_old = false; time_t now = 0; - int ngroups = 0; + int ngroups = NGROUPS_START; gid_t *groups; gids_t *ret_gids = NULL; @@ -6100,10 +6102,11 @@ _gids_cache_lookup(char *user, gid_t gid) } /* Cache lookup failed or cached value was too old, fetch new * value and insert it into cache. */ - getgrouplist(user, gid, NULL, &ngroups); groups = xmalloc(ngroups * sizeof(gid_t)); - if (getgrouplist(user, gid, groups, &ngroups) == -1) - error("getgrouplist failed"); + while (getgrouplist(user, gid, groups, &ngroups) == -1) { + /* group list larger than array, resize array to fit */ + groups = xrealloc(groups, ngroups * sizeof(gid_t)); + } if (found_but_old) { xfree(p->gids->gids); p->gids->gids = groups;