Skip to content
Snippets Groups Projects
Commit 2a09d91d authored by Janne Blomqvist's avatar Janne Blomqvist Committed by Brian Christiansen
Browse files

Remove cpuset & devices cgroups after steps finish

Bugs 2681 and 2703
parent dd14342d
No related branches found
No related tags found
No related merge requests found
...@@ -6,6 +6,8 @@ documents those changes that are of interest to users and administrators. ...@@ -6,6 +6,8 @@ documents those changes that are of interest to users and administrators.
-- Add new RPC (REQUEST_EVENT_LOG) so that slurmd and slurmstepd can log events -- Add new RPC (REQUEST_EVENT_LOG) so that slurmd and slurmstepd can log events
through the slurmctld daemon. through the slurmctld daemon.
-- Remove sbatch --bb option. That option was never supported. -- Remove sbatch --bb option. That option was never supported.
-- Automically cleanup task/cgroup cpuset and devices cgroups after steps are
done.
* Changes in Slurm 17.02.0pre1 * Changes in Slurm 17.02.0pre1
============================== ==============================
......
...@@ -1056,6 +1056,32 @@ extern int task_cgroup_cpuset_init(slurm_cgroup_conf_t *slurm_cgroup_conf) ...@@ -1056,6 +1056,32 @@ extern int task_cgroup_cpuset_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
extern int task_cgroup_cpuset_fini(slurm_cgroup_conf_t *slurm_cgroup_conf) extern int task_cgroup_cpuset_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
{ {
xcgroup_t cpuset_cg;
/* Similarly to task_cgroup_memory_fini(), we must lock the
* root cgroup so we don't race with another job step that is
* being started. */
if (xcgroup_create(&cpuset_ns, &cpuset_cg,"",0,0) == XCGROUP_SUCCESS) {
if (xcgroup_lock(&cpuset_cg) == XCGROUP_SUCCESS) {
/* First move slurmstepd to the root cpuset cg
* so we can remove the step/job/user cpuset
* cg's. */
xcgroup_move_process(&cpuset_cg, getpid());
if (xcgroup_delete(&step_cpuset_cg) != SLURM_SUCCESS)
debug2("task/cgroup: unable to remove step "
"cpuset : %m");
if (xcgroup_delete(&job_cpuset_cg) != XCGROUP_SUCCESS)
debug2("task/cgroup: not removing "
"job cpuset : %m");
if (xcgroup_delete(&user_cpuset_cg) != XCGROUP_SUCCESS)
debug2("task/cgroup: not removing "
"user cpuset : %m");
xcgroup_unlock(&cpuset_cg);
} else
error("task/cgroup: unable to lock root cpuset : %m");
xcgroup_destroy(&cpuset_cg);
} else
error("task/cgroup: unable to create root cpuset : %m");
if (user_cgroup_path[0] != '\0') if (user_cgroup_path[0] != '\0')
xcgroup_destroy(&user_cpuset_cg); xcgroup_destroy(&user_cpuset_cg);
......
...@@ -113,6 +113,34 @@ error: ...@@ -113,6 +113,34 @@ error:
extern int task_cgroup_devices_fini(slurm_cgroup_conf_t *slurm_cgroup_conf) extern int task_cgroup_devices_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
{ {
xcgroup_t devices_cg;
/* Similarly to task_cgroup_{memory,cpuset}_fini(), we must lock the
* root cgroup so we don't race with another job step that is
* being started. */
if (xcgroup_create(&devices_ns, &devices_cg,"",0,0)
== XCGROUP_SUCCESS) {
if (xcgroup_lock(&devices_cg) == XCGROUP_SUCCESS) {
/* First move slurmstepd to the root devices cg
* so we can remove the step/job/user devices
* cg's. */
xcgroup_move_process(&devices_cg, getpid());
if (xcgroup_delete(&step_devices_cg) != SLURM_SUCCESS)
debug2("task/cgroup: unable to remove step "
"devices : %m");
if (xcgroup_delete(&job_devices_cg) != XCGROUP_SUCCESS)
debug2("task/cgroup: not removing "
"job devices : %m");
if (xcgroup_delete(&user_devices_cg)
!= XCGROUP_SUCCESS)
debug2("task/cgroup: not removing "
"user devices : %m");
xcgroup_unlock(&devices_cg);
} else
error("task/cgroup: unable to lock root devices : %m");
xcgroup_destroy(&devices_cg);
} else
error("task/cgroup: unable to create root devices : %m");
if ( user_cgroup_path[0] != '\0' ) if ( user_cgroup_path[0] != '\0' )
xcgroup_destroy(&user_devices_cg); xcgroup_destroy(&user_devices_cg);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment