From 2a09d91d97a33fbece2c3342f0bc2ca68d3ce625 Mon Sep 17 00:00:00 2001
From: Janne Blomqvist <janne.blomqvist@aalto.fi>
Date: Fri, 29 Jul 2016 13:39:45 -0600
Subject: [PATCH] Remove cpuset & devices cgroups after steps finish

Bugs 2681 and 2703
---
 NEWS                                          |  2 ++
 src/plugins/task/cgroup/task_cgroup_cpuset.c  | 26 +++++++++++++++++
 src/plugins/task/cgroup/task_cgroup_devices.c | 28 +++++++++++++++++++
 3 files changed, 56 insertions(+)

diff --git a/NEWS b/NEWS
index bb4989ccc8e..5b728bd1ae5 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,8 @@ documents those changes that are of interest to users and administrators.
  -- Add new RPC (REQUEST_EVENT_LOG) so that slurmd and slurmstepd can log events
     through the slurmctld daemon.
  -- Remove sbatch --bb option. That option was never supported.
+ -- Automically cleanup task/cgroup cpuset and devices cgroups after steps are
+    done.
 
 * Changes in Slurm 17.02.0pre1
 ==============================
diff --git a/src/plugins/task/cgroup/task_cgroup_cpuset.c b/src/plugins/task/cgroup/task_cgroup_cpuset.c
index 0bcbb38ceac..5fa99f516b7 100644
--- a/src/plugins/task/cgroup/task_cgroup_cpuset.c
+++ b/src/plugins/task/cgroup/task_cgroup_cpuset.c
@@ -1056,6 +1056,32 @@ extern int task_cgroup_cpuset_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
 
 extern int task_cgroup_cpuset_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
 {
+	xcgroup_t cpuset_cg;
+
+	/* Similarly to task_cgroup_memory_fini(), we must lock the
+	 * root cgroup so we don't race with another job step that is
+	 * being started.  */
+        if (xcgroup_create(&cpuset_ns, &cpuset_cg,"",0,0) == XCGROUP_SUCCESS) {
+                if (xcgroup_lock(&cpuset_cg) == XCGROUP_SUCCESS) {
+			/* First move slurmstepd to the root cpuset cg
+			 * so we can remove the step/job/user cpuset
+			 * cg's.  */
+			xcgroup_move_process(&cpuset_cg, getpid());
+                        if (xcgroup_delete(&step_cpuset_cg) != SLURM_SUCCESS)
+                                debug2("task/cgroup: unable to remove step "
+                                       "cpuset : %m");
+                        if (xcgroup_delete(&job_cpuset_cg) != XCGROUP_SUCCESS)
+                                debug2("task/cgroup: not removing "
+                                       "job cpuset : %m");
+                        if (xcgroup_delete(&user_cpuset_cg) != XCGROUP_SUCCESS)
+                                debug2("task/cgroup: not removing "
+                                       "user cpuset : %m");
+                        xcgroup_unlock(&cpuset_cg);
+                } else
+                        error("task/cgroup: unable to lock root cpuset : %m");
+                xcgroup_destroy(&cpuset_cg);
+        } else
+                error("task/cgroup: unable to create root cpuset : %m");
 
 	if (user_cgroup_path[0] != '\0')
 		xcgroup_destroy(&user_cpuset_cg);
diff --git a/src/plugins/task/cgroup/task_cgroup_devices.c b/src/plugins/task/cgroup/task_cgroup_devices.c
index 2a5c00e9e54..583721449ea 100644
--- a/src/plugins/task/cgroup/task_cgroup_devices.c
+++ b/src/plugins/task/cgroup/task_cgroup_devices.c
@@ -113,6 +113,34 @@ error:
 
 extern int task_cgroup_devices_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
 {
+	xcgroup_t devices_cg;
+
+	/* Similarly to task_cgroup_{memory,cpuset}_fini(), we must lock the
+	 * root cgroup so we don't race with another job step that is
+	 * being started.  */
+        if (xcgroup_create(&devices_ns, &devices_cg,"",0,0)
+	    == XCGROUP_SUCCESS) {
+                if (xcgroup_lock(&devices_cg) == XCGROUP_SUCCESS) {
+			/* First move slurmstepd to the root devices cg
+			 * so we can remove the step/job/user devices
+			 * cg's.  */
+			xcgroup_move_process(&devices_cg, getpid());
+                        if (xcgroup_delete(&step_devices_cg) != SLURM_SUCCESS)
+                                debug2("task/cgroup: unable to remove step "
+                                       "devices : %m");
+                        if (xcgroup_delete(&job_devices_cg) != XCGROUP_SUCCESS)
+                                debug2("task/cgroup: not removing "
+                                       "job devices : %m");
+                        if (xcgroup_delete(&user_devices_cg)
+			    != XCGROUP_SUCCESS)
+                                debug2("task/cgroup: not removing "
+                                       "user devices : %m");
+                        xcgroup_unlock(&devices_cg);
+                } else
+                        error("task/cgroup: unable to lock root devices : %m");
+                xcgroup_destroy(&devices_cg);
+        } else
+                error("task/cgroup: unable to create root devices : %m");
 
 	if ( user_cgroup_path[0] != '\0' )
 		xcgroup_destroy(&user_devices_cg);
-- 
GitLab