From 5df2ad71c05b9609a663d07dbe5a9d09661a804c Mon Sep 17 00:00:00 2001
From: Matthieu Hautreux <matthieu.hautreux@cea.fr>
Date: Thu, 13 Oct 2011 14:48:39 +0200
Subject: [PATCH] cgroup: ensure that plugins 's cg subsystems use a default
 slurm root cg

In order to distinguish between slurm related cg and system related cg,
ensure that all slurm related cgroup directories are created under a
single directory. This directory is slurm or slurm_nodename in case of
multiple-slurmd usage.
---
 etc/cgroup.release_common.example             | 18 ++++++-
 src/common/xcgroup_read_config.c              |  8 +++
 src/common/xcgroup_read_config.h              |  2 +
 .../proctrack/cgroup/proctrack_cgroup.c       | 30 ++++++++++-
 src/plugins/task/cgroup/Makefile.am           |  2 +-
 src/plugins/task/cgroup/task_cgroup.c         | 44 ++++++++++++++--
 src/plugins/task/cgroup/task_cgroup.h         | 50 +++++++++++++++++++
 src/plugins/task/cgroup/task_cgroup_cpuset.c  | 20 ++++++--
 src/plugins/task/cgroup/task_cgroup_devices.c | 24 ++++++---
 src/plugins/task/cgroup/task_cgroup_memory.c  | 21 ++++++--
 10 files changed, 197 insertions(+), 22 deletions(-)
 create mode 100644 src/plugins/task/cgroup/task_cgroup.h

diff --git a/etc/cgroup.release_common.example b/etc/cgroup.release_common.example
index 6c6c4d03a25..8badda3d0e2 100644
--- a/etc/cgroup.release_common.example
+++ b/etc/cgroup.release_common.example
@@ -16,7 +16,6 @@
 base_path=/cgroup
 progname=$(basename $0)
 subsystem=${progname##*_}
-orphancg=${base_path}/${subsystem}/orphan
 
 if [[ $# -eq 0 ]]
 then
@@ -24,6 +23,23 @@ then
     exit 1
 fi
 
+# build orphan cg path
+if [[ $# -eq 1 ]]
+then
+    rmcg=${base_path}/${subsystem}$1
+else
+    rmcg=${base_path}/${subsystem}$2
+fi
+slurmcg=${rmcg%/uid_*}
+if [[ ${slurmcg} == ${rmcg} ]]
+then
+    # not a slurm job pattern, perhaps the slurmcg, just remove 
+    # the dir with a lock and exit
+    flock -x ${base_path}/${subsystem} -c "rmdir ${rmcg}"
+    exit $?
+fi
+orphancg=${slurmcg}/orphan
+
 # make sure orphan cgroup is existing
 if [[ ! -d ${orphancg} ]]
 then
diff --git a/src/common/xcgroup_read_config.c b/src/common/xcgroup_read_config.c
index 6f711408452..8a4d192dc87 100644
--- a/src/common/xcgroup_read_config.c
+++ b/src/common/xcgroup_read_config.c
@@ -75,6 +75,7 @@ static void _clear_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf)
 		slurm_cgroup_conf->cgroup_automount = false ;
 		xfree(slurm_cgroup_conf->cgroup_subsystems);
 		xfree(slurm_cgroup_conf->cgroup_release_agent);
+		xfree(slurm_cgroup_conf->cgroup_prepend);
 		slurm_cgroup_conf->constrain_cores = false ;
 		slurm_cgroup_conf->task_affinity = false ;
 		slurm_cgroup_conf->constrain_ram_space = false ;
@@ -147,6 +148,13 @@ extern int read_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf)
 			slurm_cgroup_conf->cgroup_release_agent =
 				xstrdup("/etc/slurm/cgroup");
 
+		/* cgroup prepend directory */
+#ifndef MULTIPLE_SLURMD
+		slurm_cgroup_conf->cgroup_prepend = xstrdup("/slurm");
+#else
+		slurm_cgroup_conf->cgroup_prepend = xstrdup("/slurm_%n");
+#endif
+
 		/* Cores constraints related conf items */
 		if (!s_p_get_boolean(&slurm_cgroup_conf->constrain_cores,
 				     "ConstrainCores", tbl))
diff --git a/src/common/xcgroup_read_config.h b/src/common/xcgroup_read_config.h
index 7693b0e2f3b..dd82e0e0e17 100644
--- a/src/common/xcgroup_read_config.h
+++ b/src/common/xcgroup_read_config.h
@@ -58,6 +58,8 @@ typedef struct slurm_cgroup_conf {
 	char *    cgroup_subsystems;
 	char *    cgroup_release_agent;
 
+	char *    cgroup_prepend;
+
 	bool      constrain_cores;
 	bool      task_affinity;
 
diff --git a/src/plugins/proctrack/cgroup/proctrack_cgroup.c b/src/plugins/proctrack/cgroup/proctrack_cgroup.c
index 798379a9419..6b782fbf872 100644
--- a/src/plugins/proctrack/cgroup/proctrack_cgroup.c
+++ b/src/plugins/proctrack/cgroup/proctrack_cgroup.c
@@ -54,6 +54,7 @@
 
 #include "src/common/xcgroup_read_config.h"
 #include "src/common/xcgroup.h"
+#include "src/common/xstring.h"
 #include "src/common/xcpuinfo.h"
 
 #include <sys/types.h>
@@ -158,15 +159,42 @@ int _slurm_cgroup_init(void)
 
 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid)
 {
+	/* we do it here as we do not have access to the conf structure */
+	/* in libslurm (src/common/xcgroup.c) */
+	xcgroup_t slurm_cg;
+	char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend);
+#ifdef MULTIPLE_SLURMD
+	if ( conf->node_name != NULL )
+		xstrsubstitute(pre,"%n", conf->node_name);
+	else {
+		xfree(pre);
+		pre = (char*) xstrdup("/slurm");
+	}
+#endif
+
+	/* create slurm cgroup in the freezer ns (it could already exist) */
+	if (xcgroup_create(&freezer_ns, &slurm_cg,pre,
+			   getuid(), getgid()) != XCGROUP_SUCCESS) {
+		return SLURM_ERROR;
+	}
+	if (xcgroup_instanciate(&slurm_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&slurm_cg);
+		return SLURM_ERROR;
+	}
+	else
+		xcgroup_destroy(&slurm_cg);
+
 	/* build user cgroup relative path if not set (should not be) */
 	if (*user_cgroup_path == '\0') {
 		if (snprintf(user_cgroup_path, PATH_MAX,
-			      "/uid_%u", uid) >= PATH_MAX) {
+			     "%s/uid_%u", pre, uid) >= PATH_MAX) {
 			error("unable to build uid %u cgroup relative "
 			      "path : %m", uid);
+			xfree(pre);
 			return SLURM_ERROR;
 		}
 	}
+	xfree(pre);
 
 	/* build job cgroup relative path if no set (should not be) */
 	if (*job_cgroup_path == '\0') {
diff --git a/src/plugins/task/cgroup/Makefile.am b/src/plugins/task/cgroup/Makefile.am
index f7cc3e07272..1813b9a4f3e 100644
--- a/src/plugins/task/cgroup/Makefile.am
+++ b/src/plugins/task/cgroup/Makefile.am
@@ -9,7 +9,7 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
 pkglib_LTLIBRARIES = task_cgroup.la
 
 # cgroup task plugin.
-task_cgroup_la_SOURCES = 	task_cgroup.c \
+task_cgroup_la_SOURCES = 	task_cgroup.h task_cgroup.c \
 				task_cgroup_cpuset.h task_cgroup_cpuset.c \
 				task_cgroup_memory.h task_cgroup_memory.c \
 				task_cgroup_devices.h task_cgroup_devices.c
diff --git a/src/plugins/task/cgroup/task_cgroup.c b/src/plugins/task/cgroup/task_cgroup.c
index 0fbb3b90731..14f783f89c2 100644
--- a/src/plugins/task/cgroup/task_cgroup.c
+++ b/src/plugins/task/cgroup/task_cgroup.c
@@ -45,10 +45,11 @@
 #include "slurm/slurm_errno.h"
 #include "src/common/slurm_xlator.h"
 #include "src/slurmd/slurmstepd/slurmstepd_job.h"
-
-#include "src/common/xcgroup_read_config.h"
+#include "src/slurmd/slurmd/slurmd.h"
 #include "src/common/xcgroup.h"
+#include "src/common/xstring.h"
 
+#include "task_cgroup.h"
 #include "task_cgroup_cpuset.h"
 #include "task_cgroup_memory.h"
 #include "task_cgroup_devices.h"
@@ -88,7 +89,6 @@ static bool use_cpuset  = false;
 static bool use_memory  = false;
 static bool use_devices = false;
 
-static slurm_cgroup_conf_t slurm_cgroup_conf;
 
 /*
  * init() is called when the plugin is loaded, before any other functions
@@ -278,3 +278,41 @@ extern int task_post_step (slurmd_job_t *job)
 	fini();
 	return SLURM_SUCCESS;
 }
+
+extern char* task_cgroup_create_slurm_cg (xcgroup_ns_t* ns) {
+
+	/* we do it here as we do not have access to the conf structure */
+	/* in libslurm (src/common/xcgroup.c) */
+	xcgroup_t slurm_cg;
+	char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend);
+#ifdef MULTIPLE_SLURMD
+	if ( conf->node_name != NULL )
+		xstrsubstitute(pre,"%n", conf->node_name);
+	else {
+		xfree(pre);
+		pre = (char*) xstrdup("/slurm");
+	}
+#endif
+
+	/* create slurm cgroup in the ns (it could already exist) */
+	if (xcgroup_create(ns,&slurm_cg,pre,
+			   getuid(), getgid()) != XCGROUP_SUCCESS) {
+		xfree(pre);
+		return pre;
+	}
+	if (xcgroup_instanciate(&slurm_cg) != XCGROUP_SUCCESS) {
+		error("unable to build slurm cgroup for ns %s: %m",
+		      ns->subsystems);
+		xcgroup_destroy(&slurm_cg);
+		xfree(pre);
+		return pre;
+	}
+	else {
+		debug3("slurm cgroup %s successfully created for ns %s: %m",
+		       pre,ns->subsystems);
+		xcgroup_destroy(&slurm_cg);
+	}
+
+exit:
+	return pre;
+}
diff --git a/src/plugins/task/cgroup/task_cgroup.h b/src/plugins/task/cgroup/task_cgroup.h
new file mode 100644
index 00000000000..aca72304311
--- /dev/null
+++ b/src/plugins/task/cgroup/task_cgroup.h
@@ -0,0 +1,50 @@
+/*****************************************************************************\
+ *  task_cgroup.h - cgroup common primitives for task/cgroup
+ *****************************************************************************
+ *  Copyright (C) 2009 CEA/DAM/DIF
+ *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.schedmd.com/slurmdocs/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#   include "config.h"
+#endif
+
+#ifndef _TASK_CGROUP_H_
+#define _TASK_CGROUP_H_
+
+#include "src/common/xcgroup_read_config.h"
+
+static slurm_cgroup_conf_t slurm_cgroup_conf;
+
+extern char* task_cgroup_create_slurm_cg (xcgroup_ns_t* ns);
+
+#endif
diff --git a/src/plugins/task/cgroup/task_cgroup_cpuset.c b/src/plugins/task/cgroup/task_cgroup_cpuset.c
index 59f46512d39..48b94140f88 100644
--- a/src/plugins/task/cgroup/task_cgroup_cpuset.c
+++ b/src/plugins/task/cgroup/task_cgroup_cpuset.c
@@ -52,6 +52,8 @@
 #include "src/common/xcgroup.h"
 #include "src/common/xcpuinfo.h"
 
+#include "task_cgroup.h"
+
 #ifdef HAVE_HWLOC
 #include <hwloc.h>
 #include <hwloc/glibc-sched.h>
@@ -165,15 +167,25 @@ extern int task_cgroup_cpuset_create(slurmd_job_t *job)
 	char* cpus = NULL;
 	size_t cpus_size;
 
+	char* slurm_cgpath ;
+
+	/* create slurm root cg in this cg namespace */
+	slurm_cgpath = task_cgroup_create_slurm_cg(&cpuset_ns);
+	if ( slurm_cgpath == NULL ) {
+		return SLURM_ERROR;
+	}
+
 	/* build user cgroup relative path if not set (should not be) */
 	if (*user_cgroup_path == '\0') {
-		if (snprintf(user_cgroup_path,PATH_MAX,
-			      "/uid_%u",uid) >= PATH_MAX) {
-			error("task/cgroup: unable to build uid %u cpuset "
-			      "cg relative path : %m",uid);
+		if (snprintf(user_cgroup_path, PATH_MAX,
+			     "%s/uid_%u", slurm_cgpath, uid) >= PATH_MAX) {
+			error("unable to build uid %u cgroup relative "
+			      "path : %m", uid);
+			xfree(slurm_cgpath);
 			return SLURM_ERROR;
 		}
 	}
+	xfree(slurm_cgpath);
 
 	/* build job cgroup relative path if no set (should not be) */
 	if (*job_cgroup_path == '\0') {
diff --git a/src/plugins/task/cgroup/task_cgroup_devices.c b/src/plugins/task/cgroup/task_cgroup_devices.c
index 9053bec0e91..8aab2012b05 100644
--- a/src/plugins/task/cgroup/task_cgroup_devices.c
+++ b/src/plugins/task/cgroup/task_cgroup_devices.c
@@ -56,11 +56,12 @@
 #include "src/common/gres.h"
 #include "src/common/list.h"
 
+#include "task_cgroup.h"
+
 #ifndef PATH_MAX
 #define PATH_MAX 256
 #endif
 
-
 static char user_cgroup_path[PATH_MAX];
 static char job_cgroup_path[PATH_MAX];
 static char jobstep_cgroup_path[PATH_MAX];
@@ -179,16 +180,25 @@ extern int task_cgroup_devices_create(slurmd_job_t *job)
 	List job_gres_list = job->job_gres_list;
 	List step_gres_list = job->step_gres_list;
 
-	
+	char* slurm_cgpath ;
+
+	/* create slurm root cg in this cg namespace */
+	slurm_cgpath = task_cgroup_create_slurm_cg(&devices_ns);
+	if ( slurm_cgpath == NULL ) {
+		return SLURM_ERROR;
+	}
+
 	/* build user cgroup relative path if not set (should not be) */
-	if ( *user_cgroup_path == '\0' ) {
-		if ( snprintf(user_cgroup_path,PATH_MAX,
-			      "/uid_%u", uid) >= PATH_MAX ) {
-		error("task/cgroup: unable to build uid %u devices "
-		      "cg relative path : %m", uid);
+	if (*user_cgroup_path == '\0') {
+		if (snprintf(user_cgroup_path, PATH_MAX,
+			     "%s/uid_%u", slurm_cgpath, uid) >= PATH_MAX) {
+			error("unable to build uid %u cgroup relative "
+			      "path : %m", uid);
+			xfree(slurm_cgpath);
 			return SLURM_ERROR;
 		}
 	}
+	xfree(slurm_cgpath);
 
 	/* build job cgroup relative path if no set (should not be) */
 	if ( *job_cgroup_path == '\0' ) {
diff --git a/src/plugins/task/cgroup/task_cgroup_memory.c b/src/plugins/task/cgroup/task_cgroup_memory.c
index 70de4c76f2d..5c09740361c 100644
--- a/src/plugins/task/cgroup/task_cgroup_memory.c
+++ b/src/plugins/task/cgroup/task_cgroup_memory.c
@@ -50,6 +50,8 @@
 #include "src/common/xcgroup_read_config.h"
 #include "src/common/xcgroup.h"
 
+#include "task_cgroup.h"
+
 #ifndef PATH_MAX
 #define PATH_MAX 256
 #endif
@@ -67,7 +69,6 @@ static xcgroup_t step_memory_cg;
 static int allowed_ram_space;
 static int allowed_swap_space;
 
-
 extern int task_cgroup_memory_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
 {
 	char release_agent_path[PATH_MAX];
@@ -185,15 +186,25 @@ extern int task_cgroup_memory_create(slurmd_job_t *job)
 	pid_t pid;
 	uint64_t ml,mlb,mls;
 
+	char* slurm_cgpath ;
+
+	/* create slurm root cg in this cg namespace */
+	slurm_cgpath = task_cgroup_create_slurm_cg(&memory_ns);
+	if ( slurm_cgpath == NULL ) {
+		return SLURM_ERROR;
+	}
+
 	/* build user cgroup relative path if not set (should not be) */
 	if (*user_cgroup_path == '\0') {
-		if (snprintf(user_cgroup_path,PATH_MAX,
-			      "/uid_%u",uid) >= PATH_MAX) {
-			error("task/cgroup: unable to build uid %u memory "
-			      "cg relative path : %m",uid);
+		if (snprintf(user_cgroup_path, PATH_MAX,
+			     "%s/uid_%u", slurm_cgpath, uid) >= PATH_MAX) {
+			error("unable to build uid %u cgroup relative "
+			      "path : %m", uid);
+			xfree(slurm_cgpath);
 			return SLURM_ERROR;
 		}
 	}
+	xfree(slurm_cgpath);
 
 	/* build job cgroup relative path if no set (should not be) */
 	if (*job_cgroup_path == '\0') {
-- 
GitLab