From ed19e5bc10ccf3a60baaf625e611da33c0caba62 Mon Sep 17 00:00:00 2001
From: Don Lipari <lipari1@llnl.gov>
Date: Tue, 29 Mar 2011 22:33:25 +0000
Subject: [PATCH] merged
 https://eris.llnl.gov/svn/slurm/branches/cgroups_Matthieu/ + -- Added
 proctrack/cgroup and task/cgroup plugins written by Matthieu +    Hautreux,
 CEA.

---
 Makefile.am                                   |    4 +-
 Makefile.in                                   |    4 +-
 NEWS                                          |    2 +
 configure                                     |    5 +-
 configure.ac                                  |    3 +-
 doc/html/configurator.html.in                 |    7 +-
 doc/html/taskplugins.shtml                    |   15 +-
 doc/man/man5/cgroup.conf.5                    |  213 +++-
 doc/man/man5/slurm.conf.5                     |   19 +-
 etc/cgroup.conf.example                       |   69 +-
 etc/cgroup.release_agent                      |    2 -
 etc/cgroup.release_common.example             |  128 ++
 slurm.spec                                    |   16 +-
 src/common/Makefile.am                        |    3 +
 src/common/Makefile.in                        |   57 +-
 src/common/xcgroup.c                          | 1112 +++++++++++++++++
 src/common/xcgroup.h                          |  319 +++++
 .../xcgroup_read_config.c}                    |   79 +-
 .../xcgroup_read_config.h}                    |   49 +-
 src/common/xcpuinfo.c                         |  982 +++++++++++++++
 src/common/xcpuinfo.h                         |  166 +++
 src/plugins/proctrack/cgroup/Changelog        |   20 -
 src/plugins/proctrack/cgroup/Makefile.am      |   11 +-
 src/plugins/proctrack/cgroup/Makefile.in      |   25 +-
 .../proctrack/cgroup/proctrack_cgroup.c       |  626 ++++------
 src/plugins/proctrack/cgroup/xcgroup.c        |  985 ---------------
 src/plugins/proctrack/cgroup/xcgroup.h        |  237 ----
 src/plugins/proctrack/cgroup/xcpuinfo.c       |  312 -----
 src/plugins/task/Makefile.am                  |    2 +-
 src/plugins/task/Makefile.in                  |    2 +-
 src/plugins/task/affinity/task_affinity.c     |    8 +
 src/plugins/task/cgroup/Makefile.am           |   16 +
 src/plugins/task/cgroup/Makefile.in           |  669 ++++++++++
 src/plugins/task/cgroup/task_cgroup.c         |  279 +++++
 src/plugins/task/cgroup/task_cgroup_cpuset.c  |  681 ++++++++++
 .../cgroup/task_cgroup_cpuset.h}              |   86 +-
 src/plugins/task/cgroup/task_cgroup_memory.c  |  348 ++++++
 src/plugins/task/cgroup/task_cgroup_memory.h  |   58 +
 src/plugins/task/none/task_none.c             |    8 +
 src/slurmd/common/task_plugin.c               |   15 +
 src/slurmd/common/task_plugin.h               |    7 +
 src/slurmd/slurmd/get_mach_stat.c             |  570 +--------
 src/slurmd/slurmd/get_mach_stat.h             |    5 -
 src/slurmd/slurmd/slurmd.c                    |    5 +-
 src/slurmd/slurmstepd/mgr.c                   |    5 +
 45 files changed, 5410 insertions(+), 2824 deletions(-)
 delete mode 100644 etc/cgroup.release_agent
 create mode 100644 etc/cgroup.release_common.example
 create mode 100644 src/common/xcgroup.c
 create mode 100644 src/common/xcgroup.h
 rename src/{plugins/proctrack/cgroup/read_config.c => common/xcgroup_read_config.c} (79%)
 rename src/{plugins/proctrack/cgroup/read_config.h => common/xcgroup_read_config.h} (85%)
 create mode 100644 src/common/xcpuinfo.c
 create mode 100644 src/common/xcpuinfo.h
 delete mode 100644 src/plugins/proctrack/cgroup/Changelog
 delete mode 100644 src/plugins/proctrack/cgroup/xcgroup.c
 delete mode 100644 src/plugins/proctrack/cgroup/xcgroup.h
 delete mode 100644 src/plugins/proctrack/cgroup/xcpuinfo.c
 create mode 100644 src/plugins/task/cgroup/Makefile.am
 create mode 100644 src/plugins/task/cgroup/Makefile.in
 create mode 100644 src/plugins/task/cgroup/task_cgroup.c
 create mode 100644 src/plugins/task/cgroup/task_cgroup_cpuset.c
 rename src/plugins/{proctrack/cgroup/xcpuinfo.h => task/cgroup/task_cgroup_cpuset.h} (61%)
 create mode 100644 src/plugins/task/cgroup/task_cgroup_memory.c
 create mode 100644 src/plugins/task/cgroup/task_cgroup_memory.h

diff --git a/Makefile.am b/Makefile.am
index 4048bc6df8d..58771d8d7a1 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -10,8 +10,8 @@ EXTRA_DIST =			\
 	etc/slurm.epilog.clean	\
 	etc/init.d.slurm	\
 	etc/init.d.slurmdbd	\
-	etc/cgroup.conf.example	\
-	etc/cgroup.release_agent \
+	etc/cgroup.conf.example \
+	etc/cgroup.release_common.example \
 	autogen.sh		\
 	slurm.spec		\
 	README			\
diff --git a/Makefile.in b/Makefile.in
index a5e4f187b8f..9e0384e2afc 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -372,8 +372,8 @@ EXTRA_DIST = \
 	etc/slurm.epilog.clean	\
 	etc/init.d.slurm	\
 	etc/init.d.slurmdbd	\
-	etc/cgroup.conf.example	\
-	etc/cgroup.release_agent \
+	etc/cgroup.conf.example \
+	etc/cgroup.release_common.example \
 	autogen.sh		\
 	slurm.spec		\
 	README			\
diff --git a/NEWS b/NEWS
index b269c7c1362..55b64fef6a7 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,8 @@ documents those changes that are of interest to users and admins.
     jobs with more than 65,536 tasks. Patch from Hongjia Cao, NUDT.
  -- Set slurmd's soft process CPU limit equal to it's hard limit and notify the
     user if the limit is not infinite.
+ -- Added proctrack/cgroup and task/cgroup plugins written by Matthieu
+    Hautreux, CEA.
 
 * Changes in SLURM 2.3.0.pre3
 =============================
diff --git a/configure b/configure
index 0b18fd90183..53dc9fe97d0 100755
--- a/configure
+++ b/configure
@@ -21175,7 +21175,7 @@ fi
 
 
 
-ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/cray/Makefile contribs/pam/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm/Makefile contribs/perlapi/libslurm/perl/Makefile.PL contribs/perlapi/libslurmdb/Makefile contribs/perlapi/libslurmdb/perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 contribs/sjobexit/Makefile contribs/slurmdb-direct/Makefile src/Makefile src/api/Makefile src/common/Makefile src/db_api/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/sshare/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/sprio/Makefile src/srun/Makefile src/srun_cr/Makefile src/slurmd/Makefile src/slurmd/common/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/common/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/blcr/Makefile src/plugins/checkpoint/blcr/cr_checkpoint.sh src/plugins/checkpoint/blcr/cr_restart.sh src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/gres/Makefile src/plugins/gres/gpu/Makefile src/plugins/gres/nic/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/job_submit/Makefile src/plugins/job_submit/cnode/Makefile src/plugins/job_submit/defaults/Makefile src/plugins/job_submit/logging/Makefile src/plugins/job_submit/lua/Makefile src/plugins/job_submit/partition/Makefile src/plugins/preempt/Makefile src/plugins/preempt/none/Makefile src/plugins/preempt/partition_prio/Makefile src/plugins/preempt/qos/Makefile src/plugins/priority/Makefile src/plugins/priority/basic/Makefile src/plugins/priority/multifactor/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/proctrack/cgroup/Makefile src/plugins/proctrack/lua/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/ba/Makefile src/plugins/select/bluegene/ba_bgq/Makefile src/plugins/select/bluegene/bl/Makefile src/plugins/select/bluegene/bl_bgq/Makefile src/plugins/select/bluegene/sfree/Makefile src/plugins/select/cons_res/Makefile src/plugins/select/cray/Makefile src/plugins/select/cray/libalps/Makefile src/plugins/select/cray/libemulate/Makefile src/plugins/select/linear/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/none/Makefile src/plugins/topology/Makefile src/plugins/topology/3d_torus/Makefile src/plugins/topology/node_rank/Makefile src/plugins/topology/none/Makefile src/plugins/topology/tree/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile testsuite/slurm_unit/common/Makefile"
+ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/cray/Makefile contribs/pam/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm/Makefile contribs/perlapi/libslurm/perl/Makefile.PL contribs/perlapi/libslurmdb/Makefile contribs/perlapi/libslurmdb/perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 contribs/sjobexit/Makefile contribs/slurmdb-direct/Makefile src/Makefile src/api/Makefile src/common/Makefile src/db_api/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/sshare/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/sprio/Makefile src/srun/Makefile src/srun_cr/Makefile src/slurmd/Makefile src/slurmd/common/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/common/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/blcr/Makefile src/plugins/checkpoint/blcr/cr_checkpoint.sh src/plugins/checkpoint/blcr/cr_restart.sh src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/gres/Makefile src/plugins/gres/gpu/Makefile src/plugins/gres/nic/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/job_submit/Makefile src/plugins/job_submit/cnode/Makefile src/plugins/job_submit/defaults/Makefile src/plugins/job_submit/logging/Makefile src/plugins/job_submit/lua/Makefile src/plugins/job_submit/partition/Makefile src/plugins/preempt/Makefile src/plugins/preempt/none/Makefile src/plugins/preempt/partition_prio/Makefile src/plugins/preempt/qos/Makefile src/plugins/priority/Makefile src/plugins/priority/basic/Makefile src/plugins/priority/multifactor/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/cgroup/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/proctrack/lua/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/ba/Makefile src/plugins/select/bluegene/ba_bgq/Makefile src/plugins/select/bluegene/bl/Makefile src/plugins/select/bluegene/bl_bgq/Makefile src/plugins/select/bluegene/sfree/Makefile src/plugins/select/cons_res/Makefile src/plugins/select/cray/Makefile src/plugins/select/cray/libalps/Makefile src/plugins/select/cray/libemulate/Makefile src/plugins/select/linear/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/cgroup/Makefile src/plugins/task/none/Makefile src/plugins/topology/Makefile src/plugins/topology/3d_torus/Makefile src/plugins/topology/node_rank/Makefile src/plugins/topology/none/Makefile src/plugins/topology/tree/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile testsuite/slurm_unit/common/Makefile"
 
 
 cat >confcache <<\_ACEOF
@@ -22479,11 +22479,11 @@ do
     "src/plugins/priority/multifactor/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/priority/multifactor/Makefile" ;;
     "src/plugins/proctrack/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/Makefile" ;;
     "src/plugins/proctrack/aix/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/aix/Makefile" ;;
+    "src/plugins/proctrack/cgroup/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/cgroup/Makefile" ;;
     "src/plugins/proctrack/pgid/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/pgid/Makefile" ;;
     "src/plugins/proctrack/linuxproc/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/linuxproc/Makefile" ;;
     "src/plugins/proctrack/rms/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/rms/Makefile" ;;
     "src/plugins/proctrack/sgi_job/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/sgi_job/Makefile" ;;
-    "src/plugins/proctrack/cgroup/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/cgroup/Makefile" ;;
     "src/plugins/proctrack/lua/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/lua/Makefile" ;;
     "src/plugins/sched/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/sched/Makefile" ;;
     "src/plugins/sched/backfill/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/sched/backfill/Makefile" ;;
@@ -22518,6 +22518,7 @@ do
     "src/plugins/mpi/openmpi/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/mpi/openmpi/Makefile" ;;
     "src/plugins/task/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/task/Makefile" ;;
     "src/plugins/task/affinity/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/task/affinity/Makefile" ;;
+    "src/plugins/task/cgroup/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/task/cgroup/Makefile" ;;
     "src/plugins/task/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/task/none/Makefile" ;;
     "src/plugins/topology/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/topology/Makefile" ;;
     "src/plugins/topology/3d_torus/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/topology/3d_torus/Makefile" ;;
diff --git a/configure.ac b/configure.ac
index 2c2c9add43f..ea038fdca14 100644
--- a/configure.ac
+++ b/configure.ac
@@ -455,11 +455,11 @@ AC_CONFIG_FILES([Makefile
 		 src/plugins/priority/multifactor/Makefile
 		 src/plugins/proctrack/Makefile
 		 src/plugins/proctrack/aix/Makefile
+		 src/plugins/proctrack/cgroup/Makefile
 		 src/plugins/proctrack/pgid/Makefile
 		 src/plugins/proctrack/linuxproc/Makefile
 		 src/plugins/proctrack/rms/Makefile
 		 src/plugins/proctrack/sgi_job/Makefile
-		 src/plugins/proctrack/cgroup/Makefile
 		 src/plugins/proctrack/lua/Makefile
 		 src/plugins/sched/Makefile
 		 src/plugins/sched/backfill/Makefile
@@ -494,6 +494,7 @@ AC_CONFIG_FILES([Makefile
 		 src/plugins/mpi/openmpi/Makefile
 		 src/plugins/task/Makefile
 		 src/plugins/task/affinity/Makefile
+		 src/plugins/task/cgroup/Makefile
 		 src/plugins/task/none/Makefile
 		 src/plugins/topology/Makefile
 		 src/plugins/topology/3d_torus/Makefile
diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in
index 6f4f14d199e..0a4e4d16af1 100644
--- a/doc/html/configurator.html.in
+++ b/doc/html/configurator.html.in
@@ -585,7 +585,7 @@ with a job step.<BR>
 Select one value for <B>ProctrackType</B>:<BR>
 <input type="radio" name="proctrack_type" value="aix"> <B>AIX</B>: Use AIX kernel
 extension, recommended for AIX systems<BR>
-<input type="radio" name="proctrack_type" value="cgroup"> <B>Cgroup</B>: Use
+<input type="radio" name="proctrack_type" value="cgroup"> <B>Cgroup</B>:  Use
 Linux <i>cgroups</i> to create a job container and track processes.
 Build a <i>cgroup.conf</i> file as well<BR>
 <input type="radio" name="proctrack_type" value="pgid" checked> <B>Pgid</B>: Use Unix
@@ -662,7 +662,10 @@ Select one value for <B>TaskPlugin</B>:<BR>
 <input type="radio" name="task_plugin" value="none" checked> <B>None</B>: No task launch actions<BR>
 <input type="radio" name="task_plugin" value="affinity"> <B>Affinity</B>:
 CPU affinity support
-(see srun man pages for the --cpu_bind, --mem_bind, and -E options)
+(see srun man pages for the --cpu_bind, --mem_bind, and -E options)<BR>
+<input type="radio" name="task_plugin" value="cgroup"> <B>Cgroup</B>:
+Allocated resources constraints enforcement using Linux Control Groups
+(see cgroup.conf man page)
 <DL><DL>
 <DT><B>TaskPluginParam</B> (As used by <I>TaskPlugin=Affinity</I> only):
 <DT><input type="radio" name="task_plugin_param" value="Cpusets">
diff --git a/doc/html/taskplugins.shtml b/doc/html/taskplugins.shtml
index de4735ac48b..6ee20a20587 100644
--- a/doc/html/taskplugins.shtml
+++ b/doc/html/taskplugins.shtml
@@ -175,8 +175,21 @@ data structure definition.</p>
 On failure, the plugin should return SLURM_ERROR and set the errno to an
 appropriate value to indicate the reason for failure.</p>
 
+<p class="commandline">int task_post_step (slurmd_job_t *job);</p>
+<p style="margin-left:.2in"><b>Description</b>: task_post_step() is called
+after termination of all the tasks of the job step.
+Executed by the <b>slurmstepd</b> program as user root.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:
+<span class="commandline">job</span>&nbsp;&nbsp;&nbsp;(input)
+pointer to the job which has terminated.
+See <b>src/slurmd/slurmstepd/slurmstepd_job.h</b> for the
+data structure definition.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful.
+On failure, the plugin should return SLURM_ERROR and set the errno to an
+appropriate value to indicate the reason for failure.</p>
+
 <h2>Versioning</h2>
-<p> This document describes version 1 of the SLURM Task Plugin API.
+<p> This document describes version 2 of the SLURM Task Plugin API.
 Future releases of SLURM may revise this API.</p>
 <p class="footer"><a href="#top">top</a></p>
 
diff --git a/doc/man/man5/cgroup.conf.5 b/doc/man/man5/cgroup.conf.5
index 99833308289..ae35cc2033b 100644
--- a/doc/man/man5/cgroup.conf.5
+++ b/doc/man/man5/cgroup.conf.5
@@ -1,13 +1,14 @@
-.TH "cgroup.conf" "5" "February 2010" "cgroup.conf 2.2" "Slurm configuration file"
+.TH "cgroup.conf" "5" "December 2010" "cgroup.conf 2.2" \
+"Slurm cgroup configuration file"
 
 .SH "NAME"
 cgroup.conf \- Slurm configuration file for the cgroup support
 
 .SH "DESCRIPTION"
-\fB/etc/cgroup.conf\fP is an ASCII file which defines parameters used by 
-Slurm's proctrack/cgroup plugin in support of Linux cgroups being used as a
-job container. The file will always be located in the same directory as the 
-\fBslurm.conf\fP file.
+
+\fB/etc/slurm/cgroup.conf\fP is an ASCII file which defines parameters used by 
+Slurm's Linux cgroup related plugins. The file will always be located in 
+the same directory as the \fBslurm.conf\fP file.
 .LP
 Parameter names are case insensitive.
 Any text following a "#" in the configuration file is treated
@@ -17,17 +18,89 @@ Changes to the configuration file take effect upon restart of
 SLURM daemons, daemon receipt of the SIGHUP signal, or execution
 of the command "scontrol reconfigure" unless otherwise noted.
 .LP
-Slurm cgroup proctrack plugin creates a hierarchical set of
+Two cgroup plugins are currently available in SLURM. The first
+one is a proctrack plugin, the second one a task plugin.
+
+.LP
+The following cgroup.conf parameters are defined to control the general behavior
+of Slurm cgroup plugins.
+
+.TP
+\fBCgroupAutomount\fR=<yes|no>
+Slurm cgroup plugins require valid and functional cgroup subsystem to be mounted
+under /cgroup/<subsystem_name>.
+When launched, plugins check their subsystem availability. If not available, 
+the plugin launch fails unless CgroupAutomount is set to yes. In that case, the 
+plugin will first try to mount the required subsystems.
+
+.TP
+\fBCgroupReleaseAgentDir\fR=<path_to_release_agent_directory>
+Used to tune the cgroup system behavior. This parameter identifies the location 
+of the directory containing Slurm cgroup release_agent files. A release_agent file
+is required for each mounted subsystem. The release_agent file name must have the
+following format: release_<subsystem_name>.  For instance, the release_agent file
+for the cpuset subsystem must be named release_cpuset.  See also CLEANUP OF
+CGROUPS below.
+
+.SH "PROCTRACK/CGROUP PLUGIN"
+
+Slurm \fBproctrack/cgroup\fP plugin is used to track processes using the
+freezer control group subsystem. It creates a hierarchical set of
 directories for each step, putting the step tasks into the leaf.
+.LP
 This directory structure is like the following:
 .br 
-/dev/cgroup/slurm/uid_%uid/job_%jobid/step_%stepid
+/cgroup/freezer/uid_%uid/job_%jobid/step_%stepid
+.LP
+Slurm cgroup proctrack plugin is enabled with the following parameter
+in slurm.conf:
+.br 
+ProctrackType=proctrack/cgroup
+
+.LP
+No particular cgroup.conf parameter is defined to control the behavior
+of this particular plugin.
+
+
+.SH "TASK/CGROUP PLUGIN"
+
+.LP
+Slurm \fBtask/cgroup\fP plugin is used to enforce allocated resources 
+constraints, thus avoiding tasks to use unallocated ressources. It currently
+only uses cpuset subsystem but could use memory and devices subsystems in a 
+near future too.
+
+.LP
+It creates a hierarchical set of directories for each task and subsystem.
+The directory structure is like the following:
+.br 
+/cgroup/%subsys/uid_%uid/job_%jobid/step_%stepid/task_%taskid
+
 .LP
-Slurm cgroup proctrack plugin can constrain cores, RAM and swap space for 
-jobs and set a variety of default job and job step parameters for cgroups.
+Slurm cgroup task plugin is enabled with the following parameter
+in slurm.conf:
+.br
+TaskPlugin=task/cgroup
+
 .LP
-The configuration parameters available include:
+The following cgroup.conf parameters are defined to control the behavior
+of this particular plugin:
 
+.TP
+\fBConstrainCores\fR=<yes|no>
+If configured to "yes" then constrain allowed cores to the subset of 
+allocated resources. It uses the cpuset subsystem.
+The default value is "no".
+.TP
+\fBTaskAffinity\fR=<yes|no>
+If configured to "yes" then set a default task affinity to bind each step 
+task to a subset of the allocated cores using \fBsched_setaffinity\fP.
+The default value is "no".
+
+.LP
+The following cgroup.conf parameters could be defined to control the behavior
+of this particular plugin in a next version where memory and devices support
+would be added :
 
 .TP
 \fBAllowedRAMSpace\fR=<number>
@@ -45,51 +118,23 @@ If the limit is exceeded, the job steps will be killed and a warning message
 will be written to standard error.
 Also see \fBConstrainSwapSpace\fR.
 
-.TP
-\fBCgroupAutomount\fR=<yes|no>
-Slurm cgroup plugins require a valid and functional cgroup system mounted on 
-/dev/cgroup. When launched, plugins check cgroup availability. If cgroup is 
-not available, the plugin launch fails unless CgroupAutomount is set to yes. 
-In that case, the plugin will first try to mount the cgroup system.
-
-.TP
-\fBCgroupMountOptions\fR=<options>
-Used to tune the cgroup system behavior.
-
-.TP
-\fBCgroupReleaseAgent\fR=<path_to_program>
-Used to tune the cgroup system behavior.
-
-.TP
-\fBConstrainCores\fR=<yes|no>
-If configured to "yes" then constrain allowed cores to the subset of 
-allocated resources.
-The default value is "no".
-
 .TP
 \fBConstrainRAMSpace\fR=<yes|no>
-If configured to "yes" then constraing the job's RAM usage.
+If configured to "yes" then constrain the job's RAM usage.
 The default value is "no".
 Also see \fBAllowedRAMSpace\fR.
 
 .TP
 \fBConstrainSwapSpace\fR=<yes|no>
-If configured to "yes" then constraing the job's swap space usage.
+If configured to "yes" then constrain the job's swap space usage.
 The default value is "no".
 Also see \fBAllowedSwapSpace\fR.
 
 .TP
-\fBJobCgroupParams\fR=<options>
-Used to tune job cgroup. The format of the parameter is the following:
-"a=b c=d e=f" where a,c,d corresponds to files under the cgroup 
-directory and b,d,f the values to write in these files.
-
-.TP
-\fBJobStepCgroupParams\fR=<options>
-Used to tune job step cgroup. The format of the parameter is the following:
-"a=b c=d e=f" where a,c,d corresponds to files under the cgroup 
-directory and b,d,f the values to write in these files.
-
+\fBConstrainDevices\fR=<yes|no>
+If configured to "yes" then constrain the job's allowed devices based on GRES
+allocated resources. It uses the devices subsystem for that.
+The default value is "no".
 
 .SH "EXAMPLE"
 .LP
@@ -102,41 +147,75 @@ directory and b,d,f the values to write in these files.
 .br
 CgroupAutomount=yes
 .br
-CgroupMountOptions="memory,cpuset"
-.br
-CgroupReleaseAgent="/etc/slurm/cgroup.release_agent"
+CgroupReleaseAgentDir="/etc/slurm/cgroup"
 .br
-JobCgroupParams="memory.swappiness=30"
-.br
-JobStepCgroupParams=""
+ConstrainCores=yes
 .br
 #
+
+.SH "NOTES"
+.LP
+Only one instance of a cgroup subsystem is valid at a time in the kernel.
+If you try to mount another cgroup hierarchy that uses the same cpuset 
+subsystem it will fail.
+However you can mount another cgroup hierarchy for a different cpuset 
+subsystem.
+
+.SH CLEANUP OF CGROUPS
+.LP
+To allow cgroups to be removed automatically when they are no longer in use
+the notify_on_release flag is set in each cgroup when the cgroup is
+instantiated. The release_agent file for each subsystem is set up when the
+subsystem is mounted.  The name of each release_agent file is 
+release_<subsystem name>. The directory is specified via the 
+CgroupReleaseAgentDir parameter in cgroup.conf. A simple release agent 
+mechanism to remove slurm cgroups when they become empty may be set up by 
+creating the release agent files for each required subsystem as symbolic 
+links to a common release agent script, as shown in the example below:
+
+[sulu] (slurm) etc> cat cgroup.conf | grep CgroupReleaseAgentDir
 .br
-# Constrain RAM at 100% of allocation and 
+CgroupReleaseAgentDir="/etc/slurm/cgroup"
 .br
-# Total space (RAM + swap) at 110% of allocation
+
+[sulu] (slurm) etc> ls -al /etc/slurm/cgroup
 .br
-#
+total 12
 .br
-ConstrainRAMSpace=yes
+drwxr-xr-x 2 root root 4096 2010-04-23 14:55 .
 .br
-AllowedRAMSpace=100
+drwxr-xr-x 4 root root 4096 2010-07-22 14:48 ..
 .br
-ConstrainSwapSpace=yes
+-rwxrwxrwx 1 root root  234 2010-04-23 14:52 release_common
 .br
-AllowedSwapSpace=10
+lrwxrwxrwx 1 root root   32 2010-04-23 11:04 release_cpuset -> /etc/slurm/cgroup/release_common
 .br
-ConstrainCores=yes
+lrwxrwxrwx 1 root root   32 2010-04-23 11:03 release_freezer -> /etc/slurm/cgroup/release_common
 
-.SH "NOTES"
-Proctrack/cgroup is not compatible with the task/affinity plugin configured
-to use cpusets, although this may be addressed in the future.
-.LP
-Only one instance of a cgroup subsystem is valid at a time in the kernel.
-If you try to mount another cgroup hierarchy that uses the same cpuset 
-subsystem it will fail.
-However you can mount another cgroup hierarchy for a different cpuset 
-subsystem.
+[sulu] (slurm) etc> cat /etc/slurm/cgroup/release_common
+.br
+#!/bin/bash
+.br
+base_path=/cgroup
+.br
+progname=$(basename $0)
+.br
+subsystem=${progname##*_}
+.br
+.br
+rmcg=${base_path}/${subsystem}$@
+.br
+uidcg=${rmcg%/job*}
+.br
+if [[ -d ${base_path}/${subsystem} ]]
+.br
+then
+.br
+     flock -x ${uidcg} -c "rmdir ${rmcg}"
+.br
+fi
+.br
+[sulu] (slurm) etc>
 
 .SH "COPYING"
 Copyright (C) 2010 Lawrence Livermore National Security.
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 157016b6205..06a00f6e3d2 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -1080,7 +1080,7 @@ Acceptable values at present include:
 which uses an AIX kernel extension and is the default for AIX systems
 .TP
 \fBproctrack/cgroup\fR
-which uses linux cgroups to constrain and track processes. 
+which uses linux cgroups to constrain and track processes.
 NOTE: see "man cgroup.conf" for configuration details
 .TP
 \fBproctrack/linuxproc\fR
@@ -1785,12 +1785,23 @@ See \fBTaskProlog\fR for execution order details.
 Identifies the type of task launch plugin, typically used to provide
 resource management within a node (e.g. pinning tasks to specific
 processors).
-Acceptable values include
-"task/none" for systems requiring no special handling and
+Acceptable values include :
+.RS
+.TP 20
+\fBtask/none\fR
+for systems requiring no special handling
+.TP
+\fBtask/cgroup\fR
+to enable resource containment using linux control cgroups.
+NOTE: see "man cgroup.conf" for configuration details
+.TP
+\fBtask/affinity\fR
 "task/affinity" to enable the \-\-cpu_bind and/or \-\-mem_bind
 srun options.
+
+.RE
 The default value is "task/none".
-If you "task/affinity" and encounter problems, it may be due to
+If you use "task/affinity" and encounter problems, it may be due to
 the variety of system calls used to implement task affinity on
 different operating systems.
 If that is the case, you may want to use Portable Linux
diff --git a/etc/cgroup.conf.example b/etc/cgroup.conf.example
index 615faa6c98c..816c1828391 100644
--- a/etc/cgroup.conf.example
+++ b/etc/cgroup.conf.example
@@ -2,70 +2,11 @@
 #
 # Slurm cgroup support configuration file
 #
-###
-
-#--
-# Slurm cgroup plugins require a valid and functional 
-# cgroup system mounted on /dev/cgroup
-# When launched, plugins check cgroup availability
-# If cgroup is not available, the plugin launch fails
-# unless CgroupAutomount is set to yes. In that case,
-# the plugin will first try to mount the cgroup system.
-# CgroupMountOptions and CgroupReleaseAgent can be used to
-# tune the cgroup system behavior
+# See man slurm.conf and man cgroup.conf for further
+# information on cgroup configuration parameters
 #--
 CgroupAutomount=yes
-CgroupMountOptions="memory,cpuset"
-CgroupReleaseAgent="/etc/slurm/cgroup.release_agent"
-
-#--
-# Slurm cgroup proctrack plugin creates a hierarchical set of
-# directories for each step, putting the step tasks into the leaf
-#
-# This directory structure is like the following : 
-#     /dev/cgroup/slurm/uid_%uid/job_%jobid/step_%stepid
-#
-# job cgroup and jobstep cgroup can be tuned using the two next 
-# parameters. The format of the parameter is the following :
-#
-# "a=b c=d e=f" where a,c,d corresponds to files under the cgroup 
-# directory and b,d,f the values to write in these files
-#--
-JobCgroupParams="memory.swappiness=30"
-JobStepCgroupParams=""
+CgroupReleaseAgent="/etc/slurm/cgroup"
 
-#--
-# Slurm cgroup proctrack plugin can constrain memory usage at the job
-# level. The constraints correspond to the amount of RAM space allowed
-# to the whole job as well as the amount of additional swap space.
-#
-# The amount of space of these two notions are expressed in percent of
-# the memory limit set to the job on the execution node.
-#
-# Thus, the following configuration :
-#     ConstrainRAMSpace=no
-#     ConstrainSwapSpace=yes
-# will request that no constraint are set for the jobs,
-#     ConstrainRAMSpace=yes
-#     AllowedRAMSpace=100
-#     ConstrainSwapSpace=yes
-#     AllowedSwapSpace=10
-# will request to constrain RAM and Swap space letting the job use
-# as many RAM space than memory asked in slurm but not more than
-# 110% of this limit in both RAM+Swap space
-#
-# Warning: setting ConstrainSwapSpace to yes automatically set 
-# ConstrainRAMSpace to yes and the corresponding limit to 100%
-# of the memory limit + the configured percent of Swap space
-#--
-ConstrainRAMSpace=yes
-AllowedRAMSpace=100
-ConstrainSwapSpace=yes
-AllowedSwapSpace=10
-
-#--
-# Slurm cgroup proctrack plugin can constrain allowed cores to 
-# the subset of allocated resources.
-# To do that, you just have to set to yes the following parameter
-#--
-ConstrainCores=yes
+ConstrainCores=no
+ConstrainRAMSpace=no
diff --git a/etc/cgroup.release_agent b/etc/cgroup.release_agent
deleted file mode 100644
index cd8679c9e75..00000000000
--- a/etc/cgroup.release_agent
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-rmdir /dev/cgroup/$@
diff --git a/etc/cgroup.release_common.example b/etc/cgroup.release_common.example
new file mode 100644
index 00000000000..6c6c4d03a25
--- /dev/null
+++ b/etc/cgroup.release_common.example
@@ -0,0 +1,128 @@
+#!/bin/bash
+#
+# Generic release agent for SLURM cgroup usage
+#
+# Manage cgroup hierarchy like :
+#
+# /cgroup/subsystem/uid_%/job_%/step_%/task_%
+#
+# Automatically sync uid_% cgroups to be coherent
+# with remaining job childs when one of them is removed
+# by a call to this release agent.
+# The synchronisation is made in a flock on the root cgroup
+# to ensure coherency of the cgroups contents.
+#
+
+base_path=/cgroup
+progname=$(basename $0)
+subsystem=${progname##*_}
+orphancg=${base_path}/${subsystem}/orphan
+
+if [[ $# -eq 0 ]]
+then
+    echo "Usage: $(basename $0) [sync] cgroup"
+    exit 1
+fi
+
+# make sure orphan cgroup is existing
+if [[ ! -d ${orphancg} ]]
+then
+    mkdir ${orphancg}
+    case ${subsystem} in 
+	cpuset)
+	    cat ${base_path}/${subsystem}/cpuset.cpus > ${orphancg}/cpuset.cpus
+	    cat ${base_path}/${subsystem}/cpuset.mems > ${orphancg}/cpuset.mems
+	    ;;
+	*)
+	    ;;
+    esac
+fi
+    
+# kernel call
+if [[ $# -eq 1 ]]
+then
+
+    rmcg=${base_path}/${subsystem}$@
+
+    # try to extract the uid cgroup from the input one
+    # ( extract /uid_% from /uid%/job_*...)
+    uidcg=${rmcg%/job_*}
+    if [[ ${uidcg} == ${rmcg} ]]
+    then
+	# not a slurm job pattern, perhaps the uidcg, just remove 
+	# the dir with a lock and exit
+	flock -x ${base_path}/${subsystem} -c "rmdir ${rmcg}"
+	exit $?
+    fi
+
+    if [[ -d ${base_path}/${subsystem} ]]
+    then
+	flock -x ${base_path}/${subsystem} -c "$0 sync $@"
+    fi
+
+    exit $?
+
+# sync subcall (called using flock by the kernel hook to be sure
+# that no one is manipulating the hierarchy, i.e. PAM, SLURM, ...)
+elif [[ $# -eq 2 ]] && [[ $1 == "sync" ]]
+then
+
+    shift
+    rmcg=${base_path}/${subsystem}$@
+    uidcg=${rmcg%/job_*}
+
+    # remove this cgroup
+    if [[ -d ${rmcg} ]]
+    then
+        case ${subsystem} in
+            memory)
+		# help to correctly remove lazy cleaning memcg
+		# but still not perfect
+                sleep 1
+                ;;
+            *)
+		;;
+        esac
+	rmdir ${rmcg}
+    fi
+    if [[ ${uidcg} == ${rmcg} ]]
+    then
+	## not a slurm job pattern exit now do not sync
+	exit 0
+    fi
+
+    # sync the user cgroup based on targeted subsystem
+    # and the remaining job
+    if [[ -d ${uidcg} ]]
+    then
+	case ${subsystem} in 
+	    cpuset)
+		cpus=$(cat ${uidcg}/job_*/cpuset.cpus 2>/dev/null)
+		if [[ -n ${cpus} ]]
+		then
+		    cpus=$(scontrol show hostnames $(echo ${cpus} | tr ' ' ','))
+		    cpus=$(echo ${cpus} | tr ' ' ',')
+		    echo ${cpus} > ${uidcg}/cpuset.cpus
+		else
+		    # first move the remaining processes to 
+		    # a cgroup reserved for orphaned processes
+		    for t in $(cat ${uidcg}/tasks)
+		    do
+			echo $t > ${orphancg}/tasks
+		    done
+		    # then remove the remaining cpus from the cgroup
+		    echo "" > ${uidcg}/cpuset.cpus
+		fi
+		;;
+	    *)
+		;;
+	esac
+    fi
+
+# error
+else
+    echo "Usage: $(basename $0) [sync] cgroup"
+    exit 1
+fi
+
+exit 0
diff --git a/slurm.spec b/slurm.spec
index 4e61d4a5a5c..39646aeefa6 100644
--- a/slurm.spec
+++ b/slurm.spec
@@ -406,9 +406,12 @@ DESTDIR="$RPM_BUILD_ROOT" make install-contrib
       install -D -m755 etc/init.d.slurmdbd $RPM_BUILD_ROOT/etc/init.d/slurmdbd
    fi
 %endif
-install -D -m644 etc/cgroup.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/cgroup.conf.example
-install -D -m755 etc/cgroup.release_agent ${RPM_BUILD_ROOT}%{_sysconfdir}/cgroup.release_agent
 install -D -m644 etc/slurm.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/slurm.conf.example
+install -D -m644 etc/cgroup.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/cgroup.conf.example
+install -D -m755 etc/cgroup.release_common.example ${RPM_BUILD_ROOT}%{_sysconfdir}/cgroup.release_common.example
+install -D -m755 etc/cgroup.release_common.example ${RPM_BUILD_ROOT}%{_sysconfdir}/cgroup/release_freezer
+install -D -m755 etc/cgroup.release_common.example ${RPM_BUILD_ROOT}%{_sysconfdir}/cgroup/release_cpuset
+install -D -m755 etc/cgroup.release_common.example ${RPM_BUILD_ROOT}%{_sysconfdir}/cgroup/release_memory
 install -D -m644 etc/slurmdbd.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/slurmdbd.conf.example
 install -D -m755 etc/slurm.epilog.clean ${RPM_BUILD_ROOT}%{_sysconfdir}/slurm.epilog.clean
 install -D -m755 contribs/sjstat ${RPM_BUILD_ROOT}%{_bindir}/sjstat
@@ -499,6 +502,8 @@ test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/crypto_openssl.so           &&
    echo %{_libdir}/slurm/crypto_openssl.so           >> $LIST
 test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/task_affinity.so            &&
    echo %{_libdir}/slurm/task_affinity.so            >> $LIST
+test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/task_cgroup.so              &&
+   echo %{_libdir}/slurm/task_cgroup.so              >> $LIST
 
 LIST=./pam.files
 touch $LIST
@@ -550,9 +555,12 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man8/spank*
 %dir %{_sysconfdir}
 %dir %{_libdir}/slurm/src
-%config %{_sysconfdir}/cgroup.conf.example
-%config %{_sysconfdir}/cgroup.release_agent
 %config %{_sysconfdir}/slurm.conf.example
+%config %{_sysconfdir}/cgroup.conf.example
+%config %{_sysconfdir}/cgroup.release_common.example
+%config (noreplace) %{_sysconfdir}/cgroup/release_freezer
+%config (noreplace) %{_sysconfdir}/cgroup/release_cpuset
+%config (noreplace) %{_sysconfdir}/cgroup/release_memory
 %config %{_sysconfdir}/slurm.epilog.clean
 %exclude %{_mandir}/man1/sjobexit*
 %if %{slurm_with blcr}
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index 56f413c6ad1..6d989b0eeb1 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -32,6 +32,9 @@ noinst_LTLIBRARIES = 			\
 	libspank.la
 
 libcommon_la_SOURCES = 			\
+ 	xcgroup_read_config.c xcgroup_read_config.h		\
+ 	xcgroup.c xcgroup.h 					\
+ 	xcpuinfo.c xcpuinfo.h 					\
 	assoc_mgr.c assoc_mgr.h 	\
 	xmalloc.c xmalloc.h 		\
 	xassert.c xassert.h		\
diff --git a/src/common/Makefile.in b/src/common/Makefile.in
index 173536ee49a..81e1425de4c 100644
--- a/src/common/Makefile.in
+++ b/src/common/Makefile.in
@@ -94,11 +94,13 @@ CONFIG_CLEAN_FILES =
 CONFIG_CLEAN_VPATH_FILES =
 LTLIBRARIES = $(noinst_LTLIBRARIES)
 libcommon_la_DEPENDENCIES =
-am__libcommon_la_SOURCES_DIST = assoc_mgr.c assoc_mgr.h xmalloc.c \
-	xmalloc.h xassert.c xassert.h xstring.c xstring.h xsignal.c \
-	xsignal.h forward.c forward.h strlcpy.c strlcpy.h list.c \
-	list.h net.c net.h log.c log.h cbuf.c cbuf.h safeopen.c \
-	safeopen.h bitstring.c bitstring.h mpi.c mpi.h pack.c pack.h \
+am__libcommon_la_SOURCES_DIST = xcgroup_read_config.c \
+	xcgroup_read_config.h xcgroup.c xcgroup.h xcpuinfo.c \
+	xcpuinfo.h assoc_mgr.c assoc_mgr.h xmalloc.c xmalloc.h \
+	xassert.c xassert.h xstring.c xstring.h xsignal.c xsignal.h \
+	forward.c forward.h strlcpy.c strlcpy.h list.c list.h net.c \
+	net.h log.c log.h cbuf.c cbuf.h safeopen.c safeopen.h \
+	bitstring.c bitstring.h mpi.c mpi.h pack.c pack.h \
 	parse_config.c parse_config.h parse_spec.c parse_spec.h \
 	plugin.c plugin.h plugrack.c plugrack.h print_fields.c \
 	print_fields.h read_config.c read_config.h node_select.c \
@@ -130,25 +132,26 @@ am__libcommon_la_SOURCES_DIST = assoc_mgr.c assoc_mgr.h xmalloc.c \
 	proc_args.c proc_args.h slurm_strcasestr.c slurm_strcasestr.h \
 	node_conf.h node_conf.c gres.h gres.c
 @HAVE_UNSETENV_FALSE@am__objects_1 = unsetenv.lo
-am_libcommon_la_OBJECTS = assoc_mgr.lo xmalloc.lo xassert.lo \
-	xstring.lo xsignal.lo forward.lo strlcpy.lo list.lo net.lo \
-	log.lo cbuf.lo safeopen.lo bitstring.lo mpi.lo pack.lo \
-	parse_config.lo parse_spec.lo plugin.lo plugrack.lo \
-	print_fields.lo read_config.lo node_select.lo env.lo fd.lo \
-	slurm_cred.lo slurm_errno.lo slurm_priority.lo \
-	slurm_protocol_api.lo slurm_protocol_pack.lo \
-	slurm_protocol_util.lo slurm_protocol_socket_implementation.lo \
-	slurm_protocol_defs.lo slurm_rlimits_info.lo slurmdb_defs.lo \
-	slurmdb_pack.lo slurmdbd_defs.lo working_cluster.lo uid.lo \
-	util-net.lo slurm_auth.lo jobacct_common.lo \
-	slurm_accounting_storage.lo slurm_jobacct_gather.lo \
-	slurm_jobcomp.lo slurm_topology.lo switch.lo arg_desc.lo \
-	malloc.lo getopt.lo getopt1.lo $(am__objects_1) \
-	slurm_selecttype_info.lo slurm_resource_info.lo hostlist.lo \
-	slurm_step_layout.lo checkpoint.lo job_resources.lo \
-	parse_time.lo job_options.lo global_defaults.lo timers.lo \
-	stepd_api.lo write_labelled_message.lo proc_args.lo \
-	slurm_strcasestr.lo node_conf.lo gres.lo
+am_libcommon_la_OBJECTS = xcgroup_read_config.lo xcgroup.lo \
+	xcpuinfo.lo assoc_mgr.lo xmalloc.lo xassert.lo xstring.lo \
+	xsignal.lo forward.lo strlcpy.lo list.lo net.lo log.lo cbuf.lo \
+	safeopen.lo bitstring.lo mpi.lo pack.lo parse_config.lo \
+	parse_spec.lo plugin.lo plugrack.lo print_fields.lo \
+	read_config.lo node_select.lo env.lo fd.lo slurm_cred.lo \
+	slurm_errno.lo slurm_priority.lo slurm_protocol_api.lo \
+	slurm_protocol_pack.lo slurm_protocol_util.lo \
+	slurm_protocol_socket_implementation.lo slurm_protocol_defs.lo \
+	slurm_rlimits_info.lo slurmdb_defs.lo slurmdb_pack.lo \
+	slurmdbd_defs.lo working_cluster.lo uid.lo util-net.lo \
+	slurm_auth.lo jobacct_common.lo slurm_accounting_storage.lo \
+	slurm_jobacct_gather.lo slurm_jobcomp.lo slurm_topology.lo \
+	switch.lo arg_desc.lo malloc.lo getopt.lo getopt1.lo \
+	$(am__objects_1) slurm_selecttype_info.lo \
+	slurm_resource_info.lo hostlist.lo slurm_step_layout.lo \
+	checkpoint.lo job_resources.lo parse_time.lo job_options.lo \
+	global_defaults.lo timers.lo stepd_api.lo \
+	write_labelled_message.lo proc_args.lo slurm_strcasestr.lo \
+	node_conf.lo gres.lo
 am__EXTRA_libcommon_la_SOURCES_DIST = unsetenv.c unsetenv.h
 libcommon_la_OBJECTS = $(am_libcommon_la_OBJECTS)
 libcommon_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
@@ -406,6 +409,9 @@ noinst_LTLIBRARIES = \
 	libspank.la
 
 libcommon_la_SOURCES = \
+ 	xcgroup_read_config.c xcgroup_read_config.h		\
+ 	xcgroup.c xcgroup.h 					\
+ 	xcpuinfo.c xcpuinfo.h 					\
 	assoc_mgr.c assoc_mgr.h 	\
 	xmalloc.c xmalloc.h 		\
 	xassert.c xassert.h		\
@@ -652,6 +658,9 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/working_cluster.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/write_labelled_message.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xassert.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xcgroup.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xcgroup_read_config.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xcpuinfo.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xmalloc.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xsignal.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xstring.Plo@am__quote@
diff --git a/src/common/xcgroup.c b/src/common/xcgroup.c
new file mode 100644
index 00000000000..c9c2d66117e
--- /dev/null
+++ b/src/common/xcgroup.c
@@ -0,0 +1,1112 @@
+/*****************************************************************************\
+ *  xcgroup.c - cgroup related primitives
+ *****************************************************************************
+ *  Copyright (C) 2009 CEA/DAM/DIF
+ *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#   include "config.h"
+#endif
+
+#if HAVE_STDINT_H
+#  include <stdint.h>
+#endif
+#if HAVE_INTTYPES_H
+#  include <inttypes.h>
+#endif
+
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <dirent.h>
+
+#include <slurm/slurm.h>
+#include <slurm/slurm_errno.h>
+#include "src/common/log.h"
+#include "src/common/xmalloc.h"
+#include "src/common/xstring.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+
+#include "xcgroup.h"
+
+#ifndef PATH_MAX
+#define PATH_MAX 256
+#endif
+
+/* internal functions */
+size_t _file_getsize(int fd);
+int _file_read_uint32s(char* file_path, uint32_t** pvalues, int* pnb);
+int _file_write_uint32s(char* file_path, uint32_t* values, int nb);
+int _file_read_uint64s(char* file_path, uint64_t** pvalues, int* pnb);
+int _file_write_uint64s(char* file_path, uint64_t* values, int nb);
+int _file_read_content(char* file_path, char** content, size_t *csize);
+int _file_write_content(char* file_path, char* content, size_t csize);
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * xcgroup_ns primitives xcgroup_ns primitives xcgroup_ns primitives
+ * xcgroup_ns primitives xcgroup_ns primitives xcgroup_ns primitives
+ * xcgroup_ns primitives xcgroup_ns primitives xcgroup_ns primitives
+ * -----------------------------------------------------------------------------
+ */
+
+/*
+ * create a cgroup namespace for tasks containment
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_create(xcgroup_ns_t* cgns, char* mnt_point, char* mnt_args,
+		      char* subsys, char* notify_prog) {
+
+	cgns->mnt_point = xstrdup(mnt_point);
+	cgns->mnt_args = xstrdup(mnt_args);
+	cgns->subsystems = xstrdup(subsys);
+	cgns->notify_prog = xstrdup(notify_prog);
+	return XCGROUP_SUCCESS;
+}
+
+/*
+ * destroy a cgroup namespace
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_destroy(xcgroup_ns_t* cgns) {
+
+	xfree(cgns->mnt_point);
+	xfree(cgns->mnt_args);
+	xfree(cgns->subsystems);
+	xfree(cgns->notify_prog);
+
+	return XCGROUP_SUCCESS;
+}
+
+/*
+ * mount a cgroup namespace
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_mount(xcgroup_ns_t* cgns)
+{
+	int fstatus;
+	char* mount_cmd_fmt;
+	char mount_cmd[1024];
+
+	char* mnt_point;
+	char* p;
+
+	xcgroup_t cg;
+
+	mode_t cmask;
+	mode_t omask;
+
+	cmask = S_IWGRP | S_IWOTH;
+	omask = umask(cmask);
+
+	fstatus = mkdir(cgns->mnt_point, 0755);
+	if (fstatus && errno != EEXIST) {
+		if (cgns->mnt_point[0] != '/') {
+			debug("unable to create cgroup ns directory '%s'"
+			      " : do not start with '/'", cgns->mnt_point);
+			umask(omask);
+			return XCGROUP_ERROR;
+		}
+		mnt_point = xstrdup(cgns->mnt_point);
+		p = mnt_point;
+		while ((p = index(p+1, '/')) != NULL) {
+			*p = '\0';
+			mkdir(mnt_point, 0755);
+			if (errno != EEXIST) {
+				debug("unable to create cgroup ns required "
+				      "directory '%s'", mnt_point);
+				xfree(mnt_point);
+				umask(omask);
+				return XCGROUP_ERROR;
+			}
+			*p='/';
+		}
+		xfree(mnt_point);
+		fstatus = mkdir(cgns->mnt_point, 0755);
+	}
+
+	if (fstatus && errno != EEXIST) {
+		debug("unable to create cgroup ns directory '%s'"
+		      " : %m", cgns->mnt_point);
+		umask(omask);
+		return XCGROUP_ERROR;
+	}
+	umask(omask);
+
+	if (cgns->mnt_args == NULL ||
+	     strlen(cgns->mnt_args) == 0) {
+		mount_cmd_fmt = "/bin/mount -o %s%s -t cgroup none %s";
+	}
+	else
+		mount_cmd_fmt = "/bin/mount -o %s, %s -t cgroup none %s";
+
+	if (snprintf(mount_cmd, 1024, mount_cmd_fmt, cgns->subsystems,
+		      cgns->mnt_args, cgns->mnt_point) >= 1024) {
+		debug2("unable to build cgroup ns mount cmd line");
+		return XCGROUP_ERROR;
+	}
+	else
+		debug3("cgroup mount cmd line is '%s'", mount_cmd);
+
+	if (system(mount_cmd))
+		return XCGROUP_ERROR;
+	else {
+		/* we then set the release_agent if necessary */
+		if (cgns->notify_prog) {
+			if (xcgroup_create(cgns, &cg, "/", 0, 0) ==
+			     XCGROUP_ERROR)
+				return XCGROUP_SUCCESS;
+			xcgroup_set_param(&cg, "release_agent",
+					  cgns->notify_prog);
+		}
+		return XCGROUP_SUCCESS;
+	}
+}
+
+/*
+ * umount a cgroup namespace
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_umount(xcgroup_ns_t* cgns)
+{
+	char* umount_cmd_fmt;
+	char umount_cmd[1024];
+
+	umount_cmd_fmt = "/bin/umount %s";
+
+	if (snprintf(umount_cmd, 1024, umount_cmd_fmt,
+		      cgns->mnt_point) >= 1024) {
+		debug2("unable to build cgroup ns umount cmd line");
+		return XCGROUP_ERROR;
+	}
+	else
+		debug3("cgroup ns umount cmd line is '%s'", umount_cmd);
+
+	if (system(umount_cmd))
+		return XCGROUP_ERROR;
+	else
+		return XCGROUP_SUCCESS;
+}
+
+/*
+ * check that a cgroup namespace is ready to be used
+ *
+ * returned values:
+ *  - XCGROUP_ERROR : not available
+ *  - XCGROUP_SUCCESS : ready to be used
+ */
+int xcgroup_ns_is_available(xcgroup_ns_t* cgns)
+{
+	int fstatus;
+	char* value;
+	size_t s;
+	xcgroup_t cg;
+
+	if (xcgroup_create(cgns, &cg, "/", 0, 0) == XCGROUP_ERROR)
+		return 0;
+
+	if (xcgroup_get_param(&cg, "release_agent",
+			       &value, &s) != XCGROUP_SUCCESS)
+		fstatus = 0;
+	else {
+		xfree(value);
+		fstatus = 1;
+	}
+
+	xcgroup_destroy(&cg);
+
+	return fstatus;
+}
+
+/*
+ * Look for the cgroup in a specific cgroup namespace that owns
+ * a particular pid
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_find_by_pid(xcgroup_ns_t* cgns, xcgroup_t* cg, pid_t pid)
+{
+	int fstatus = SLURM_ERROR;
+	char file_path[PATH_MAX];
+	char* buf;
+	size_t fsize;
+	char* p;
+	char* e;
+	char* entry;
+	char* subsys;
+	int found=0;
+
+	/* build pid cgroup meta filepath */
+	if (snprintf(file_path, PATH_MAX, "/proc/%u/cgroup",
+		      pid) >= PATH_MAX) {
+		debug2("unable to build cgroup meta filepath for pid=%u : %m",
+		       pid);
+		return XCGROUP_ERROR;
+	}
+
+	/*
+	 * read file content
+	 * multiple lines of the form :
+	 * num_mask:subsystems:relative_path
+	 */
+	fstatus = _file_read_content(file_path, &buf, &fsize);
+	if (fstatus == XCGROUP_SUCCESS) {
+		fstatus = XCGROUP_ERROR;
+		p = buf;
+		while (found==0 && (e = index(p, '\n')) != NULL) {
+			*e='\0';
+			/* get subsystems entry */
+			subsys = index(p, ':');
+			p=e;
+			if (subsys == NULL)
+				continue;
+			subsys++;
+			/* get relative path entry */
+			entry = index(subsys, ':');
+			if (entry == NULL)
+				continue;
+			*entry='\0';
+			/* check subsystem versus ns one */
+			if (strcmp(cgns->subsystems, subsys) != 0) {
+				debug("skipping cgroup subsys %s(%s)",
+				      subsys, cgns->subsystems);
+				continue;
+			}
+			else
+				found=1;
+			entry++;
+			fstatus = xcgroup_load(cgns, cg, entry);
+			break;
+		}
+		xfree(buf);
+	}
+
+	return fstatus;
+}
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * xcgroup primitives xcgroup primitives xcgroup primitives xcgroup primitives
+ * xcgroup primitives xcgroup primitives xcgroup primitives xcgroup primitives
+ * xcgroup primitives xcgroup primitives xcgroup primitives xcgroup primitives
+ * -----------------------------------------------------------------------------
+ */
+
+int xcgroup_create(xcgroup_ns_t* cgns, xcgroup_t* cg,
+		   char* uri, uid_t uid,  gid_t gid)
+{
+	int fstatus = XCGROUP_ERROR;
+	char file_path[PATH_MAX];
+
+	/* build cgroup absolute path*/
+	if (snprintf(file_path, PATH_MAX, "%s%s", cgns->mnt_point,
+		      uri) >= PATH_MAX) {
+		debug2("unable to build cgroup '%s' absolute path in ns '%s' "
+		       ": %m", uri, cgns->subsystems);
+		return fstatus;
+	}
+
+	/* fill xcgroup structure */
+	cg->ns = cgns;
+	cg->name = xstrdup(uri);
+	cg->path = xstrdup(file_path);
+	cg->uid = uid;
+	cg->gid = gid;
+
+	return XCGROUP_SUCCESS;
+}
+
+int xcgroup_destroy(xcgroup_t* cg)
+{
+	cg->ns = NULL;
+	xfree(cg->name);
+	xfree(cg->path);
+	cg->uid = -1;
+	cg->gid = -1;
+	return XCGROUP_SUCCESS;
+}
+
+int xcgroup_lock(xcgroup_t* cg)
+{
+	int fstatus = XCGROUP_ERROR;
+
+	if ((cg->fd = open(cg->path, O_RDONLY)) < 0) {
+		debug2("xcgroup_lock: error from open of cgroup '%s' : %m",
+		       cg->path);
+		return fstatus;
+	}
+
+	if (flock(cg->fd,  LOCK_EX) < 0) {
+		debug2("xcgroup_lock: error locking cgroup '%s' : %m",
+		       cg->path);
+		close(cg->fd);
+	}
+	else
+		fstatus = XCGROUP_SUCCESS;
+
+	return fstatus;
+}
+
+int xcgroup_unlock(xcgroup_t* cg)
+{
+	int fstatus = XCGROUP_ERROR;
+
+	if (flock(cg->fd,  LOCK_UN) < 0) {
+		debug2("xcgroup_lock: error unlocking cgroup '%s' : %m",
+		       cg->path);
+	}
+	else
+		fstatus = XCGROUP_SUCCESS;
+
+	close(cg->fd);
+	return fstatus;
+}
+
+int xcgroup_instanciate(xcgroup_t* cg)
+{
+	int fstatus = XCGROUP_ERROR;
+	mode_t cmask;
+	mode_t omask;
+
+	xcgroup_ns_t* cgns;
+	char* uri;
+	char* file_path;
+	uid_t uid;
+	gid_t gid;
+	int create_only;
+	int notify;
+
+	/* init variables based on input cgroup */
+	cgns = cg->ns;
+	uri = cg->name;
+	file_path = cg->path;
+	uid = cg->uid;
+	gid = cg->gid;
+	create_only=0;
+	notify=1;
+
+	/* save current mask and apply working one */
+	cmask = S_IWGRP | S_IWOTH;
+	omask = umask(cmask);
+
+	/* build cgroup */
+ 	if (mkdir(file_path, 0755)) {
+		if (create_only || errno != EEXIST) {
+			debug2("unable to create cgroup '%s' : %m",
+			       file_path);
+			umask(omask);
+			return fstatus;
+		}
+	}
+	umask(omask);
+
+	/* change cgroup ownership as requested */
+	if (chown(file_path, uid, gid)) {
+		debug2("unable to chown %d:%d cgroup '%s' : %m",
+		       uid, gid, file_path);
+		return fstatus;
+	}
+
+	/* following operations failure might not result in a general
+	 * failure so set output status to success */
+	fstatus = XCGROUP_SUCCESS;
+
+	/* set notify on release flag */
+	if (notify && cgns->notify_prog)
+		xcgroup_set_params(cg, "notify_on_release=1");
+	else
+		xcgroup_set_params(cg, "notify_on_release=0");
+	return fstatus;
+}
+
+int xcgroup_load(xcgroup_ns_t* cgns, xcgroup_t* cg, char* uri)
+{
+	int fstatus = XCGROUP_ERROR;
+	char file_path[PATH_MAX];
+
+	struct stat buf;
+
+	/* build cgroup absolute path*/
+	if (snprintf(file_path, PATH_MAX, "%s%s", cgns->mnt_point,
+		      uri) >= PATH_MAX) {
+		debug2("unable to build cgroup '%s' absolute path in ns '%s' "
+		       ": %m", uri, cgns->subsystems);
+		return fstatus;
+	}
+
+	if (stat((const char*)file_path, &buf)) {
+		debug2("unable to get cgroup '%s' entry '%s' properties"
+		       ": %m", cgns->mnt_point, file_path);
+		return fstatus;
+	}
+
+	/* fill xcgroup structure */
+	cg->ns = cgns;
+	cg->name = xstrdup(uri);
+	cg->path = xstrdup(file_path);
+	cg->uid = buf.st_uid;
+	cg->gid = buf.st_gid;
+
+	return XCGROUP_SUCCESS;
+}
+
+int xcgroup_delete(xcgroup_t* cg)
+{
+	if (rmdir(cg->path))
+		return XCGROUP_ERROR;
+	else
+		return XCGROUP_SUCCESS;
+}
+
+int xcgroup_add_pids(xcgroup_t* cg, pid_t* pids, int npids)
+{
+	int fstatus = XCGROUP_ERROR;
+	char* cpath = cg->path;
+	char file_path[PATH_MAX];
+
+	if (snprintf(file_path, PATH_MAX, "%s/tasks",
+		      cpath) >= PATH_MAX) {
+		debug2("unable to add pids to '%s' : %m", cpath);
+		return fstatus;
+	}
+
+	fstatus = _file_write_uint32s(file_path, (uint32_t*)pids, npids);
+	if (fstatus != XCGROUP_SUCCESS)
+		debug2("unable to add pids to '%s'", cpath);
+	return fstatus;
+}
+
+int
+xcgroup_get_pids(xcgroup_t* cg, pid_t **pids, int *npids)
+{
+	int fstatus = XCGROUP_ERROR;
+	char* cpath = cg->path;
+	char file_path[PATH_MAX];
+
+	if (pids == NULL || npids == NULL)
+		return SLURM_ERROR;
+
+	if (snprintf(file_path, PATH_MAX, "%s/tasks",
+		      cpath) >= PATH_MAX) {
+		debug2("unable to get pids of '%s' : %m", cpath);
+		return fstatus;
+	}
+
+	fstatus = _file_read_uint32s(file_path, (uint32_t**)pids, npids);
+	if (fstatus != XCGROUP_SUCCESS)
+		debug2("unable to get pids of '%s'", cpath);
+	return fstatus;
+}
+
+int xcgroup_set_params(xcgroup_t* cg, char* parameters)
+{
+	int fstatus = XCGROUP_ERROR;
+	char file_path[PATH_MAX];
+	char* cpath = cg->path;
+	char* params;
+	char* value;
+	char* p;
+	char* next;
+
+	params = (char*) xstrdup(parameters);
+
+	p = params;
+	while (p != NULL && *p != '\0') {
+		next = index(p, ' ');
+		if (next) {
+			*next='\0';
+			next++;
+			while (*next == ' ')
+				next++;
+		}
+		value = index(p, '=');
+		if (value != NULL) {
+			*value='\0';
+			value++;
+			if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, p)
+			     >= PATH_MAX) {
+				debug2("unable to build filepath for '%s' and"
+				       " parameter '%s' : %m", cpath, p);
+				goto next_loop;
+			}
+			fstatus = _file_write_content(file_path, value,
+						      strlen(value));
+			if (fstatus != XCGROUP_SUCCESS)
+				debug2("unable to set parameter '%s' to "
+				       "'%s' for '%s'", p, value, cpath);
+			else
+				debug3("parameter '%s' set to '%s' for '%s'",
+				       p, value, cpath);
+		}
+		else
+			debug2("bad paramters format for entry '%s'", p);
+	next_loop:
+		p = next;
+	}
+
+	xfree(params);
+	return fstatus;
+}
+
+int xcgroup_set_param(xcgroup_t* cg, char* param, char* content)
+{
+	int fstatus = XCGROUP_ERROR;
+	char file_path[PATH_MAX];
+	char* cpath = cg->path;
+
+	if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
+		debug2("unable to build filepath for '%s' and"
+		       " parameter '%s' : %m", cpath, param);
+		return fstatus;
+	}
+
+	fstatus = _file_write_content(file_path, content, strlen(content));
+	if (fstatus != XCGROUP_SUCCESS)
+		debug2("unable to set parameter '%s' to "
+		       "'%s' for '%s'", param, content, cpath);
+	else
+		debug3("parameter '%s' set to '%s' for '%s'",
+		       param, content, cpath);
+
+	return fstatus;
+}
+
+int xcgroup_get_param(xcgroup_t* cg, char* param, char **content, size_t *csize)
+{
+	int fstatus = XCGROUP_ERROR;
+	char file_path[PATH_MAX];
+	char* cpath = cg->path;
+
+	if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
+		debug2("unable to build filepath for '%s' and"
+		       " parameter '%s' : %m", cpath, param);
+	}
+	else {
+		fstatus = _file_read_content(file_path, content, csize);
+		if (fstatus != XCGROUP_SUCCESS)
+			debug2("unable to get parameter '%s' for '%s'",
+			       param, cpath);
+	}
+	return fstatus;
+}
+
+int xcgroup_set_uint32_param(xcgroup_t* cg, char* param, uint32_t value)
+{
+	int fstatus = XCGROUP_ERROR;
+	char file_path[PATH_MAX];
+	char* cpath = cg->path;
+
+	if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
+		debug2("unable to build filepath for '%s' and"
+		       " parameter '%s' : %m", cpath, param);
+		return fstatus;
+	}
+
+	fstatus = _file_write_uint32s(file_path, &value, 1);
+	if (fstatus != XCGROUP_SUCCESS)
+		debug2("unable to set parameter '%s' to "
+		       "'%u' for '%s'", param, value, cpath);
+	else
+		debug3("parameter '%s' set to '%u' for '%s'",
+		       param, value, cpath);
+
+	return fstatus;
+}
+
+int xcgroup_get_uint32_param(xcgroup_t* cg, char* param, uint32_t* value)
+{
+	int fstatus = XCGROUP_ERROR;
+	char file_path[PATH_MAX];
+	char* cpath = cg->path;
+	uint32_t* values;
+	int vnb;
+
+	if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
+		debug2("unable to build filepath for '%s' and"
+		       " parameter '%s' : %m", cpath, param);
+	}
+	else {
+		fstatus = _file_read_uint32s(file_path, &values, &vnb);
+		if (fstatus != XCGROUP_SUCCESS)
+			debug2("unable to get parameter '%s' for '%s'",
+			       param, cpath);
+		else if (vnb < 1) {
+			debug2("empty parameter '%s' for '%s'",
+			       param, cpath);
+		}
+		else {
+			*value = values[0];
+			xfree(values);
+			fstatus = XCGROUP_SUCCESS;
+		}
+	}
+	return fstatus;
+}
+
+int xcgroup_set_uint64_param(xcgroup_t* cg, char* param, uint64_t value)
+{
+	int fstatus = XCGROUP_ERROR;
+	char file_path[PATH_MAX];
+	char* cpath = cg->path;
+
+	if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
+		debug2("unable to build filepath for '%s' and"
+		       " parameter '%s' : %m", cpath, param);
+		return fstatus;
+	}
+
+	fstatus = _file_write_uint64s(file_path, &value, 1);
+	if (fstatus != XCGROUP_SUCCESS)
+		debug2("unable to set parameter '%s' to "
+		       "'%lu' for '%s'", param, value, cpath);
+	else
+		debug3("parameter '%s' set to '%lu' for '%s'",
+		       param, value, cpath);
+
+	return fstatus;
+}
+
+int xcgroup_get_uint64_param(xcgroup_t* cg, char* param, uint64_t* value)
+{
+	int fstatus = XCGROUP_ERROR;
+	char file_path[PATH_MAX];
+	char* cpath = cg->path;
+	uint64_t* values;
+	int vnb;
+
+	if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
+		debug2("unable to build filepath for '%s' and"
+		       " parameter '%s' : %m", cpath, param);
+	}
+	else {
+		fstatus = _file_read_uint64s(file_path, &values, &vnb);
+		if (fstatus != XCGROUP_SUCCESS)
+			debug2("unable to get parameter '%s' for '%s'",
+			       param, cpath);
+		else if (vnb < 1) {
+			debug2("empty parameter '%s' for '%s'",
+			       param, cpath);
+		}
+		else {
+			*value = values[0];
+			xfree(values);
+			fstatus = XCGROUP_SUCCESS;
+		}
+	}
+	return fstatus;
+}
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * internal primitives internal primitives internal primitives
+ * internal primitives internal primitives internal primitives
+ * internal primitives internal primitives internal primitives
+ * -----------------------------------------------------------------------------
+ */
+
+size_t _file_getsize(int fd)
+{
+	int rc;
+	size_t fsize;
+	off_t offset;
+	char c;
+
+	/* store current position and rewind */
+	offset = lseek(fd, 0, SEEK_CUR);
+	if (offset < 0)
+		return -1;
+	lseek(fd, 0, SEEK_SET);
+
+	/* get file size */
+	fsize=0;
+	do {
+		rc = read(fd, (void*)&c, 1);
+		if (rc > 0)
+			fsize++;
+	}
+	while ((rc < 0 && errno == EINTR) || rc > 0);
+
+	/* restore position */
+	lseek(fd, offset, SEEK_SET);
+
+	if (rc < 0)
+		return -1;
+	else
+		return fsize;
+}
+
+int _file_write_uint64s(char* file_path, uint64_t* values, int nb)
+{
+	int fstatus;
+	int rc;
+	int fd;
+	char tstr[256];
+	uint64_t value;
+	int i;
+
+	/* open file for writing */
+	fd = open(file_path, O_WRONLY, 0700);
+	if (fd < 0) {
+		debug2("unable to open '%s' for writing : %m", file_path);
+		return XCGROUP_ERROR;
+	}
+
+	/* add one value per line */
+	fstatus = XCGROUP_SUCCESS;
+	for (i=0 ; i < nb ; i++) {
+
+		value = values[i];
+
+		rc = snprintf(tstr, sizeof(tstr), "%llu",
+			      (long long unsigned int)value);
+		if (rc < 0) {
+			debug2("unable to build %lu string value, skipping",
+			       value);
+			fstatus = XCGROUP_ERROR;
+			continue;
+		}
+
+		do {
+			rc = write(fd, tstr, strlen(tstr)+1);
+		}
+		while (rc != 0 && errno == EINTR);
+		if (rc < 1) {
+			debug2("unable to add value '%s' to file '%s' : %m",
+			       tstr, file_path);
+			fstatus = XCGROUP_ERROR;
+		}
+
+	}
+
+	/* close file */
+	close(fd);
+
+	return fstatus;
+}
+
+int _file_read_uint64s(char* file_path, uint64_t** pvalues, int* pnb)
+{
+	int rc;
+	int fd;
+
+	size_t fsize;
+	char* buf;
+	char* p;
+
+	uint64_t* pa=NULL;
+	int i;
+
+	/* check input pointers */
+	if (pvalues == NULL || pnb == NULL)
+		return XCGROUP_ERROR;
+
+	/* open file for reading */
+	fd = open(file_path, O_RDONLY, 0700);
+	if (fd < 0) {
+		debug2("unable to open '%s' for reading : %m", file_path);
+		return XCGROUP_ERROR;
+	}
+
+	/* get file size */
+	fsize=_file_getsize(fd);
+	if (fsize == -1) {
+		close(fd);
+		return XCGROUP_ERROR;
+	}
+
+	/* read file contents */
+	buf = (char*) xmalloc((fsize+1)*sizeof(char));
+	do {
+		rc = read(fd, buf, fsize);
+	}
+	while (rc < 0 && errno == EINTR);
+	close(fd);
+	buf[fsize]='\0';
+
+	/* count values (splitted by \n) */
+	i=0;
+	if (rc > 0) {
+		p = buf;
+		while (index(p, '\n') != NULL) {
+			i++;
+			p = index(p, '\n') + 1;
+		}
+	}
+
+	/* build uint32_t list */
+	if (i > 0) {
+		pa = (uint64_t*) xmalloc(sizeof(uint64_t) * i);
+		p = buf;
+		i = 0;
+		while (index(p, '\n') != NULL) {
+			long long unsigned int ll_tmp;
+			sscanf(p, "%llu", &ll_tmp);
+			pa[i++] = ll_tmp;
+			p = index(p, '\n') + 1;
+		}
+	}
+
+	/* free buffer */
+	xfree(buf);
+
+	/* set output values */
+	*pvalues = pa;
+	*pnb = i;
+
+	return XCGROUP_SUCCESS;
+}
+
+int _file_write_uint32s(char* file_path, uint32_t* values, int nb)
+{
+	int fstatus;
+	int rc;
+	int fd;
+	char tstr[256];
+	uint32_t value;
+	int i;
+
+	/* open file for writing */
+	fd = open(file_path, O_WRONLY, 0700);
+	if (fd < 0) {
+		debug2("unable to open '%s' for writing : %m", file_path);
+		return XCGROUP_ERROR;
+	}
+
+	/* add one value per line */
+	fstatus = XCGROUP_SUCCESS;
+	for (i=0 ; i < nb ; i++) {
+
+		value = values[i];
+
+		rc = snprintf(tstr, sizeof(tstr), "%u", value);
+		if (rc < 0) {
+			debug2("unable to build %u string value, skipping",
+			       value);
+			fstatus = XCGROUP_ERROR;
+			continue;
+		}
+
+		do {
+			rc = write(fd, tstr, strlen(tstr)+1);
+		}
+		while (rc < 0 && errno == EINTR);
+		if (rc < 1) {
+			debug2("unable to add value '%s' to file '%s' : %m",
+			       tstr, file_path);
+			fstatus = XCGROUP_ERROR;
+		}
+
+	}
+
+	/* close file */
+	close(fd);
+
+	return fstatus;
+}
+
+int _file_read_uint32s(char* file_path, uint32_t** pvalues, int* pnb)
+{
+	int rc;
+	int fd;
+
+	size_t fsize;
+	char* buf;
+	char* p;
+
+	uint32_t* pa=NULL;
+	int i;
+
+	/* check input pointers */
+	if (pvalues == NULL || pnb == NULL)
+		return XCGROUP_ERROR;
+
+	/* open file for reading */
+	fd = open(file_path, O_RDONLY, 0700);
+	if (fd < 0) {
+		debug2("unable to open '%s' for reading : %m", file_path);
+		return XCGROUP_ERROR;
+	}
+
+	/* get file size */
+	fsize=_file_getsize(fd);
+	if (fsize == -1) {
+		close(fd);
+		return XCGROUP_ERROR;
+	}
+
+	/* read file contents */
+	buf = (char*) xmalloc((fsize+1)*sizeof(char));
+	do {
+		rc = read(fd, buf, fsize);
+	}
+	while (rc < 0 && errno == EINTR);
+	close(fd);
+	buf[fsize]='\0';
+
+	/* count values (splitted by \n) */
+	i=0;
+	if (rc > 0) {
+		p = buf;
+		while (index(p, '\n') != NULL) {
+			i++;
+			p = index(p, '\n') + 1;
+		}
+	}
+
+	/* build uint32_t list */
+	if (i > 0) {
+		pa = (uint32_t*) xmalloc(sizeof(uint32_t) * i);
+		p = buf;
+		i = 0;
+		while (index(p, '\n') != NULL) {
+			sscanf(p, "%u", pa+i);
+			p = index(p, '\n') + 1;
+			i++;
+		}
+	}
+
+	/* free buffer */
+	xfree(buf);
+
+	/* set output values */
+	*pvalues = pa;
+	*pnb = i;
+
+	return XCGROUP_SUCCESS;
+}
+
+int _file_write_content(char* file_path, char* content, size_t csize)
+{
+	int fstatus;
+	int rc;
+	int fd;
+
+	/* open file for writing */
+	fd = open(file_path, O_WRONLY, 0700);
+	if (fd < 0) {
+		debug2("unable to open '%s' for writing : %m", file_path);
+		return XCGROUP_ERROR;
+	}
+
+	/* write content */
+	do {
+		rc = write(fd, content, csize);
+	}
+	while (rc != 0 && errno == EINTR);
+
+	/* check read size */
+	if (rc < csize) {
+		debug2("unable to write %lu bytes to file '%s' : %m",
+		       csize, file_path);
+		fstatus = XCGROUP_ERROR;
+	}
+	else
+		fstatus = XCGROUP_SUCCESS;
+
+	/* close file */
+	close(fd);
+
+	return fstatus;
+}
+
+int _file_read_content(char* file_path, char** content, size_t *csize)
+{
+	int fstatus;
+	int rc;
+	int fd;
+
+	size_t fsize;
+	char* buf;
+
+	fstatus = XCGROUP_ERROR;
+
+	/* check input pointers */
+	if (content == NULL || csize == NULL)
+		return fstatus;
+
+	/* open file for reading */
+	fd = open(file_path, O_RDONLY, 0700);
+	if (fd < 0) {
+		debug2("unable to open '%s' for reading : %m", file_path);
+		return fstatus;
+	}
+
+	/* get file size */
+	fsize=_file_getsize(fd);
+	if (fsize == -1) {
+		close(fd);
+		return fstatus;
+	}
+
+	/* read file contents */
+	buf = (char*) xmalloc((fsize+1)*sizeof(char));
+	buf[fsize]='\0';
+	do {
+		rc = read(fd, buf, fsize);
+	}
+	while (rc < 0 && errno == EINTR);
+
+	/* set output values */
+	if (rc >= 0) {
+		*content = buf;
+		*csize = rc;
+		fstatus = XCGROUP_SUCCESS;
+	}
+
+	/* close file */
+	close(fd);
+
+	return fstatus;
+}
diff --git a/src/common/xcgroup.h b/src/common/xcgroup.h
new file mode 100644
index 00000000000..c67fa8ecee5
--- /dev/null
+++ b/src/common/xcgroup.h
@@ -0,0 +1,319 @@
+/*****************************************************************************\
+ *  cgroup.h - cgroup related primitives headers
+ *****************************************************************************
+ *  Copyright (C) 2009 CEA/DAM/DIF
+ *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#   include "config.h"
+#endif
+
+#ifndef _XCGROUP_H_
+#define _XCGROUP_H_
+
+#include <sys/types.h>
+#include <dirent.h>
+
+#define XCGROUP_ERROR    1
+#define XCGROUP_SUCCESS  0
+
+#ifndef CGROUP_BASEDIR
+#define CGROUP_BASEDIR "/cgroup"
+#endif
+
+typedef struct xcgroup_ns {
+
+	char* mnt_point;  /* mount point to use for the associated cgroup */
+	char* mnt_args;   /* mount args to use in addition */
+
+	char* subsystems; /* list of comma separated subsystems to provide */
+
+	char* notify_prog;/* prog to use with notify on release action */
+
+} xcgroup_ns_t;
+
+typedef struct xcgroup {
+
+	xcgroup_ns_t* ns; /* xcgroup namespace of this xcgroup */
+	char* name;       /* name of the xcgroup relative to the ns */
+	char* path;       /* absolute path of the xcgroup in the ns */
+	uid_t uid;        /* uid of the owner */
+	gid_t gid;        /* gid of the owner */
+	int   fd;         /* used for locking */
+
+} xcgroup_t;
+
+/*
+ * create a cgroup namespace for tasks containment
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_create(xcgroup_ns_t* cgns,
+		      char* mnt_point,char* mnt_args,
+		      char* subsys,char* notify_prog);
+
+/*
+ * destroy a cgroup namespace
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_destroy(xcgroup_ns_t* cgns);
+
+/*
+ * mount a cgroup namespace
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_mount(xcgroup_ns_t* cgns);
+
+/*
+ * umount a cgroup namespace
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_umount(xcgroup_ns_t* cgns);
+
+/*
+ * test if cgroup namespace is currently available (mounted)
+ *
+ * returned values:
+ *  - 0 if not available
+ *  - 1 if available
+ */
+int xcgroup_ns_is_available();
+
+/*
+ * load a cgroup from a cgroup namespace given a pid
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_ns_find_by_pid(xcgroup_ns_t* cgns,xcgroup_t* cg,pid_t pid);
+
+/*
+ * create a cgroup structure
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_create(xcgroup_ns_t* cgns,xcgroup_t* cg,
+		   char* uri,uid_t uid, gid_t gid);
+
+/*
+ * destroy a cgroup internal structure
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_destroy(xcgroup_t* cg);
+
+/*
+ * lock a cgroup (must have been instanciated)
+ * (system level using flock)
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_lock(xcgroup_t* cg);
+
+/*
+ * unlock a cgroup
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_unlock(xcgroup_t* cg);
+
+/*
+ * instanciate a cgroup in a cgroup namespace (mkdir)
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_instanciate(xcgroup_t* cg);
+
+/*
+ * load a cgroup from a cgroup namespace into a structure
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_load(xcgroup_ns_t* cgns,xcgroup_t* cg,
+		 char* uri);
+
+/*
+ * delete a cgroup instance in a cgroup namespace (rmdir)
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_delete(xcgroup_t* cg);
+
+/*
+ * add a list of pids to a cgroup
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_add_pids(xcgroup_t* cg,pid_t* pids,int npids);
+
+/*
+ * extract the pids list of a cgroup
+ *
+ * pids array must be freed using xfree(...)
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_get_pids(xcgroup_t* cg, pid_t **pids, int *npids);
+
+/*
+ * set cgroup parameters using string of the form :
+ * parameteres="param=value[ param=value]*"
+ *
+ * param must correspond to a file of the cgroup that
+ * will be written with the value content
+ *
+ * i.e. xcgroup_set_params(&cg,"memory.swappiness=10");
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_set_params(xcgroup_t* cg,char* parameters);
+
+/*
+ * set a cgroup parameter
+ *
+ * param must correspond to a file of the cgroup that
+ * will be written with the value content
+ *
+ * i.e. xcgroup_set_params(&cf,"memory.swappiness","10");
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_set_param(xcgroup_t* cg,char* parameter,char* content);
+
+/*
+ * get a cgroup parameter
+ *
+ * param must correspond to a file of the cgroup that
+ * will be read for its content
+ *
+ * i.e. xcgroup_get_param(&cg,"memory.swappiness",&value,&size);
+ *
+ * on success, content must be free using xfree
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_get_param(xcgroup_t* cg,char* param,char **content,size_t *csize);
+
+/*
+ * set a cgroup parameter in the form of a uint32_t
+ *
+ * param must correspond to a file of the cgroup that
+ * will be written with the uint32_t value
+ *
+ * i.e. xcgroup_set_uint32_param(&cf,"memory.swappiness",value);
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_set_uint32_param(xcgroup_t* cg,char* parameter,uint32_t value);
+
+/*
+ * get a cgroup parameter in the form of a uint32_t
+ *
+ * param must correspond to a file of the cgroup that
+ * will be read for its content
+ *
+ * i.e. xcgroup_get_uint32_param(&cg,"memory.swappiness",&value);
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_get_uint32_param(xcgroup_t* cg,char* param,uint32_t* value);
+
+/*
+ * set a cgroup parameter in the form of a uint64_t
+ *
+ * param must correspond to a file of the cgroup that
+ * will be written with the uint64_t value
+ *
+ * i.e. xcgroup_set_uint64_param(&cf,"memory.swappiness",value);
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_set_uint64_param(xcgroup_t* cg,char* parameter,uint64_t value);
+
+/*
+ * get a cgroup parameter in the form of a uint64_t
+ *
+ * param must correspond to a file of the cgroup that
+ * will be read for its content
+ *
+ * i.e. xcgroup_get_uint64_param(&cg,"memory.swappiness",&value);
+ *
+ * returned values:
+ *  - XCGROUP_ERROR
+ *  - XCGROUP_SUCCESS
+ */
+int xcgroup_get_uint64_param(xcgroup_t* cg,char* param,uint64_t* value);
+
+#endif
diff --git a/src/plugins/proctrack/cgroup/read_config.c b/src/common/xcgroup_read_config.c
similarity index 79%
rename from src/plugins/proctrack/cgroup/read_config.c
rename to src/common/xcgroup_read_config.c
index 4e4296f8652..913dc8eec52 100644
--- a/src/plugins/proctrack/cgroup/read_config.c
+++ b/src/common/xcgroup_read_config.c
@@ -1,5 +1,5 @@
 /*****************************************************************************\
- *  read_config.c - functions for reading cgroup.conf
+ *  xcgroup_read_config.c - functions for reading cgroup.conf
  *****************************************************************************
  *  Copyright (C) 2009 CEA/DAM/DIF
  *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
@@ -52,58 +52,54 @@
 #include "src/common/xmalloc.h"
 #include "src/common/xstring.h"
 
-#include "read_config.h"
+#include "xcgroup_read_config.h"
 
 slurm_cgroup_conf_t *slurm_cgroup_conf = NULL;
 
 /* Local functions */
-static void _clear_slurm_cgroup_conf(void);
+static void _clear_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf);
 static char * _get_conf_path(void);
 
 /*
  * free_slurm_cgroup_conf - free storage associated with the global variable
  *	slurm_cgroup_conf
  */
-extern void free_slurm_cgroup_conf(void)
+extern void free_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf)
 {
-	_clear_slurm_cgroup_conf();
-	xfree(slurm_cgroup_conf);
+	_clear_slurm_cgroup_conf(slurm_cgroup_conf);
 }
 
-static void _clear_slurm_cgroup_conf(void)
+static void _clear_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf)
 {
 	if (slurm_cgroup_conf) {
 		slurm_cgroup_conf->cgroup_automount = false ;
-		xfree(slurm_cgroup_conf->cgroup_mount_opts);
+		xfree(slurm_cgroup_conf->cgroup_subsystems);
 		xfree(slurm_cgroup_conf->cgroup_release_agent);
-		xfree(slurm_cgroup_conf->user_cgroup_params);
-		xfree(slurm_cgroup_conf->job_cgroup_params);
-		xfree(slurm_cgroup_conf->jobstep_cgroup_params);
+		slurm_cgroup_conf->constrain_cores = false ;
+		slurm_cgroup_conf->task_affinity = false ;
 		slurm_cgroup_conf->constrain_ram_space = false ;
 		slurm_cgroup_conf->allowed_ram_space = 100 ;
 		slurm_cgroup_conf->constrain_swap_space = false ;
 		slurm_cgroup_conf->allowed_swap_space = 0 ;
-		slurm_cgroup_conf->constrain_cores = false ;
 		slurm_cgroup_conf->memlimit_enforcement = 0 ;
 		slurm_cgroup_conf->memlimit_threshold = 100 ;
+		slurm_cgroup_conf->constrain_devices = false ;
 	}
 }
 
 /*
  * read_slurm_cgroup_conf - load the Slurm cgroup configuration from the
- *	cgroup.conf file. Store result into global variable slurm_cgroup_conf.
- *	This function can be called more than once.
+ *	cgroup.conf file.
  * RET SLURM_SUCCESS if no error, otherwise an error code
  */
-extern int read_slurm_cgroup_conf(void)
+extern int read_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf)
 {
 	s_p_options_t options[] = {
 		{"CgroupAutomount", S_P_BOOLEAN},
-		{"CgroupMountOptions", S_P_STRING},
-		{"CgroupReleaseAgent", S_P_STRING},
-		{"UserCgroupParams", S_P_STRING},
-		{"JobCgroupParams", S_P_STRING},
-		{"JobStepCgroupParams", S_P_STRING},
+		{"CgroupSubsystems", S_P_STRING},
+		{"CgroupReleaseAgentDir", S_P_STRING},
+		{"ConstrainCores", S_P_BOOLEAN},
+		{"TaskAffinity", S_P_BOOLEAN},
 		{"ConstrainRAMSpace", S_P_BOOLEAN},
 		{"AllowedRAMSpace", S_P_UINT32},
 		{"ConstrainSwapSpace", S_P_BOOLEAN},
@@ -111,6 +107,7 @@ extern int read_slurm_cgroup_conf(void)
 		{"ConstrainCores", S_P_BOOLEAN},
 		{"MemoryLimitEnforcement", S_P_BOOLEAN},
 		{"MemoryLimitThreshold", S_P_UINT32},
+		{"ConstrainDevices", S_P_BOOLEAN},
 		{NULL} };
 	s_p_hashtbl_t *tbl = NULL;
 	char *conf_path = NULL;
@@ -118,11 +115,11 @@ extern int read_slurm_cgroup_conf(void)
 
 	/* Set initial values */
 	if (slurm_cgroup_conf == NULL) {
-		slurm_cgroup_conf = xmalloc(sizeof(slurm_cgroup_conf_t));
+		return SLURM_ERROR;
 	}
-	_clear_slurm_cgroup_conf();
+	_clear_slurm_cgroup_conf(slurm_cgroup_conf);
 
-	/* Get the slurmdbd.conf path and validate the file */
+	/* Get the cgroup.conf path and validate the file */
 	conf_path = _get_conf_path();
 	if ((conf_path == NULL) || (stat(conf_path, &buf) == -1)) {
 		info("No cgroup.conf file (%s)", conf_path);
@@ -138,23 +135,23 @@ extern int read_slurm_cgroup_conf(void)
 
 		/* cgroup initialisation parameters */
 		if (!s_p_get_boolean(&slurm_cgroup_conf->cgroup_automount,
-				     "CgroupAutomount", tbl))
+				   "CgroupAutomount", tbl))
 			slurm_cgroup_conf->cgroup_automount = false;
-		s_p_get_string(&slurm_cgroup_conf->cgroup_mount_opts,
-			       "CgroupMountOptions", tbl);
+		s_p_get_string(&slurm_cgroup_conf->cgroup_subsystems,
+			       "CgroupSubsystems", tbl);
 		s_p_get_string(&slurm_cgroup_conf->cgroup_release_agent,
-			       "CgroupReleaseAgent", tbl);
-		if ( ! slurm_cgroup_conf->cgroup_release_agent )
+			       "CgroupReleaseAgentDir", tbl);
+		if (! slurm_cgroup_conf->cgroup_release_agent)
 			slurm_cgroup_conf->cgroup_release_agent =
-				xstrdup("memory,cpuset");
+				xstrdup("/etc/slurm/cgroup");
 
-		/* job and jobsteps cgroup parameters */
-		s_p_get_string(&slurm_cgroup_conf->user_cgroup_params,
-			       "UserCgroupParams", tbl);
-		s_p_get_string(&slurm_cgroup_conf->job_cgroup_params,
-			       "JobCgroupParams", tbl);
-		s_p_get_string(&slurm_cgroup_conf->jobstep_cgroup_params,
-			       "JobStepCgroupParams", tbl);
+		/* Cores constraints related conf items */
+		if (!s_p_get_boolean(&slurm_cgroup_conf->constrain_cores,
+				     "ConstrainCores", tbl))
+			slurm_cgroup_conf->constrain_cores = false;
+		if (!s_p_get_boolean(&slurm_cgroup_conf->task_affinity,
+				     "TaskAffinity", tbl))
+			slurm_cgroup_conf->task_affinity = false;
 
 		/* RAM and Swap constraints related conf items */
 		if (!s_p_get_boolean(&slurm_cgroup_conf->constrain_ram_space,
@@ -170,11 +167,6 @@ extern int read_slurm_cgroup_conf(void)
 				    "AllowedSwapSpace", tbl))
 			slurm_cgroup_conf->allowed_swap_space = 0;
 
-		/* Cores constraints */
-		if (!s_p_get_boolean(&slurm_cgroup_conf->constrain_cores,
-				     "ConstrainCores", tbl))
-			slurm_cgroup_conf->constrain_cores = false;
-
 		/* Memory limits */
 		if (!s_p_get_boolean(&slurm_cgroup_conf->memlimit_enforcement,
 				     "MemoryLimitEnforcement", tbl))
@@ -183,6 +175,11 @@ extern int read_slurm_cgroup_conf(void)
 				    "MemoryLimitThreshold", tbl))
 			slurm_cgroup_conf->memlimit_threshold = 0;
 
+		/* Devices constraint related conf items */
+		if (!s_p_get_boolean(&slurm_cgroup_conf->constrain_devices,
+				     "ConstrainDevices", tbl))
+			slurm_cgroup_conf->constrain_devices = false;
+
 		s_p_hashtbl_destroy(tbl);
 	}
 
diff --git a/src/plugins/proctrack/cgroup/read_config.h b/src/common/xcgroup_read_config.h
similarity index 85%
rename from src/plugins/proctrack/cgroup/read_config.h
rename to src/common/xcgroup_read_config.h
index b1619ae95c1..59d97164b1e 100644
--- a/src/plugins/proctrack/cgroup/read_config.h
+++ b/src/common/xcgroup_read_config.h
@@ -1,34 +1,34 @@
 /*****************************************************************************\
- *  read_config.h - functions and declarations for reading cgroup.conf
+ *  xcgroup_read_config.h - functions and declarations for reading cgroup.conf
  *****************************************************************************
  *  Copyright (C) 2009 CEA/DAM/DIF
  *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
- *  
+ *
  *  This file is part of SLURM, a resource management program.
  *  For details, see <https://computing.llnl.gov/linux/slurm/>.
  *  Please also read the included file: DISCLAIMER.
- *  
+ *
  *  SLURM is free software; you can redistribute it and/or modify it under
  *  the terms of the GNU General Public License as published by the Free
  *  Software Foundation; either version 2 of the License, or (at your option)
  *  any later version.
  *
- *  In addition, as a special exception, the copyright holders give permission 
- *  to link the code of portions of this program with the OpenSSL library under 
- *  certain conditions as described in each individual source file, and 
- *  distribute linked combinations including the two. You must obey the GNU 
- *  General Public License in all respects for all of the code used other than 
- *  OpenSSL. If you modify file(s) with this exception, you may extend this 
- *  exception to your version of the file(s), but you are not obligated to do 
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
  *  so. If you do not wish to do so, delete this exception statement from your
- *  version.  If you delete this exception statement from all source files in 
+ *  version.  If you delete this exception statement from all source files in
  *  the program, then also delete it here.
- *  
+ *
  *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
  *  details.
- *  
+ *
  *  You should have received a copy of the GNU General Public License along
  *  with SLURM; if not, write to the Free Software Foundation, Inc.,
  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
@@ -55,12 +55,11 @@
 typedef struct slurm_cgroup_conf {
 
 	bool      cgroup_automount;
-	char *    cgroup_mount_opts;
+	char *    cgroup_subsystems;
 	char *    cgroup_release_agent;
 
-	char *    user_cgroup_params;
-	char *    job_cgroup_params;
-	char *    jobstep_cgroup_params;
+	bool      constrain_cores;
+	bool      task_affinity;
 
 	bool      constrain_ram_space;
 	uint32_t  allowed_ram_space;
@@ -68,27 +67,25 @@ typedef struct slurm_cgroup_conf {
 	bool      constrain_swap_space;
 	uint32_t  allowed_swap_space;
 
-	bool      constrain_cores;
-
 	bool      memlimit_enforcement;
 	uint32_t  memlimit_threshold;
 
-} slurm_cgroup_conf_t;
+	bool      constrain_devices;
 
-extern slurm_cgroup_conf_t *slurm_cgroup_conf;
+} slurm_cgroup_conf_t;
 
 /*
- * read_slurm_cgroup_conf - load the Slurm cgroup configuration from the 
- *      cgroup.conf  file. 
+ * read_slurm_cgroup_conf - load the Slurm cgroup configuration from the
+ *      cgroup.conf  file.
  *      This function can be called more than once if so desired.
  * RET SLURM_SUCCESS if no error, otherwise an error code
  */
-extern int read_slurm_cgroup_conf(void);
+extern int read_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf);
 
 /*
- * free_slurm_cgroup_conf - free storage associated with the global variable 
+ * free_slurm_cgroup_conf - free storage associated with the global variable
  *	slurm_cgroup_conf
  */
-extern void free_slurm_cgroup_conf(void);
+extern void free_slurm_cgroup_conf(slurm_cgroup_conf_t *slurm_cgroup_conf);
 
 #endif /* !_DBD_READ_CONFIG_H */
diff --git a/src/common/xcpuinfo.c b/src/common/xcpuinfo.c
new file mode 100644
index 00000000000..ed23d6c54eb
--- /dev/null
+++ b/src/common/xcpuinfo.c
@@ -0,0 +1,982 @@
+/*****************************************************************************\
+ *  xcpuinfo.c - cpuinfo related primitives
+ *****************************************************************************
+ *  Copyright (C) 2009 CEA/DAM/DIF
+ *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#   include "config.h"
+#endif
+
+#if HAVE_STDINT_H
+#  include <stdint.h>
+#endif
+#if HAVE_INTTYPES_H
+#  include <inttypes.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include <slurm/slurm.h>
+#include <slurm/slurm_errno.h>
+#include "src/common/log.h"
+#include "src/common/xmalloc.h"
+#include "src/common/xstring.h"
+#include "src/slurmd/slurmd/get_mach_stat.h"
+
+#include "xcpuinfo.h"
+
+static char* _cpuinfo_path = "/proc/cpuinfo";
+
+static int _compute_block_map(uint16_t numproc,
+			      uint16_t **block_map, uint16_t **block_map_inv);
+static int _chk_cpuinfo_str(char *buffer, char *keyword, char **valptr);
+static int _chk_cpuinfo_uint32(char *buffer, char *keyword, uint32_t *val);
+
+static int _ranges_conv(char* lrange, char** prange, int mode);
+static int _range_to_map(char* range, uint16_t *map, uint16_t map_size,
+			 int add_threads);
+static int _map_to_range(uint16_t *map, uint16_t map_size, char** prange);
+
+bool     initialized = false;
+uint16_t procs, sockets, cores, threads=1;
+uint16_t block_map_size;
+uint16_t *block_map, *block_map_inv;
+
+/*
+ * get_procs - Return the count of procs on this system
+ * Input: procs - buffer for the CPU count
+ * Output: procs - filled in with CPU count, "1" if error
+ *         return code - 0 if no error, otherwise errno
+ */
+extern int
+get_procs(uint16_t *procs)
+{
+#ifdef LPAR_INFO_FORMAT2
+	/* AIX 5.3 only */
+	lpar_info_format2_t info;
+
+	*procs = 1;
+	if (lpar_get_info(LPAR_INFO_FORMAT2, &info, sizeof(info)) != 0) {
+		error("lpar_get_info() failed");
+		return EINVAL;
+	}
+
+	*procs = (uint16_t) info.online_vcpus;
+#else /* !LPAR_INFO_FORMAT2 */
+
+#  ifdef _SC_NPROCESSORS_ONLN
+	int my_proc_tally;
+
+	*procs = 1;
+	my_proc_tally = (int)sysconf(_SC_NPROCESSORS_ONLN);
+	if (my_proc_tally < 1) {
+		error ("get_procs: error running sysconf(_SC_NPROCESSORS_ONLN)");
+		return EINVAL;
+	}
+
+	*procs = (uint16_t) my_proc_tally;
+#  else
+#    ifdef HAVE_SYSCTLBYNAME
+	int ncpu;
+	size_t len = sizeof(ncpu);
+
+	*procs = 1;
+	if (sysctlbyname("hw.ncpus", &ncpu, &len, NULL, 0) == -1) {
+		error("get_procs: error running sysctl(HW_NCPU)");
+		return EINVAL;
+	}
+	*procs = (uint16_t) ncpu;
+#    else /* !HAVE_SYSCTLBYNAME */
+	*procs = 1;
+#    endif /* HAVE_SYSCTLBYNAME */
+#  endif /* _SC_NPROCESSORS_ONLN */
+#endif /* LPAR_INFO_FORMAT2 */
+
+	return 0;
+}
+
+/*
+ * get_cpuinfo - Return detailed cpuinfo on this system
+ * Input:  numproc - number of processors on the system
+ * Output: p_sockets - number of physical processor sockets
+ *         p_cores - total number of physical CPU cores
+ *         p_threads - total number of hardware execution threads
+ *         block_map - asbtract->physical block distribution map
+ *         block_map_inv - physical->abstract block distribution map (inverse)
+ *         return code - 0 if no error, otherwise errno
+ * NOTE: User must xfree block_map and block_map_inv
+ */
+typedef struct cpuinfo {
+	uint16_t seen;
+	uint32_t cpuid;
+	uint32_t physid;
+	uint16_t physcnt;
+	uint32_t coreid;
+	uint16_t corecnt;
+	uint16_t siblings;
+	uint16_t cores;
+} cpuinfo_t;
+static cpuinfo_t *cpuinfo = NULL; /* array of CPU information for get_cpuinfo */
+				  /* Note: file static for qsort/_compare_cpus*/
+extern int
+get_cpuinfo(uint16_t numproc,
+		uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
+		uint16_t *block_map_size,
+		uint16_t **block_map, uint16_t **block_map_inv)
+{
+	int retval;
+	uint16_t numcpu	   = 0;		/* number of cpus seen */
+	uint16_t numphys   = 0;		/* number of unique "physical id"s */
+	uint16_t numcores  = 0;		/* number of unique "cores id"s */
+
+	uint16_t maxsibs   = 0;		/* maximum value of "siblings" */
+	uint16_t maxcores  = 0;		/* maximum value of "cores" */
+	uint16_t minsibs   = 0xffff;	/* minimum value of "siblings" */
+	uint16_t mincores  = 0xffff;	/* minimum value of "cores" */
+
+	uint32_t maxcpuid  = 0;		/* maximum CPU ID ("processor") */
+	uint32_t maxphysid = 0;		/* maximum "physical id" */
+	uint32_t maxcoreid = 0;		/* maximum "core id" */
+	uint32_t mincpuid  = 0xffffffff;/* minimum CPU ID ("processor") */
+	uint32_t minphysid = 0xffffffff;/* minimum "physical id" */
+	uint32_t mincoreid = 0xffffffff;/* minimum "core id" */
+	int i;
+#if defined (__sun)
+#if defined (_LP64)
+	int64_t curcpu, val, sockets, cores, threads;
+#else
+	int32_t curcpu, val, sockets, cores, threads;
+#endif
+	int32_t chip_id, core_id, ncore_per_chip, ncpu_per_chip;
+#else
+	FILE *cpu_info_file;
+	char buffer[128];
+	uint16_t curcpu, sockets, cores, threads;
+#endif
+
+	*p_sockets = numproc;		/* initially all single core/thread */
+	*p_cores   = 1;
+	*p_threads = 1;
+	*block_map_size = 0;
+	*block_map      = NULL;
+	*block_map_inv  = NULL;
+
+#if defined (__sun)
+	kstat_ctl_t   *kc;
+	kstat_t       *ksp;
+	kstat_named_t *knp;
+
+	kc = kstat_open();
+	if (kc == NULL) {
+		error ("get speed: kstat error %d", errno);
+		return errno;
+	}
+#else
+	cpu_info_file = fopen(_cpuinfo_path, "r");
+	if (cpu_info_file == NULL) {
+		error ("get_cpuinfo: error %d opening %s",
+			errno, _cpuinfo_path);
+		return errno;
+	}
+#endif
+
+	/* Note: assumes all processor IDs are within [0:numproc-1] */
+	/*       treats physical/core IDs as tokens, not indices */
+	if (cpuinfo)
+		memset(cpuinfo, 0, numproc * sizeof(cpuinfo_t));
+	else
+		cpuinfo = xmalloc(numproc * sizeof(cpuinfo_t));
+
+#if defined (__sun)
+	ksp = kstat_lookup(kc, "cpu_info", -1, NULL);
+	for (; ksp != NULL; ksp = ksp->ks_next) {
+		if (strcmp(ksp->ks_module, "cpu_info"))
+			continue;
+
+		numcpu++;
+		kstat_read(kc, ksp, NULL);
+
+		knp = kstat_data_lookup(ksp, "chip_id");
+		chip_id = knp->value.l;
+		knp = kstat_data_lookup(ksp, "core_id");
+		core_id = knp->value.l;
+		knp = kstat_data_lookup(ksp, "ncore_per_chip");
+		ncore_per_chip = knp->value.l;
+		knp = kstat_data_lookup(ksp, "ncpu_per_chip");
+		ncpu_per_chip = knp->value.l;
+
+		if (chip_id >= numproc) {
+			debug("cpuid is %ld (> %d), ignored", curcpu, numproc);
+			continue;
+		}
+
+		cpuinfo[chip_id].seen = 1;
+		cpuinfo[chip_id].cpuid = chip_id;
+
+		maxcpuid = MAX(maxcpuid, chip_id);
+		mincpuid = MIN(mincpuid, chip_id);
+
+		for (i = 0; i < numproc; i++) {
+			if ((cpuinfo[i].coreid == core_id) &&
+			    (cpuinfo[i].corecnt))
+				break;
+		}
+
+		if (i == numproc) {
+			numcores++;
+		} else {
+			cpuinfo[i].corecnt++;
+		}
+
+		if (chip_id < numproc) {
+			cpuinfo[chip_id].corecnt++;
+			cpuinfo[chip_id].coreid = core_id;
+		}
+
+		maxcoreid = MAX(maxcoreid, core_id);
+		mincoreid = MIN(mincoreid, core_id);
+
+		if (ncore_per_chip > numproc) {
+			debug("cores is %u (> %d), ignored",
+			      ncore_per_chip, numproc);
+				continue;
+		}
+
+		if (chip_id < numproc)
+			cpuinfo[chip_id].cores = ncore_per_chip;
+
+		maxcores = MAX(maxcores, ncore_per_chip);
+		mincores = MIN(mincores, ncore_per_chip);
+	}
+#else
+
+	curcpu = 0;
+	while (fgets(buffer, sizeof(buffer), cpu_info_file) != NULL) {
+		uint32_t val;
+		if (_chk_cpuinfo_uint32(buffer, "processor", &val)) {
+			numcpu++;
+			curcpu = val;
+		    	if (val >= numproc) {	/* out of bounds, ignore */
+				debug("cpuid is %u (> %d), ignored",
+					val, numproc);
+				continue;
+			}
+			cpuinfo[val].seen = 1;
+			cpuinfo[val].cpuid = val;
+			maxcpuid = MAX(maxcpuid, val);
+			mincpuid = MIN(mincpuid, val);
+		} else if (_chk_cpuinfo_uint32(buffer, "physical id", &val)) {
+			/* see if the ID has already been seen */
+			for (i=0; i<numproc; i++) {
+				if ((cpuinfo[i].physid == val)
+				&&  (cpuinfo[i].physcnt))
+					break;
+			}
+
+			if (i == numproc) {		/* new ID... */
+				numphys++;		/* ...increment total */
+			} else {			/* existing ID... */
+				cpuinfo[i].physcnt++;	/* ...update ID cnt */
+			}
+
+			if (curcpu < numproc) {
+				cpuinfo[curcpu].physcnt++;
+				cpuinfo[curcpu].physid = val;
+			}
+
+			maxphysid = MAX(maxphysid, val);
+			minphysid = MIN(minphysid, val);
+		} else if (_chk_cpuinfo_uint32(buffer, "core id", &val)) {
+			/* see if the ID has already been seen */
+			for (i = 0; i < numproc; i++) {
+				if ((cpuinfo[i].coreid == val)
+				&&  (cpuinfo[i].corecnt))
+					break;
+			}
+
+			if (i == numproc) {		/* new ID... */
+				numcores++;		/* ...increment total */
+			} else {			/* existing ID... */
+				cpuinfo[i].corecnt++;	/* ...update ID cnt */
+			}
+
+			if (curcpu < numproc) {
+				cpuinfo[curcpu].corecnt++;
+				cpuinfo[curcpu].coreid = val;
+			}
+
+			maxcoreid = MAX(maxcoreid, val);
+			mincoreid = MIN(mincoreid, val);
+		} else if (_chk_cpuinfo_uint32(buffer, "siblings", &val)) {
+			/* Note: this value is a count, not an index */
+		    	if (val > numproc) {	/* out of bounds, ignore */
+				debug("siblings is %u (> %d), ignored",
+					val, numproc);
+				continue;
+			}
+			if (curcpu < numproc)
+				cpuinfo[curcpu].siblings = val;
+			maxsibs = MAX(maxsibs, val);
+			minsibs = MIN(minsibs, val);
+		} else if (_chk_cpuinfo_uint32(buffer, "cpu cores", &val)) {
+			/* Note: this value is a count, not an index */
+		    	if (val > numproc) {	/* out of bounds, ignore */
+				debug("cores is %u (> %d), ignored",
+					val, numproc);
+				continue;
+			}
+			if (curcpu < numproc)
+				cpuinfo[curcpu].cores = val;
+			maxcores = MAX(maxcores, val);
+			mincores = MIN(mincores, val);
+		}
+	}
+
+	fclose(cpu_info_file);
+#endif
+
+	/*** Sanity check ***/
+	if (minsibs == 0) minsibs = 1;		/* guaranteee non-zero */
+	if (maxsibs == 0) {
+	    	minsibs = 1;
+	    	maxsibs = 1;
+	}
+	if (maxcores == 0) {			/* no core data */
+	    	mincores = 0;
+	    	maxcores = 0;
+	}
+
+	/*** Compute Sockets/Cores/Threads ***/
+	if ((minsibs == maxsibs) &&		/* homogeneous system */
+	    (mincores == maxcores)) {
+		sockets = numphys; 		/* unique "physical id" */
+		if (sockets <= 1) {		/* verify single socket */
+			sockets = numcpu / maxsibs; /* maximum "siblings" */
+		}
+		if (sockets == 0)
+			sockets = 1;		/* guarantee non-zero */
+
+		cores = numcores / sockets;	/* unique "core id" */
+		cores = MAX(maxcores, cores);	/* maximum "cpu cores" */
+
+		if (cores == 0) {
+			cores = numcpu / sockets;	/* assume multi-core */
+			if (cores > 1) {
+				debug3("Warning: cpuinfo missing 'core id' or "
+					"'cpu cores' but assuming multi-core");
+			}
+		}
+		if (cores == 0)
+			cores = 1;	/* guarantee non-zero */
+
+		threads = numcpu / (sockets * cores); /* solve for threads */
+		if (threads == 0)
+			threads = 1;	/* guarantee non-zero */
+	} else {				/* heterogeneous system */
+		sockets = numcpu;
+		cores   = 1;			/* one core per socket */
+		threads = 1;			/* one core per core */
+	}
+
+	*p_sockets = sockets;		/* update output parameters */
+	*p_cores   = cores;
+	*p_threads = threads;
+
+#if DEBUG_DETAIL
+	/*** Display raw data ***/
+	debug3("");
+	debug3("numcpu:     %u", numcpu);
+	debug3("numphys:    %u", numphys);
+	debug3("numcores:   %u", numcores);
+
+	debug3("cores:      %u->%u", mincores, maxcores);
+	debug3("sibs:       %u->%u", minsibs,  maxsibs);
+
+	debug3("cpuid:      %u->%u", mincpuid,  maxcpuid);
+	debug3("physid:     %u->%u", minphysid, maxphysid);
+	debug3("coreid:     %u->%u", mincoreid, maxcoreid);
+
+	for (i = 0; i <= maxcpuid; i++) {
+		debug3("CPU %d:", i);
+		debug3(" seen:     %u", cpuinfo[i].seen);
+		debug3(" physid:   %u", cpuinfo[i].physid);
+		debug3(" physcnt:  %u", cpuinfo[i].physcnt);
+		debug3(" siblings: %u", cpuinfo[i].siblings);
+		debug3(" cores:    %u", cpuinfo[i].cores);
+		debug3(" coreid:   %u", cpuinfo[i].coreid);
+		debug3(" corecnt:  %u", cpuinfo[i].corecnt);
+		debug3("");
+	}
+
+	debug3("");
+	debug3("Sockets:          %u", sockets);
+	debug3("Cores per socket: %u", cores);
+	debug3("Threads per core: %u", threads);
+#endif
+
+	*block_map_size = numcpu;
+	retval = _compute_block_map(*block_map_size, block_map, block_map_inv);
+
+	xfree(cpuinfo);		/* done with raw cpuinfo data */
+
+	return retval;
+}
+
+/* _chk_cpuinfo_str
+ *	check a line of cpuinfo data (buffer) for a keyword.  If it
+ *	exists, return the string value for that keyword in *valptr.
+ * Input:  buffer - single line of cpuinfo data
+ *	   keyword - keyword to check for
+ * Output: valptr - string value corresponding to keyword
+ *         return code - true if keyword found, false if not found
+ */
+static int _chk_cpuinfo_str(char *buffer, char *keyword, char **valptr)
+{
+	char *ptr;
+	if (strncmp(buffer, keyword, strlen(keyword)))
+		return false;
+
+	ptr = strstr(buffer, ":");
+	if (ptr != NULL)
+		ptr++;
+	*valptr = ptr;
+	return true;
+}
+
+/* _chk_cpuinfo_uint32
+ *	check a line of cpuinfo data (buffer) for a keyword.  If it
+ *	exists, return the uint16 value for that keyword in *valptr.
+ * Input:  buffer - single line of cpuinfo data
+ *	   keyword - keyword to check for
+ * Output: valptr - uint32 value corresponding to keyword
+ *         return code - true if keyword found, false if not found
+ */
+static int _chk_cpuinfo_uint32(char *buffer, char *keyword, uint32_t *val)
+{
+	char *valptr;
+	if (_chk_cpuinfo_str(buffer, keyword, &valptr)) {
+		*val = strtoul(valptr, (char **)NULL, 10);
+		return true;
+	} else {
+		return false;
+	}
+}
+
+/*
+ * _compute_block_map - Compute abstract->machine block mapping (and inverse)
+ *   allows computation of CPU ID masks for an abstract block distribution
+ *   of logical processors which can then be mapped the IDs used in the
+ *   actual machine processor ID ordering (which can be BIOS/OS dependendent)
+ * Input:  numproc - number of processors on the system
+ *	   cpu - array of cpuinfo (file static for qsort/_compare_cpus)
+ * Output: block_map, block_map_inv - asbtract->physical block distribution map
+ *         return code - 0 if no error, otherwise errno
+ * NOTE: User must free block_map and block_map_inv
+ *
+ * For example, given a system with 8 logical processors arranged as:
+ *
+ *	Sockets:          4
+ *	Cores per socket: 2
+ *	Threads per core: 1
+ *
+ * and a logical CPU ID assignment of:
+ *
+ *	Machine logical CPU ID assignment:
+ *	Logical CPU ID:        0  1  2  3  4  5  6  7
+ *	Physical Socket ID:    0  1  3  2  0  1  3  2
+ *
+ * The block_map would be:
+ *
+ *	Abstract -> Machine logical CPU ID block mapping:
+ *	Input: (Abstract ID)   0  1  2  3  4  5  6  7
+ *	Output: (Machine ID)   0  4  1  5  3  7  2  6  <--- block_map[]
+ *	Physical Socket ID:    0  0  1  1  2  2  3  3
+ *
+ * and it's inverse would be:
+ *
+ *	Machine -> Abstract logical CPU ID block mapping: (inverse)
+ *	Input: (Machine ID)    0  1  2  3  4  5  6  7
+ *	Output: (Abstract ID)  0  2  6  4  1  3  7  5  <--- block_map_inv[]
+ *	Physical Socket ID:    0  1  3  2  0  1  3  2
+ */
+
+/* physical cpu comparison with void * arguments to allow use with
+ * libc qsort()
+ */
+static int _icmp16(uint16_t a, uint16_t b)
+{
+    	if (a < b) {
+		return -1;
+	} else if (a == b) {
+		return 0;
+	} else {
+		return 1;
+	}
+}
+static int _icmp32(uint32_t a, uint32_t b)
+{
+	if (a < b) {
+		return -1;
+	} else if (a == b) {
+		return 0;
+	} else {
+		return 1;
+	}
+}
+
+static int _compare_cpus(const void *a1, const void *b1) {
+	uint16_t *a = (uint16_t *) a1;
+	uint16_t *b = (uint16_t *) b1;
+	int cmp;
+
+	cmp = -1 * _icmp16(cpuinfo[*a].seen,cpuinfo[*b].seen); /* seen to front */
+	if (cmp != 0)
+		return cmp;
+
+	cmp = _icmp32(cpuinfo[*a].physid, cpuinfo[*b].physid); /* key 1: physid */
+	if (cmp != 0)
+		return cmp;
+
+	cmp = _icmp32(cpuinfo[*a].coreid, cpuinfo[*b].coreid); /* key 2: coreid */
+	if (cmp != 0)
+		return cmp;
+
+	cmp = _icmp32(cpuinfo[*a].cpuid, cpuinfo[*b].cpuid);   /* key 3: cpu id */
+	return cmp;
+}
+
+static int _compute_block_map(uint16_t numproc,
+			      uint16_t **block_map, uint16_t **block_map_inv)
+{
+	uint16_t i;
+	/* Compute abstract->machine block mapping (and inverse) */
+	if (block_map) {
+		*block_map = xmalloc(numproc * sizeof(uint16_t));
+		for (i = 0; i < numproc; i++) {
+			(*block_map)[i] = i;
+		}
+		qsort(*block_map, numproc, sizeof(uint16_t), &_compare_cpus);
+	}
+	if (block_map_inv) {
+		*block_map_inv = xmalloc(numproc * sizeof(uint16_t));
+		for (i = 0; i < numproc; i++) {
+			uint16_t idx = (*block_map)[i];
+			(*block_map_inv)[idx] = i;
+		}
+	}
+
+#if DEBUG_DETAIL
+	/* Display the mapping tables */
+
+	debug3("\nMachine logical CPU ID assignment:");
+	debug3("Logical CPU ID:      ");
+	for (i = 0; i < numproc; i++) {
+		debug3("%3d", i);
+	}
+	debug3("");
+	debug3("Physical Socket ID:  ");
+	for (i = 0; i < numproc; i++) {
+		debug3("%3u", cpuinfo[i].physid);
+	}
+	debug3("");
+
+	if (block_map) {
+		debug3("\nAbstract -> Machine logical CPU ID block mapping:");
+		debug3("Input: (Abstract ID) ");
+		for (i = 0; i < numproc; i++) {
+			debug3("%3d", i);
+		}
+		debug3("");
+		debug3("Output: (Machine ID) ");
+		for (i = 0; i < numproc; i++) {
+			debug3("%3u", (*block_map)[i]);
+		}
+		debug3("");
+		debug3("Physical Socket ID:  ");
+		for (i = 0; i < numproc; i++) {
+			uint16_t id = (*block_map)[i];
+			debug3("%3u", cpuinfo[id].physid);
+		}
+		debug3("");
+	}
+
+	if (block_map_inv) {
+		debug3("\nMachine -> Abstract logical CPU ID block mapping: "
+			"(inverse)");
+		debug3("Input: (Machine ID)  ");
+		for (i = 0; i < numproc; i++) {
+			debug3("%3d", i);
+		}
+		debug3("");
+		debug3("Output: (Abstract ID)");
+		for (i = 0; i < numproc; i++) {
+			debug3("%3u", (*block_map_inv)[i]);
+		}
+		debug3("");
+		debug3("Physical Socket ID:  ");
+		for (i = 0; i < numproc; i++) {
+			debug3("%3u", cpuinfo[i].physid);
+		}
+		debug3("");
+	}
+#endif
+	return 0;
+}
+
+int _ranges_conv(char* lrange,char** prange,int mode);
+
+/* for testing purpose */
+/* uint16_t procs=8, sockets=2, cores=2, threads=2; */
+/* uint16_t block_map_size=8; */
+/* uint16_t block_map[] = { 0, 4, 2, 6, 1, 5, 3, 7 }; */
+/* uint16_t block_map_inv[] = { 0, 4, 2, 6, 1, 5, 3, 7 }; */
+/* xcpuinfo_abs_to_mac("0,2,4,6",&mach); */
+/* xcpuinfo_mac_to_abs(mach,&abs); */
+
+int
+xcpuinfo_init()
+{
+	if ( initialized )
+		return XCPUINFO_SUCCESS;
+
+	if ( get_procs(&procs) )
+		return XCPUINFO_ERROR;
+
+	if ( get_cpuinfo(procs,&sockets,&cores,&threads,
+			 &block_map_size,&block_map,&block_map_inv) )
+		return XCPUINFO_ERROR;
+
+	initialized = true ;
+
+	return XCPUINFO_SUCCESS;
+}
+
+int
+xcpuinfo_fini()
+{
+	if ( ! initialized )
+		return XCPUINFO_SUCCESS;
+
+	initialized = false ;
+	procs = sockets = cores = threads = 0;
+	block_map_size = 0;
+	xfree(block_map);
+	xfree(block_map_inv);
+
+	return XCPUINFO_SUCCESS;
+}
+
+int
+xcpuinfo_abs_to_mac(char* lrange,char** prange)
+{
+	return _ranges_conv(lrange,prange,0);
+}
+
+int
+xcpuinfo_mac_to_abs(char* lrange,char** prange)
+{
+	return _ranges_conv(lrange,prange,1);
+}
+
+int
+xcpuinfo_abs_to_map(char* lrange,uint16_t **map,uint16_t *map_size)
+{
+	*map_size = block_map_size;
+	*map = (uint16_t*) xmalloc(block_map_size*sizeof(uint16_t));
+	/* abstract range does not already include the hyperthreads */
+	return _range_to_map(lrange,*map,*map_size,1);
+}
+
+int
+xcpuinfo_map_to_mac(uint16_t *map,uint16_t map_size,char** range)
+{
+	return _map_to_range(map,map_size,range);
+}
+
+int
+xcpuinfo_mac_to_map(char* lrange,uint16_t **map,uint16_t *map_size)
+{
+	*map_size = block_map_size;
+	*map = (uint16_t*) xmalloc(block_map_size*sizeof(uint16_t));
+	/* machine range already includes the hyperthreads */
+	return _range_to_map(lrange,*map,*map_size,0);
+}
+
+int
+xcpuinfo_absmap_to_macmap(uint16_t *amap,uint16_t amap_size,
+			  uint16_t **bmap,uint16_t *bmap_size)
+{
+	/* int i; */
+
+	/* abstract to machine conversion using block map */
+	uint16_t *cmap;
+	uint16_t *map_out;
+	cmap = block_map;
+	*bmap_size = amap_size;
+	map_out = (uint16_t*) xmalloc(amap_size*sizeof(uint16_t));
+	/* for( i = 0 ; i < amap_size ; i++) { */
+	/* 	if ( amap[i] ) */
+	/* 		map_out[cmap[i]]=1; */
+	/* 	else */
+	/* 		map_out[cmap[i]]=0; */
+	/* } */
+	*bmap = map_out;
+	return XCPUINFO_SUCCESS;
+}
+
+int
+xcpuinfo_macmap_to_absmap(uint16_t *amap,uint16_t amap_size,
+			  uint16_t **bmap,uint16_t *bmap_size)
+{
+	int i;
+
+	/* machine to abstract conversion using inverted block map */
+	uint16_t *cmap;
+	cmap = block_map_inv;
+	*bmap_size = amap_size;
+	*bmap = (uint16_t*) xmalloc(amap_size*sizeof(uint16_t));
+	for( i = 0 ; i < amap_size ; i++) {
+		if ( amap[i] )
+			(*bmap)[cmap[i]]=1;
+		else
+			(*bmap)[cmap[i]]=0;
+	}
+	return XCPUINFO_SUCCESS;
+}
+
+/*
+ * set to 1 each element of already allocated map of size
+ * map_size if they are present in the input range
+ * if add_thread does not equal 0, the input range is a treated
+ * as a core range, and it will be mapped to an array of uint16_t
+ * that will include all the hyperthreads associated to the cores.
+ */
+static int
+_range_to_map(char* range,uint16_t *map,uint16_t map_size,int add_threads)
+{
+	int bad_nb=0;
+	int num_fl=0;
+	int con_fl=0;
+	int last=0;
+
+	char *dup;
+	char *p;
+	char *s=NULL;
+
+	uint16_t start=0,end=0,i;
+
+	/* duplicate input range */
+	dup = xstrdup(range);
+	p = dup;
+	while ( ! last ) {
+		if ( isdigit(*p) ) {
+			if ( !num_fl ) {
+				num_fl++;
+				s=p;
+			}
+		}
+		else if ( *p == '-' ) {
+			if ( s && num_fl ) {
+				*p = '\0';
+				start = (uint16_t) atoi(s);
+				con_fl=1;
+				num_fl=0;
+				s=NULL;
+			}
+		}
+		else if ( *p == ',' || *p == '\0') {
+			if ( *p == '\0' )
+				last = 1;
+			if ( s && num_fl ) {
+				*p = '\0';
+				end = (uint16_t) atoi(s);
+				if ( !con_fl )
+					start = end ;
+				con_fl=2;
+				num_fl=0;
+				s=NULL;
+			}
+		}
+		else {
+			bad_nb++;
+			break;
+		}
+		if ( con_fl == 2 ) {
+			if ( add_threads ) {
+				start = start * threads;
+				end = (end+1)*threads - 1 ;
+			}
+			for( i = start ; i <= end && i < map_size ; i++) {
+				map[i]=1;
+			}
+			con_fl=0;
+		}
+		p++;
+	}
+
+	xfree(dup);
+
+	if ( bad_nb > 0 ) {
+		/* bad format for input range */
+		return XCPUINFO_ERROR;
+	}
+
+	return XCPUINFO_SUCCESS;
+}
+
+
+/*
+ * allocate and build a range of ids using an input map
+ * having printable element set to 1
+ */
+static int
+_map_to_range(uint16_t *map,uint16_t map_size,char** prange)
+{
+	size_t len;
+	int num_fl=0;
+	int con_fl=0;
+
+	char id[12];
+	char *str;
+
+	uint16_t start=0,end=0,i;
+
+	str = xstrdup("");
+	for ( i = 0 ; i < map_size ; i++ ) {
+
+		if ( map[i] ) {
+			num_fl=1;
+			end=i;
+			if ( !con_fl ) {
+				start=end;
+				con_fl=1;
+			}
+		}
+		else if ( num_fl ) {
+			if ( start < end ) {
+				sprintf(id,"%u-%u,",start,end);
+				xstrcat(str,id);
+			}
+			else {
+				sprintf(id,"%u,",start);
+				xstrcat(str,id);
+			}
+			con_fl = num_fl = 0;
+		}
+	}
+	if ( num_fl ) {
+		if ( start < end ) {
+			sprintf(id,"%u-%u,",start,end);
+			xstrcat(str,id);
+		}
+		else {
+			sprintf(id,"%u,",start);
+			xstrcat(str,id);
+		}
+	}
+
+	len = strlen(str);
+	if ( len > 0 ) {
+		str[len-1]='\0';
+	}
+	else {
+		xfree(str);
+		return XCPUINFO_ERROR;
+	}
+
+	if ( prange != NULL )
+		*prange = str;
+	else
+		xfree(str);
+
+	return XCPUINFO_SUCCESS;
+}
+
+/*
+ * convert a range into an other one according to
+ * a modus operandi being 0 or 1 for abstract to machine
+ * or machine to abstract representation of cores
+ */
+static int
+_ranges_conv(char* lrange,char** prange,int mode)
+{
+	int fstatus;
+	int i;
+	uint16_t *amap;
+	uint16_t *map;
+	uint16_t *map_out;
+
+	/* init internal data if not already done */
+	if ( xcpuinfo_init() != XCPUINFO_SUCCESS )
+		return XCPUINFO_ERROR;
+
+	if ( mode ) {
+		/* machine to abstract conversion */
+		amap = block_map_inv;
+	}
+	else {
+		/* abstract to machine conversion */
+		amap = block_map;
+	}
+
+	/* allocate map for local work */
+	map = (uint16_t*) xmalloc(block_map_size*sizeof(uint16_t));
+	map_out = (uint16_t*) xmalloc(block_map_size*sizeof(uint16_t));
+
+	/* extract the input map */
+	fstatus = _range_to_map(lrange,map,block_map_size,!mode);
+	if ( fstatus ) {
+		goto exit;
+	}
+
+	/* do the conversion (see src/slurmd/slurmd/get_mach_stat.c) */
+	for( i = 0 ; i < block_map_size ; i++) {
+		if ( map[i] )
+			map_out[amap[i]]=1;
+	}
+
+	/* build the ouput range */
+	fstatus = _map_to_range(map_out,block_map_size,prange);
+
+exit:
+	xfree(map);
+	xfree(map_out);
+	return fstatus;
+}
diff --git a/src/common/xcpuinfo.h b/src/common/xcpuinfo.h
new file mode 100644
index 00000000000..45300650c6e
--- /dev/null
+++ b/src/common/xcpuinfo.h
@@ -0,0 +1,166 @@
+/*****************************************************************************\
+ *  xcpuinfo.h - cpuinfo related primitives headers
+ *****************************************************************************
+ *  Copyright (C) 2009 CEA/DAM/DIF
+ *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#   include "config.h"
+#endif
+
+#ifndef _XCPUINFO_H_
+#define _XCPUINFO_H_
+
+#define XCPUINFO_ERROR    1
+#define XCPUINFO_SUCCESS  0
+
+extern int get_procs(uint16_t *procs);
+extern int get_cpuinfo(uint16_t numproc,
+		       uint16_t *sockets, uint16_t *cores, uint16_t *threads,
+		       uint16_t *block_map_size,
+		       uint16_t **block_map, uint16_t **block_map_inv);
+
+/*
+ * Initialize xcpuinfo internal data
+ *
+ * returned values:
+ *  - XCPUINFO_ERROR
+ *  - XCPUINFO_SUCCESS
+ */
+int xcpuinfo_init();
+
+/*
+ * Destroy xcpuinfo internal data
+ *
+ * returned values:
+ *  - XCPUINFO_ERROR
+ *  - XCPUINFO_SUCCESS
+ */
+int xcpuinfo_fini();
+
+/*
+ * Use xcpuinfo internal data to convert an abstract range
+ * of cores (slurm internal format) into the machine one
+ *
+ * range is of the form 0-1,4-5
+ *
+ * on success, the output range must be freed using xfree
+ *
+ * returned values:
+ *  - XCPUINFO_ERROR
+ *  - XCPUINFO_SUCCESS
+ */
+int xcpuinfo_abs_to_mac(char* lrange,char** prange);
+
+/*
+ * Use xcpuinfo internal data to convert a machine range
+ * of cores into an abstract one (slurm internal format)
+ *
+ * range is of the form 0-1,4-5
+ *
+ * on success, the output range must be freed using xfree
+ *
+ * returned values:
+ *  - XCPUINFO_ERROR
+ *  - XCPUINFO_SUCCESS
+ */
+int xcpuinfo_mac_to_abs(char* lrange,char** prange);
+
+/*
+ * Use xcpuinfo internal data to convert an abstract range
+ * of cores (slurm internal format) into the equivalent 
+ * map of cores
+ *
+ * range is of the form 0-1,4-5
+ *
+ * on success, the output map must be freed using xfree
+ *
+ * returned values:
+ *  - XCPUINFO_ERROR
+ *  - XCPUINFO_SUCCESS
+ */
+int xcpuinfo_abs_to_map(char* lrange,uint16_t **map,uint16_t *map_size);
+
+/*
+ * Use xcpuinfo internal data to convert a machine range
+ * of cores into the equivalent map of cores
+ *
+ * range is of the form 0-1,4-5
+ *
+ * on success, the output map must be freed using xfree
+ *
+ * returned values:
+ *  - XCPUINFO_ERROR
+ *  - XCPUINFO_SUCCESS
+ */
+int xcpuinfo_mac_to_map(char* lrange,uint16_t **map,uint16_t *map_size);
+
+/*
+ * Use xcpuinfo internal data to convert a machine map
+ * of cores into the equivalent machine range of cores
+ *
+ * on success, the output map must be freed using xfree
+ *
+ * returned values:
+ *  - XCPUINFO_ERROR
+ *  - XCPUINFO_SUCCESS
+ */
+int xcpuinfo_map_to_mac(uint16_t *map,uint16_t map_size,char** range);
+
+/*
+ * Use xcpuinfo internal data to convert an abstract map of cores
+ * into the equivalent machine map of cores
+ *
+ * on success, the output map must be freed using xfree
+ *
+ * returned values:
+ *  - XCPUINFO_ERROR
+ *  - XCPUINFO_SUCCESS
+ */
+int xcpuinfo_absmap_to_macmap(uint16_t *amap,uint16_t amap_size,
+			      uint16_t **bmap,uint16_t *bmap_size);
+
+/*
+ * Use xcpuinfo internal data to convert a machine map of cores
+ * into the equivalent abstract map of cores
+ *
+ * on success, the output map must be freed using xfree
+ *
+ * returned values:
+ *  - XCPUINFO_ERROR
+ *  - XCPUINFO_SUCCESS
+ */
+int xcpuinfo_macmap_to_absmap(uint16_t *amap,uint16_t amap_size,
+			      uint16_t **bmap,uint16_t *bmap_size);
+
+#endif
diff --git a/src/plugins/proctrack/cgroup/Changelog b/src/plugins/proctrack/cgroup/Changelog
deleted file mode 100644
index c3ebd4eacef..00000000000
--- a/src/plugins/proctrack/cgroup/Changelog
+++ /dev/null
@@ -1,20 +0,0 @@
-* Thu Jan 07 2010 Matthieu Hautreux <matthieu.hautreux@cea.fr>
-- release 0.2
-- fix a bug in memory limits calculation based on cgroup.conf 
-  configuration parameters ( (a / b * c) becomes (a * (float) (b/c)) 
-  which is better due to a, b and c being uint32_t values and roundness 
-  issues)
-- add new operations xcgroup_get_memlimit and xcgroup_get_memswlimit
-- add a workaround to cope with a slurm-2.1.0 and previous versions
-  limitation. job_mem field of slurmd_job_t corresponds to job steps
-  limits and not to the job mem limit. Two distinct fields should be
-  available in the future. In the meantime, we use job_mem value of
-  each launched step and extend the amount of allowed memory (both
-  ram and swap) if the the new amount is higher that the previous
-  one
-	
-* Tue Dec 01 2009 Matthieu Hautreux <matthieu.hautreux@cea.fr>
-- initial release (0.1) of proctrack/cgroup plugin
-- include a patch for jobacct_gather proper behavior when used with 
-  proctrack/cgroup (skip POSIX threads reported by the cgroup during
-  accounting)
diff --git a/src/plugins/proctrack/cgroup/Makefile.am b/src/plugins/proctrack/cgroup/Makefile.am
index 4f2cebae6f9..75a043e1db7 100644
--- a/src/plugins/proctrack/cgroup/Makefile.am
+++ b/src/plugins/proctrack/cgroup/Makefile.am
@@ -2,19 +2,12 @@
 
 AUTOMAKE_OPTIONS = foreign
 
-PLUGIN_FLAGS = -module -avoid-version --export-dynamic 
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic
 
 INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
 
 pkglib_LTLIBRARIES = proctrack_cgroup.la
 
 # Process group ID process tracking plugin.
-proctrack_cgroup_la_SOURCES = \
-	proctrack_cgroup.c \
-	xcgroup.c xcgroup.h \
-	xcpuinfo.c xcpuinfo.h \
-	read_config.c read_config.h \
-	$(top_builddir)/src/slurmd/slurmd/get_mach_stat.c \
-	$(top_builddir)/src/slurmd/slurmd/get_mach_stat.h
-
+proctrack_cgroup_la_SOURCES = proctrack_cgroup.c
 proctrack_cgroup_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
diff --git a/src/plugins/proctrack/cgroup/Makefile.in b/src/plugins/proctrack/cgroup/Makefile.in
index 11ec924e552..fab187dfbe3 100644
--- a/src/plugins/proctrack/cgroup/Makefile.in
+++ b/src/plugins/proctrack/cgroup/Makefile.in
@@ -105,8 +105,7 @@ am__base_list = \
 am__installdirs = "$(DESTDIR)$(pkglibdir)"
 LTLIBRARIES = $(pkglib_LTLIBRARIES)
 proctrack_cgroup_la_LIBADD =
-am_proctrack_cgroup_la_OBJECTS = proctrack_cgroup.lo xcgroup.lo \
-	xcpuinfo.lo read_config.lo get_mach_stat.lo
+am_proctrack_cgroup_la_OBJECTS = proctrack_cgroup.lo
 proctrack_cgroup_la_OBJECTS = $(am_proctrack_cgroup_la_OBJECTS)
 proctrack_cgroup_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@@ -324,19 +323,12 @@ top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 AUTOMAKE_OPTIONS = foreign
-PLUGIN_FLAGS = -module -avoid-version --export-dynamic 
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic
 INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
 pkglib_LTLIBRARIES = proctrack_cgroup.la
 
 # Process group ID process tracking plugin.
-proctrack_cgroup_la_SOURCES = \
-	proctrack_cgroup.c \
-	xcgroup.c xcgroup.h \
-	xcpuinfo.c xcpuinfo.h \
-	read_config.c read_config.h \
-	$(top_builddir)/src/slurmd/slurmd/get_mach_stat.c \
-	$(top_builddir)/src/slurmd/slurmd/get_mach_stat.h
-
+proctrack_cgroup_la_SOURCES = proctrack_cgroup.c
 proctrack_cgroup_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
 all: all-am
 
@@ -412,11 +404,7 @@ mostlyclean-compile:
 distclean-compile:
 	-rm -f *.tab.c
 
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/get_mach_stat.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proctrack_cgroup.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/read_config.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xcgroup.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xcpuinfo.Plo@am__quote@
 
 .c.o:
 @am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@@ -439,13 +427,6 @@ distclean-compile:
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
 
-get_mach_stat.lo: $(top_builddir)/src/slurmd/slurmd/get_mach_stat.c
-@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT get_mach_stat.lo -MD -MP -MF $(DEPDIR)/get_mach_stat.Tpo -c -o get_mach_stat.lo `test -f '$(top_builddir)/src/slurmd/slurmd/get_mach_stat.c' || echo '$(srcdir)/'`$(top_builddir)/src/slurmd/slurmd/get_mach_stat.c
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/get_mach_stat.Tpo $(DEPDIR)/get_mach_stat.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$(top_builddir)/src/slurmd/slurmd/get_mach_stat.c' object='get_mach_stat.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o get_mach_stat.lo `test -f '$(top_builddir)/src/slurmd/slurmd/get_mach_stat.c' || echo '$(srcdir)/'`$(top_builddir)/src/slurmd/slurmd/get_mach_stat.c
-
 mostlyclean-libtool:
 	-rm -f *.lo
 
diff --git a/src/plugins/proctrack/cgroup/proctrack_cgroup.c b/src/plugins/proctrack/cgroup/proctrack_cgroup.c
index b0da92ea47e..81cf12de42e 100644
--- a/src/plugins/proctrack/cgroup/proctrack_cgroup.c
+++ b/src/plugins/proctrack/cgroup/proctrack_cgroup.c
@@ -35,14 +35,14 @@
 \*****************************************************************************/
 
 #if HAVE_CONFIG_H
-#   include "config.h"
+#include "config.h"
 #endif
 
 #if HAVE_STDINT_H
-#  include <stdint.h>
+#include <stdint.h>
 #endif
 #if HAVE_INTTYPES_H
-#  include <inttypes.h>
+#include <inttypes.h>
 #endif
 
 #include <slurm/slurm.h>
@@ -52,15 +52,15 @@
 
 #include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
+#include "src/common/xcgroup_read_config.h"
+#include "src/common/xcgroup.h"
+#include "src/common/xcpuinfo.h"
+
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <stdlib.h>
 
-#include "read_config.h"
-#include "xcgroup.h"
-#include "xcpuinfo.h"
-
 /*
  * These variables are required by the generic plugin interface.  If they
  * are not found in the plugin, the plugin loader will ignore it.
@@ -90,7 +90,8 @@
  * minimum version for their plugins as the job completion logging API
  * matures.
  */
-const char plugin_name[]      = "Process tracking via linux cgroup";
+const char plugin_name[]      = "Process tracking via linux "
+	"cgroup freezer subsystem";
 const char plugin_type[]      = "proctrack/cgroup";
 const uint32_t plugin_version = 10;
 
@@ -98,390 +99,274 @@ const uint32_t plugin_version = 10;
 #define PATH_MAX 256
 #endif
 
-#define CGROUP_SLURMDIR CGROUP_BASEDIR "/slurm"
+static slurm_cgroup_conf_t slurm_cgroup_conf;
+
+static char user_cgroup_path[PATH_MAX];
+static char job_cgroup_path[PATH_MAX];
+static char jobstep_cgroup_path[PATH_MAX];
+static char release_agent_path[PATH_MAX];
+
+static xcgroup_ns_t freezer_ns;
 
-char user_cgroup_path[PATH_MAX];
-char job_cgroup_path[PATH_MAX];
-char jobstep_cgroup_path[PATH_MAX];
+static xcgroup_t user_freezer_cg;
+static xcgroup_t job_freezer_cg;
+static xcgroup_t step_freezer_cg;
 
 int _slurm_cgroup_init()
 {
-	int fstatus;
-	xcgroup_opts_t opts;
-
-	/* initialize job/jobstep cgroup path */
+	/* initialize user/job/jobstep cgroup relative paths
+	 * and release agent path */
 	user_cgroup_path[0]='\0';
 	job_cgroup_path[0]='\0';
 	jobstep_cgroup_path[0]='\0';
+	release_agent_path[0]='\0';
+
+	/* build freezer release agent path */
+	if (snprintf(release_agent_path, PATH_MAX, "%s/release_freezer",
+		      slurm_cgroup_conf.cgroup_release_agent) >= PATH_MAX) {
+		error("unable to build cgroup freezer release agent path");
+		return SLURM_ERROR;
+	}
 
-	/* we first check that cgroup is mounted */
-	if ( ! xcgroup_is_available() ) {
-		if ( slurm_cgroup_conf->cgroup_automount ) {
-			if ( xcgroup_mount(slurm_cgroup_conf->
-					   cgroup_mount_opts) ) {
-				error("unable to mount cgroup");
+	/* initialize freezer cgroup namespace */
+	if (xcgroup_ns_create(&freezer_ns, CGROUP_BASEDIR "/freezer", "",
+			       "freezer", release_agent_path)
+	     != XCGROUP_SUCCESS) {
+		error("unable to create freezer cgroup namespace");
+		return SLURM_ERROR;
+	}
+
+	/* check that freezer cgroup namespace is available */
+	if (! xcgroup_ns_is_available(&freezer_ns)) {
+		if (slurm_cgroup_conf.cgroup_automount) {
+			if (xcgroup_ns_mount(&freezer_ns)) {
+				error("unable to mount freezer cgroup"
+				      " namespace");
 				return SLURM_ERROR;
 			}
-			info("cgroup system is now mounted");
-			/* we then set the release_agent if necessary */
-			if ( slurm_cgroup_conf->cgroup_release_agent ) {
-				xcgroup_set_release_agent(slurm_cgroup_conf->
-							  cgroup_release_agent);
-			}
+			info("cgroup namespace '%s' is now mounted", "freezer");
 		}
 		else {
-			error("cgroup is not mounted. aborting");
+			error("cgroup namespace '%s' not mounted. aborting",
+			      "freezer");
 			return SLURM_ERROR;
 		}
 	}
 
-	/* create a non releasable root cgroup for slurm usage */
-	opts.uid=getuid();
-	opts.gid=getgid();
-	opts.create_only=0;
-	opts.notify=0;
-	fstatus = xcgroup_create(CGROUP_SLURMDIR,&opts);
-	if ( fstatus != SLURM_SUCCESS ) {
-		error("unable to create SLURM cgroup directory '%s'. aborting",
-		      CGROUP_SLURMDIR);
-		return SLURM_ERROR;
-	}
-
 	return SLURM_SUCCESS;
 }
 
-int _slurm_cgroup_create(slurmd_job_t *job,uint32_t id,uid_t uid,gid_t gid)
+int _slurm_cgroup_create(slurmd_job_t *job, uint32_t id, uid_t uid, gid_t gid)
 {
-	int fstatus;
-
-	xcgroup_opts_t opts;
-	uint32_t cur_memlimit,cur_memswlimit;
-
-	/* build user cgroup path if no set (should not be) */
-	if ( *user_cgroup_path == '\0' ) {
-		if ( snprintf(user_cgroup_path,PATH_MAX,CGROUP_SLURMDIR
-			      "/uid_%u",uid) >= PATH_MAX ) {
-			error("unable to build uid %u cgroup filepath : %m",
-			      uid);
+	/* build user cgroup relative path if not set (should not be) */
+	if (*user_cgroup_path == '\0') {
+		if (snprintf(user_cgroup_path, PATH_MAX,
+			      "/uid_%u", uid) >= PATH_MAX) {
+			error("unable to build uid %u cgroup relative "
+			      "path : %m", uid);
 			return SLURM_ERROR;
 		}
 	}
 
-	/* build job cgroup path if no set (should not be) */
-	if ( *job_cgroup_path == '\0' ) {
-		if ( snprintf(job_cgroup_path,PATH_MAX,"%s/job_%u",
-			      user_cgroup_path,job->jobid) >= PATH_MAX ) {
-			error("unable to build job %u cgroup filepath : %m",
-			      job->jobid);
+	/* build job cgroup relative path if no set (should not be) */
+	if (*job_cgroup_path == '\0') {
+		if (snprintf(job_cgroup_path, PATH_MAX, "%s/job_%u",
+			      user_cgroup_path, job->jobid) >= PATH_MAX) {
+			error("unable to build job %u cgroup relative "
+			      "path : %m", job->jobid);
 			return SLURM_ERROR;
 		}
 	}
 
-	/* build job step cgroup path (should not be) */
-	if ( *jobstep_cgroup_path == '\0' ) {
-		if ( snprintf(jobstep_cgroup_path,PATH_MAX,"%s/step_%u",
-			      job_cgroup_path,job->stepid) >= PATH_MAX ) {
-			error("unable to build job step %u cgroup filepath "
-			      ": %m",job->stepid);
-			return SLURM_ERROR;
+	/* build job step cgroup relative path (should not be) */
+	if (*jobstep_cgroup_path == '\0') {
+		if (job->stepid == NO_VAL) {
+			if (snprintf(jobstep_cgroup_path, PATH_MAX,
+				     "%s/step_batch", job_cgroup_path)
+			    >= PATH_MAX) {
+				error("proctrack/cgroup unable to build job step"
+				      " %u.batch freezer cg relative path: %m",
+				      job->jobid);
+				return SLURM_ERROR;
+			}
+		} else {
+			if (snprintf(jobstep_cgroup_path, PATH_MAX, "%s/step_%u",
+				     job_cgroup_path, job->stepid) >= PATH_MAX) {
+				error("proctrack/cgroup unable to build job step"
+				      " %u.%u freezer cg relative path: %m",
+				      job->jobid, job->stepid);
+				return SLURM_ERROR;
+			}
 		}
 	}
 
-	/* create user cgroup (it could already exist) */
-	opts.uid=getuid();
-	opts.gid=getgid();
-	opts.create_only=0;
-	opts.notify=1;
-	if ( xcgroup_create(user_cgroup_path,&opts)
-	     != SLURM_SUCCESS )
+	/* create user cgroup in the freezer ns (it could already exist) */
+	if (xcgroup_create(&freezer_ns, &user_freezer_cg,
+			    user_cgroup_path,
+			    getuid(), getgid()) != XCGROUP_SUCCESS) {
 		return SLURM_ERROR;
-	if ( slurm_cgroup_conf->user_cgroup_params )
-		xcgroup_set_params(user_cgroup_path,
-				   slurm_cgroup_conf->user_cgroup_params);
-
-	/*
-	 * if memory constraints have to be added to uid cgroup
-	 * use_hierachy=1 must be set here, but this would result
-	 * in impossibility to configure some job memory parameters
-	 * differently, so skip this stage for now
-	 */
+	}
+	if (xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_freezer_cg);
 
-	/* create job cgroup (it could already exist) */
-	opts.uid=getuid();
-	opts.gid=getgid();
-	opts.create_only=0;
-	opts.notify=1;
-	if ( xcgroup_create(job_cgroup_path,&opts)
-	     != SLURM_SUCCESS )
 		return SLURM_ERROR;
-
-	/* job cgroup parameters must be set before any sub cgroups
-	   are created */
-	xcgroup_set_mem_use_hierarchy(job_cgroup_path,1);
-	if ( slurm_cgroup_conf->job_cgroup_params )
-		xcgroup_set_params(job_cgroup_path,
-				   slurm_cgroup_conf->job_cgroup_params);
-
-	/*
-	 *  Warning: OOM Killer must be disabled for slurmstepd
-	 *  or it would be destroyed if the application use
-	 *  more memory than permitted
-	 *
-	 *  If an env value is already set for slurmstepd
-	 *  OOM killer behavior, keep it, otherwise set the
-	 *  -17 value, wich means do not let OOM killer kill it
-	 *
-	 *  FYI, setting "export SLURMSTEPD_OOM_ADJ=-17"
-	 *  in /etc/sysconfig/slurm would be the same
-	 */
-	setenv("SLURMSTEPD_OOM_ADJ","-17",0);
-
-	/*
-	 * FIXME!
-	 * Warning, with slurm-2.1.0 job_mem more corresponds to the
-	 * missing field jobstep_mem and thus must not be
-	 * trusted to set the job mem limit constraint
-	 * Due to the lack of jobstep_mem field in slurm-2.1.0
-	 * we only allow to extend the amount of allowed memory
-	 * as a step requiring less than the max allowed amount
-	 * for the job could otherwise reduce the allowed amount of other
-	 * already running steps
-	 * Thus, as a long as a step comes with a value that is higher
-	 * than the current value, we use it as it means that the
-	 * job is at least authorized to use this amount
-	 * In the future, a jobstep_mem field should be added
-	 * to avoid this workaround and be more deterministic
-	 *
-	 * Unfortunately with this workaround comes a collateral problem !
-	 * As we propose to alter already fixed limits for both mem and
-	 * mem+swap, we have to respect a certain order while doing the
-	 * modification to respect the kernel cgroup implementation
-	 * requirements : when sets, memory limit must be lower or equal
-	 * to memory+swap limit
-	 *
-	 * Notes : a limit value of -1 means that the limit was not
-	 * previously set
-	 * Notes : this whole part should be much more simpler when
-	 * the jobstep_mem field will be added
-	 *
-	 */
-
-	/*
-	 * Get current limits for both mem and mem+swap
-	 */
-	xcgroup_get_memlimit(job_cgroup_path,&cur_memlimit);
-	xcgroup_get_memswlimit(job_cgroup_path,&cur_memswlimit);
-
-	/*
-	 * set memory constraints according to cgroup conf
-	 */
-	if ( slurm_cgroup_conf->constrain_ram_space &&
-	     cur_memlimit == -1 ) {
-		uint32_t limit;
-		limit = (uint32_t) job->job_mem ;
-		limit = (uint32_t) limit *
-			( slurm_cgroup_conf->allowed_ram_space / 100.0 ) ;
-		xcgroup_set_memlimit(job_cgroup_path,limit);
 	}
-	if ( slurm_cgroup_conf->constrain_swap_space ) {
-		uint32_t limit,memlimit,swaplimit;
-		memlimit = (uint32_t) job->job_mem ;
-		swaplimit = memlimit ;
-		memlimit = (uint32_t) memlimit *
-			( slurm_cgroup_conf->allowed_ram_space / 100.0 ) ;
-		swaplimit = (uint32_t) swaplimit *
-			( slurm_cgroup_conf->allowed_swap_space / 100.0 ) ;
-		limit = memlimit + swaplimit ;
-		/*
-		 * if memlimit was not set in the previous block,
-		 * we have to set it here or it will not be possible
-		 * to set mem+swap limit as the mem limit value could be
-		 * higher.
-		 * FIXME!
-		 * However, due to the restriction mentioned in the previous
-		 * block (job_mem...) if a step already set it, we will
-		 * have to skip this as if the new amount is bigger
-		 * we will not be allowed by the kernel to set it as
-		 * the mem+swap value will certainly be lower. In such
-		 * scenario, we will have to set memlimit after mem+swap limit
-		 * to still be clean regarding to cgroup kernel implementation
-		 * ( memlimit must be lower or equal to mem+swap limit when
-		 * set ). See stage 2 below...
-		 */
-		if ( !slurm_cgroup_conf->constrain_ram_space &&
-		     cur_memlimit == -1 )
-			xcgroup_set_memlimit(job_cgroup_path,limit);
-		/*
-		 * FIXME!
-		 * for the reason why we do this, see the previous block too
-		 */
 
-		if ( cur_memswlimit == -1 || cur_memswlimit < limit )
-			xcgroup_set_memswlimit(job_cgroup_path,limit);
-		else
-			debug3("keeping previously set mem+swap limit of %uMB"
-			       " for '%s'",cur_memswlimit,job_cgroup_path);
-		/*
-		 * FIXME!
-		 * stage 2
-		 */
-		if ( !slurm_cgroup_conf->constrain_ram_space &&
-		     cur_memlimit != -1 ) {
-			/*
-			 * FIXME!
-			 * for the reason why we do this, see the previous
-			 * block
-			 */
-			if ( cur_memlimit == -1 || cur_memlimit < limit )
-				xcgroup_set_memlimit(job_cgroup_path,limit);
-			else
-				debug3("keeping previously set mem limit of "
-				       "%uMB for '%s'",cur_memlimit,
-				       job_cgroup_path);
-		}
+	/* create job cgroup in the freezer ns (it could already exist) */
+	if (xcgroup_create(&freezer_ns, &job_freezer_cg,
+			    job_cgroup_path,
+			    getuid(), getgid()) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_freezer_cg);
+		return SLURM_ERROR;
 	}
-	/*
-	 * FIXME!
-	 * yet an other stage 2 due to jobstep_mem lack...
-	 * only used when ram_space constraint is enforced
-	 */
-	if ( slurm_cgroup_conf->constrain_ram_space &&
-	     cur_memlimit != -1 ) {
-		uint32_t limit;
-		limit = (uint32_t) job->job_mem ;
-		limit = (uint32_t) limit *
-			( slurm_cgroup_conf->allowed_ram_space / 100.0 ) ;
-		if ( cur_memlimit == -1 || cur_memlimit < limit )
-			xcgroup_set_memlimit(job_cgroup_path,limit);
-		else
-			debug3("keeping previously set mem limit of "
-			       "%uMB for '%s'",cur_memlimit,job_cgroup_path);
+	if (xcgroup_instanciate(&job_freezer_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_freezer_cg);
+		xcgroup_destroy(&job_freezer_cg);
+		return SLURM_ERROR;
 	}
 
-	/* set cores constraints if required by conf */
-	if ( slurm_cgroup_conf->constrain_cores &&
-	     job->job_alloc_cores ) {
-		/*
-		 * abstract mapping of cores in slurm must
-		 * first be mapped into the machine one
-		 */
-		char* mach;
-		if ( xcpuinfo_abs_to_mac(job->job_alloc_cores,&mach) !=
-		     XCPUINFO_SUCCESS ) {
-			error("unable to convert abstract slurm allocated "
-			      "cores '%s' into a valid machine map",
-			      job->job_alloc_cores);
-		}
-		else {
-			debug3("allocated cores conversion done : "
-			       "%s (abstract) -> %s (machine)",
-			       job->job_alloc_cores,mach);
-			xcgroup_set_cpuset_cpus(job_cgroup_path,
-						mach);
-			xfree(mach);
-		}
-	}
-	else if ( ! job->job_alloc_cores ) {
-		error("job_alloc_cores not defined for this job! ancestor's conf"
-		      " will be used instead");
+	/* create step cgroup in the freezer ns (it should not exists) */
+	if (xcgroup_create(&freezer_ns, &step_freezer_cg,
+			    jobstep_cgroup_path,
+			    getuid(), getgid()) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_freezer_cg);
+		xcgroup_destroy(&job_freezer_cg);
+		return SLURM_ERROR;
 	}
-
-	/* create the step sub cgroup  (it sould not already exists) */
-	opts.uid=uid;
-	opts.gid=gid;
-	opts.create_only=1;
-	opts.notify=1;
-	fstatus = xcgroup_create(jobstep_cgroup_path,&opts);
-	if ( fstatus != XCGROUP_SUCCESS ) {
-		rmdir(job_cgroup_path);
-		return fstatus;
+	if (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_freezer_cg);
+		xcgroup_destroy(&job_freezer_cg);
+		xcgroup_destroy(&step_freezer_cg);
+		return SLURM_ERROR;
 	}
 
-	/* set jobstep cgroup parameters */
-	if ( slurm_cgroup_conf->jobstep_cgroup_params )
-		xcgroup_set_params(jobstep_cgroup_path,
-				   slurm_cgroup_conf->jobstep_cgroup_params);
-
-	return fstatus;
+	return SLURM_SUCCESS;
 }
 
 int _slurm_cgroup_destroy(void)
 {
-	if ( jobstep_cgroup_path[0] != '\0' )
-		xcgroup_destroy(jobstep_cgroup_path);
+	if (jobstep_cgroup_path[0] != '\0') {
+		xcgroup_delete(&step_freezer_cg);
+		xcgroup_destroy(&step_freezer_cg);
+	}
 
-	if ( job_cgroup_path[0] != '\0' )
-		xcgroup_destroy(job_cgroup_path);
+	if (job_cgroup_path[0] != '\0') {
+		xcgroup_delete(&job_freezer_cg);
+		xcgroup_destroy(&job_freezer_cg);
+	}
 
-	if ( user_cgroup_path[0] != '\0' )
-		xcgroup_destroy(user_cgroup_path);
+	if (user_cgroup_path[0] != '\0') {
+		xcgroup_delete(&user_freezer_cg);
+		xcgroup_destroy(&user_freezer_cg);
+	}
 
 	return SLURM_SUCCESS;
 }
 
-int _slurm_cgroup_add_pids(uint32_t id,pid_t* pids,int npids)
+int _slurm_cgroup_add_pids(uint32_t id, pid_t* pids, int npids)
+{
+	if (*jobstep_cgroup_path == '\0')
+		return SLURM_ERROR;
+
+	return xcgroup_add_pids(&step_freezer_cg, pids, npids);
+}
+
+int _slurm_cgroup_stick_stepd(uint32_t id, pid_t pid)
 {
-	if ( *jobstep_cgroup_path == '\0' )
+	if (*job_cgroup_path == '\0')
 		return SLURM_ERROR;
 
-	return xcgroup_add_pids(jobstep_cgroup_path,pids,npids);
+	return xcgroup_add_pids(&job_freezer_cg, &pid, 1);
 }
 
 int
 _slurm_cgroup_get_pids(uint32_t id, pid_t **pids, int *npids)
 {
-	if ( *jobstep_cgroup_path == '\0' )
+	if (*jobstep_cgroup_path == '\0')
 		return SLURM_ERROR;
 
-	return xcgroup_get_pids(jobstep_cgroup_path,pids,npids);
+	return xcgroup_get_pids(&step_freezer_cg, pids, npids);
 }
 
-int _slurm_cgroup_set_memlimit(uint32_t id,uint32_t memlimit)
+int _slurm_cgroup_suspend(uint32_t id)
 {
-	if ( *jobstep_cgroup_path == '\0' )
+	if (*jobstep_cgroup_path == '\0')
 		return SLURM_ERROR;
 
-	return xcgroup_set_memlimit(jobstep_cgroup_path,memlimit);
+	return xcgroup_set_param(&step_freezer_cg,
+				 "freezer.state", "FROZEN");
 }
 
-int _slurm_cgroup_set_memswlimit(uint32_t id,uint32_t memlimit)
+int _slurm_cgroup_resume(uint32_t id)
 {
-	if ( *jobstep_cgroup_path == '\0' )
+	if (*jobstep_cgroup_path == '\0')
 		return SLURM_ERROR;
 
-	return xcgroup_set_memswlimit(jobstep_cgroup_path,memlimit);
+	return xcgroup_set_param(&step_freezer_cg,
+				 "freezer.state", "THAWED");
 }
 
-int
-_slurm_cgroup_find_by_pid(uint32_t* pcont_id, pid_t pid)
+bool
+_slurm_cgroup_has_pid(pid_t pid)
 {
-	int fstatus;
-	int rc;
-	uint32_t cont_id;
-	char cpath[PATH_MAX];
-	char* token;
+	bool fstatus;
+	xcgroup_t cg;
 
-	fstatus = xcgroup_find_by_pid(cpath,pid);
-	if (  fstatus != SLURM_SUCCESS )
-		return fstatus;
+	fstatus = xcgroup_ns_find_by_pid(&freezer_ns, &cg, pid);
+	if ( fstatus != XCGROUP_SUCCESS)
+		return false;
 
-	token = rindex(cpath,'/');
-	if ( token == NULL ) {
-		debug3("pid %u cgroup '%s' does not match %s cgroup pattern",
-		      pid,cpath,plugin_type);
-		return SLURM_ERROR;
+	if (strcmp(cg.path, step_freezer_cg.path)) {
+		fstatus = false;
+	}
+	else {
+		fstatus = true;
 	}
 
-	rc = sscanf(token,"/%u",&cont_id);
-	if ( rc == 1 ) {
-		if ( pcont_id != NULL )
-			*pcont_id=cont_id;
-		fstatus = SLURM_SUCCESS;
+	xcgroup_destroy(&cg);
+	return fstatus;
+}
+
+int
+_slurm_cgroup_is_pid_a_slurm_task(uint32_t id, pid_t pid)
+{
+	int fstatus = -1;
+	int fd;
+	pid_t ppid;
+	char file_path[PATH_MAX], buf[2048];
+
+	if (snprintf(file_path, PATH_MAX, "/proc/%ld/stat",
+		      (long)pid) >= PATH_MAX) {
+		debug2("unable to build pid '%d' stat file: %m ", pid);
+		return fstatus;
 	}
-	else {
-		fstatus = SLURM_ERROR;
+
+	if ((fd = open(file_path, O_RDONLY)) < 0) {
+		debug2("unable to open '%s' : %m ", file_path);
+		return fstatus;
+	}
+	if (read(fd, buf, 2048) <= 0) {
+		debug2("unable to read '%s' : %m ", file_path);
+		close(fd);
+		return fstatus;
+	}
+	close(fd);
+
+	if (sscanf(buf, "%*d %*s %*s %d", &ppid) != 1) {
+		debug2("unable to get ppid of pid '%d', %m", pid);
+		return fstatus;
 	}
 
+	/*
+	 * assume that any child of slurmstepd is a slurm task
+	 * they will get all signals, inherited processes will
+	 * only get SIGKILL
+	 */
+	if (ppid == (long) id)
+		fstatus = 1;
+	else
+		fstatus = 0;
+
 	return fstatus;
 }
 
@@ -489,59 +374,58 @@ _slurm_cgroup_find_by_pid(uint32_t* pcont_id, pid_t pid)
  * init() is called when the plugin is loaded, before any other functions
  * are called.  Put global initialization here.
  */
-extern int init ( void )
+extern int init (void)
 {
 	/* read cgroup configuration */
-	if ( read_slurm_cgroup_conf() )
+	if (read_slurm_cgroup_conf(&slurm_cgroup_conf))
 		return SLURM_ERROR;
 
 	/* initialize cpuinfo internal data */
-	if ( xcpuinfo_init() != XCPUINFO_SUCCESS ) {
-		free_slurm_cgroup_conf();
+	if (xcpuinfo_init() != XCPUINFO_SUCCESS) {
+		free_slurm_cgroup_conf(&slurm_cgroup_conf);
 		return SLURM_ERROR;
 	}
 
 	/* initialize cgroup internal data */
-	if ( _slurm_cgroup_init() != SLURM_SUCCESS ) {
+	if (_slurm_cgroup_init() != SLURM_SUCCESS) {
 		xcpuinfo_fini();
-		free_slurm_cgroup_conf();
+		free_slurm_cgroup_conf(&slurm_cgroup_conf);
 		return SLURM_ERROR;
 	}
 
 	return SLURM_SUCCESS;
 }
 
-extern int fini ( void )
+extern int fini (void)
 {
 	_slurm_cgroup_destroy();
 	xcpuinfo_fini();
-	free_slurm_cgroup_conf();
+	free_slurm_cgroup_conf(&slurm_cgroup_conf);
 	return SLURM_SUCCESS;
 }
 
 /*
  * Uses slurmd job-step manager's pid as the unique container id.
  */
-extern int slurm_container_plugin_create ( slurmd_job_t *job )
+extern int slurm_container_plugin_create (slurmd_job_t *job)
 {
 	int fstatus;
 
 	/* create a new cgroup for that container */
-	fstatus = _slurm_cgroup_create(job,(uint32_t)job->jmgr_pid,
-				       job->uid,job->gid);
-	if ( fstatus )
+	fstatus = _slurm_cgroup_create(job, (uint32_t)job->jmgr_pid,
+				       job->uid, job->gid);
+	if (fstatus)
 		return SLURM_ERROR;
 
-	/* set the cgroup paths to adhoc env variables */
-	env_array_overwrite(&job->env,"SLURM_JOB_CGROUP",
-			    job_cgroup_path);
-	env_array_overwrite(&job->env,"SLURM_STEP_CGROUP",
-			    jobstep_cgroup_path);
-
-	/* add slurmstepd pid to this newly created container */
-	fstatus = _slurm_cgroup_add_pids((uint32_t)job->jmgr_pid,
-					 &(job->jmgr_pid),1);
-	if ( fstatus ) {
+	/* stick slurmstepd pid to the newly created job container
+	 * (Note: we do not put it in the step container because this
+	 * container could be used to suspend/resume tasks using freezer
+	 * properties so we need to let the slurmstepd outside of
+	 * this one)
+	 */
+	fstatus = _slurm_cgroup_stick_stepd((uint32_t)job->jmgr_pid,
+					    job->jmgr_pid);
+	if (fstatus) {
 		_slurm_cgroup_destroy();
 		return SLURM_ERROR;
 	}
@@ -554,38 +438,67 @@ extern int slurm_container_plugin_create ( slurmd_job_t *job )
 	return SLURM_SUCCESS;
 }
 
-extern int slurm_container_plugin_add ( slurmd_job_t *job, pid_t pid )
+extern int slurm_container_plugin_add (slurmd_job_t *job, pid_t pid)
 {
-	return _slurm_cgroup_add_pids(job->cont_id,&pid,1);
+	return _slurm_cgroup_add_pids(job->cont_id, &pid, 1);
 }
 
-extern int slurm_container_plugin_signal ( uint32_t id, int signal )
+extern int slurm_container_plugin_signal (uint32_t id, int signal)
 {
 	pid_t* pids = NULL;
 	int npids;
 	int i;
+	int slurm_task;
+
+	/* get all the pids associated with the step */
+	if (_slurm_cgroup_get_pids(id, &pids, &npids) !=
+	     SLURM_SUCCESS) {
+		debug3("unable to get pids list for cont_id=%u", id);
+		/* that could mean that all the processes already exit */
+		/* the container so return success */
+		return SLURM_SUCCESS;
+	}
 
-	if ( _slurm_cgroup_get_pids(id,&pids,&npids) !=
-	     SLURM_SUCCESS ) {
-		error("unable to get pids list for cont_id=%u",id);
-		return SLURM_ERROR;
+	/* directly manage SIGSTOP using cgroup freezer subsystem */
+	if (signal == SIGSTOP) {
+		xfree(pids);
+		return _slurm_cgroup_suspend(id);
+	}
+
+	/* start by resuming in case of SIGKILL */
+	if (signal == SIGKILL) {
+		_slurm_cgroup_resume(id);
 	}
 
-	for ( i = 0 ; i<npids ; i++ ) {
-		/* do not kill slurmstepd */
-		if ( pids[i] != id ) {
-			debug2("killing process %d with signal %d",
-			       pids[i],signal);
-			kill(pids[i],signal);
+	for (i = 0 ; i<npids ; i++) {
+		/* do not kill slurmstepd (it should not be part
+		 * of the list, but just to not forget about that ;))
+		 */
+		if (pids[i] == id)
+			continue;
+
+		/* only signal slurm tasks unless signal is SIGKILL */
+		slurm_task = _slurm_cgroup_is_pid_a_slurm_task(id, pids[i]);
+		if (slurm_task == 1 || signal == SIGKILL) {
+			debug2("killing process %d (%s) with signal %d", pids[i],
+			       (slurm_task==1)?"slurm_task":"inherited_task",
+			       signal);
+			kill(pids[i], signal);
 		}
 	}
 
 	xfree(pids);
 
+	/* resume tasks after signaling slurm tasks with SIGCONT to be sure */
+	/* that SIGTSTP received at suspend time is removed */
+	if (signal == SIGCONT) {
+		return _slurm_cgroup_resume(id);
+	}
+
 	return SLURM_SUCCESS;
 }
 
-extern int slurm_container_plugin_destroy ( uint32_t id )
+extern int slurm_container_plugin_destroy (uint32_t id)
 {
 	_slurm_cgroup_destroy();
 	return SLURM_SUCCESS;
@@ -594,24 +507,13 @@ extern int slurm_container_plugin_destroy ( uint32_t id )
 extern uint32_t slurm_container_plugin_find(pid_t pid)
 {
 	uint32_t cont_id=-1;
-	_slurm_cgroup_find_by_pid(&cont_id,pid);
+	/* not provided for now */
 	return cont_id;
 }
 
 extern bool slurm_container_plugin_has_pid(uint32_t cont_id, pid_t pid)
 {
-	int fstatus;
-	uint32_t lid;
-
-	fstatus = _slurm_cgroup_find_by_pid(&lid,pid);
-	if ( fstatus != SLURM_SUCCESS )
-		return false;
-
-	if ( lid == cont_id )
-		return true;
-	else
-		return false;
-
+	return _slurm_cgroup_has_pid(pid);
 }
 
 extern int slurm_container_plugin_wait(uint32_t cont_id)
@@ -637,8 +539,8 @@ extern int slurm_container_plugin_wait(uint32_t cont_id)
 	return SLURM_SUCCESS;
 }
 
-extern int slurm_container_plugin_get_pids(
-	uint32_t cont_id, pid_t **pids, int *npids)
+extern int slurm_container_plugin_get_pids(uint32_t cont_id,
+					   pid_t **pids, int *npids)
 {
-	return _slurm_cgroup_get_pids(cont_id,pids,npids);
+	return _slurm_cgroup_get_pids(cont_id, pids, npids);
 }
diff --git a/src/plugins/proctrack/cgroup/xcgroup.c b/src/plugins/proctrack/cgroup/xcgroup.c
deleted file mode 100644
index d238cf7d373..00000000000
--- a/src/plugins/proctrack/cgroup/xcgroup.c
+++ /dev/null
@@ -1,985 +0,0 @@
-/*****************************************************************************\
- *  xcgroup.c - cgroup related primitives
- *****************************************************************************
- *  Copyright (C) 2009 CEA/DAM/DIF
- *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
- *
- *  This file is part of SLURM, a resource management program.
- *  For details, see <https://computing.llnl.gov/linux/slurm/>.
- *  Please also read the included file: DISCLAIMER.
- *
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *
- *  In addition, as a special exception, the copyright holders give permission
- *  to link the code of portions of this program with the OpenSSL library under
- *  certain conditions as described in each individual source file, and
- *  distribute linked combinations including the two. You must obey the GNU
- *  General Public License in all respects for all of the code used other than
- *  OpenSSL. If you modify file(s) with this exception, you may extend this
- *  exception to your version of the file(s), but you are not obligated to do
- *  so. If you do not wish to do so, delete this exception statement from your
- *  version.  If you delete this exception statement from all source files in
- *  the program, then also delete it here.
- *
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with SLURM; if not, write to the Free Software Foundation, Inc.,
- *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
-\*****************************************************************************/
-
-#if HAVE_CONFIG_H
-#   include "config.h"
-#endif
-
-#if HAVE_STDINT_H
-#  include <stdint.h>
-#endif
-#if HAVE_INTTYPES_H
-#  include <inttypes.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-
-#include <slurm/slurm.h>
-#include <slurm/slurm_errno.h>
-#include "src/common/log.h"
-#include "src/common/xmalloc.h"
-#include "src/common/xstring.h"
-#include "src/slurmd/slurmstepd/slurmstepd_job.h"
-
-#include "xcgroup.h"
-
-#ifndef PATH_MAX
-#define PATH_MAX 256
-#endif
-
-/* internal functions */
-size_t _file_getsize(int fd);
-int _file_read_uint32s(char* file_path,uint32_t** pvalues,int* pnb);
-int _file_write_uint32s(char* file_path,uint32_t* values,int nb);
-int _file_read_uint64s(char* file_path,uint64_t** pvalues,int* pnb);
-int _file_write_uint64s(char* file_path,uint64_t* values,int nb);
-int _file_read_content(char* file_path,char** content,size_t *csize);
-int _file_write_content(char* file_path,char* content,size_t csize);
-int _xcgroup_cpuset_init(char* file_path);
-
-/* xcgroup primitives */
-int xcgroup_is_available(void)
-{
-	char* value;
-	size_t s;
-
-	if ( xcgroup_get_param(CGROUP_BASEDIR,"release_agent",
-			       &value,&s) != XCGROUP_SUCCESS )
-		return 0;
-	else {
-		xfree(value);
-		return 1;
-	}
-
-}
-
-int xcgroup_set_release_agent(char* agent)
-{
-	int fstatus;
-	char* rag;
-	char* value;
-	size_t s;
-
-	if ( agent == NULL )
-		return XCGROUP_ERROR;
-
-	rag = (char*) xstrdup("release_agent=");
-	fstatus = xcgroup_get_param(CGROUP_BASEDIR,"release_agent",
-				    &value,&s);
-	if (  fstatus == XCGROUP_SUCCESS ) {
-		if ( strcmp(value,agent) != 0 ) {
-			xstrcat(rag,agent);
-			fstatus = xcgroup_set_params(CGROUP_BASEDIR,rag);
-		}
-		xfree(value);
-	}
-
-	xfree(rag);
-	return fstatus;
-}
-
-int xcgroup_mount(char* mount_opts)
-{
-	char* mount_cmd_fmt;
-	char mount_cmd[1024];
-
-	mode_t cmask;
-	mode_t omask;
-
-	cmask = S_IWGRP | S_IWOTH;
-	omask = umask(cmask);
-
-	if ( mkdir(CGROUP_BASEDIR,0755) && errno != EEXIST) {
-		debug("unable to create cgroup directory '%s'"
-		      " : %m",CGROUP_BASEDIR);
-		umask(omask);
-		return XCGROUP_ERROR;
-	}
-	umask(omask);
-
-	if ( mount_opts == NULL ||
-	     strlen(mount_opts) == 0 ) {
-		mount_cmd_fmt="/bin/mount -t cgroup none " CGROUP_BASEDIR;
-	}
-	else
-		mount_cmd_fmt="/bin/mount -o %s -t cgroup none " CGROUP_BASEDIR;
-
-	if ( snprintf(mount_cmd,1024,mount_cmd_fmt,
-		      mount_opts) >= 1024 ) {
-		debug2("unable to build mount cmd line");
-		return XCGROUP_ERROR;
-	}
-	else
-		debug3("cgroup mount cmd line is '%s'",mount_cmd);
-
-	if ( system(mount_cmd) )
-		return XCGROUP_ERROR;
-	else
-		return XCGROUP_SUCCESS;
-
-}
-
-int xcgroup_create(char* file_path,xcgroup_opts_t* opts)
-{
-	int fstatus;
-	uid_t uid;
-	gid_t gid;
-	int create_only;
-	int notify;
-
-	mode_t cmask;
-	mode_t omask;
-
-	uid=opts->uid;
-	gid=opts->gid;
-	create_only=opts->create_only;
-	notify=opts->notify;
-
-	fstatus = XCGROUP_ERROR;
-
-	/* save current mask and apply working one */
-	cmask = S_IWGRP | S_IWOTH;
-	omask = umask(cmask);
-
-	/* build cgroup */
-	if ( mkdir(file_path,0755) ) {
-		if ( create_only || errno != EEXIST ) {
-			debug2("unable to create cgroup '%s' : %m",
-			       file_path);
-			umask(omask);
-			return fstatus;
-		}
-	}
-	umask(omask);
-
-	/* initialize cpuset support (if enabled in cgroup ) */
-	if ( _xcgroup_cpuset_init(file_path) != XCGROUP_SUCCESS ) {
-		debug2("unable to initialize cpuset cgroup component");
-		rmdir(file_path);
-		return fstatus;
-	}
-
-	/* change cgroup ownership as requested */
-	if ( chown(file_path,uid,gid) ) {
-		debug2("unable to chown %d:%d cgroup '%s' : %m",
-		       uid,gid,file_path);
-		return fstatus;
-	}
-
-	/* following operations failure might not result in a general
-	 * failure so set output status to success */
-	fstatus = XCGROUP_SUCCESS;
-
-	/* set notify on release flag */
-	if ( notify == 1 )
-		xcgroup_set_params(file_path,"notify_on_release=1");
-	else if ( notify == 0 )
-		xcgroup_set_params(file_path,"notify_on_release=0");
-
-	return fstatus;
-}
-
-int xcgroup_destroy(char* file_path)
-{
-
-	/*
-	 * nothing to be done here, notify_on_release was set
-	 * so hope that all will works perfectly...
-	 *
-	 * with memory cgroup some pages can still be accounted
-	 * to the cgroup but no more processes are present, this results
-	 * in a directory not being removed until the pages are accounted
-	 * to an other cgroup...
-	 * echoing 1 into memory.force_empty can purge this memory but
-	 * as slurmstepd is still present in the cgroup and use pages,
-	 * this is not sufficient as it could leave some other pages too..
-	 * we should have a way to ask the cgroup to force_empty
-	 * on last process exit but I did not find any for now
-	 */
-	//xcgroup_set_params(file_path,"memory.force_empty=1");
-
-	return XCGROUP_SUCCESS;
-}
-
-int xcgroup_add_pids(char* cpath,pid_t* pids,int npids)
-{
-	int fstatus;
-	char file_path[PATH_MAX];
-
-	fstatus = XCGROUP_ERROR;
-
-	if ( snprintf(file_path,PATH_MAX,"%s/tasks",
-		      cpath) >= PATH_MAX ) {
-		debug2("unable to add pids to '%s' : %m",cpath);
-		return fstatus;
-	}
-
-	fstatus = _file_write_uint32s(file_path,(uint32_t*)pids,npids);
-	if ( fstatus != XCGROUP_SUCCESS )
-		debug2("unable to add pids to '%s'",cpath);
-	return fstatus;
-}
-
-int
-xcgroup_get_pids(char* cpath, pid_t **pids, int *npids)
-{
-	int fstatus;
-	char file_path[PATH_MAX];
-
-	fstatus = XCGROUP_ERROR;
-
-	if ( pids == NULL || npids == NULL )
-		return SLURM_ERROR;
-
-	if ( snprintf(file_path,PATH_MAX,"%s/tasks",
-		      cpath) >= PATH_MAX ) {
-		debug2("unable to get pids of '%s' : %m",cpath);
-		return fstatus;
-	}
-
-	fstatus = _file_read_uint32s(file_path,(uint32_t**)pids,npids);
-	if ( fstatus != XCGROUP_SUCCESS )
-		debug2("unable to get pids of '%s'",cpath);
-	return fstatus;
-}
-
-int
-xcgroup_find_by_pid(char* cpath, pid_t pid)
-{
-	int fstatus = SLURM_ERROR;
-	char file_path[PATH_MAX];
-	char* buf;
-	size_t fsize;
-	char* p;
-	char* e;
-	char* entry;
-
-	/* build pid cgroup meta filepath */
-	if ( snprintf(file_path,PATH_MAX,"/proc/%u/cgroup",
-		      pid) >= PATH_MAX ) {
-		debug2("unable to build cgroup meta filepath for pid=%u : %m",
-		       pid);
-		return XCGROUP_ERROR;
-	}
-
-	/* read file content */
-	fstatus = _file_read_content(file_path,&buf,&fsize);
-	if ( fstatus == XCGROUP_SUCCESS ) {
-		fstatus = XCGROUP_ERROR;
-		p = buf;
-		if ( index(p,'\n') != NULL ) {
-			e = index(p,'\n');
-			*e='\0';
-			entry = rindex(p,':');
-			if ( entry != NULL ) {
-				entry++;
-				snprintf(cpath,PATH_MAX,"%s%s",
-					 CGROUP_BASEDIR,entry);
-				fstatus = XCGROUP_SUCCESS;
-			}
-		}
-		xfree(buf);
-	}
-
-	return fstatus;
-}
-
-int xcgroup_set_memlimit(char* cpath,uint32_t memlimit)
-{
-	int fstatus;
-	char file_path[PATH_MAX];
-	uint64_t ml;
-
-	fstatus = XCGROUP_ERROR;
-
-	if ( snprintf(file_path,PATH_MAX,"%s/memory.limit_in_bytes",
-		      cpath) >= PATH_MAX ) {
-		debug2("unable to set memory limit of '%s' : %m",cpath);
-		return fstatus;
-	}
-
-	ml = (uint64_t) memlimit * 1024 * 1024;
-	fstatus = _file_write_uint64s(file_path,&ml,1);
-	if ( fstatus != XCGROUP_SUCCESS )
-		debug2("unable to set memory limit of '%s' : %m",cpath);
-	else
-		debug3("memory limit set to %uMB for '%s'",memlimit,cpath);
-
-	return fstatus;
-}
-
-int xcgroup_get_memlimit(char* cpath,uint32_t* memlimit)
-{
-	int fstatus;
-	char file_path[PATH_MAX];
-	uint64_t* ml;
-	int i;
-
-	fstatus = XCGROUP_ERROR;
-
-	if ( snprintf(file_path,PATH_MAX,"%s/memory.limit_in_bytes",
-		      cpath) >= PATH_MAX ) {
-		debug2("unable to get memory limit of '%s' : %m",cpath);
-		return fstatus;
-	}
-
-	fstatus = _file_read_uint64s(file_path,&ml,&i);
-	if ( fstatus != XCGROUP_SUCCESS ||
-	     i == 0 )
-		debug2("unable to get memory limit of '%s' : %m",cpath);
-	else {
-		if ( *ml == 0 ) {
-			*memlimit = 0;
-		}
-		else {
-			/* convert into MB */
-			*ml /= 1024 * 1024;
-			/* memlimit is stored into a uint32_t */
-			/* so cap the memlimit value to the max value */
-			/* of an uint32_t */
-			*memlimit = -1 ;
-			if ( *ml < *memlimit ) {
-				*memlimit = *ml;
-			}
-		}
-		debug3("memory limit of '%s' is %uMB",cpath,*memlimit);
-		xfree(ml);
-	}
-
-	return fstatus;
-}
-
-int xcgroup_set_memswlimit(char* cpath,uint32_t memlimit)
-{
-	int fstatus;
-	char file_path[PATH_MAX];
-	uint64_t ml;
-
-	fstatus = XCGROUP_ERROR;
-
-	if ( snprintf(file_path,PATH_MAX,"%s/memory.memsw.limit_in_bytes",
-		      cpath) >= PATH_MAX ) {
-		debug2("unable to set memsw limit of '%s' : %m",cpath);
-		return fstatus;
-	}
-
-	ml = (uint64_t) memlimit * 1024 * 1024;
-	fstatus = _file_write_uint64s(file_path,&ml,1);
-	if ( fstatus != XCGROUP_SUCCESS )
-		debug2("unable to set memsw limit of '%s' : %m",cpath);
-	else
-		debug3("mem+swap limit set to %uMB for '%s'",memlimit,cpath);
-
-	return fstatus;
-}
-
-int xcgroup_get_memswlimit(char* cpath,uint32_t* memlimit)
-{
-	int fstatus;
-	char file_path[PATH_MAX];
-	uint64_t *ml;
-	int i;
-
-	fstatus = XCGROUP_ERROR;
-
-	if ( snprintf(file_path,PATH_MAX,"%s/memory.memsw.limit_in_bytes",
-		      cpath) >= PATH_MAX ) {
-		debug2("unable to get memsw limit of '%s' : %m",cpath);
-		return fstatus;
-	}
-
-	fstatus = _file_read_uint64s(file_path,&ml,&i);
-	if ( fstatus != XCGROUP_SUCCESS ||
-	     i ==0 )
-		debug2("unable to get memsw limit of '%s' : %m",cpath);
-	else {
-		if ( *ml == 0 ) {
-			*memlimit = 0;
-		}
-		else {
-			/* convert into MB */
-			*ml /= 1024 * 1024;
-			/* memlimit is stored into a uint32_t */
-			/* so cap the memlimit value to the max value */
-			/* of an uint32_t */
-			*memlimit = -1 ;
-			if ( *ml < *memlimit ) {
-				*memlimit = *ml;
-			}
-		}
-		debug3("mem+swap limit of '%s' is %uMB",cpath,*memlimit);
-		xfree(ml);
-	}
-
-	return fstatus;
-}
-
-int xcgroup_set_mem_use_hierarchy(char* cpath,int flag)
-{
-	if ( flag )
-		return xcgroup_set_params(cpath,"memory.use_hierarchy=1");
-	else
-		return xcgroup_set_params(cpath,"memory.use_hierarchy=0");
-}
-
-int xcgroup_set_cpuset_cpus(char* cpath,char* range)
-{
-	int fstatus;
-	char file_path[PATH_MAX];
-
-	fstatus = XCGROUP_ERROR;
-
-	if ( snprintf(file_path,PATH_MAX,"%s/cpuset.cpus",
-		      cpath) >= PATH_MAX ) {
-		debug2("unable to set cpuset.cpus to '%s' for '%s' : %m",
-		       range,cpath);
-		return fstatus;
-	}
-
-	fstatus = _file_write_content(file_path,range,strlen(range));
-	if ( fstatus != XCGROUP_SUCCESS )
-		debug2("unable to set cpuset.cpus to '%s' for '%s' : %m",
-		       range,cpath);
-	else
-		debug3("cpuset.cpus set to '%s' for '%s'",range,cpath);
-
-	return fstatus;
-}
-
-int xcgroup_set_params(char* cpath,char* parameters)
-{
-	int fstatus;
-	char file_path[PATH_MAX];
-	char* params;
-	char* value;
-	char* p;
-	char* next;
-
-	fstatus = XCGROUP_ERROR;
-
-	params = (char*) xstrdup(parameters);
-
-	p = params;
-	while ( p != NULL && *p != '\0' ) {
-		next = index(p,' ');
-		if ( next ) {
-			*next='\0';
-			next++;
-			while ( *next == ' ' )
-				next++;
-		}
-		value = index(p,'=');
-		if ( value != NULL ) {
-			*value='\0';
-			value++;
-			if ( snprintf(file_path,PATH_MAX,"%s/%s",cpath,p)
-			     >= PATH_MAX ) {
-				debug2("unable to build filepath for '%s' and"
-				       " parameter '%s' : %m",cpath,p);
-				goto next_loop;
-			}
-			fstatus = _file_write_content(file_path,value,
-						      strlen(value));
-			if ( fstatus != XCGROUP_SUCCESS )
-				debug2("unable to set parameter '%s' to "
-				       "'%s' for '%s'",p,value,cpath);
-			else
-				debug3("parameter '%s' set to '%s' for '%s'",
-				       p,value,cpath);
-		}
-		else
-			debug2("bad parameters format for entry '%s'",p);
-	next_loop:
-		p = next;
-	}
-
-	xfree(params);
-
-	return fstatus;
-}
-
-int xcgroup_get_param(char* cpath,char* parameter,char **content,size_t *csize)
-{
-	int fstatus;
-	char file_path[PATH_MAX];
-
-	fstatus = XCGROUP_ERROR;
-
-	if ( snprintf(file_path,PATH_MAX,"%s/%s",cpath,parameter)
-	     >= PATH_MAX ) {
-		debug2("unable to build filepath for '%s' and"
-		       " parameter '%s' : %m",cpath,parameter);
-	}
-	else {
-		fstatus = _file_read_content(file_path,content,csize);
-		if ( fstatus != XCGROUP_SUCCESS )
-			debug2("unable to get parameter '%s'", parameter);
-	}
-
-	return fstatus;
-}
-
-
-size_t _file_getsize(int fd)
-{
-	int rc;
-	size_t fsize;
-	off_t offset;
-	char c;
-
-	/* store current position and rewind */
-	offset = lseek(fd,0,SEEK_CUR);
-	if ( offset < 0 )
-		return -1;
-	lseek(fd,0,SEEK_SET);
-
-	/* get file size */
-	fsize=0;
-	do {
-		rc = read(fd,(void*)&c,1);
-		if ( rc > 0 )
-			fsize++;
-	}
-	while ( (rc < 0 && errno == EINTR) || rc > 0 );
-
-	/* restore position */
-	lseek(fd,offset,SEEK_SET);
-
-	if ( rc < 0 )
-		return -1;
-	else
-		return fsize;
-}
-
-int
-_file_write_uint64s(char* file_path,uint64_t* values,int nb)
-{
-	int fstatus;
-	int rc;
-	int fd;
-	char tstr[256];
-	uint64_t value;
-	int i;
-
-	/* open file for writing */
-	fd = open(file_path, O_WRONLY, 0700);
-	if (fd < 0) {
-		debug2("unable to open '%s' for writing : %m",
-		       file_path);
-		return XCGROUP_ERROR;
-	}
-
-	/* add one value per line */
-	fstatus = XCGROUP_SUCCESS;
-	for ( i=0 ; i < nb ; i++ ) {
-
-		value = values[i];
-
-		rc = snprintf(tstr, sizeof(tstr), "%"PRIu64"", value);
-		if ( rc < 0 ) {
-			debug2("unable to build %"PRIu64" string value, "
-			       "skipping", value);
-			fstatus = XCGROUP_ERROR;
-			continue;
-		}
-
-		do {
-			rc = write(fd, tstr, strlen(tstr)+1);
-		}
-		while ( rc != 0 && errno == EINTR);
-		if (rc < 1) {
-			debug2("unable to add value '%s' to file '%s' : %m",
-			       tstr,file_path);
-			fstatus = XCGROUP_ERROR;
-		}
-
-	}
-
-	/* close file */
-	close(fd);
-
-	return fstatus;
-}
-
-
-int
-_file_read_uint64s(char* file_path,uint64_t** pvalues,int* pnb)
-{
-	int rc;
-	int fd;
-
-	size_t fsize;
-	char* buf;
-	char* p;
-
-	uint64_t* pa=NULL;
-	int i;
-
-	/* check input pointers */
-	if ( pvalues == NULL || pnb == NULL )
-		return XCGROUP_ERROR;
-
-	/* open file for reading */
-	fd = open(file_path, O_RDONLY, 0700);
-	if (fd < 0) {
-		debug2("unable to open '%s' for reading : %m",
-		       file_path);
-		return XCGROUP_ERROR;
-	}
-
-	/* get file size */
-	fsize=_file_getsize(fd);
-	if ( fsize == -1 ) {
-		close(fd);
-		return XCGROUP_ERROR;
-	}
-
-	/* read file contents */
-	buf = (char*) xmalloc((fsize+1)*sizeof(char));
-	do {
-		rc = read(fd,buf,fsize);
-	}
-	while ( rc < 0 && errno == EINTR );
-	close(fd);
-	buf[fsize]='\0';
-
-	/* count values (splitted by \n) */
-	i=0;
-	if ( rc > 0 ) {
-		p = buf;
-		while ( index(p,'\n') != NULL ) {
-			i++;
-			p = index(p,'\n') + 1;
-		}
-	}
-
-	/* build uint32_t list */
-	if ( i > 0 ) {
-		pa = (uint64_t*) xmalloc(sizeof(uint64_t) * i);
-		p = buf;
-		i = 0;
-		while ( index(p,'\n') != NULL ) {
-			long long unsigned int ll_tmp;
-			sscanf(p,"%llu", &ll_tmp);
-			pa[i++] = ll_tmp;
-			p = index(p,'\n') + 1;
-		}
-	}
-
-	/* free buffer */
-	xfree(buf);
-
-	/* set output values */
-	*pvalues = pa;
-	*pnb = i;
-
-	return XCGROUP_SUCCESS;
-}
-
-int
-_file_write_uint32s(char* file_path,uint32_t* values,int nb)
-{
-	int fstatus;
-	int rc;
-	int fd;
-	char tstr[256];
-	uint32_t value;
-	int i;
-
-	/* open file for writing */
-	fd = open(file_path, O_WRONLY, 0700);
-	if (fd < 0) {
-		debug2("unable to open '%s' for writing : %m",
-		       file_path);
-		return XCGROUP_ERROR;
-	}
-
-	/* add one value per line */
-	fstatus = XCGROUP_SUCCESS;
-	for ( i=0 ; i < nb ; i++ ) {
-
-		value = values[i];
-
-		rc = snprintf(tstr, sizeof(tstr), "%u",value);
-		if ( rc < 0 ) {
-			debug2("unable to build %u string value, skipping",
-			       value);
-			fstatus = XCGROUP_ERROR;
-			continue;
-		}
-
-		do {
-			rc = write(fd, tstr, strlen(tstr)+1);
-		}
-		while ( rc != 0 && errno == EINTR);
-		if (rc < 1) {
-			debug2("unable to add value '%s' to file '%s' : %m",
-			       tstr,file_path);
-			fstatus = XCGROUP_ERROR;
-		}
-
-	}
-
-	/* close file */
-	close(fd);
-
-	return fstatus;
-}
-
-
-int
-_file_read_uint32s(char* file_path,uint32_t** pvalues,int* pnb)
-{
-	int rc;
-	int fd;
-
-	size_t fsize;
-	char* buf;
-	char* p;
-
-	uint32_t* pa=NULL;
-	int i;
-
-	/* check input pointers */
-	if ( pvalues == NULL || pnb == NULL )
-		return XCGROUP_ERROR;
-
-	/* open file for reading */
-	fd = open(file_path, O_RDONLY, 0700);
-	if (fd < 0) {
-		debug2("unable to open '%s' for reading : %m",
-		       file_path);
-		return XCGROUP_ERROR;
-	}
-
-	/* get file size */
-	fsize=_file_getsize(fd);
-	if ( fsize == -1 ) {
-		close(fd);
-		return XCGROUP_ERROR;
-	}
-
-	/* read file contents */
-	buf = (char*) xmalloc((fsize+1)*sizeof(char));
-	do {
-		rc = read(fd,buf,fsize);
-	}
-	while ( rc < 0 && errno == EINTR );
-	close(fd);
-	buf[fsize]='\0';
-
-	/* count values (splitted by \n) */
-	i=0;
-	if ( rc > 0 ) {
-		p = buf;
-		while ( index(p,'\n') != NULL ) {
-			i++;
-			p = index(p,'\n') + 1;
-		}
-	}
-
-	/* build uint32_t list */
-	if ( i > 0 ) {
-		pa = (uint32_t*) xmalloc(sizeof(uint32_t) * i);
-		p = buf;
-		i = 0;
-		while ( index(p,'\n') != NULL ) {
-			sscanf(p,"%u",pa+i);
-			p = index(p,'\n') + 1;
-			i++;
-		}
-	}
-
-	/* free buffer */
-	xfree(buf);
-
-	/* set output values */
-	*pvalues = pa;
-	*pnb = i;
-
-	return XCGROUP_SUCCESS;
-}
-
-int
-_file_write_content(char* file_path, char* content,size_t csize)
-{
-	int fstatus;
-	int rc;
-	int fd;
-
-	/* open file for writing */
-	fd = open(file_path, O_WRONLY, 0700);
-	if (fd < 0) {
-		debug2("unable to open '%s' for writing : %m",
-		       file_path);
-		return XCGROUP_ERROR;
-	}
-
-	/* write content */
-	do {
-		rc = write(fd,content,csize);
-	}
-	while ( rc != 0 && errno == EINTR);
-
-	/* check read size */
-	if (rc < csize) {
-		debug2("unable to write %zd bytes to file '%s' : %m",
-		       csize, file_path);
-		fstatus = XCGROUP_ERROR;
-	}
-	else
-		fstatus = XCGROUP_SUCCESS;
-
-	/* close file */
-	close(fd);
-
-	return fstatus;
-}
-
-int
-_file_read_content(char* file_path,char** content,size_t *csize)
-{
-	int fstatus;
-	int rc;
-	int fd;
-
-	size_t fsize;
-	char* buf;
-
-	fstatus = XCGROUP_ERROR;
-
-	/* check input pointers */
-	if ( content == NULL || csize == NULL )
-		return fstatus;
-
-	/* open file for reading */
-	fd = open(file_path, O_RDONLY, 0700);
-	if (fd < 0) {
-		debug2("unable to open '%s' for reading : %m",
-		       file_path);
-		return fstatus;
-	}
-
-	/* get file size */
-	fsize=_file_getsize(fd);
-	if ( fsize == -1 ) {
-		close(fd);
-		return fstatus;
-	}
-
-	/* read file contents */
-	buf = (char*) xmalloc((fsize+1)*sizeof(char));
-	buf[fsize]='\0';
-	do {
-		rc = read(fd,buf,fsize);
-	}
-	while ( rc < 0 && errno == EINTR );
-
-	/* set output values */
-	if ( rc >= 0 ) {
-		*content = buf;
-		*csize = rc;
-		fstatus = XCGROUP_SUCCESS;
-	}
-
-	/* close file */
-	close(fd);
-
-	return fstatus;
-}
-
-
-int _xcgroup_cpuset_init(char* file_path)
-{
-	int fstatus;
-	char path[PATH_MAX];
-
-	char* cpuset_metafiles[] = {
-		"cpuset.cpus",
-		"cpuset.mems"
-	};
-	char* cpuset_meta;
-	char* cpuset_conf;
-	size_t csize;
-
-	int i;
-
-	fstatus = XCGROUP_ERROR;
-
-	/* when cgroups are configured with cpuset, at least
-	 * cpuset.cpus and cpuset.mems must be set or the cgroup
-	 * will not be available at all.
-	 * we duplicate the ancestor configuration in the init step */
-	for ( i = 0 ; i < 2 ; i++ ) {
-
-		cpuset_meta = cpuset_metafiles[i];
-
-		/* try to read ancestor configuration */
-		if ( snprintf(path,PATH_MAX,"%s/../%s",
-			      file_path,cpuset_meta) >= PATH_MAX ) {
-			debug2("unable to get ancestor %s for cgroup '%s' : %m",
-			       cpuset_meta,file_path);
-			return fstatus;
-		}
-		if ( _file_read_content(path,&cpuset_conf,&csize) !=
-		     XCGROUP_SUCCESS ) {
-			debug3("assuming no cpuset support for '%s'",path);
-			return XCGROUP_SUCCESS;
-		}
-
-		/* duplicate ancestor conf in current cgroup */
-		if ( snprintf(path,PATH_MAX,"%s/%s",
-			      file_path,cpuset_meta) >= PATH_MAX ) {
-			debug2("unable to set %s for cgroup '%s' : %m",
-			       cpuset_meta,file_path);
-			return fstatus;
-		}
-		if ( _file_write_content(path,cpuset_conf,csize) !=
-		     XCGROUP_SUCCESS ) {
-			debug2("unable to write %s configuration (%s) of '%s'",
-			       cpuset_meta,cpuset_conf,file_path);
-			return fstatus;
-		}
-
-	}
-
-	return XCGROUP_SUCCESS;
-}
diff --git a/src/plugins/proctrack/cgroup/xcgroup.h b/src/plugins/proctrack/cgroup/xcgroup.h
deleted file mode 100644
index 7f886b7b21f..00000000000
--- a/src/plugins/proctrack/cgroup/xcgroup.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*****************************************************************************\
- *  cgroup.h - cgroup related primitives headers
- *****************************************************************************
- *  Copyright (C) 2009 CEA/DAM/DIF
- *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <https://computing.llnl.gov/linux/slurm/>.
- *  Please also read the included file: DISCLAIMER.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *
- *  In addition, as a special exception, the copyright holders give permission 
- *  to link the code of portions of this program with the OpenSSL library under 
- *  certain conditions as described in each individual source file, and 
- *  distribute linked combinations including the two. You must obey the GNU 
- *  General Public License in all respects for all of the code used other than 
- *  OpenSSL. If you modify file(s) with this exception, you may extend this 
- *  exception to your version of the file(s), but you are not obligated to do 
- *  so. If you do not wish to do so, delete this exception statement from your
- *  version.  If you delete this exception statement from all source files in 
- *  the program, then also delete it here.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with SLURM; if not, write to the Free Software Foundation, Inc.,
- *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
-\*****************************************************************************/
-
-#if HAVE_CONFIG_H
-#   include "config.h"
-#endif
-
-#ifndef _XCGROUP_H_
-#define _XCGROUP_H_
-
-#include <sys/types.h>
-
-#define XCGROUP_ERROR    1
-#define XCGROUP_SUCCESS  0
-
-#ifndef CGROUP_BASEDIR
-#define CGROUP_BASEDIR "/dev/cgroup"
-#endif
-
-typedef struct xcgroup_opts {
-
-	uid_t uid;        /* uid of the owner */
-	gid_t gid;        /* gid of the owner */
-
-	int create_only;  /* do nothing if the cgroup already exists */
-	int notify;       /* notify_on_release flag value (0/1) */
-
-} xcgroup_opts_t;
-
-/*
- * test if cgroup system is currently available (mounted)
- *
- * returned values:
- *  - 0 if not available
- *  - 1 if available
- */
-int xcgroup_is_available();
-
-/*
- * mount the cgroup system using given options
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_mount(char* mount_opts);
-
-/*
- * set cgroup system release agent
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_set_release_agent(char* agent);
-
-/*
- * create a cgroup according to input properties
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_create(char* cpath, xcgroup_opts_t* opts);
-
-/*
- * destroy a cgroup (do nothing for now)
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_destroy(char* cpath);
-
-/*
- * add a list of pids to a cgroup
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_add_pids(char* cpath,pid_t* pids,int npids);
-
-/*
- * extract the pids list of a cgroup
- *
- * pids array must be freed using xfree(...)
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_get_pids(char* cpath, pid_t **pids, int *npids);
-
-/*
- * return the cpath containing the input pid
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_find_by_pid(char* cpath, pid_t pid);
-
-/*
- * set cgroup memory limit to the value ot memlimit
- *
- * memlimit must be expressed in MB
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_set_memlimit(char* cpath,uint32_t memlimit);
-
-/*
- * get cgroup memory limit
- *
- * memlimit will be expressed in MB
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_get_memlimit(char* cpath,uint32_t* memlimit);
-
-/*
- * set cgroup mem+swap limit to the value ot memlimit
- *
- * memlimit must be expressed in MB
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_set_memswlimit(char* cpath,uint32_t memlimit);
-
-/*
- * get cgroup mem+swap limit
- *
- * memlimit will be expressed in MB
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_get_memswlimit(char* cpath,uint32_t* memlimit);
-
-/*
- * toggle memory use hierarchy behavior using flag value
- *
- * flag values are 0/1 to disable/enable the feature
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_set_mem_use_hierarchy(char* cpath,int flag);
-
-/*
- * set cgroup cpuset cpus configuration
- *
- * range is the ranges of cores to constrain the cgroup to
- * i.e. 0-1,4-5
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_set_cpuset_cpus(char* cpath,char* range);
-
-/* 
- * set cgroup parameters using string of the form :
- * parameteres="param=value[ param=value]*"
- *
- * param must correspond to a file of the cgroup that
- * will be written with the value content
- *
- * i.e. xcgroup_set_params("/dev/cgroup/slurm",
- *                         "memory.swappiness=10");
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_set_params(char* cpath,char* parameters);
-
-/* 
- * get a cgroup parameter
- *
- * param must correspond to a file of the cgroup that
- * will be read for its content
- *
- * i.e. xcgroup_get_param("/dev/cgroup/slurm",
- *                         "memory.swappiness",&value,&size);
- *
- * on success, content must be free using xfree
- *
- * returned values:
- *  - XCGROUP_ERROR
- *  - XCGROUP_SUCCESS
- */
-int xcgroup_get_param(char* cpath,char* param,char **content,size_t *csize);
-
-#endif
diff --git a/src/plugins/proctrack/cgroup/xcpuinfo.c b/src/plugins/proctrack/cgroup/xcpuinfo.c
deleted file mode 100644
index 73fc02cf84a..00000000000
--- a/src/plugins/proctrack/cgroup/xcpuinfo.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/*****************************************************************************\
- *  xcpuinfo.c - cpuinfo related primitives
- *****************************************************************************
- *  Copyright (C) 2009 CEA/DAM/DIF
- *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <https://computing.llnl.gov/linux/slurm/>.
- *  Please also read the included file: DISCLAIMER.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *
- *  In addition, as a special exception, the copyright holders give permission 
- *  to link the code of portions of this program with the OpenSSL library under 
- *  certain conditions as described in each individual source file, and 
- *  distribute linked combinations including the two. You must obey the GNU 
- *  General Public License in all respects for all of the code used other than 
- *  OpenSSL. If you modify file(s) with this exception, you may extend this 
- *  exception to your version of the file(s), but you are not obligated to do 
- *  so. If you do not wish to do so, delete this exception statement from your
- *  version.  If you delete this exception statement from all source files in 
- *  the program, then also delete it here.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with SLURM; if not, write to the Free Software Foundation, Inc.,
- *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
-\*****************************************************************************/
-
-#if HAVE_CONFIG_H
-#   include "config.h"
-#endif
-
-#if HAVE_STDINT_H
-#  include <stdint.h>
-#endif
-#if HAVE_INTTYPES_H
-#  include <inttypes.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <ctype.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-
-#include <slurm/slurm.h>
-#include <slurm/slurm_errno.h>
-#include "src/common/log.h"
-#include "src/common/xmalloc.h"
-#include "src/common/xstring.h"
-#include "src/slurmd/slurmd/get_mach_stat.h"
-
-#include "xcpuinfo.h"
-
-bool     initialized = false;      
-uint16_t procs, sockets, cores, threads;
-uint16_t block_map_size;
-uint16_t *block_map, *block_map_inv;
-
-int _ranges_conv(char* lrange,char** prange,int mode);
-
-/* for testing purpose */
-/* uint16_t block_map_size=8; */
-/* uint16_t block_map[] = { 0, 4, 1, 5, 3, 7, 2, 6}; */
-/* uint16_t block_map_inv[] = { 0, 2, 6, 4, 1, 3, 7, 5}; */
-/* xcpuinfo_abs_to_mac("0,2,4,6",&mach); */
-/* xcpuinfo_mac_to_abs(mach,&abs); */
-
-int
-xcpuinfo_init()
-{
-	if ( initialized )
-		return XCPUINFO_SUCCESS;
-
-	if ( get_procs(&procs) )
-		return XCPUINFO_ERROR;
-	
-	if ( get_cpuinfo(procs,&sockets,&cores,&threads,
-			 &block_map_size,&block_map,&block_map_inv) )
-		return XCPUINFO_ERROR;
-
-	initialized = true ;
-
-	return XCPUINFO_SUCCESS;
-}
-
-int
-xcpuinfo_fini()
-{
-	if ( ! initialized )
-		return XCPUINFO_SUCCESS;
-
-	initialized = false ;
-	procs = sockets = cores = threads = 0;
-	block_map_size = 0;
-	xfree(block_map);
-	xfree(block_map_inv);
-
-	return XCPUINFO_SUCCESS;
-}
-
-int
-xcpuinfo_abs_to_mac(char* lrange,char** prange)
-{
-	return _ranges_conv(lrange,prange,0);
-}
-
-int
-xcpuinfo_mac_to_abs(char* lrange,char** prange)
-{
-	return _ranges_conv(lrange,prange,1);
-}
-
-
-/* 
- * set to 1 each element of already allocated map of size 
- * map_size if they are present in the input range
- */
-int
-_range_to_map(char* range,uint16_t *map,uint16_t map_size)
-{
-	int bad_nb=0;
-	int num_fl=0;
-	int con_fl=0;
-	int last=0;
-
-	char *dup;
-	char *p;
-	char *s=NULL;
-
-	uint16_t start=0,end=0,i;
-
-	/* duplicate input range */
-	dup = xstrdup(range);
-	p = dup;
-	while ( ! last ) {
-		if ( isdigit(*p) ) {
-			if ( !num_fl ) {
-				num_fl++;
-				s=p;
-			}
-		}
-		else if ( *p == '-' ) {
-			if ( s && num_fl ) {
-				*p = '\0';
-				start = (uint16_t) atoi(s);
-				con_fl=1;
-				num_fl=0;
-				s=NULL;
-			}
-		}
-		else if ( *p == ',' || *p == '\0') {
-			if ( *p == '\0' )
-				last = 1;
-			if ( s && num_fl ) {
-				*p = '\0';
-				end = (uint16_t) atoi(s);
-				if ( !con_fl )
-					start = end ;
-				con_fl=2;
-				num_fl=0;
-				s=NULL;
-			}
-		}
-		else {
-			bad_nb++;
-			break;
-		}
-		if ( con_fl == 2 ) {
-			for( i = start ; i <= end && i < map_size ; i++) {
-				map[i]=1;
-			}
-			con_fl=0;
-		}
-		p++;
-	}
-
-	xfree(dup);
-
-	if ( bad_nb > 0 ) {
-		/* bad format for input range */
-		return XCPUINFO_ERROR;
-	}
-
-	return XCPUINFO_SUCCESS;
-}
-
-
-/*
- * allocate and build a range of ids using an input map
- * having printable element set to 1
- */
-int
-_map_to_range(uint16_t *map,uint16_t map_size,char** prange)
-{
-	size_t len;
-	int num_fl=0;
-	int con_fl=0;
-
-	char id[12];
-	char *str;
-
-	uint16_t start=0,end=0,i;
-
-	str = xstrdup("");
-	for ( i = 0 ; i < map_size ; i++ ) {
-
-		if ( map[i] ) {
-			num_fl=1;
-			end=i;
-			if ( !con_fl ) {
-				start=end;
-				con_fl=1;
-			}
-		}
-		else if ( num_fl ) {
-			if ( start < end ) {
-				sprintf(id,"%u-%u,",start,end);
-				xstrcat(str,id);
-			}
-			else {
-				sprintf(id,"%u,",start);
-				xstrcat(str,id);
-			}
-			con_fl = num_fl = 0;
-		}
-	}
-	if ( num_fl ) {
-		if ( start < end ) {
-			sprintf(id,"%u-%u,",start,end);
-			xstrcat(str,id);
-		}
-		else {
-			sprintf(id,"%u,",start);
-			xstrcat(str,id);
-		}
-	}
-
-	len = strlen(str);
-	if ( len > 0 ) {
-		str[len-1]='\0';
-	}
-
-	if ( prange != NULL )
-		*prange = str;
-	else
-		xfree(str);
-
-	return XCPUINFO_SUCCESS;
-}
-
-/*
- * convert a range into an other one according to 
- * a modus operandi being 0 or 1 for abstract to machine
- * or machine to abstract representation of cores
- */
-int
-_ranges_conv(char* lrange,char** prange,int mode)
-{
-	int fstatus;
-	int i;
-	uint16_t *amap;
-	uint16_t *map;
-	uint16_t *map_out;
-
-	/* init internal data if not already done */
-	if ( xcpuinfo_init() != XCPUINFO_SUCCESS )
-		return XCPUINFO_ERROR;
-
-	if ( mode ) {
-		/* machine to abstract conversion */
-		amap = block_map_inv;
-	}
-	else {
-		/* abstract to machine conversion */
-		amap = block_map;
-	}
-
-	/* allocate map for local work */
-	map = (uint16_t*) xmalloc(block_map_size*sizeof(uint16_t));
-	map_out = (uint16_t*) xmalloc(block_map_size*sizeof(uint16_t));
-
-	/* extract the input map */
-	fstatus = _range_to_map(lrange,map,block_map_size);
-	if ( fstatus ) {
-		goto exit;
-	}
-
-	/* do the conversion (see src/slurmd/slurmd/get_mach_stat.c) */
-	for( i = 0 ; i < block_map_size ; i++) {
-		if ( map[i] )
-			map_out[amap[i]]=1;
-	}
-
-	/* build the ouput range */
-	fstatus = _map_to_range(map_out,block_map_size,prange);
-
-exit:
-	xfree(map);
-	xfree(map_out);
-	return fstatus;
-}
diff --git a/src/plugins/task/Makefile.am b/src/plugins/task/Makefile.am
index 05e6cd8c9d1..58d5b9ef2cf 100644
--- a/src/plugins/task/Makefile.am
+++ b/src/plugins/task/Makefile.am
@@ -1,3 +1,3 @@
 # Makefile for task plugins
 
-SUBDIRS = affinity none
+SUBDIRS = affinity none cgroup
diff --git a/src/plugins/task/Makefile.in b/src/plugins/task/Makefile.in
index 360d3e621ed..89bb299dd35 100644
--- a/src/plugins/task/Makefile.in
+++ b/src/plugins/task/Makefile.in
@@ -317,7 +317,7 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-SUBDIRS = affinity none
+SUBDIRS = affinity none cgroup
 all: all-recursive
 
 .SUFFIXES:
diff --git a/src/plugins/task/affinity/task_affinity.c b/src/plugins/task/affinity/task_affinity.c
index a7f15d2ccc6..7bb5115b3f6 100644
--- a/src/plugins/task/affinity/task_affinity.c
+++ b/src/plugins/task/affinity/task_affinity.c
@@ -434,3 +434,11 @@ extern int task_post_term (slurmd_job_t *job)
 	return SLURM_SUCCESS;
 }
 
+/*
+ * task_post_step() is called after termination of the step
+ * (all the task)
+ */
+extern int task_post_step (slurmd_job_t *job)
+{
+	return SLURM_SUCCESS;
+}
diff --git a/src/plugins/task/cgroup/Makefile.am b/src/plugins/task/cgroup/Makefile.am
new file mode 100644
index 00000000000..df048a98a9a
--- /dev/null
+++ b/src/plugins/task/cgroup/Makefile.am
@@ -0,0 +1,16 @@
+# Makefile for task/cgroup plugin
+
+AUTOMAKE_OPTIONS = foreign
+
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
+
+pkglib_LTLIBRARIES = task_cgroup.la
+
+# cgroup task plugin.
+task_cgroup_la_SOURCES = 	task_cgroup.c \
+				task_cgroup_cpuset.h task_cgroup_cpuset.c \
+				task_cgroup_memory.h task_cgroup_memory.c
+task_cgroup_la_CPPFLAGS = $(HWLOC_CPPFLAGS)
+task_cgroup_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(HWLOC_LDFLAGS) $(HWLOC_LIBS)
diff --git a/src/plugins/task/cgroup/Makefile.in b/src/plugins/task/cgroup/Makefile.in
new file mode 100644
index 00000000000..8e1ad36421d
--- /dev/null
+++ b/src/plugins/task/cgroup/Makefile.in
@@ -0,0 +1,669 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Makefile for task/cgroup plugin
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = src/plugins/task/cgroup
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \
+	$(top_srcdir)/auxdir/libtool.m4 \
+	$(top_srcdir)/auxdir/ltoptions.m4 \
+	$(top_srcdir)/auxdir/ltsugar.m4 \
+	$(top_srcdir)/auxdir/ltversion.m4 \
+	$(top_srcdir)/auxdir/lt~obsolete.m4 \
+	$(top_srcdir)/auxdir/slurm.m4 \
+	$(top_srcdir)/auxdir/x_ac__system_configuration.m4 \
+	$(top_srcdir)/auxdir/x_ac_affinity.m4 \
+	$(top_srcdir)/auxdir/x_ac_aix.m4 \
+	$(top_srcdir)/auxdir/x_ac_blcr.m4 \
+	$(top_srcdir)/auxdir/x_ac_bluegene.m4 \
+	$(top_srcdir)/auxdir/x_ac_cflags.m4 \
+	$(top_srcdir)/auxdir/x_ac_cray.m4 \
+	$(top_srcdir)/auxdir/x_ac_databases.m4 \
+	$(top_srcdir)/auxdir/x_ac_debug.m4 \
+	$(top_srcdir)/auxdir/x_ac_elan.m4 \
+	$(top_srcdir)/auxdir/x_ac_env.m4 \
+	$(top_srcdir)/auxdir/x_ac_federation.m4 \
+	$(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \
+	$(top_srcdir)/auxdir/x_ac_hwloc.m4 \
+	$(top_srcdir)/auxdir/x_ac_iso.m4 \
+	$(top_srcdir)/auxdir/x_ac_lua.m4 \
+	$(top_srcdir)/auxdir/x_ac_munge.m4 \
+	$(top_srcdir)/auxdir/x_ac_ncurses.m4 \
+	$(top_srcdir)/auxdir/x_ac_pam.m4 \
+	$(top_srcdir)/auxdir/x_ac_printf_null.m4 \
+	$(top_srcdir)/auxdir/x_ac_ptrace.m4 \
+	$(top_srcdir)/auxdir/x_ac_readline.m4 \
+	$(top_srcdir)/auxdir/x_ac_setpgrp.m4 \
+	$(top_srcdir)/auxdir/x_ac_setproctitle.m4 \
+	$(top_srcdir)/auxdir/x_ac_sgi_job.m4 \
+	$(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \
+	$(top_srcdir)/auxdir/x_ac_sun_const.m4 \
+	$(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(pkglibdir)"
+LTLIBRARIES = $(pkglib_LTLIBRARIES)
+task_cgroup_la_LIBADD =
+am_task_cgroup_la_OBJECTS = task_cgroup_la-task_cgroup.lo \
+	task_cgroup_la-task_cgroup_cpuset.lo \
+	task_cgroup_la-task_cgroup_memory.lo
+task_cgroup_la_OBJECTS = $(am_task_cgroup_la_OBJECTS)
+task_cgroup_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(task_cgroup_la_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm
+depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
+SOURCES = $(task_cgroup_la_SOURCES)
+DIST_SOURCES = $(task_cgroup_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTHD_CFLAGS = @AUTHD_CFLAGS@
+AUTHD_LIBS = @AUTHD_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BGL_LOADED = @BGL_LOADED@
+BGQ_LOADED = @BGQ_LOADED@
+BG_INCLUDES = @BG_INCLUDES@
+BG_LDFLAGS = @BG_LDFLAGS@
+BG_L_P_LOADED = @BG_L_P_LOADED@
+BLCR_CPPFLAGS = @BLCR_CPPFLAGS@
+BLCR_HOME = @BLCR_HOME@
+BLCR_LDFLAGS = @BLCR_LDFLAGS@
+BLCR_LIBS = @BLCR_LIBS@
+BLUEGENE_LOADED = @BLUEGENE_LOADED@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CMD_LDFLAGS = @CMD_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ELAN_LIBS = @ELAN_LIBS@
+EXEEXT = @EXEEXT@
+FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@
+FGREP = @FGREP@
+GREP = @GREP@
+GTK_CFLAGS = @GTK_CFLAGS@
+GTK_LIBS = @GTK_LIBS@
+HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@
+HAVEPGCONFIG = @HAVEPGCONFIG@
+HAVE_AIX = @HAVE_AIX@
+HAVE_ELAN = @HAVE_ELAN@
+HAVE_FEDERATION = @HAVE_FEDERATION@
+HAVE_OPENSSL = @HAVE_OPENSSL@
+HAVE_SOME_CURSES = @HAVE_SOME_CURSES@
+HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@
+HWLOC_LDFLAGS = @HWLOC_LDFLAGS@
+HWLOC_LIBS = @HWLOC_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_LDFLAGS = @LIB_LDFLAGS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@
+MUNGE_LDFLAGS = @MUNGE_LDFLAGS@
+MUNGE_LIBS = @MUNGE_LIBS@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_LIBS = @MYSQL_LIBS@
+NCURSES = @NCURSES@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NUMA_LIBS = @NUMA_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PAM_DIR = @PAM_DIR@
+PAM_LIBS = @PAM_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PROCTRACKDIR = @PROCTRACKDIR@
+PROJECT = @PROJECT@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+READLINE_LIBS = @READLINE_LIBS@
+RELEASE = @RELEASE@
+SED = @SED@
+SEMAPHORE_LIBS = @SEMAPHORE_LIBS@
+SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SLURMCTLD_PORT = @SLURMCTLD_PORT@
+SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@
+SLURMDBD_PORT = @SLURMDBD_PORT@
+SLURMD_PORT = @SLURMD_PORT@
+SLURM_API_AGE = @SLURM_API_AGE@
+SLURM_API_CURRENT = @SLURM_API_CURRENT@
+SLURM_API_MAJOR = @SLURM_API_MAJOR@
+SLURM_API_REVISION = @SLURM_API_REVISION@
+SLURM_API_VERSION = @SLURM_API_VERSION@
+SLURM_MAJOR = @SLURM_MAJOR@
+SLURM_MICRO = @SLURM_MICRO@
+SLURM_MINOR = @SLURM_MINOR@
+SLURM_PREFIX = @SLURM_PREFIX@
+SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@
+SLURM_VERSION_STRING = @SLURM_VERSION_STRING@
+SO_LDFLAGS = @SO_LDFLAGS@
+SSL_CPPFLAGS = @SSL_CPPFLAGS@
+SSL_LDFLAGS = @SSL_LDFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+UTIL_LIBS = @UTIL_LIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+lua_CFLAGS = @lua_CFLAGS@
+lua_LIBS = @lua_LIBS@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AUTOMAKE_OPTIONS = foreign
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
+pkglib_LTLIBRARIES = task_cgroup.la
+
+# cgroup task plugin.
+task_cgroup_la_SOURCES = task_cgroup.c \
+				task_cgroup_cpuset.h task_cgroup_cpuset.c \
+				task_cgroup_memory.h task_cgroup_memory.c
+
+task_cgroup_la_CPPFLAGS = $(HWLOC_CPPFLAGS)
+task_cgroup_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(HWLOC_LDFLAGS) $(HWLOC_LIBS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/task/cgroup/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign src/plugins/task/cgroup/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	test -z "$(pkglibdir)" || $(MKDIR_P) "$(DESTDIR)$(pkglibdir)"
+	@list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \
+	}
+
+uninstall-pkglibLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \
+	for p in $$list; do \
+	  $(am__strip_dir) \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \
+	done
+
+clean-pkglibLTLIBRARIES:
+	-test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES)
+	@list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+	  dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+	  test "$$dir" != "$$p" || dir=.; \
+	  echo "rm -f \"$${dir}/so_locations\""; \
+	  rm -f "$${dir}/so_locations"; \
+	done
+task_cgroup.la: $(task_cgroup_la_OBJECTS) $(task_cgroup_la_DEPENDENCIES) 
+	$(task_cgroup_la_LINK) -rpath $(pkglibdir) $(task_cgroup_la_OBJECTS) $(task_cgroup_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task_cgroup_la-task_cgroup.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task_cgroup_la-task_cgroup_cpuset.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task_cgroup_la-task_cgroup_memory.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
+
+task_cgroup_la-task_cgroup.lo: task_cgroup.c
+@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(task_cgroup_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT task_cgroup_la-task_cgroup.lo -MD -MP -MF $(DEPDIR)/task_cgroup_la-task_cgroup.Tpo -c -o task_cgroup_la-task_cgroup.lo `test -f 'task_cgroup.c' || echo '$(srcdir)/'`task_cgroup.c
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/task_cgroup_la-task_cgroup.Tpo $(DEPDIR)/task_cgroup_la-task_cgroup.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='task_cgroup.c' object='task_cgroup_la-task_cgroup.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(task_cgroup_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o task_cgroup_la-task_cgroup.lo `test -f 'task_cgroup.c' || echo '$(srcdir)/'`task_cgroup.c
+
+task_cgroup_la-task_cgroup_cpuset.lo: task_cgroup_cpuset.c
+@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(task_cgroup_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT task_cgroup_la-task_cgroup_cpuset.lo -MD -MP -MF $(DEPDIR)/task_cgroup_la-task_cgroup_cpuset.Tpo -c -o task_cgroup_la-task_cgroup_cpuset.lo `test -f 'task_cgroup_cpuset.c' || echo '$(srcdir)/'`task_cgroup_cpuset.c
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/task_cgroup_la-task_cgroup_cpuset.Tpo $(DEPDIR)/task_cgroup_la-task_cgroup_cpuset.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='task_cgroup_cpuset.c' object='task_cgroup_la-task_cgroup_cpuset.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(task_cgroup_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o task_cgroup_la-task_cgroup_cpuset.lo `test -f 'task_cgroup_cpuset.c' || echo '$(srcdir)/'`task_cgroup_cpuset.c
+
+task_cgroup_la-task_cgroup_memory.lo: task_cgroup_memory.c
+@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(task_cgroup_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT task_cgroup_la-task_cgroup_memory.lo -MD -MP -MF $(DEPDIR)/task_cgroup_la-task_cgroup_memory.Tpo -c -o task_cgroup_la-task_cgroup_memory.lo `test -f 'task_cgroup_memory.c' || echo '$(srcdir)/'`task_cgroup_memory.c
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/task_cgroup_la-task_cgroup_memory.Tpo $(DEPDIR)/task_cgroup_la-task_cgroup_memory.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='task_cgroup_memory.c' object='task_cgroup_la-task_cgroup_memory.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(task_cgroup_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o task_cgroup_la-task_cgroup_memory.lo `test -f 'task_cgroup_memory.c' || echo '$(srcdir)/'`task_cgroup_memory.c
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	set x; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+	for dir in "$(DESTDIR)$(pkglibdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \
+	mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-pkglibLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-pkglibLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+	clean-libtool clean-pkglibLTLIBRARIES ctags distclean \
+	distclean-compile distclean-generic distclean-libtool \
+	distclean-tags distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-pkglibLTLIBRARIES \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/plugins/task/cgroup/task_cgroup.c b/src/plugins/task/cgroup/task_cgroup.c
new file mode 100644
index 00000000000..ad54cfdde50
--- /dev/null
+++ b/src/plugins/task/cgroup/task_cgroup.c
@@ -0,0 +1,279 @@
+/*****************************************************************************\
+ *  task_cgroup.c - Library for task pre-launch and post_termination functions
+ *	            for containment using linux cgroup subsystems
+ *****************************************************************************
+ *  Copyright (C) 2009 CEA/DAM/DIF
+ *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#if     HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <signal.h>
+#include <sys/types.h>
+
+#include <slurm/slurm_errno.h>
+#include "src/common/slurm_xlator.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+
+#include "src/common/xcgroup_read_config.h"
+#include "src/common/xcgroup.h"
+
+#include "task_cgroup_cpuset.h"
+#include "task_cgroup_memory.h"
+//#include "task_cgroup_devices.h"
+
+/*
+ * These variables are required by the generic plugin interface.  If they
+ * are not found in the plugin, the plugin loader will ignore it.
+ *
+ * plugin_name - a string giving a human-readable description of the
+ * plugin.  There is no maximum length, but the symbol must refer to
+ * a valid string.
+ *
+ * plugin_type - a string suggesting the type of the plugin or its
+ * applicability to a particular form of data or method of data handling.
+ * If the low-level plugin API is used, the contents of this string are
+ * unimportant and may be anything.  SLURM uses the higher-level plugin
+ * interface which requires this string to be of the form
+ *
+ *      <application>/<method>
+ *
+ * where <application> is a description of the intended application of
+ * the plugin (e.g., "task" for task control) and <method> is a description
+ * of how this plugin satisfies that application.  SLURM will only load
+ * a task plugin if the plugin_type string has a prefix of "task/".
+ *
+ * plugin_version - an unsigned 32-bit integer giving the version number
+ * of the plugin.  If major and minor revisions are desired, the major
+ * version number may be multiplied by a suitable magnitude constant such
+ * as 100 or 1000.  Various SLURM versions will likely require a certain
+ * minimum versions for their plugins as this API matures.
+ */
+const char plugin_name[]        = "Tasks containment using linux cgroup";
+const char plugin_type[]        = "task/cgroup";
+const uint32_t plugin_version   = 100;
+
+static bool use_cpuset  = false;
+static bool use_memory  = false;
+static bool use_devices = false;
+
+static slurm_cgroup_conf_t slurm_cgroup_conf;
+
+/*
+ * init() is called when the plugin is loaded, before any other functions
+ *	are called.  Put global initialization here.
+ */
+extern int init (void)
+{
+
+	/* read cgroup configuration */
+	if (read_slurm_cgroup_conf(&slurm_cgroup_conf))
+		return SLURM_ERROR;
+
+	/* enable subsystems based on conf */
+	if (slurm_cgroup_conf.constrain_cores) {
+		use_cpuset = true;
+		task_cgroup_cpuset_init(&slurm_cgroup_conf);
+		debug("%s: now constraining jobs allocated cores",
+		      plugin_type);
+	}
+
+	if (slurm_cgroup_conf.constrain_ram_space ||
+	     slurm_cgroup_conf.constrain_swap_space) {
+		use_memory = true;
+		task_cgroup_memory_init(&slurm_cgroup_conf);
+		debug("%s: now constraining jobs allocated memory",
+		      plugin_type);
+	}
+
+	if (slurm_cgroup_conf.constrain_devices) {
+		use_devices = true;
+		/* here we should initialize the devices subsystem */
+		debug("%s: now constraining jobs allocated devices",
+		      plugin_type);
+	}
+
+	verbose("%s: loaded", plugin_type);
+	return SLURM_SUCCESS;
+}
+
+/*
+ * fini() is called when the plugin is removed. Clear any allocated
+ *	storage here.
+ */
+extern int fini (void)
+{
+
+	if (use_cpuset) {
+		task_cgroup_cpuset_fini(&slurm_cgroup_conf);
+	}
+	if (use_memory) {
+		task_cgroup_memory_fini(&slurm_cgroup_conf);
+	}
+	if (use_devices) {
+		;
+	}
+
+	/* unload configuration */
+	free_slurm_cgroup_conf(&slurm_cgroup_conf);
+
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_slurmd_batch_request()
+ */
+extern int task_slurmd_batch_request (uint32_t job_id,
+				      batch_job_launch_msg_t *req)
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_slurmd_launch_request()
+ */
+extern int task_slurmd_launch_request (uint32_t job_id,
+				       launch_tasks_request_msg_t *req,
+				       uint32_t node_id)
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_slurmd_reserve_resources()
+ */
+extern int task_slurmd_reserve_resources (uint32_t job_id,
+					  launch_tasks_request_msg_t *req,
+					  uint32_t node_id)
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_slurmd_suspend_job()
+ */
+extern int task_slurmd_suspend_job (uint32_t job_id)
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_slurmd_resume_job()
+ */
+extern int task_slurmd_resume_job (uint32_t job_id)
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_slurmd_release_resources()
+ */
+extern int task_slurmd_release_resources (uint32_t job_id)
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_pre_setuid() is called before setting the UID for the
+ * user to launch his jobs. Use this to create the CPUSET directory
+ * and set the owner appropriately.
+ */
+extern int task_pre_setuid (slurmd_job_t *job)
+{
+
+	if (use_cpuset) {
+		/* we create the cpuset container as we are still root */
+		task_cgroup_cpuset_create(job);
+	}
+
+	if (use_memory) {
+		/* we create the memory container as we are still root */
+		task_cgroup_memory_create(job);
+	}
+
+	if (use_devices) {
+		/* here we should create the devices container as we are root */
+	}
+
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_pre_launch() is called prior to exec of application task.
+ *	It is followed by TaskProlog program (from slurm.conf) and
+ *	--task-prolog (from srun command line).
+ */
+extern int task_pre_launch (slurmd_job_t *job)
+{
+
+	if (use_cpuset) {
+		/* attach the task ? not necessary but in case of future mods */
+		task_cgroup_cpuset_attach_task(job);
+
+		/* set affinity if requested */
+		if (slurm_cgroup_conf.task_affinity)
+			task_cgroup_cpuset_set_task_affinity(job);
+	}
+
+	if (use_memory) {
+		/* attach the task ? not necessary but in case of future mods */
+		task_cgroup_memory_attach_task(job);
+	}
+
+	if (use_devices) {
+		;
+	}
+
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_term() is called after termination of application task.
+ *	It is preceded by --task-epilog (from srun command line)
+ *	followed by TaskEpilog program (from slurm.conf).
+ */
+extern int task_post_term (slurmd_job_t *job)
+{
+	return SLURM_SUCCESS;
+}
+
+/*
+ * task_post_step() is called after termination of the step
+ * (all the task)
+ */
+extern int task_post_step (slurmd_job_t *job)
+{
+	fini();
+	return SLURM_SUCCESS;
+}
diff --git a/src/plugins/task/cgroup/task_cgroup_cpuset.c b/src/plugins/task/cgroup/task_cgroup_cpuset.c
new file mode 100644
index 00000000000..93efe61eebe
--- /dev/null
+++ b/src/plugins/task/cgroup/task_cgroup_cpuset.c
@@ -0,0 +1,681 @@
+/***************************************************************************** \
+ *  task_cgroup_cpuset.c - cpuset cgroup subsystem for task/cgroup
+ *****************************************************************************
+ *  Copyright (C) 2009 CEA/DAM/DIF
+ *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <sys/types.h>
+#include <slurm/slurm_errno.h>
+#include <slurm/slurm.h>
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+#include "src/slurmd/slurmd/slurmd.h"
+
+#include "src/common/xstring.h"
+#include "src/common/xcgroup_read_config.h"
+#include "src/common/xcgroup.h"
+#include "src/common/xcpuinfo.h"
+
+#ifdef HAVE_HWLOC
+#include <hwloc.h>
+#include <hwloc/glibc-sched.h>
+#endif
+
+#ifndef PATH_MAX
+#define PATH_MAX 256
+#endif
+
+static char user_cgroup_path[PATH_MAX];
+static char job_cgroup_path[PATH_MAX];
+static char jobstep_cgroup_path[PATH_MAX];
+
+static xcgroup_ns_t cpuset_ns;
+
+static xcgroup_t user_cpuset_cg;
+static xcgroup_t job_cpuset_cg;
+static xcgroup_t step_cpuset_cg;
+
+static int _xcgroup_cpuset_init(xcgroup_t* cg);
+
+extern int task_cgroup_cpuset_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
+{
+	char release_agent_path[PATH_MAX];
+
+	/* initialize cpuinfo internal data */
+	if (xcpuinfo_init() != XCPUINFO_SUCCESS) {
+		return SLURM_ERROR;
+	}
+
+	/* initialize user/job/jobstep cgroup relative paths */
+	user_cgroup_path[0]='\0';
+	job_cgroup_path[0]='\0';
+	jobstep_cgroup_path[0]='\0';
+
+	/* initialize cpuset cgroup namespace */
+	release_agent_path[0]='\0';
+	if (snprintf(release_agent_path,PATH_MAX,"%s/release_cpuset",
+		      slurm_cgroup_conf->cgroup_release_agent) >= PATH_MAX) {
+		error("task/cgroup: unable to build cpuset release agent path");
+		goto error;
+	}
+	if (xcgroup_ns_create(&cpuset_ns,CGROUP_BASEDIR "/cpuset","",
+			       "cpuset",release_agent_path) !=
+	     XCGROUP_SUCCESS) {
+		error("task/cgroup: unable to create cpuset namespace");
+		goto error;
+	}
+
+	/* check that cpuset cgroup namespace is available */
+	if (! xcgroup_ns_is_available(&cpuset_ns)) {
+		if (slurm_cgroup_conf->cgroup_automount) {
+			if (xcgroup_ns_mount(&cpuset_ns)) {
+				error("task/cgroup: unable to mount cpuset "
+				      "namespace");
+				goto clean;
+			}
+			info("task/cgroup: cpuset namespace is now mounted");
+		} else {
+			error("task/cgroup: cpuset namespace not mounted. "
+			      "aborting");
+			goto clean;
+		}
+	}
+
+	return SLURM_SUCCESS;
+
+clean:
+	xcgroup_ns_destroy(&cpuset_ns);
+
+error:
+	xcpuinfo_fini();
+	return SLURM_ERROR;
+}
+
+extern int task_cgroup_cpuset_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
+{
+
+	if (user_cgroup_path[0] != '\0')
+		xcgroup_destroy(&user_cpuset_cg);
+	if (job_cgroup_path[0] != '\0')
+		xcgroup_destroy(&job_cpuset_cg);
+	if (jobstep_cgroup_path[0] != '\0')
+		xcgroup_destroy(&step_cpuset_cg);
+
+	user_cgroup_path[0]='\0';
+	job_cgroup_path[0]='\0';
+	jobstep_cgroup_path[0]='\0';
+
+	xcgroup_ns_destroy(&cpuset_ns);
+
+	xcpuinfo_fini();
+	return SLURM_SUCCESS;
+}
+
+extern int task_cgroup_cpuset_create(slurmd_job_t *job)
+{
+	int rc;
+	int fstatus = SLURM_ERROR;
+
+	xcgroup_t cpuset_cg;
+
+	uint32_t jobid = job->jobid;
+	uint32_t stepid = job->stepid;
+	uid_t uid = job->uid;
+	uid_t gid = job->gid;
+	char* user_alloc_cores = NULL;
+	char* job_alloc_cores = NULL;
+	char* step_alloc_cores = NULL;
+
+	char* cpus = NULL;
+	size_t cpus_size;
+
+	/* build user cgroup relative path if not set (should not be) */
+	if (*user_cgroup_path == '\0') {
+		if (snprintf(user_cgroup_path,PATH_MAX,
+			      "/uid_%u",uid) >= PATH_MAX) {
+			error("task/cgroup: unable to build uid %u cpuset "
+			      "cg relative path : %m",uid);
+			return SLURM_ERROR;
+		}
+	}
+
+	/* build job cgroup relative path if no set (should not be) */
+	if (*job_cgroup_path == '\0') {
+		if (snprintf(job_cgroup_path,PATH_MAX,"%s/job_%u",
+			      user_cgroup_path,jobid) >= PATH_MAX) {
+			error("task/cgroup: unable to build job %u cpuset "
+			      "cg relative path : %m",jobid);
+			return SLURM_ERROR;
+		}
+	}
+
+	/* build job step cgroup relative path (should not be) */
+	if (*jobstep_cgroup_path == '\0') {
+		if (stepid == NO_VAL) {
+			if (snprintf(jobstep_cgroup_path, PATH_MAX,
+				     "%s/step_batch", job_cgroup_path)
+			    >= PATH_MAX) {
+				error("task/cgroup: unable to build job step"
+				      " %u.batch cpuset cg relative path: %m",
+				      jobid);
+				return SLURM_ERROR;
+			}
+		} else {
+			if (snprintf(jobstep_cgroup_path, PATH_MAX, "%s/step_%u",
+				     job_cgroup_path, stepid) >= PATH_MAX) {
+				error("task/cgroup: unable to build job step"
+				      " %u.%u cpuset cg relative path: %m",
+				      jobid, stepid);
+				return SLURM_ERROR;
+			}
+		}
+	}
+
+	/*
+	 * create cpuset root cg and lock it
+	 *
+	 * we will keep the lock until the end to avoid the effect of a release
+	 * agent that would remove an existing cgroup hierarchy while we are
+	 * setting it up. As soon as the step cgroup is created, we can release
+	 * the lock.
+	 * Indeed, consecutive slurm steps could result in cg being removed
+	 * between the next EEXIST instanciation and the first addition of
+	 * a task. The release_agent will have to lock the root cpuset cgroup
+	 * to avoid this scenario.
+	 */
+	if (xcgroup_create(&cpuset_ns,&cpuset_cg,"",0,0) != XCGROUP_SUCCESS) {
+		error("task/cgroup: unable to create root cpuset xcgroup");
+		return SLURM_ERROR;
+	}
+	if (xcgroup_lock(&cpuset_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&cpuset_cg);
+		error("task/cgroup: unable to lock root cpuset cg");
+		return SLURM_ERROR;
+	}
+
+	/*
+	 * build job and job steps allocated cores lists
+	 */
+	debug("task/cgroup: job abstract cores are '%s'",
+	      job->job_alloc_cores);
+	debug("task/cgroup: step abstract cores are '%s'",
+	      job->step_alloc_cores);
+	if (xcpuinfo_abs_to_mac(job->job_alloc_cores,
+				 &job_alloc_cores) != XCPUINFO_SUCCESS) {
+		error("task/cgroup: unable to build job physical cores");
+		goto error;
+	}
+	if (xcpuinfo_abs_to_mac(job->step_alloc_cores,
+				 &step_alloc_cores) != XCPUINFO_SUCCESS) {
+		error("task/cgroup: unable to build step physical cores");
+		goto error;
+	}
+	debug("task/cgroup: job physical cores are '%s'",
+	      job->job_alloc_cores);
+	debug("task/cgroup: step physical cores are '%s'",
+	      job->step_alloc_cores);
+
+	/*
+	 * create user cgroup in the cpuset ns (it could already exist)
+	 */
+	if (xcgroup_create(&cpuset_ns,&user_cpuset_cg,
+			    user_cgroup_path,
+			    getuid(),getgid()) != XCGROUP_SUCCESS) {
+		goto error;
+	}
+	if (xcgroup_instanciate(&user_cpuset_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_cpuset_cg);
+		goto error;
+	}
+
+	/*
+	 * check that user's cpuset cgroup is consistant and add the job cores
+	 */
+	rc = xcgroup_get_param(&user_cpuset_cg,"cpuset.cpus",&cpus,&cpus_size);
+	if (rc != XCGROUP_SUCCESS || cpus_size == 1) {
+		/* initialize the cpusets as it was inexistant */
+		if (_xcgroup_cpuset_init(&user_cpuset_cg) !=
+		     XCGROUP_SUCCESS) {
+			xcgroup_delete(&user_cpuset_cg);
+			xcgroup_destroy(&user_cpuset_cg);
+			goto error;
+		}
+	}
+	user_alloc_cores = xstrdup(job_alloc_cores);
+	if (cpus != NULL && cpus_size > 1) {
+		cpus[cpus_size-1]='\0';
+		xstrcat(user_alloc_cores,",");
+		xstrcat(user_alloc_cores,cpus);
+	}
+	xcgroup_set_param(&user_cpuset_cg,"cpuset.cpus",user_alloc_cores);
+	xfree(cpus);
+
+	/*
+	 * create job cgroup in the cpuset ns (it could already exist)
+	 */
+	if (xcgroup_create(&cpuset_ns,&job_cpuset_cg,
+			    job_cgroup_path,
+			    getuid(),getgid()) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_cpuset_cg);
+		goto error;
+	}
+	if (xcgroup_instanciate(&job_cpuset_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_cpuset_cg);
+		xcgroup_destroy(&job_cpuset_cg);
+		goto error;
+	}
+	if (_xcgroup_cpuset_init(&job_cpuset_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_cpuset_cg);
+		xcgroup_destroy(&job_cpuset_cg);
+		goto error;
+	}
+	xcgroup_set_param(&job_cpuset_cg,"cpuset.cpus",job_alloc_cores);
+
+	/*
+	 * create step cgroup in the cpuset ns (it should not exists)
+	 * use job's user uid/gid to enable tasks cgroups creation by
+	 * the user inside the step cgroup owned by root
+	 */
+	if (xcgroup_create(&cpuset_ns,&step_cpuset_cg,
+			    jobstep_cgroup_path,
+			    uid,gid) != XCGROUP_SUCCESS) {
+		/* do not delete user/job cgroup as */
+		/* they can exist for other steps */
+		xcgroup_destroy(&user_cpuset_cg);
+		xcgroup_destroy(&job_cpuset_cg);
+		goto error;
+	}
+	if (xcgroup_instanciate(&step_cpuset_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_cpuset_cg);
+		xcgroup_destroy(&job_cpuset_cg);
+		xcgroup_destroy(&step_cpuset_cg);
+		goto error;
+	}
+	if (_xcgroup_cpuset_init(&step_cpuset_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_cpuset_cg);
+		xcgroup_destroy(&job_cpuset_cg);
+		xcgroup_delete(&step_cpuset_cg);
+		xcgroup_destroy(&step_cpuset_cg);
+		goto error;
+	}
+	xcgroup_set_param(&step_cpuset_cg,"cpuset.cpus",step_alloc_cores);
+
+	/* attach the slurmstepd to the step cpuset cgroup */
+	pid_t pid = getpid();
+	rc = xcgroup_add_pids(&step_cpuset_cg,&pid,1);
+	if (rc != XCGROUP_SUCCESS) {
+		error("task/cgroup: unable to add slurmstepd to cpuset cg '%s'",
+		      step_cpuset_cg.path);
+		fstatus = SLURM_ERROR;
+	} else
+		fstatus = SLURM_SUCCESS;
+
+error:
+	xcgroup_unlock(&cpuset_cg);
+	xcgroup_destroy(&cpuset_cg);
+
+	xfree(user_alloc_cores);
+	xfree(job_alloc_cores);
+	xfree(step_alloc_cores);
+
+	return fstatus;
+}
+
+extern int task_cgroup_cpuset_attach_task(slurmd_job_t *job)
+{
+	int fstatus = SLURM_ERROR;
+
+	/* tasks are automatically attached as slurmstepd is in the step cg */
+	fstatus = SLURM_SUCCESS;
+
+	return fstatus;
+}
+
+/* affinity should be set using sched_setaffinity to not force */
+/* user to have to play with the cgroup hierarchy to modify it */
+extern int task_cgroup_cpuset_set_task_affinity(slurmd_job_t *job)
+{
+	int fstatus = SLURM_ERROR;
+
+#ifndef HAVE_HWLOC
+
+	error("task/cgroup: plugin not compiled with hwloc support, "
+	      "skipping affinity.");
+	return fstatus;
+
+#else
+	uint32_t i;
+	uint32_t nldoms;
+	uint32_t nsockets;
+	uint32_t ncores;
+	uint32_t npus;
+	uint32_t nobj;
+
+	uint32_t pfirst,plast;
+	uint32_t taskid = job->envtp->localid;
+	uint32_t jntasks = job->node_tasks;
+	uint32_t jnpus = jntasks * job->cpus_per_task;
+	pid_t    pid = job->envtp->task_pid;
+
+	cpu_bind_type_t bind_type;
+	int verbose;
+
+	hwloc_topology_t topology;
+	hwloc_cpuset_t cpuset,ct;
+	hwloc_obj_t obj;
+	struct hwloc_obj *pobj;
+	hwloc_obj_type_t hwtype;
+	hwloc_obj_type_t req_hwtype;
+	int hwdepth;
+
+	size_t tssize;
+	cpu_set_t ts;
+
+	bind_type = job->cpu_bind_type ;
+	if (conf->task_plugin_param & CPU_BIND_VERBOSE ||
+	    bind_type & CPU_BIND_VERBOSE)
+		verbose = 1 ;
+
+	if (bind_type & CPU_BIND_NONE) {
+		if (verbose)
+			info("task/cgroup: task[%u] is requesting no affinity",
+			     taskid);
+		return 0;
+	} else if (bind_type & CPU_BIND_TO_THREADS) {
+		if (verbose)
+			info("task/cgroup: task[%u] is requesting "
+			     "thread level binding",taskid);
+		req_hwtype = HWLOC_OBJ_PU;
+	} else if (bind_type & CPU_BIND_TO_CORES) {
+		if (verbose)
+			info("task/cgroup: task[%u] is requesting "
+			     "core level binding",taskid);
+		req_hwtype = HWLOC_OBJ_CORE;
+	} else if (bind_type & CPU_BIND_TO_SOCKETS) {
+		if (verbose)
+			info("task/cgroup: task[%u] is requesting "
+			     "socket level binding",taskid);
+		req_hwtype = HWLOC_OBJ_SOCKET;
+	} else if (bind_type & CPU_BIND_TO_LDOMS) {
+		if (verbose)
+			info("task/cgroup: task[%u] is requesting "
+			     "ldom level binding",taskid);
+		req_hwtype = HWLOC_OBJ_NODE;
+	} else {
+		if (verbose)
+			info("task/cgroup: task[%u] using core level binding"
+			     " by default",taskid);
+		req_hwtype = HWLOC_OBJ_CORE;
+	}
+
+	/* Allocate and initialize hwloc objects */
+	hwloc_topology_init(&topology);
+	cpuset = hwloc_cpuset_alloc() ;
+
+	/*
+	 * Perform the topology detection. It will only get allowed PUs.
+	 * Detect in the same time the granularity to use for binding.
+	 * The granularity can be relaxed from threads to cores if enough
+	 * cores are available as with hyperthread support, ntasks-per-core
+	 * param can let us have access to more threads per core for each
+	 * task
+	 * Revert back to machine granularity if no finer-grained granularity
+	 * matching the request is found. This will result in no affinity
+	 * applied.
+	 * The detected granularity will be used to find where to best place
+	 * the task, then the cpu_bind option will be used to relax the
+	 * affinity constraint and use more PUs. (i.e. use a core granularity
+	 * to dispatch the tasks across the sockets and then provide access
+	 * to each task to the cores of its socket.)
+	 */
+	hwloc_topology_load(topology);
+	npus = (uint32_t) hwloc_get_nbobjs_by_type(topology,
+						   HWLOC_OBJ_PU);
+	ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology,
+						     HWLOC_OBJ_CORE);
+	nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
+						       HWLOC_OBJ_SOCKET);
+	nldoms = (uint32_t) hwloc_get_nbobjs_by_type(topology,
+						     HWLOC_OBJ_NODE);
+	hwtype = HWLOC_OBJ_MACHINE;
+	nobj = 1;
+	if (npus >= jnpus || bind_type & CPU_BIND_TO_THREADS) {
+		hwtype = HWLOC_OBJ_PU;
+		nobj = npus;
+	}
+	if (ncores >= jnpus || bind_type & CPU_BIND_TO_CORES) {
+		hwtype = HWLOC_OBJ_CORE;
+		nobj = ncores;
+	}
+	if (nsockets >= jntasks &&
+	     bind_type & CPU_BIND_TO_SOCKETS) {
+		hwtype = HWLOC_OBJ_SOCKET;
+		nobj = nsockets;
+	}
+	/*
+	 * HWLOC returns all the NUMA nodes available regardless of the
+	 * number of underlying sockets available (regardless of the allowed
+	 * resources). So there is no guarantee that each ldom will be populated
+	 * with usable sockets. So add a simple check that at least ensure that
+	 * we have as many sockets as ldoms before moving to ldoms granularity
+	 */
+	if (nldoms >= jntasks &&
+	     nsockets >= nldoms &&
+	     bind_type & CPU_BIND_TO_LDOMS) {
+		hwtype = HWLOC_OBJ_NODE;
+		nobj = nldoms;
+	}
+
+	/*
+	 * Perform a block binding on the detected object respecting the
+	 * granularity.
+	 * If not enough objects to do the job, revert to no affinity mode
+	 */
+	if (hwloc_compare_types(hwtype,HWLOC_OBJ_MACHINE) == 0) {
+
+		info("task/cgroup: task[%u] disabling affinity because of %s "
+		     "granularity",taskid,hwloc_obj_type_string(hwtype));
+
+	} else if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0 &&
+		    jnpus > nobj) {
+
+		info("task/cgroup: task[%u] not enough %s objects, disabling "
+		     "affinity",taskid,hwloc_obj_type_string(hwtype));
+
+	} else {
+
+		if (verbose) {
+			info("task/cgroup: task[%u] using %s granularity",
+			     taskid,hwloc_obj_type_string(hwtype));
+		}
+		if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) {
+			/* cores or threads granularity */
+			pfirst = taskid *  job->cpus_per_task ;
+			plast = pfirst + job->cpus_per_task - 1;
+		} else {
+			/* sockets or ldoms granularity */
+			pfirst = taskid;
+			plast = pfirst;
+		}
+
+		hwdepth = hwloc_get_type_depth(topology,hwtype);
+		for (i = pfirst; i <= plast && i < nobj ; i++) {
+			obj = hwloc_get_obj_by_depth(topology,hwdepth,(int)i);
+
+			/* if requested binding overlap the granularity */
+			/* use the ancestor cpuset instead of the object one */
+			if (hwloc_compare_types(hwtype,req_hwtype) > 0) {
+
+				/* Get the parent object of req_hwtype or the */
+				/* one just above if not found (meaning of >0)*/
+				/* (useful for ldoms binding with !NUMA nodes)*/
+				pobj = obj->parent;
+				while (pobj != NULL &&
+					hwloc_compare_types(pobj->type,
+							    req_hwtype) > 0)
+					pobj = pobj->parent;
+
+				if (pobj != NULL) {
+					if (verbose)
+						info("task/cgroup: task[%u] "
+						     "higher level %s found",
+						     taskid,
+						     hwloc_obj_type_string(
+							     pobj->type));
+					ct = hwloc_cpuset_dup(pobj->
+							      allowed_cpuset);
+					hwloc_cpuset_or(cpuset,cpuset,ct);
+					hwloc_cpuset_free(ct);
+				} else {
+					/* should not be executed */
+					if (verbose)
+						info("task/cgroup: task[%u] "
+						     "no higher level found",
+						     taskid);
+					ct = hwloc_cpuset_dup(obj->
+							      allowed_cpuset);
+					hwloc_cpuset_or(cpuset,cpuset,ct);
+					hwloc_cpuset_free(ct);
+				}
+
+			} else {
+				ct = hwloc_cpuset_dup(obj->allowed_cpuset);
+				hwloc_cpuset_or(cpuset,cpuset,ct);
+				hwloc_cpuset_free(ct);
+			}
+		}
+
+		char *str;
+		hwloc_cpuset_asprintf(&str,cpuset);
+		tssize = sizeof(cpu_set_t);
+		if (hwloc_cpuset_to_glibc_sched_affinity(topology,cpuset,
+							  &ts,tssize) == 0) {
+			fstatus = SLURM_SUCCESS;
+			if (sched_setaffinity(pid,tssize,&ts)) {
+				error("task/cgroup: task[%u] unable to set "
+				      "taskset '%s'",taskid,str);
+				fstatus = SLURM_ERROR;
+			} else if (verbose) {
+				info("task/cgroup: task[%u] taskset '%s' is set"
+				     ,taskid,str);
+			}
+		} else {
+			error("task/cgroup: task[%u] unable to build "
+			      "taskset '%s'",taskid,str);
+			fstatus = SLURM_ERROR;
+		}
+		free(str);
+
+	}
+
+	/* Destroy hwloc objects */
+	hwloc_cpuset_free(cpuset);
+	hwloc_topology_destroy(topology);
+
+	return fstatus;
+#endif
+
+}
+
+
+/* when cgroups are configured with cpuset, at least
+ * cpuset.cpus and cpuset.mems must be set or the cgroup
+ * will not be available at all.
+ * we duplicate the ancestor configuration in the init step */
+static int _xcgroup_cpuset_init(xcgroup_t* cg)
+{
+	int fstatus,i;
+
+	char* cpuset_metafiles[] = {
+		"cpuset.cpus",
+		"cpuset.mems"
+	};
+	char* cpuset_meta;
+	char* cpuset_conf;
+	size_t csize;
+
+	xcgroup_t acg;
+	char* acg_name;
+	char* p;
+
+	fstatus = XCGROUP_ERROR;
+
+	/* load ancestor cg */
+	acg_name = (char*) xstrdup(cg->name);
+	p = rindex(acg_name,'/');
+	if (p == NULL) {
+		debug2("task/cgroup: unable to get ancestor path for "
+		       "cpuset cg '%s' : %m",cg->path);
+		return fstatus;
+	} else
+		*p = '\0';
+	if (xcgroup_load(cg->ns,&acg,acg_name) != XCGROUP_SUCCESS) {
+		debug2("task/cgroup: unable to load ancestor for "
+		       "cpuset cg '%s' : %m",cg->path);
+		return fstatus;
+	}
+
+	/* inherits ancestor params */
+	for (i = 0 ; i < 2 ; i++) {
+		cpuset_meta = cpuset_metafiles[i];
+		if (xcgroup_get_param(&acg,cpuset_meta,
+				       &cpuset_conf,&csize)
+		     != XCGROUP_SUCCESS) {
+			debug2("task/cgroup: assuming no cpuset cg "
+			       "support for '%s'",acg.path);
+			xcgroup_destroy(&acg);
+			return fstatus;
+		}
+		if (csize > 0)
+			cpuset_conf[csize-1]='\0';
+		if (xcgroup_set_param(cg,cpuset_meta,cpuset_conf)
+		     != XCGROUP_SUCCESS) {
+			debug2("task/cgroup: unable to write %s configuration "
+			       "(%s) for cpuset cg '%s'",cpuset_meta,
+			       cpuset_conf,cg->path);
+			xcgroup_destroy(&acg);
+			xfree(cpuset_conf);
+			return fstatus;
+		}
+		xfree(cpuset_conf);
+	}
+
+	xcgroup_destroy(&acg);
+	return XCGROUP_SUCCESS;
+}
diff --git a/src/plugins/proctrack/cgroup/xcpuinfo.h b/src/plugins/task/cgroup/task_cgroup_cpuset.h
similarity index 61%
rename from src/plugins/proctrack/cgroup/xcpuinfo.h
rename to src/plugins/task/cgroup/task_cgroup_cpuset.h
index f8cf8af5a33..8b12bbc5386 100644
--- a/src/plugins/proctrack/cgroup/xcpuinfo.h
+++ b/src/plugins/task/cgroup/task_cgroup_cpuset.h
@@ -1,34 +1,34 @@
 /*****************************************************************************\
- *  xcpuinfo.h - cpuinfo related primitives headers
+ *  task_cgroup_cpuset.h - cpuset cgroup subsystem primitives for task/cgroup
  *****************************************************************************
  *  Copyright (C) 2009 CEA/DAM/DIF
  *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
- *  
+ *
  *  This file is part of SLURM, a resource management program.
  *  For details, see <https://computing.llnl.gov/linux/slurm/>.
  *  Please also read the included file: DISCLAIMER.
- *  
+ *
  *  SLURM is free software; you can redistribute it and/or modify it under
  *  the terms of the GNU General Public License as published by the Free
  *  Software Foundation; either version 2 of the License, or (at your option)
  *  any later version.
  *
- *  In addition, as a special exception, the copyright holders give permission 
- *  to link the code of portions of this program with the OpenSSL library under 
- *  certain conditions as described in each individual source file, and 
- *  distribute linked combinations including the two. You must obey the GNU 
- *  General Public License in all respects for all of the code used other than 
- *  OpenSSL. If you modify file(s) with this exception, you may extend this 
- *  exception to your version of the file(s), but you are not obligated to do 
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
  *  so. If you do not wish to do so, delete this exception statement from your
- *  version.  If you delete this exception statement from all source files in 
+ *  version.  If you delete this exception statement from all source files in
  *  the program, then also delete it here.
- *  
+ *
  *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
  *  details.
- *  
+ *
  *  You should have received a copy of the GNU General Public License along
  *  with SLURM; if not, write to the Free Software Foundation, Inc.,
  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
@@ -38,56 +38,24 @@
 #   include "config.h"
 #endif
 
-#ifndef _XCPUINFO_H_
-#define _XCPUINFO_H_
+#ifndef _TASK_CGROUP_CPUSET_H_
+#define _TASK_CGROUP_CPUSET_H_
 
-#define XCPUINFO_ERROR    1
-#define XCPUINFO_SUCCESS  0
+#include "src/common/xcgroup_read_config.h"
 
-/*
- * Initialize xcpuinfo internal data
- *
- * returned values:
- *  - XCPUINFO_ERROR
- *  - XCPUINFO_SUCCESS
- */
-int xcpuinfo_init();
+/* initialize cpuset subsystem of task/cgroup */
+extern int task_cgroup_cpuset_init(slurm_cgroup_conf_t *slurm_cgroup_conf);
 
-/*
- * Destroy xcpuinfo internal data
- *
- * returned values:
- *  - XCPUINFO_ERROR
- *  - XCPUINFO_SUCCESS
- */
-int xcpuinfo_fini();
+/* release cpuset subsystem resources */
+extern int task_cgroup_cpuset_fini(slurm_cgroup_conf_t *slurm_cgroup_conf);
 
-/*
- * Use xcpuinfo internal data to convert an abstract range
- * of cores (slurm internal format) into the machine one
- *
- * range is of the form 0-1,4-5
- *
- * on success, the output range must be freed using xfree
- *
- * returned values:
- *  - XCPUINFO_ERROR
- *  - XCPUINFO_SUCCESS
- */
-int xcpuinfo_abs_to_mac(char* lrange,char** prange);
+/* create user/job/jobstep cpuset cgroups */
+extern int task_cgroup_cpuset_create(slurmd_job_t *job);
 
-/*
- * Use xcpuinfo internal data to convert a machine range
- * of cores into an abstract one (slurm internal format)
- *
- * range is of the form 0-1,4-5
- *
- * on success, the output range must be freed using xfree
- *
- * returned values:
- *  - XCPUINFO_ERROR
- *  - XCPUINFO_SUCCESS
- */
-int xcpuinfo_mac_to_abs(char* lrange,char** prange);
+/* create a task cgroup and attach the task to it */
+extern int task_cgroup_cpuset_attach_task(slurmd_job_t *job);
+
+/* set a task affinity based on its local id and job information */
+extern int task_cgroup_cpuset_set_task_affinity(slurmd_job_t *job);
 
 #endif
diff --git a/src/plugins/task/cgroup/task_cgroup_memory.c b/src/plugins/task/cgroup/task_cgroup_memory.c
new file mode 100644
index 00000000000..4b8c194c2c5
--- /dev/null
+++ b/src/plugins/task/cgroup/task_cgroup_memory.c
@@ -0,0 +1,348 @@
+/***************************************************************************** \
+ *  task_cgroup_memory.c - memory cgroup subsystem for task/cgroup
+ *****************************************************************************
+ *  Copyright (C) 2009 CEA/DAM/DIF
+ *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <stdlib.h>		/* getenv     */
+#include <slurm/slurm_errno.h>
+#include <slurm/slurm.h>
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
+#include "src/slurmd/slurmd/slurmd.h"
+
+#include "src/common/xstring.h"
+#include "src/common/xcgroup_read_config.h"
+#include "src/common/xcgroup.h"
+
+#ifndef PATH_MAX
+#define PATH_MAX 256
+#endif
+
+static char user_cgroup_path[PATH_MAX];
+static char job_cgroup_path[PATH_MAX];
+static char jobstep_cgroup_path[PATH_MAX];
+
+static xcgroup_ns_t memory_ns;
+
+static xcgroup_t user_memory_cg;
+static xcgroup_t job_memory_cg;
+static xcgroup_t step_memory_cg;
+
+static int allowed_ram_space;
+static int allowed_swap_space;
+
+
+extern int task_cgroup_memory_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
+{
+	char release_agent_path[PATH_MAX];
+
+	/* initialize user/job/jobstep cgroup relative paths */
+	user_cgroup_path[0]='\0';
+	job_cgroup_path[0]='\0';
+	jobstep_cgroup_path[0]='\0';
+
+	/* initialize memory cgroup namespace */
+	release_agent_path[0]='\0';
+	if (snprintf(release_agent_path,PATH_MAX,"%s/release_memory",
+		      slurm_cgroup_conf->cgroup_release_agent) >= PATH_MAX) {
+		error("task/cgroup: unable to build memory release agent path");
+		goto error;
+	}
+	if (xcgroup_ns_create(&memory_ns,CGROUP_BASEDIR "/memory","",
+			       "memory",release_agent_path) !=
+	     XCGROUP_SUCCESS) {
+		error("task/cgroup: unable to create memory namespace");
+		goto error;
+	}
+
+	/* check that memory cgroup namespace is available */
+	if (! xcgroup_ns_is_available(&memory_ns)) {
+		if (slurm_cgroup_conf->cgroup_automount) {
+			if (xcgroup_ns_mount(&memory_ns)) {
+				error("task/cgroup: unable to mount memory "
+				      "namespace");
+				goto clean;
+			}
+			info("task/cgroup: memory namespace is now mounted");
+		} else {
+			error("task/cgroup: memory namespace not mounted. "
+			      "aborting");
+			goto clean;
+		}
+	}
+
+	allowed_ram_space = slurm_cgroup_conf->allowed_ram_space;
+	allowed_swap_space = slurm_cgroup_conf->allowed_swap_space;
+
+        /*
+         *  Warning: OOM Killer must be disabled for slurmstepd
+         *  or it would be destroyed if the application use
+         *  more memory than permitted
+         *
+         *  If an env value is already set for slurmstepd
+         *  OOM killer behavior, keep it, otherwise set the
+         *  -17 value, wich means do not let OOM killer kill it
+         *
+         *  FYI, setting "export SLURMSTEPD_OOM_ADJ=-17"
+         *  in /etc/sysconfig/slurm would be the same
+         */
+        setenv("SLURMSTEPD_OOM_ADJ","-17",0);
+
+	return SLURM_SUCCESS;
+
+clean:
+	xcgroup_ns_destroy(&memory_ns);
+
+error:
+	return SLURM_ERROR;
+}
+
+extern int task_cgroup_memory_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
+{
+	xcgroup_t memory_cg;
+
+	if (user_cgroup_path[0] == '\0' ||
+	     job_cgroup_path[0] == '\0' ||
+	     jobstep_cgroup_path[0] == '\0')
+		return SLURM_SUCCESS;
+
+	/*
+	 * Move the slurmstepd back to the root memory cg and force empty
+	 * the step cgroup to move its allocated pages to its parent.
+	 * The release_agent will asynchroneously be called for the step
+	 * cgroup. It will do the necessary cleanup.
+	 * It should be good if this force_empty mech could be done directly
+	 * by the memcg implementation at the end of the last task managed
+	 * by a cgroup. It is too difficult and near impossible to handle
+	 * that cleanup correctly with current memcg.
+	 */
+	if (xcgroup_create(&memory_ns,&memory_cg,"",0,0) == XCGROUP_SUCCESS) {
+		xcgroup_set_uint32_param(&memory_cg,"tasks",getpid());
+		xcgroup_destroy(&memory_cg);
+		xcgroup_set_param(&step_memory_cg,"memory.force_empty","1");
+	}
+
+	xcgroup_destroy(&user_memory_cg);
+	xcgroup_destroy(&job_memory_cg);
+	xcgroup_destroy(&step_memory_cg);
+
+	user_cgroup_path[0]='\0';
+	job_cgroup_path[0]='\0';
+	jobstep_cgroup_path[0]='\0';
+
+	xcgroup_ns_destroy(&memory_ns);
+
+	return SLURM_SUCCESS;
+}
+
+extern int task_cgroup_memory_create(slurmd_job_t *job)
+{
+	int rc;
+	int fstatus = SLURM_ERROR;
+
+	xcgroup_t memory_cg;
+
+	uint32_t jobid = job->jobid;
+	uint32_t stepid = job->stepid;
+	uid_t uid = job->uid;
+	uid_t gid = job->gid;
+	pid_t pid;
+	uint64_t ml,mlb,mls;
+
+	/* build user cgroup relative path if not set (should not be) */
+	if (*user_cgroup_path == '\0') {
+		if (snprintf(user_cgroup_path,PATH_MAX,
+			      "/uid_%u",uid) >= PATH_MAX) {
+			error("task/cgroup: unable to build uid %u memory "
+			      "cg relative path : %m",uid);
+			return SLURM_ERROR;
+		}
+	}
+
+	/* build job cgroup relative path if no set (should not be) */
+	if (*job_cgroup_path == '\0') {
+		if (snprintf(job_cgroup_path,PATH_MAX,"%s/job_%u",
+			      user_cgroup_path,jobid) >= PATH_MAX) {
+			error("task/cgroup: unable to build job %u memory "
+			      "cg relative path : %m",jobid);
+			return SLURM_ERROR;
+		}
+	}
+
+	/* build job step cgroup relative path (should not be) */
+	if (*jobstep_cgroup_path == '\0') {
+		if (snprintf(jobstep_cgroup_path,PATH_MAX,"%s/step_%u",
+			      job_cgroup_path,stepid) >= PATH_MAX) {
+			error("task/cgroup: unable to build job step %u memory "
+			      "cg relative path : %m",stepid);
+			return SLURM_ERROR;
+		}
+	}
+
+	/*
+	 * create memory root cg and lock it
+	 *
+	 * we will keep the lock until the end to avoid the effect of a release
+	 * agent that would remove an existing cgroup hierarchy while we are
+	 * setting it up. As soon as the step cgroup is created, we can release
+	 * the lock.
+	 * Indeed, consecutive slurm steps could result in cg being removed
+	 * between the next EEXIST instanciation and the first addition of
+	 * a task. The release_agent will have to lock the root memory cgroup
+	 * to avoid this scenario.
+	 */
+	if (xcgroup_create(&memory_ns,&memory_cg,"",0,0) != XCGROUP_SUCCESS) {
+		error("task/cgroup: unable to create root memory xcgroup");
+		return SLURM_ERROR;
+	}
+	if (xcgroup_lock(&memory_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&memory_cg);
+		error("task/cgroup: unable to lock root memory cg");
+		return SLURM_ERROR;
+	}
+
+	/*
+	 * Create user cgroup in the memory ns (it could already exist)
+	 * Ask for hierarchical memory accounting starting from the user
+	 * container in order to track the memory consumption up to the
+	 * user.
+	 * We do not set any limits at this level for now. It could be
+	 * interesting to do it in the future but memcg cleanup mech
+	 * are not working well so it will be really difficult to manage
+	 * addition/removal of memory amounts at this level. (kernel 2.6.34)
+	 */
+	if (xcgroup_create(&memory_ns,&user_memory_cg,
+			    user_cgroup_path,
+			    getuid(),getgid()) != XCGROUP_SUCCESS) {
+		goto error;
+	}
+	if (xcgroup_instanciate(&user_memory_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_memory_cg);
+		goto error;
+	}
+	xcgroup_set_param(&user_memory_cg,"memory.use_hierarchy","1");
+
+	/*
+	 * Create job cgroup in the memory ns (it could already exist)
+	 * and set the associated memory limits.
+	 * Ask for hierarchical memory accounting starting from the job
+	 * container in order to guarantee that a job will stay on track
+	 * regardless of the consumption of each step.
+	 */
+	ml = (uint64_t) job->job_mem;
+	ml = ml * 1024 * 1024 ;
+	mlb = (uint64_t) (ml * (allowed_ram_space / 100.0)) ;
+	mls = (uint64_t) mlb + (ml * (allowed_swap_space / 100.0)) ;
+	if (xcgroup_create(&memory_ns,&job_memory_cg,
+			    job_cgroup_path,
+			    getuid(),getgid()) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_memory_cg);
+		goto error;
+	}
+	if (xcgroup_instanciate(&job_memory_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_memory_cg);
+		xcgroup_destroy(&job_memory_cg);
+		goto error;
+	}
+	xcgroup_set_param(&job_memory_cg,"memory.use_hierarchy","1");
+	xcgroup_set_uint64_param(&job_memory_cg,
+				 "memory.limit_in_bytes",mlb);
+	xcgroup_set_uint64_param(&job_memory_cg,
+				 "memory.memsw.limit_in_bytes",mls);
+	debug("task/cgroup: job mem.limit=%luMB memsw.limit=%luMB",
+	      mlb/(1024*1024),mls/(1024*1024));
+
+	/*
+	 * Create step cgroup in the memory ns (it should not exists)
+	 * and set the associated memory limits.
+	 */
+	ml = (uint64_t) job->step_mem;
+	ml = ml * 1024 * 1024 ;
+	mlb = (uint64_t) (ml * (allowed_ram_space / 100.0)) ;
+	mls = (uint64_t) mlb + (ml * (allowed_swap_space / 100.0)) ;
+	if (xcgroup_create(&memory_ns,&step_memory_cg,
+			    jobstep_cgroup_path,
+			    uid,gid) != XCGROUP_SUCCESS) {
+		/* do not delete user/job cgroup as */
+		/* they can exist for other steps */
+		xcgroup_destroy(&user_memory_cg);
+		xcgroup_destroy(&job_memory_cg);
+		goto error;
+	}
+	if (xcgroup_instanciate(&step_memory_cg) != XCGROUP_SUCCESS) {
+		xcgroup_destroy(&user_memory_cg);
+		xcgroup_destroy(&job_memory_cg);
+		xcgroup_destroy(&step_memory_cg);
+		goto error;
+	}
+	xcgroup_set_uint64_param(&step_memory_cg,
+				 "memory.limit_in_bytes",mlb);
+	xcgroup_set_uint64_param(&step_memory_cg,
+				 "memory.memsw.limit_in_bytes",mls);
+	debug("task/cgroup: step mem.limit=%luMB memsw.limit=%luMB",
+	      mlb/(1024*1024),mls/(1024*1024));
+
+	/*
+	 * Attach the slurmstepd to the step memory cgroup
+	 */
+	pid = getpid();
+	rc = xcgroup_add_pids(&step_memory_cg,&pid,1);
+	if (rc != XCGROUP_SUCCESS) {
+		error("task/cgroup: unable to add slurmstepd to memory cg '%s'",
+		      step_memory_cg.path);
+		fstatus = SLURM_ERROR;
+	} else
+		fstatus = SLURM_SUCCESS;
+
+error:
+	xcgroup_unlock(&memory_cg);
+	xcgroup_destroy(&memory_cg);
+
+	return fstatus;
+}
+
+extern int task_cgroup_memory_attach_task(slurmd_job_t *job)
+{
+	int fstatus = SLURM_ERROR;
+
+	/* tasks are automatically attached as slurmstepd is in the step cg */
+	fstatus = SLURM_SUCCESS;
+
+	return fstatus;
+}
+
diff --git a/src/plugins/task/cgroup/task_cgroup_memory.h b/src/plugins/task/cgroup/task_cgroup_memory.h
new file mode 100644
index 00000000000..4bc5855973f
--- /dev/null
+++ b/src/plugins/task/cgroup/task_cgroup_memory.h
@@ -0,0 +1,58 @@
+/*****************************************************************************\
+ *  task_cgroup_memory.h - memory cgroup subsystem primitives for task/cgroup
+ *****************************************************************************
+ *  Copyright (C) 2009 CEA/DAM/DIF
+ *  Written by Matthieu Hautreux <matthieu.hautreux@cea.fr>
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#   include "config.h"
+#endif
+
+#ifndef _TASK_CGROUP_MEMORY_H_
+#define _TASK_CGROUP_MEMORY_H_
+
+#include "src/common/xcgroup_read_config.h"
+
+/* initialize memory subsystem of task/cgroup */
+extern int task_cgroup_memory_init(slurm_cgroup_conf_t *slurm_cgroup_conf);
+
+/* release memory subsystem resources */
+extern int task_cgroup_memory_fini(slurm_cgroup_conf_t *slurm_cgroup_conf);
+
+/* create user/job/jobstep memory cgroups */
+extern int task_cgroup_memory_create(slurmd_job_t *job);
+
+/* create a task cgroup and attach the task to it */
+extern int task_cgroup_memory_attach_task(slurmd_job_t *job);
+
+#endif
diff --git a/src/plugins/task/none/task_none.c b/src/plugins/task/none/task_none.c
index e252e37f258..afef76c8f98 100644
--- a/src/plugins/task/none/task_none.c
+++ b/src/plugins/task/none/task_none.c
@@ -193,3 +193,11 @@ extern int task_post_term (slurmd_job_t *job)
 	return SLURM_SUCCESS;
 }
 
+/*
+ * task_post_step() is called after termination of the step
+ * (all the task)
+ */
+extern int task_post_step (slurmd_job_t *job)
+{
+	return SLURM_SUCCESS;
+}
diff --git a/src/slurmd/common/task_plugin.c b/src/slurmd/common/task_plugin.c
index 14319ebf10c..b1205682813 100644
--- a/src/slurmd/common/task_plugin.c
+++ b/src/slurmd/common/task_plugin.c
@@ -63,6 +63,7 @@ typedef struct slurmd_task_ops {
 	int	(*pre_setuid)		    (slurmd_job_t *job);
 	int	(*pre_launch)		    (slurmd_job_t *job);
 	int	(*post_term)		    (slurmd_job_t *job);
+	int	(*post_step)		    (slurmd_job_t *job);
 } slurmd_task_ops_t;
 
 
@@ -93,6 +94,7 @@ _slurmd_task_get_ops(slurmd_task_context_t *c)
 		"task_pre_setuid",
 		"task_pre_launch",
 		"task_post_term",
+		"task_post_step",
 	};
 	int n_syms = sizeof( syms ) / sizeof( char * );
 
@@ -363,3 +365,16 @@ extern int post_term(slurmd_job_t *job)
 
 	return (*(g_task_context->ops.post_term))(job);
 }
+
+/*
+ * Note that a step has terminated.
+ *
+ * RET - slurm error code
+ */
+extern int post_step(slurmd_job_t *job)
+{
+	if (slurmd_task_init())
+		return SLURM_ERROR;
+
+	return (*(g_task_context->ops.post_step))(job);
+}
diff --git a/src/slurmd/common/task_plugin.h b/src/slurmd/common/task_plugin.h
index ee87917c641..7ce8f5c9fcc 100644
--- a/src/slurmd/common/task_plugin.h
+++ b/src/slurmd/common/task_plugin.h
@@ -130,4 +130,11 @@ extern int pre_launch(slurmd_job_t *job);
  */
 extern int post_term(slurmd_job_t *job);
 
+/*
+ * Note that a step has terminated.
+ *
+ * RET - slurm error code
+ */
+extern int post_step(slurmd_job_t *job);
+
 #endif /* _SLURMD_TASK_PLUGIN_H_ */
diff --git a/src/slurmd/slurmd/get_mach_stat.c b/src/slurmd/slurmd/get_mach_stat.c
index 13fc93e71e4..47de3a4ae77 100644
--- a/src/slurmd/slurmd/get_mach_stat.c
+++ b/src/slurmd/slurmd/get_mach_stat.c
@@ -97,13 +97,6 @@
 #include "src/slurmctld/slurmctld.h"
 #include "src/slurmd/slurmd/get_mach_stat.h"
 
-static char* _cpuinfo_path = "/proc/cpuinfo";
-
-static int _compute_block_map(uint16_t numproc,
-			      uint16_t **block_map, uint16_t **block_map_inv);
-static int _chk_cpuinfo_str(char *buffer, char *keyword, char **valptr);
-static int _chk_cpuinfo_uint32(char *buffer, char *keyword, uint32_t *val);
-
 /* #define DEBUG_DETAIL	1 */	/* enable detailed debugging within SLURM */
 
 #if DEBUG_MODULE
@@ -132,6 +125,7 @@ main(int argc, char * argv[])
 	uint16_t testnumproc = 0;
 	uint32_t up_time = 0;
 	int days, hours, mins, secs;
+	char* _cpuinfo_path = "/proc/cpuinfo";
 
 	if (argc > 1) {
 	    	_cpuinfo_path = argv[1];
@@ -207,60 +201,6 @@ gethostname_short (char *name, size_t len)
 }
 #endif
 
-
-/*
- * get_procs - Return the count of procs on this system
- * Input: procs - buffer for the CPU count
- * Output: procs - filled in with CPU count, "1" if error
- *         return code - 0 if no error, otherwise errno
- */
-extern int
-get_procs(uint16_t *procs)
-{
-#ifdef LPAR_INFO_FORMAT2
-	/* AIX 5.3 only */
-	lpar_info_format2_t info;
-
-	*procs = 1;
-	if (lpar_get_info(LPAR_INFO_FORMAT2, &info, sizeof(info)) != 0) {
-		error("lpar_get_info() failed");
-		return EINVAL;
-	}
-
-	*procs = (uint16_t) info.online_vcpus;
-#else /* !LPAR_INFO_FORMAT2 */
-
-#  ifdef _SC_NPROCESSORS_ONLN
-	int my_proc_tally;
-
-	*procs = 1;
-	my_proc_tally = (int)sysconf(_SC_NPROCESSORS_ONLN);
-	if (my_proc_tally < 1) {
-		error ("get_procs: error running sysconf(_SC_NPROCESSORS_ONLN)");
-		return EINVAL;
-	}
-
-	*procs = (uint16_t) my_proc_tally;
-#  else
-#    ifdef HAVE_SYSCTLBYNAME
-	int ncpu;
-	size_t len = sizeof(ncpu);
-
-	*procs = 1;
-	if (sysctlbyname("hw.ncpus", &ncpu, &len, NULL, 0) == -1) {
-		error("get_procs: error running sysctl(HW_NCPU)");
-		return EINVAL;
-	}
-	*procs = (uint16_t) ncpu;
-#    else /* !HAVE_SYSCTLBYNAME */
-	*procs = 1;
-#    endif /* HAVE_SYSCTLBYNAME */
-#  endif /* _SC_NPROCESSORS_ONLN */
-#endif /* LPAR_INFO_FORMAT2 */
-
-	return 0;
-}
-
 #ifdef USE_OS_NAME
 /*
  * get_os_name - Return the operating system name and version
@@ -445,6 +385,7 @@ extern int get_up_time(uint32_t *up_time)
 	return 0;
 }
 
+#ifdef USE_CPU_SPEED
 /* _chk_cpuinfo_str
  *	check a line of cpuinfo data (buffer) for a keyword.  If it
  *	exists, return the string value for that keyword in *valptr.
@@ -466,25 +407,6 @@ static int _chk_cpuinfo_str(char *buffer, char *keyword, char **valptr)
 	return true;
 }
 
-/* _chk_cpuinfo_uint32
- *	check a line of cpuinfo data (buffer) for a keyword.  If it
- *	exists, return the uint16 value for that keyword in *valptr.
- * Input:  buffer - single line of cpuinfo data
- *	   keyword - keyword to check for
- * Output: valptr - uint32 value corresponding to keyword
- *         return code - true if keyword found, false if not found
- */
-static int _chk_cpuinfo_uint32(char *buffer, char *keyword, uint32_t *val)
-{
-	char *valptr;
-	if (_chk_cpuinfo_str(buffer, keyword, &valptr)) {
-		*val = strtoul(valptr, (char **)NULL, 10);
-		return true;
-	} else {
-		return false;
-	}
-}
-#ifdef USE_CPU_SPEED
 /* _chk_cpuinfo_float
  *	check a line of cpuinfo data (buffer) for a keyword.  If it
  *	exists, return the float value for that keyword in *valptr.
@@ -532,6 +454,7 @@ get_speed(float *speed)
 #else
 	FILE *cpu_info_file;
 	char buffer[128];
+	char* _cpuinfo_path = "/proc/cpuinfo";
 
 	*speed = 1.0;
 	cpu_info_file = fopen(_cpuinfo_path, "r");
@@ -551,490 +474,3 @@ get_speed(float *speed)
 
 #endif
 
-/*
- * get_cpuinfo - Return detailed cpuinfo on this system
- * Input:  numproc - number of processors on the system
- * Output: p_sockets - number of physical processor sockets
- *         p_cores - total number of physical CPU cores
- *         p_threads - total number of hardware execution threads
- *         block_map - asbtract->physical block distribution map
- *         block_map_inv - physical->abstract block distribution map (inverse)
- *         return code - 0 if no error, otherwise errno
- * NOTE: User must xfree block_map and block_map_inv
- */
-typedef struct cpuinfo {
-	uint16_t seen;
-	uint32_t cpuid;
-	uint32_t physid;
-	uint16_t physcnt;
-	uint32_t coreid;
-	uint16_t corecnt;
-	uint16_t siblings;
-	uint16_t cores;
-} cpuinfo_t;
-static cpuinfo_t *cpuinfo = NULL; /* array of CPU information for get_cpuinfo */
-				  /* Note: file static for qsort/_compare_cpus*/
-extern int
-get_cpuinfo(uint16_t numproc,
-		uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
-		uint16_t *block_map_size,
-		uint16_t **block_map, uint16_t **block_map_inv)
-{
-	int retval;
-	uint16_t numcpu	   = 0;		/* number of cpus seen */
-	uint16_t numphys   = 0;		/* number of unique "physical id"s */
-	uint16_t numcores  = 0;		/* number of unique "cores id"s */
-
-	uint16_t maxsibs   = 0;		/* maximum value of "siblings" */
-	uint16_t maxcores  = 0;		/* maximum value of "cores" */
-	uint16_t minsibs   = 0xffff;	/* minimum value of "siblings" */
-	uint16_t mincores  = 0xffff;	/* minimum value of "cores" */
-
-	uint32_t maxcpuid  = 0;		/* maximum CPU ID ("processor") */
-	uint32_t maxphysid = 0;		/* maximum "physical id" */
-	uint32_t maxcoreid = 0;		/* maximum "core id" */
-	uint32_t mincpuid  = 0xffffffff;/* minimum CPU ID ("processor") */
-	uint32_t minphysid = 0xffffffff;/* minimum "physical id" */
-	uint32_t mincoreid = 0xffffffff;/* minimum "core id" */
-	int i;
-#if defined (__sun)
-#if defined (_LP64)
-	int64_t curcpu, val, sockets, cores, threads;
-#else
-	int32_t curcpu, val, sockets, cores, threads;
-#endif
-	int32_t chip_id, core_id, ncore_per_chip, ncpu_per_chip;
-#else
-	FILE *cpu_info_file;
-	char buffer[128];
-	uint16_t curcpu, sockets, cores, threads;
-#endif
-
-	*p_sockets = numproc;		/* initially all single core/thread */
-	*p_cores   = 1;
-	*p_threads = 1;
-	*block_map_size = 0;
-	*block_map      = NULL;
-	*block_map_inv  = NULL;
-
-#if defined (__sun)
-	kstat_ctl_t   *kc;
-	kstat_t       *ksp;
-	kstat_named_t *knp;
-
-	kc = kstat_open();
-	if (kc == NULL) {
-		error ("get speed: kstat error %d", errno);
-		return errno;
-	}
-#else
-	cpu_info_file = fopen(_cpuinfo_path, "r");
-	if (cpu_info_file == NULL) {
-		error ("get_cpuinfo: error %d opening %s",
-			errno, _cpuinfo_path);
-		return errno;
-	}
-#endif
-
-	/* Note: assumes all processor IDs are within [0:numproc-1] */
-	/*       treats physical/core IDs as tokens, not indices */
-	if (cpuinfo)
-		memset(cpuinfo, 0, numproc * sizeof(cpuinfo_t));
-	else
-		cpuinfo = xmalloc(numproc * sizeof(cpuinfo_t));
-
-#if defined (__sun)
-	ksp = kstat_lookup(kc, "cpu_info", -1, NULL);
-	for (; ksp != NULL; ksp = ksp->ks_next) {
-		if (strcmp(ksp->ks_module, "cpu_info"))
-			continue;
-
-		numcpu++;
-		kstat_read(kc, ksp, NULL);
-
-		knp = kstat_data_lookup(ksp, "chip_id");
-		chip_id = knp->value.l;
-		knp = kstat_data_lookup(ksp, "core_id");
-		core_id = knp->value.l;
-		knp = kstat_data_lookup(ksp, "ncore_per_chip");
-		ncore_per_chip = knp->value.l;
-		knp = kstat_data_lookup(ksp, "ncpu_per_chip");
-		ncpu_per_chip = knp->value.l;
-
-		if (chip_id >= numproc) {
-			debug("cpuid is %ld (> %d), ignored", curcpu, numproc);
-			continue;
-		}
-
-		cpuinfo[chip_id].seen = 1;
-		cpuinfo[chip_id].cpuid = chip_id;
-
-		maxcpuid = MAX(maxcpuid, chip_id);
-		mincpuid = MIN(mincpuid, chip_id);
-
-		for (i = 0; i < numproc; i++) {
-			if ((cpuinfo[i].coreid == core_id) &&
-			    (cpuinfo[i].corecnt))
-				break;
-		}
-
-		if (i == numproc) {
-			numcores++;
-		} else {
-			cpuinfo[i].corecnt++;
-		}
-
-		if (chip_id < numproc) {
-			cpuinfo[chip_id].corecnt++;
-			cpuinfo[chip_id].coreid = core_id;
-		}
-
-		maxcoreid = MAX(maxcoreid, core_id);
-		mincoreid = MIN(mincoreid, core_id);
-
-		if (ncore_per_chip > numproc) {
-			debug("cores is %u (> %d), ignored",
-			      ncore_per_chip, numproc);
-				continue;
-		}
-
-		if (chip_id < numproc)
-			cpuinfo[chip_id].cores = ncore_per_chip;
-
-		maxcores = MAX(maxcores, ncore_per_chip);
-		mincores = MIN(mincores, ncore_per_chip);
-	}
-#else
-
-	curcpu = 0;
-	while (fgets(buffer, sizeof(buffer), cpu_info_file) != NULL) {
-		uint32_t val;
-		if (_chk_cpuinfo_uint32(buffer, "processor", &val)) {
-			numcpu++;
-			curcpu = val;
-		    	if (val >= numproc) {	/* out of bounds, ignore */
-				debug("cpuid is %u (> %d), ignored",
-					val, numproc);
-				continue;
-			}
-			cpuinfo[val].seen = 1;
-			cpuinfo[val].cpuid = val;
-			maxcpuid = MAX(maxcpuid, val);
-			mincpuid = MIN(mincpuid, val);
-		} else if (_chk_cpuinfo_uint32(buffer, "physical id", &val)) {
-			/* see if the ID has already been seen */
-			for (i=0; i<numproc; i++) {
-				if ((cpuinfo[i].physid == val)
-				&&  (cpuinfo[i].physcnt))
-					break;
-			}
-
-			if (i == numproc) {		/* new ID... */
-				numphys++;		/* ...increment total */
-			} else {			/* existing ID... */
-				cpuinfo[i].physcnt++;	/* ...update ID cnt */
-			}
-
-			if (curcpu < numproc) {
-				cpuinfo[curcpu].physcnt++;
-				cpuinfo[curcpu].physid = val;
-			}
-
-			maxphysid = MAX(maxphysid, val);
-			minphysid = MIN(minphysid, val);
-		} else if (_chk_cpuinfo_uint32(buffer, "core id", &val)) {
-			/* see if the ID has already been seen */
-			for (i = 0; i < numproc; i++) {
-				if ((cpuinfo[i].coreid == val)
-				&&  (cpuinfo[i].corecnt))
-					break;
-			}
-
-			if (i == numproc) {		/* new ID... */
-				numcores++;		/* ...increment total */
-			} else {			/* existing ID... */
-				cpuinfo[i].corecnt++;	/* ...update ID cnt */
-			}
-
-			if (curcpu < numproc) {
-				cpuinfo[curcpu].corecnt++;
-				cpuinfo[curcpu].coreid = val;
-			}
-
-			maxcoreid = MAX(maxcoreid, val);
-			mincoreid = MIN(mincoreid, val);
-		} else if (_chk_cpuinfo_uint32(buffer, "siblings", &val)) {
-			/* Note: this value is a count, not an index */
-		    	if (val > numproc) {	/* out of bounds, ignore */
-				debug("siblings is %u (> %d), ignored",
-					val, numproc);
-				continue;
-			}
-			if (curcpu < numproc)
-				cpuinfo[curcpu].siblings = val;
-			maxsibs = MAX(maxsibs, val);
-			minsibs = MIN(minsibs, val);
-		} else if (_chk_cpuinfo_uint32(buffer, "cpu cores", &val)) {
-			/* Note: this value is a count, not an index */
-		    	if (val > numproc) {	/* out of bounds, ignore */
-				debug("cores is %u (> %d), ignored",
-					val, numproc);
-				continue;
-			}
-			if (curcpu < numproc)
-				cpuinfo[curcpu].cores = val;
-			maxcores = MAX(maxcores, val);
-			mincores = MIN(mincores, val);
-		}
-	}
-
-	fclose(cpu_info_file);
-#endif
-
-	/*** Sanity check ***/
-	if (minsibs == 0) minsibs = 1;		/* guaranteee non-zero */
-	if (maxsibs == 0) {
-	    	minsibs = 1;
-	    	maxsibs = 1;
-	}
-	if (maxcores == 0) {			/* no core data */
-	    	mincores = 0;
-	    	maxcores = 0;
-	}
-
-	/*** Compute Sockets/Cores/Threads ***/
-	if ((minsibs == maxsibs) &&		/* homogeneous system */
-	    (mincores == maxcores)) {
-		sockets = numphys; 		/* unique "physical id" */
-		if (sockets <= 1) {		/* verify single socket */
-			sockets = numcpu / maxsibs; /* maximum "siblings" */
-		}
-		if (sockets == 0)
-			sockets = 1;		/* guarantee non-zero */
-
-		cores = numcores / sockets;	/* unique "core id" */
-		cores = MAX(maxcores, cores);	/* maximum "cpu cores" */
-
-		if (cores == 0) {
-			cores = numcpu / sockets;	/* assume multi-core */
-			if (cores > 1) {
-				debug3("Warning: cpuinfo missing 'core id' or "
-					"'cpu cores' but assuming multi-core");
-			}
-		}
-		if (cores == 0)
-			cores = 1;	/* guarantee non-zero */
-
-		threads = numcpu / (sockets * cores); /* solve for threads */
-		if (threads == 0)
-			threads = 1;	/* guarantee non-zero */
-	} else {				/* heterogeneous system */
-		sockets = numcpu;
-		cores   = 1;			/* one core per socket */
-		threads = 1;			/* one core per core */
-	}
-
-	*p_sockets = sockets;		/* update output parameters */
-	*p_cores   = cores;
-	*p_threads = threads;
-
-#if DEBUG_DETAIL
-	/*** Display raw data ***/
-	debug3("");
-	debug3("numcpu:     %u", numcpu);
-	debug3("numphys:    %u", numphys);
-	debug3("numcores:   %u", numcores);
-
-	debug3("cores:      %u->%u", mincores, maxcores);
-	debug3("sibs:       %u->%u", minsibs,  maxsibs);
-
-	debug3("cpuid:      %u->%u", mincpuid,  maxcpuid);
-	debug3("physid:     %u->%u", minphysid, maxphysid);
-	debug3("coreid:     %u->%u", mincoreid, maxcoreid);
-
-	for (i = 0; i <= maxcpuid; i++) {
-		debug3("CPU %d:", i);
-		debug3(" seen:     %u", cpuinfo[i].seen);
-		debug3(" physid:   %u", cpuinfo[i].physid);
-		debug3(" physcnt:  %u", cpuinfo[i].physcnt);
-		debug3(" siblings: %u", cpuinfo[i].siblings);
-		debug3(" cores:    %u", cpuinfo[i].cores);
-		debug3(" coreid:   %u", cpuinfo[i].coreid);
-		debug3(" corecnt:  %u", cpuinfo[i].corecnt);
-		debug3("");
-	}
-
-	debug3("");
-	debug3("Sockets:          %u", sockets);
-	debug3("Cores per socket: %u", cores);
-	debug3("Threads per core: %u", threads);
-#endif
-
-	*block_map_size = numcpu;
-	retval = _compute_block_map(*block_map_size, block_map, block_map_inv);
-
-	xfree(cpuinfo);		/* done with raw cpuinfo data */
-
-	return retval;
-}
-
-/*
- * _compute_block_map - Compute abstract->machine block mapping (and inverse)
- *   allows computation of CPU ID masks for an abstract block distribution
- *   of logical processors which can then be mapped the IDs used in the
- *   actual machine processor ID ordering (which can be BIOS/OS dependendent)
- * Input:  numproc - number of processors on the system
- *	   cpu - array of cpuinfo (file static for qsort/_compare_cpus)
- * Output: block_map, block_map_inv - asbtract->physical block distribution map
- *         return code - 0 if no error, otherwise errno
- * NOTE: User must free block_map and block_map_inv
- *
- * For example, given a system with 8 logical processors arranged as:
- *
- *	Sockets:          4
- *	Cores per socket: 2
- *	Threads per core: 1
- *
- * and a logical CPU ID assignment of:
- *
- *	Machine logical CPU ID assignment:
- *	Logical CPU ID:        0  1  2  3  4  5  6  7
- *	Physical Socket ID:    0  1  3  2  0  1  3  2
- *
- * The block_map would be:
- *
- *	Abstract -> Machine logical CPU ID block mapping:
- *	Input: (Abstract ID)   0  1  2  3  4  5  6  7
- *	Output: (Machine ID)   0  4  1  5  3  7  2  6  <--- block_map[]
- *	Physical Socket ID:    0  0  1  1  2  2  3  3
- *
- * and it's inverse would be:
- *
- *	Machine -> Abstract logical CPU ID block mapping: (inverse)
- *	Input: (Machine ID)    0  1  2  3  4  5  6  7
- *	Output: (Abstract ID)  0  2  6  4  1  3  7  5  <--- block_map_inv[]
- *	Physical Socket ID:    0  1  3  2  0  1  3  2
- */
-
-/* physical cpu comparison with void * arguments to allow use with
- * libc qsort()
- */
-static int _icmp16(uint16_t a, uint16_t b)
-{
-    	if (a < b) {
-		return -1;
-	} else if (a == b) {
-		return 0;
-	} else {
-		return 1;
-	}
-}
-static int _icmp32(uint32_t a, uint32_t b)
-{
-	if (a < b) {
-		return -1;
-	} else if (a == b) {
-		return 0;
-	} else {
-		return 1;
-	}
-}
-
-static int _compare_cpus(const void *a1, const void *b1) {
-	uint16_t *a = (uint16_t *) a1;
-	uint16_t *b = (uint16_t *) b1;
-	int cmp;
-
-	cmp = -1 * _icmp16(cpuinfo[*a].seen,cpuinfo[*b].seen); /* seen to front */
-	if (cmp != 0)
-		return cmp;
-
-	cmp = _icmp32(cpuinfo[*a].physid, cpuinfo[*b].physid); /* key 1: physid */
-	if (cmp != 0)
-		return cmp;
-
-	cmp = _icmp32(cpuinfo[*a].coreid, cpuinfo[*b].coreid); /* key 2: coreid */
-	if (cmp != 0)
-		return cmp;
-
-	cmp = _icmp32(cpuinfo[*a].cpuid, cpuinfo[*b].cpuid);   /* key 3: cpu id */
-	return cmp;
-}
-
-static int _compute_block_map(uint16_t numproc,
-			      uint16_t **block_map, uint16_t **block_map_inv)
-{
-	uint16_t i;
-	/* Compute abstract->machine block mapping (and inverse) */
-	if (block_map) {
-		*block_map = xmalloc(numproc * sizeof(uint16_t));
-		for (i = 0; i < numproc; i++) {
-			(*block_map)[i] = i;
-		}
-		qsort(*block_map, numproc, sizeof(uint16_t), &_compare_cpus);
-	}
-	if (block_map_inv) {
-		*block_map_inv = xmalloc(numproc * sizeof(uint16_t));
-		for (i = 0; i < numproc; i++) {
-			uint16_t idx = (*block_map)[i];
-			(*block_map_inv)[idx] = i;
-		}
-	}
-
-#if DEBUG_DETAIL
-	/* Display the mapping tables */
-
-	debug3("\nMachine logical CPU ID assignment:");
-	debug3("Logical CPU ID:      ");
-	for (i = 0; i < numproc; i++) {
-		debug3("%3d", i);
-	}
-	debug3("");
-	debug3("Physical Socket ID:  ");
-	for (i = 0; i < numproc; i++) {
-		debug3("%3u", cpuinfo[i].physid);
-	}
-	debug3("");
-
-	if (block_map) {
-		debug3("\nAbstract -> Machine logical CPU ID block mapping:");
-		debug3("Input: (Abstract ID) ");
-		for (i = 0; i < numproc; i++) {
-			debug3("%3d", i);
-		}
-		debug3("");
-		debug3("Output: (Machine ID) ");
-		for (i = 0; i < numproc; i++) {
-			debug3("%3u", (*block_map)[i]);
-		}
-		debug3("");
-		debug3("Physical Socket ID:  ");
-		for (i = 0; i < numproc; i++) {
-			uint16_t id = (*block_map)[i];
-			debug3("%3u", cpuinfo[id].physid);
-		}
-		debug3("");
-	}
-
-	if (block_map_inv) {
-		debug3("\nMachine -> Abstract logical CPU ID block mapping: "
-			"(inverse)");
-		debug3("Input: (Machine ID)  ");
-		for (i = 0; i < numproc; i++) {
-			debug3("%3d", i);
-		}
-		debug3("");
-		debug3("Output: (Abstract ID)");
-		for (i = 0; i < numproc; i++) {
-			debug3("%3u", (*block_map_inv)[i]);
-		}
-		debug3("");
-		debug3("Physical Socket ID:  ");
-		for (i = 0; i < numproc; i++) {
-			debug3("%3u", cpuinfo[i].physid);
-		}
-		debug3("");
-	}
-#endif
-	return 0;
-}
diff --git a/src/slurmd/slurmd/get_mach_stat.h b/src/slurmd/slurmd/get_mach_stat.h
index aca63cddec7..625f686e057 100644
--- a/src/slurmd/slurmd/get_mach_stat.h
+++ b/src/slurmd/slurmd/get_mach_stat.h
@@ -51,11 +51,6 @@
 #  include <inttypes.h>
 #endif  /*  HAVE_CONFIG_H */
 
-extern int get_procs(uint16_t *procs);
-extern int get_cpuinfo(uint16_t numproc,
-		       uint16_t *sockets, uint16_t *cores, uint16_t *threads,
-		       uint16_t *block_map_size,
-		       uint16_t **block_map, uint16_t **block_map_inv);
 extern int get_mach_name(char *node_name);
 extern int get_memory(uint32_t *real_memory);
 extern int get_tmp_disk(uint32_t *tmp_disk, char *tmp_fs);
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index ba15c3b598e..3b788e72312 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -85,6 +85,7 @@
 #include "src/common/stepd_api.h"
 #include "src/common/switch.h"
 #include "src/slurmd/common/task_plugin.h"
+#include "src/common/xcpuinfo.h"
 #include "src/common/xmalloc.h"
 #include "src/common/xstring.h"
 #include "src/common/xsignal.h"
@@ -767,7 +768,7 @@ _read_config(bool reconfig)
 
 	_update_logging();
 	_update_nice();
-		
+
 	get_procs(&conf->actual_cpus);
 	get_cpuinfo(conf->actual_cpus,
 		    &conf->actual_sockets,
@@ -776,7 +777,7 @@ _read_config(bool reconfig)
 		    &conf->block_map_size,
 		    &conf->block_map, &conf->block_map_inv);
 
-	if (((cf->fast_schedule == 0) && !cr_flag && !gang_flag) || 
+	if (((cf->fast_schedule == 0) && !cr_flag && !gang_flag) ||
 	    ((cf->fast_schedule == 1) &&
 	     (conf->actual_cpus < conf->conf_cpus))) {
 		conf->cpus    = conf->actual_cpus;
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index 11ecd35fe80..ef76ff03295 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -1006,6 +1006,11 @@ job_manager(slurmd_job_t *job)
 	if (!job->batch && !job->user_managed_io && io_initialized)
 		_wait_for_io(job);
 
+	/*
+	 * Warn task plugin that the user's step have terminated
+	 */
+	post_step(job);
+
 	debug2("Before call to spank_fini()");
 	if (spank_fini (job)  < 0) {
 		error ("spank_fini failed");
-- 
GitLab