From 53706b41b930fe6374add3ad097a627254c45bc1 Mon Sep 17 00:00:00 2001
From: David Bigagli <david@schedmd.com>
Date: Fri, 27 Jun 2014 11:43:35 -0700
Subject: [PATCH] Ignore transient cgroup errors.

---
 NEWS                                            | 2 ++
 src/plugins/proctrack/cgroup/proctrack_cgroup.c | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index 99481b0e643..0fa36a005b6 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,8 @@ documents those changes that are of interest to users and admins.
     you could get multiple '*' wckeys.
  -- Fix bug which could report to squeue the wrong partition for a running job
     that is submitted to multiple partitions.
+ -- If failed to rmdir() a cgroup ignore the error preventing slurmstepd to
+    loop forever.
 
 * Changes in Slurm 14.03.4
 ==========================
diff --git a/src/plugins/proctrack/cgroup/proctrack_cgroup.c b/src/plugins/proctrack/cgroup/proctrack_cgroup.c
index d3608d3dd93..e92386937d6 100644
--- a/src/plugins/proctrack/cgroup/proctrack_cgroup.c
+++ b/src/plugins/proctrack/cgroup/proctrack_cgroup.c
@@ -287,8 +287,11 @@ int _slurm_cgroup_destroy(void)
 		if (xcgroup_delete(&step_freezer_cg) != XCGROUP_SUCCESS) {
 			error("_slurm_cgroup_destroy: problem deleting step "
 			      "cgroup path %s: %m", step_freezer_cg.path);
+			/* Ignore the error since the cgroup will
+			 * be eventually cleaned up by the release
+			 * script.
+			 */
 			xcgroup_unlock(&freezer_cg);
-			return SLURM_ERROR;
 		}
 		xcgroup_destroy(&step_freezer_cg);
 	}
-- 
GitLab