From 53706b41b930fe6374add3ad097a627254c45bc1 Mon Sep 17 00:00:00 2001 From: David Bigagli <david@schedmd.com> Date: Fri, 27 Jun 2014 11:43:35 -0700 Subject: [PATCH] Ignore transient cgroup errors. --- NEWS | 2 ++ src/plugins/proctrack/cgroup/proctrack_cgroup.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 99481b0e643..0fa36a005b6 100644 --- a/NEWS +++ b/NEWS @@ -29,6 +29,8 @@ documents those changes that are of interest to users and admins. you could get multiple '*' wckeys. -- Fix bug which could report to squeue the wrong partition for a running job that is submitted to multiple partitions. + -- If failed to rmdir() a cgroup ignore the error preventing slurmstepd to + loop forever. * Changes in Slurm 14.03.4 ========================== diff --git a/src/plugins/proctrack/cgroup/proctrack_cgroup.c b/src/plugins/proctrack/cgroup/proctrack_cgroup.c index d3608d3dd93..e92386937d6 100644 --- a/src/plugins/proctrack/cgroup/proctrack_cgroup.c +++ b/src/plugins/proctrack/cgroup/proctrack_cgroup.c @@ -287,8 +287,11 @@ int _slurm_cgroup_destroy(void) if (xcgroup_delete(&step_freezer_cg) != XCGROUP_SUCCESS) { error("_slurm_cgroup_destroy: problem deleting step " "cgroup path %s: %m", step_freezer_cg.path); + /* Ignore the error since the cgroup will + * be eventually cleaned up by the release + * script. + */ xcgroup_unlock(&freezer_cg); - return SLURM_ERROR; } xcgroup_destroy(&step_freezer_cg); } -- GitLab