Skip to content
Snippets Groups Projects
Commit d835060d authored by Morris Jette's avatar Morris Jette
Browse files

Merge pull request #13 from grondo/2.3-step-memcg-fixes

task/cgroup: minor job step memcg fixes
parents b720f7f1 a93afcd1
No related branches found
No related tags found
No related merge requests found
......@@ -748,6 +748,52 @@ int xcgroup_get_uint64_param(xcgroup_t* cg, char* param, uint64_t* value)
return fstatus;
}
static int cgroup_move_process_by_task (xcgroup_t *cg, pid_t pid)
{
DIR *dir;
struct dirent *entry;
char path [PATH_MAX];
if (snprintf (path, PATH_MAX, "/proc/%d/task", (int) pid) >= PATH_MAX) {
error ("xcgroup: move_process_by_task: path overflow!");
return XCGROUP_ERROR;
}
dir = opendir (path);
if (!dir) {
error ("xcgroup: opendir(%s): %m", path);
return XCGROUP_ERROR;
}
while ((entry = readdir (dir))) {
if (entry->d_name[0] != '.')
xcgroup_set_param (cg, "tasks", entry->d_name);
}
closedir (dir);
return XCGROUP_SUCCESS;
}
static int cgroup_procs_writable (xcgroup_t *cg)
{
struct stat st;
char *path = NULL;
int rc = 0;
xstrfmtcat (path, "%s/%s", cg->path, "cgroup.procs");
if ((stat (path, &st) >= 0) && (st.st_mode & S_IWUSR))
rc = 1;
xfree (path);
return (rc);
}
int xcgroup_move_process (xcgroup_t *cg, pid_t pid)
{
if (!cgroup_procs_writable (cg))
return cgroup_move_process_by_task (cg, pid);
return xcgroup_set_uint32_param (cg, "cgroup.procs", pid);
}
/*
* -----------------------------------------------------------------------------
......
......@@ -314,4 +314,18 @@ int xcgroup_set_uint64_param(xcgroup_t* cg,char* parameter,uint64_t value);
*/
int xcgroup_get_uint64_param(xcgroup_t* cg,char* param,uint64_t* value);
/*
* Move process 'pid' (and all its threads) to cgroup 'cg'
*
* This call ensures that pid and all its threads are moved to the
* cgroup cg. If the cgroup.procs file is not writable, then threads
* must be moved individually and this call can be racy.
*
* returns:
* - XCGROUP_ERROR
* - XCGROUP_SUCCESS
*/
int xcgroup_move_process(xcgroup_t *cg, pid_t pid);
#endif
......@@ -175,19 +175,22 @@ extern int task_cgroup_memory_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
return SLURM_SUCCESS;
/*
* Move the slurmstepd back to the root memory cg and force empty
* Move the slurmstepd back to the root memory cg and remove[*]
* the step cgroup to move its allocated pages to its parent.
* The release_agent will asynchroneously be called for the step
* cgroup. It will do the necessary cleanup.
* It should be good if this force_empty mech could be done directly
* by the memcg implementation at the end of the last task managed
* by a cgroup. It is too difficult and near impossible to handle
* that cleanup correctly with current memcg.
*
* [*] Calling rmdir(2) on an empty cgroup moves all resident charged
* pages to the parent (i.e. the job cgroup). (If force_empty were
* used instead, only clean pages would be flushed). This keeps
* resident pagecache pages associated with the job. It is expected
* that the job epilog will then optionally force_empty the
* job cgroup (to flush pagecache), and then rmdir(2) the cgroup
* or wait for release notification from kernel.
*/
if (xcgroup_create(&memory_ns,&memory_cg,"",0,0) == XCGROUP_SUCCESS) {
xcgroup_set_uint32_param(&memory_cg,"tasks",getpid());
xcgroup_move_process(&memory_cg, getpid());
xcgroup_destroy(&memory_cg);
xcgroup_set_param(&step_memory_cg,"memory.force_empty","1");
if (xcgroup_delete(&step_memory_cg) != XCGROUP_SUCCESS)
error ("cgroup: rmdir step memcg failed: %m");
}
xcgroup_destroy(&user_memory_cg);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment