Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
d835060d
Commit
d835060d
authored
13 years ago
by
Morris Jette
Browse files
Options
Downloads
Plain Diff
Merge pull request #13 from grondo/2.3-step-memcg-fixes
task/cgroup: minor job step memcg fixes
parents
b720f7f1
a93afcd1
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/common/xcgroup.c
+46
-0
46 additions, 0 deletions
src/common/xcgroup.c
src/common/xcgroup.h
+14
-0
14 additions, 0 deletions
src/common/xcgroup.h
src/plugins/task/cgroup/task_cgroup_memory.c
+12
-9
12 additions, 9 deletions
src/plugins/task/cgroup/task_cgroup_memory.c
with
72 additions
and
9 deletions
src/common/xcgroup.c
+
46
−
0
View file @
d835060d
...
...
@@ -748,6 +748,52 @@ int xcgroup_get_uint64_param(xcgroup_t* cg, char* param, uint64_t* value)
return
fstatus
;
}
static
int
cgroup_move_process_by_task
(
xcgroup_t
*
cg
,
pid_t
pid
)
{
DIR
*
dir
;
struct
dirent
*
entry
;
char
path
[
PATH_MAX
];
if
(
snprintf
(
path
,
PATH_MAX
,
"/proc/%d/task"
,
(
int
)
pid
)
>=
PATH_MAX
)
{
error
(
"xcgroup: move_process_by_task: path overflow!"
);
return
XCGROUP_ERROR
;
}
dir
=
opendir
(
path
);
if
(
!
dir
)
{
error
(
"xcgroup: opendir(%s): %m"
,
path
);
return
XCGROUP_ERROR
;
}
while
((
entry
=
readdir
(
dir
)))
{
if
(
entry
->
d_name
[
0
]
!=
'.'
)
xcgroup_set_param
(
cg
,
"tasks"
,
entry
->
d_name
);
}
closedir
(
dir
);
return
XCGROUP_SUCCESS
;
}
static
int
cgroup_procs_writable
(
xcgroup_t
*
cg
)
{
struct
stat
st
;
char
*
path
=
NULL
;
int
rc
=
0
;
xstrfmtcat
(
path
,
"%s/%s"
,
cg
->
path
,
"cgroup.procs"
);
if
((
stat
(
path
,
&
st
)
>=
0
)
&&
(
st
.
st_mode
&
S_IWUSR
))
rc
=
1
;
xfree
(
path
);
return
(
rc
);
}
int
xcgroup_move_process
(
xcgroup_t
*
cg
,
pid_t
pid
)
{
if
(
!
cgroup_procs_writable
(
cg
))
return
cgroup_move_process_by_task
(
cg
,
pid
);
return
xcgroup_set_uint32_param
(
cg
,
"cgroup.procs"
,
pid
);
}
/*
* -----------------------------------------------------------------------------
...
...
This diff is collapsed.
Click to expand it.
src/common/xcgroup.h
+
14
−
0
View file @
d835060d
...
...
@@ -314,4 +314,18 @@ int xcgroup_set_uint64_param(xcgroup_t* cg,char* parameter,uint64_t value);
*/
int
xcgroup_get_uint64_param
(
xcgroup_t
*
cg
,
char
*
param
,
uint64_t
*
value
);
/*
* Move process 'pid' (and all its threads) to cgroup 'cg'
*
* This call ensures that pid and all its threads are moved to the
* cgroup cg. If the cgroup.procs file is not writable, then threads
* must be moved individually and this call can be racy.
*
* returns:
* - XCGROUP_ERROR
* - XCGROUP_SUCCESS
*/
int
xcgroup_move_process
(
xcgroup_t
*
cg
,
pid_t
pid
);
#endif
This diff is collapsed.
Click to expand it.
src/plugins/task/cgroup/task_cgroup_memory.c
+
12
−
9
View file @
d835060d
...
...
@@ -175,19 +175,22 @@ extern int task_cgroup_memory_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
return
SLURM_SUCCESS
;
/*
* Move the slurmstepd back to the root memory cg and
force empty
* Move the slurmstepd back to the root memory cg and
remove[*]
* the step cgroup to move its allocated pages to its parent.
* The release_agent will asynchroneously be called for the step
* cgroup. It will do the necessary cleanup.
* It should be good if this force_empty mech could be done directly
* by the memcg implementation at the end of the last task managed
* by a cgroup. It is too difficult and near impossible to handle
* that cleanup correctly with current memcg.
*
* [*] Calling rmdir(2) on an empty cgroup moves all resident charged
* pages to the parent (i.e. the job cgroup). (If force_empty were
* used instead, only clean pages would be flushed). This keeps
* resident pagecache pages associated with the job. It is expected
* that the job epilog will then optionally force_empty the
* job cgroup (to flush pagecache), and then rmdir(2) the cgroup
* or wait for release notification from kernel.
*/
if
(
xcgroup_create
(
&
memory_ns
,
&
memory_cg
,
""
,
0
,
0
)
==
XCGROUP_SUCCESS
)
{
xcgroup_
set_uint32_param
(
&
memory_cg
,
"tasks"
,
getpid
());
xcgroup_
move_process
(
&
memory_cg
,
getpid
());
xcgroup_destroy
(
&
memory_cg
);
xcgroup_set_param
(
&
step_memory_cg
,
"memory.force_empty"
,
"1"
);
if
(
xcgroup_delete
(
&
step_memory_cg
)
!=
XCGROUP_SUCCESS
)
error
(
"cgroup: rmdir step memcg failed: %m"
);
}
xcgroup_destroy
(
&
user_memory_cg
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment