Skip to content
Snippets Groups Projects
Commit aaedd86c authored by Christopher J. Morrone's avatar Christopher J. Morrone
Browse files

Rename proctrack container functions. All are now named slurm_container_*.

parent 48687a1e
No related branches found
No related tags found
No related merge requests found
This file describes changes in recent versions of SLURM. It primarily
documents those changes that are of interest to users and admins.
* Changes in SLURM 0.6.0-pre2
=============================
* Changes in SLURM 0.6.0-pre1
=============================
-- Added bgl/partition_allocator/smap changes from 0.5.7.
......
......@@ -110,7 +110,7 @@ extern int fini ( void )
* Uses job step process group id as a unique identifier. Job id
* and step id are not unique by themselves.
*/
extern uint32_t slurm_create_container ( slurmd_job_t *job )
extern uint32_t slurm_container_create ( slurmd_job_t *job )
{
int pgid = (int) job->pgid;
int i;
......@@ -128,13 +128,13 @@ extern uint32_t slurm_create_container ( slurmd_job_t *job )
}
extern int slurm_add_container ( uint32_t id )
extern int slurm_container_add ( uint32_t id, pid_t pid )
{
error("slurm_add_container not supported");
return SLURM_ERROR;
debug("slurm_container_add not supported");
return SLURM_SUCCESS;
}
extern int slurm_signal_container ( uint32_t id, int signal )
extern int slurm_container_signal ( uint32_t id, int signal )
{
int jobid = (int) id;
if (!id) /* no container ID */
......@@ -143,7 +143,7 @@ extern int slurm_signal_container ( uint32_t id, int signal )
return proctrack_job_kill(&jobid, &signal);
}
extern int slurm_destroy_container ( uint32_t id )
extern int slurm_container_destroy ( uint32_t id )
{
int jobid = (int) id;
......@@ -158,7 +158,7 @@ extern int slurm_destroy_container ( uint32_t id )
}
extern uint32_t
slurm_find_container(pid_t pid)
slurm_container_find(pid_t pid)
{
int local_pid = (int) pid;
int cont_id = proctrack_get_job_id(&local_pid);
......
......@@ -92,27 +92,27 @@ extern int fini ( void )
/*
* Uses slurmd job-step manager's pid as the unique container id.
*/
extern uint32_t slurm_create_container ( slurmd_job_t *job )
extern uint32_t slurm_container_create ( slurmd_job_t *job )
{
return (uint32_t) job->jmgr_pid;
}
extern int slurm_add_container ( uint32_t id )
extern int slurm_container_add ( uint32_t id, pid_t pid )
{
return SLURM_SUCCESS;
}
extern int slurm_signal_container ( uint32_t id, int signal )
extern int slurm_container_signal ( uint32_t id, int signal )
{
return kill_proc_tree_not_top((pid_t)id, signal);
}
extern int slurm_destroy_container ( uint32_t id )
extern int slurm_container_destroy ( uint32_t id )
{
return SLURM_SUCCESS;
}
extern uint32_t slurm_find_container(pid_t pid)
extern uint32_t slurm_container_find(pid_t pid)
{
return (uint32_t) find_ancestor(pid, "slurmd");
}
......
......@@ -94,17 +94,17 @@ extern int fini ( void )
/*
* Uses job step process group id.
*/
extern uint32_t slurm_create_container ( slurmd_job_t *job )
extern uint32_t slurm_container_create ( slurmd_job_t *job )
{
return (uint32_t) job->pgid;
}
extern int slurm_add_container ( uint32_t id )
extern int slurm_container_add ( uint32_t id, pid_t pid )
{
return SLURM_SUCCESS;
}
extern int slurm_signal_container ( uint32_t id, int signal )
extern int slurm_container_signal ( uint32_t id, int signal )
{
pid_t pid = (pid_t) id;
......@@ -119,13 +119,13 @@ extern int slurm_signal_container ( uint32_t id, int signal )
return (int)killpg(pid, signal);
}
extern int slurm_destroy_container ( uint32_t id )
extern int slurm_container_destroy ( uint32_t id )
{
return SLURM_SUCCESS;
}
extern uint32_t
slurm_find_container(pid_t pid)
slurm_container_find(pid_t pid)
{
pid_t rc = getpgid(pid);
......
......@@ -696,9 +696,9 @@ _fork_all_tasks(slurmd_job_t *job)
* will wait for our signal before calling exec.
*/
shm_update_step_pgid(job->jobid, job->stepid, job->pgid);
cont_id = slurm_create_container(job);
cont_id = slurm_container_create(job);
if (cont_id == 0) {
error("slurm_create_container: %m");
error("slurm_container_create: %m");
exit(3);
}
shm_update_step_cont_id(job->jobid, job->stepid, cont_id);
......@@ -876,12 +876,12 @@ _kill_running_tasks(slurmd_job_t *job)
return;
if (s->cont_id) {
slurm_signal_container(s->cont_id, SIGKILL);
slurm_container_signal(s->cont_id, SIGKILL);
/* Try destroying the container up to 30 times */
while (slurm_destroy_container(s->cont_id) != SLURM_SUCCESS
while (slurm_container_destroy(s->cont_id) != SLURM_SUCCESS
&& limit < 30) {
slurm_signal_container(s->cont_id, SIGKILL);
slurm_container_signal(s->cont_id, SIGKILL);
sleep(1);
limit++;
}
......
......@@ -38,7 +38,7 @@
/* ************************************************************************ */
typedef struct slurm_proctrack_ops {
uint32_t (*create) ( slurmd_job_t *job );
int (*add) ( uint32_t id );
int (*add) ( uint32_t id, pid_t pid );
int (*signal) ( uint32_t id, int signal );
int (*destroy) ( uint32_t id );
uint32_t (*find_cont) ( pid_t pid );
......@@ -70,11 +70,11 @@ _proctrack_get_ops( slurm_proctrack_context_t *c )
* Must be synchronized with slurm_proctrack_ops_t above.
*/
static const char *syms[] = {
"slurm_create_container",
"slurm_add_container",
"slurm_signal_container",
"slurm_destroy_container",
"slurm_find_container"
"slurm_container_create",
"slurm_container_add",
"slurm_container_signal",
"slurm_container_destroy",
"slurm_container_find"
};
int n_syms = sizeof( syms ) / sizeof( char * );
......@@ -223,7 +223,7 @@ slurm_proctrack_fini( void )
* Returns container ID or zero on error
*/
extern uint32_t
slurm_create_container(slurmd_job_t *job)
slurm_container_create(slurmd_job_t *job)
{
if ( slurm_proctrack_init() < 0 )
return 0;
......@@ -232,30 +232,31 @@ slurm_create_container(slurmd_job_t *job)
}
/*
* Add this process to the specified container
* cont_id IN - container ID as returned by slurm_create_container()
* Add a process to the specified container
* cont_id IN - container ID as returned by slurm_container_create()
* pid IN - process ID to be added to the container
*
* Returns a SLURM errno.
*/
extern int
slurm_add_container(uint32_t cont_id)
slurm_container_add(uint32_t cont_id, pid_t pid)
{
if ( slurm_proctrack_init() < 0 )
return SLURM_ERROR;
return (*(g_proctrack_context->ops.add))( cont_id );
return (*(g_proctrack_context->ops.add))( cont_id , pid );
}
/*
* Signal all processes within a container
* cont_id IN - container ID as returned by slurm_create_container()
* cont_id IN - container ID as returned by slurm_container_create()
* signal IN - signal to send, if zero then perform error checking
* but do not send signal
*
* Returns a SLURM errno.
*/
extern int
slurm_signal_container(uint32_t cont_id, int signal)
slurm_container_signal(uint32_t cont_id, int signal)
{
if ( slurm_proctrack_init() < 0 )
return SLURM_ERROR;
......@@ -265,12 +266,12 @@ slurm_signal_container(uint32_t cont_id, int signal)
/*
* Destroy a container, any processes within the container are not effected
* cont_id IN - container ID as returned by slurm_create_container()
* cont_id IN - container ID as returned by slurm_container_create()
*
* Returns a SLURM errno.
*/
extern int
slurm_destroy_container(uint32_t cont_id)
slurm_container_destroy(uint32_t cont_id)
{
if ( slurm_proctrack_init() < 0 )
return SLURM_ERROR;
......@@ -284,7 +285,7 @@ slurm_destroy_container(uint32_t cont_id)
* Returns a SLURM errno.
*/
extern uint32_t
slurm_find_container(pid_t pid)
slurm_container_find(pid_t pid)
{
if ( slurm_proctrack_init() < 0 )
return SLURM_ERROR;
......
......@@ -55,41 +55,42 @@ extern int slurm_proctrack_fini(void);
*
* Returns container ID or zero on error
*/
extern uint32_t slurm_create_container(slurmd_job_t *job);
extern uint32_t slurm_container_create(slurmd_job_t *job);
/*
* Add this process to the specified container
* cont_id IN - container ID as returned by slurm_create_container()
* Add a process to the specified container
* cont_id IN - container ID as returned by slurm_container_create()
* pid IN - process ID to be added to the container
*
* Returns a SLURM errno.
*/
extern int slurm_add_container(uint32_t cont_id);
extern int slurm_container_add(uint32_t cont_id, pid_t pid);
/*
* Signal all processes within a container
* cont_id IN - container ID as returned by slurm_create_container()
* cont_id IN - container ID as returned by slurm_container_create()
* signal IN - signal to send, if zero then perform error checking
* but do not send signal
*
* Returns a SLURM errno.
*/
extern int slurm_signal_container(uint32_t cont_id, int signal);
extern int slurm_container_signal(uint32_t cont_id, int signal);
/*
* Destroy a container, any processes within the container are not effected
* cont_id IN - container ID as returned by slurm_create_container()
* cont_id IN - container ID as returned by slurm_container_create()
*
* Returns a SLURM errno.
*/
extern int slurm_destroy_container(uint32_t cont_id);
extern int slurm_container_destroy(uint32_t cont_id);
/*
* Get container ID for give process ID
*
* Returns a SLURM errno.
*/
extern uint32_t slurm_find_container(pid_t pid);
extern uint32_t slurm_container_find(pid_t pid);
/* Wait for all processes within a container to exit */
/* Add process to a container */
......
......@@ -760,11 +760,11 @@ _rpc_kill_tasks(slurm_msg_t *msg, slurm_addr *cli_addr)
* Assume step termination request.
* Send SIGCONT just in case the processes are stopped.
*/
slurm_signal_container(step->cont_id, SIGCONT);
if (slurm_signal_container(step->cont_id, req->signal) < 0)
slurm_container_signal(step->cont_id, SIGCONT);
if (slurm_container_signal(step->cont_id, req->signal) < 0)
rc = errno;
} else if (req->signal == 0) {
if (slurm_signal_container(step->cont_id, req->signal) < 0)
if (slurm_container_signal(step->cont_id, req->signal) < 0)
rc = errno;
/* SIGMIGRATE and SIGSOUND are used to initiate job checkpoint on AIX.
* These signals are not sent to the entire process group, but just a
......@@ -839,10 +839,10 @@ static void _rpc_pid2jid(slurm_msg_t *msg, slurm_addr *cli)
slurm_msg_t resp_msg;
job_id_response_msg_t resp;
bool found = false;
uint32_t my_cont = slurm_find_container(req->job_pid);
uint32_t my_cont = slurm_container_find(req->job_pid);
if (my_cont == 0) {
verbose("slurm_find_container(%u): process not found",
verbose("slurm_container_find(%u): process not found",
(uint32_t) req->job_pid);
} else {
List steps = shm_get_steps();
......@@ -1012,7 +1012,7 @@ _kill_all_active_steps(uint32_t jobid, int sig, bool batch)
debug2("signal %d to job %u (cont_id:%u)",
sig, jobid, s->cont_id);
if (slurm_signal_container(s->cont_id, sig) < 0)
if (slurm_container_signal(s->cont_id, sig) < 0)
error("kill jid %d cont_id %u: %m",
s->jobid, s->cont_id);
}
......
......@@ -880,7 +880,7 @@ _shm_clear_stale_entries(void)
if ((s->state == SLURMD_JOB_UNUSED) /* unused */
|| (s->cont_id == 0) /* empty */
|| (slurm_signal_container(s->cont_id, 0) == 0)) /* active */
|| (slurm_container_signal(s->cont_id, 0) == 0)) /* active */
continue;
while (t && !active_tasks) {
......@@ -1135,7 +1135,7 @@ static bool
_valid_slurmd_cont_id(uint32_t cont_id)
{
/* Check if container has processes */
if (slurm_signal_container(cont_id, 0) != 0)
if (slurm_container_signal(cont_id, 0) != 0)
return false;
return true;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment