diff --git a/src/plugins/proctrack/aix/proctrack_aix.c b/src/plugins/proctrack/aix/proctrack_aix.c index b019c8e096d32e398a043028f8e2763ac22336a7..8051b554d2374d34a4923ecdb8fe8a439dc42a39 100644 --- a/src/plugins/proctrack/aix/proctrack_aix.c +++ b/src/plugins/proctrack/aix/proctrack_aix.c @@ -61,6 +61,8 @@ extern int proctrack_job_kill(int *jobid, int *signal); /* signal a job */ extern int proctrack_get_job_id(int *pid_ptr); /* return jobid for given pid */ extern int proctrack_dump_records(void); /* dump records */ extern uint32_t proctrack_version(void); /* proctrack version */ +extern int proctrack_get_pids(uint32_t job_id, int pid_array_len, + int32_t *pid_array_ptr); /* * These variables are required by the generic plugin interface. If they @@ -101,7 +103,7 @@ const uint32_t plugin_version = 90; */ extern int init ( void ) { - uint32_t required_version = 2; + uint32_t required_version = 3; if (proctrack_version() < required_version) { error("proctrack AIX kernel extension must be >= %u", @@ -161,7 +163,6 @@ extern int slurm_container_destroy ( uint32_t id ) if (proctrack_job_unreg(&jobid) == 0) return SLURM_SUCCESS; - error("proctrack_job_unreg(%d): %m", jobid); return SLURM_ERROR; } @@ -175,3 +176,95 @@ slurm_container_find(pid_t pid) return (uint32_t) cont_id; } +extern bool +slurm_container_has_pid(uint32_t cont_id, pid_t pid) +{ + int local_pid = (int) pid; + int found_cont_id = proctrack_get_job_id(&local_pid); + + if (found_cont_id == -1 || (uint32_t)found_cont_id != cont_id) + return false; + + return true; +} + +extern int +slurm_container_wait(uint32_t cont_id) +{ + int jobid = (int) cont_id; + int delay = 1; + + if (cont_id == 0 || cont_id == 1) { + errno = EINVAL; + return SLURM_ERROR; + } + + /* Spin until the container is successfully destroyed */ + while (proctrack_job_unreg(&jobid) != 0) { + sleep(delay); + if (delay < 120) { + delay *= 2; + } else { + int i; + pid_t *pids = NULL; + int npids = 0; + error("Container %u is still not empty", cont_id); + + slurm_container_get_pids(cont_id, &pids, &npids); + if (npids > 0) { + for (i = 0; i < npids; i++) { + verbose(" Container %u has pid %d", + pids[i]); + } + xfree(pids); + } + } + } + + return SLURM_SUCCESS; +} + +extern int +slurm_container_get_pids(uint32_t cont_id, pid_t **pids, int *npids) +{ + int32_t *p; + int np; + int len = 64; + + p = (int32_t *)xmalloc(len * sizeof(int32_t)); + while((np = proctrack_get_pids(cont_id, len, p)) > len) { + /* array is too short, double its length */ + len *= 2; + xrealloc(p, len); + } + + if (np == -1) { + error("proctrack_get_pids(AIX) for container %u failed: %m", + cont_id); + xfree(p); + *pids = NULL; + *npids = 0; + return SLURM_ERROR; + } + + if (sizeof(uint32_t) == sizeof(pid_t)) { + debug3("slurm_container_get_pids: No need to copy pids array"); + *npids = np; + *pids = (pid_t *)p; + } else { + /* need to cast every individual pid in the array */ + pid_t *p_copy; + int i; + + debug3("slurm_container_get_pids: Must copy pids array"); + p_copy = (pid_t *)xmalloc(np * sizeof(pid_t)); + for (i = 0; i < np; i++) { + p_copy[i] = (pid_t)p[i]; + } + xfree(p); + + *npids = np; + *pids = p_copy; + } + return SLURM_SUCCESS; +} diff --git a/src/plugins/proctrack/linuxproc/kill_tree.c b/src/plugins/proctrack/linuxproc/kill_tree.c index 11acf81763d1350b36612e07b1561e66c9da1406..317fe6dc714109b13520b511495ee36b972649bc 100644 --- a/src/plugins/proctrack/linuxproc/kill_tree.c +++ b/src/plugins/proctrack/linuxproc/kill_tree.c @@ -52,6 +52,7 @@ #include <string.h> #include <limits.h> +#include "slurm/slurm.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" #include "src/common/log.h" @@ -335,3 +336,54 @@ extern pid_t find_ancestor(pid_t process, char *process_name) return pid; } + +/* The returned "pids" array does NOT include the slurmstepd */ +extern int proctrack_linuxproc_get_pids(pid_t top, pid_t **pids, int *npids) +{ + xppid_t **hashtbl; + xpid_t *list, *ptr; + pid_t *p; + int i; + int len = 32; + + if ((hashtbl = _build_hashtbl()) == NULL) + return SLURM_ERROR; + + list = _get_list(top, NULL, hashtbl); + if (list == NULL) { + *pids = NULL; + *npids = 0; + _destroy_hashtbl(hashtbl); + return SLURM_ERROR; + } + + p = (pid_t *)xmalloc(sizeof(pid_t) * len); + ptr = list; + i = 0; + while(ptr != NULL) { + if (ptr->is_usercmd) { /* don't include the slurmstepd */ + if (i >= len-1) { + len *= 2; + xrealloc(p, len); + } + p[i] = ptr->pid; + i++; + } + ptr = ptr->next; + } + + if (i == 0) { + xfree(p); + *pids = NULL; + *npids = 0; + _destroy_hashtbl(hashtbl); + _destroy_list(list); + return SLURM_ERROR; + } else { + *pids = p; + *npids = i; + _destroy_hashtbl(hashtbl); + _destroy_list(list); + return SLURM_SUCCESS; + } +} diff --git a/src/plugins/proctrack/linuxproc/kill_tree.h b/src/plugins/proctrack/linuxproc/kill_tree.h index bb76a1889b3c89b8dcc7fd4cc318dfc2433397c6..9dfc3a4ae316a910de222d3bc6fb9d002f5a59df 100644 --- a/src/plugins/proctrack/linuxproc/kill_tree.h +++ b/src/plugins/proctrack/linuxproc/kill_tree.h @@ -49,4 +49,6 @@ extern pid_t find_ancestor(pid_t process, char *process_name); * then kill all that subtree. */ +extern int proctrack_linuxproc_get_pids(pid_t top, pid_t **pids, int *npids); + #endif /* _HAVE_KILL_TREE_H */ diff --git a/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c b/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c index 71ead731487496054df36588fdc19f7d3f1d2b0e..1f81a817a894f97b7350549c0f2d2bf5abcb8b2f 100644 --- a/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c +++ b/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c @@ -130,3 +130,43 @@ extern uint32_t slurm_container_find(pid_t pid) return (uint32_t) find_ancestor(pid, "slurmstepd"); } +extern bool slurm_container_has_pid(uint32_t cont_id, pid_t pid) +{ + uint32_t cont; + + cont = (uint32_t) find_ancestor(pid, "slurmstepd"); + if (cont == cont_id) + return true; + + return false; +} + +extern int +slurm_container_wait(uint32_t cont_id) +{ + int delay = 1; + + if (cont_id == 0 || cont_id == 1) { + errno = EINVAL; + return SLURM_ERROR; + } + + /* Spin until the container is successfully destroyed */ + while (slurm_container_destroy(cont_id) != SLURM_SUCCESS) { + slurm_container_signal(cont_id, SIGKILL); + sleep(delay); + if (delay < 120) { + delay *= 2; + } else { + error("Unable to destroy container %u", cont_id); + } + } + + return SLURM_SUCCESS; +} + +extern int +slurm_container_get_pids(uint32_t cont_id, pid_t **pids, int *npids) +{ + return proctrack_linuxproc_get_pids((pid_t)cont_id, pids, npids); +} diff --git a/src/plugins/proctrack/pgid/proctrack_pgid.c b/src/plugins/proctrack/pgid/proctrack_pgid.c index 14d811ee6c872e69b2f89062ed5ddb37e831bd30..420245d2528858a13ddc91e18d73798c6b0e8694 100644 --- a/src/plugins/proctrack/pgid/proctrack_pgid.c +++ b/src/plugins/proctrack/pgid/proctrack_pgid.c @@ -148,3 +148,44 @@ extern uint32_t slurm_container_find(pid_t pid) return (uint32_t) rc; } +extern bool slurm_container_has_pid(uint32_t cont_id, pid_t pid) +{ + pid_t pgid = getpgid(pid); + + if (pgid == -1 || (uint32_t)pgid != cont_id) + return false; + + return true; +} + +extern int +slurm_container_wait(uint32_t cont_id) +{ + pid_t pgid = (pid_t)cont_id; + int delay = 1; + + if (cont_id == 0 || cont_id == 1) { + errno = EINVAL; + return SLURM_ERROR; + } + + /* Spin until the process group is gone. */ + while (killpg(pgid, 0) == 0) { + slurm_container_signal(cont_id, SIGKILL); + sleep(delay); + if (delay < 120) { + delay *= 2; + } else { + error("Unable to destroy container %u", cont_id); + } + } + + return SLURM_SUCCESS; +} + +extern int +slurm_container_get_pids(uint32_t cont_id, pid_t **pids, int *npids) +{ + error("proctrack/aix does not implement slurm_container_get_pids"); + return SLURM_ERROR; +} diff --git a/src/plugins/proctrack/rms/proctrack_rms.c b/src/plugins/proctrack/rms/proctrack_rms.c index 429c781b47b7695f16c9bc4a41693fec2fd4806b..5dd24c885cfd2d219c2387a2b06645a892203ffd 100644 --- a/src/plugins/proctrack/rms/proctrack_rms.c +++ b/src/plugins/proctrack/rms/proctrack_rms.c @@ -176,6 +176,7 @@ extern int slurm_container_destroy (uint32_t id) return SLURM_ERROR; } + extern uint32_t slurm_container_find (pid_t pid) { int prgid = 0; @@ -185,6 +186,79 @@ extern uint32_t slurm_container_find (pid_t pid) return (uint32_t) prgid; } +extern bool slurm_container_has_pid (uint32_t cont_id, pid_t pid) +{ + int prgid = 0; + + if (rms_getprgid ((int) pid, &prgid) < 0) + return false; + if ((uint32_t)prgid != cont_id) + return false; + + return true; +} + +extern int +slurm_container_wait(uint32_t cont_id) +{ + int delay = 1; + + if (cont_id == 0 || cont_id == 1) { + errno = EINVAL; + return SLURM_ERROR; + } + + /* Spin until the container is empty */ + while (slurm_container_signal(cont_id, 0) != -1) { + slurm_container_signal(cont_id, SIGKILL); + sleep(delay); + if (delay < 120) { + delay *= 2; + } else { + error("Unable to destroy container %u", cont_id); + } + } + + return SLURM_SUCCESS; +} + +/* + * This module assumes that the slurmstepd (running as root) is always the + * last process in the rms program description. We do not include + * the slurmstepd in the list of pids that we return. + */ +extern int +slurm_container_get_pids(uint32_t cont_id, pid_t **pids, int *npids) +{ + pid_t *p; + int np; + int len = 32; + + p = xmalloc(len * sizeof(pid_t)); + while(rms_prginfo((int)cont_id, len, p, &np) == -1) { + if (errno == EINVAL) { + /* array is too short, double its length */ + len *= 2; + xrealloc(p, len); + } else { + xfree(p); + *pids = NULL; + *npids = 0; + return SLURM_ERROR; + } + } + + /* Don't include the last pid (slurmstepd) in the list */ + if (np > 0) { + p[np-1] = 0; + np--; + } + + *npids = np; + *pids = p; + + return SLURM_SUCCESS; +} static void _close_all_fd_except(int fd) diff --git a/src/plugins/proctrack/sgi_job/proctrack_sgi_job.c b/src/plugins/proctrack/sgi_job/proctrack_sgi_job.c index 9a418b557254dfd9a65616865e0c78b8ffa41096..f5934f57c652840ce2f6b6a171eb7a2251166588 100644 --- a/src/plugins/proctrack/sgi_job/proctrack_sgi_job.c +++ b/src/plugins/proctrack/sgi_job/proctrack_sgi_job.c @@ -76,6 +76,8 @@ typedef jid_t (*waitjid_f) (jid_t jid, int *status, int options); typedef int (*killjid_f) (jid_t jid, int sig); typedef jid_t (*detachpid_f) (pid_t pid); typedef jid_t (*attachpid_f) (pid_t pid, jid_t jid_requested); +typedef int (*getpidlist_f)(jid_t jid, pid_t *pid, int bufsize); +typedef int (*getpidcnt_f) (jid_t jid); /* * Handle to libjob.so @@ -86,12 +88,14 @@ static void *libjob_handle = NULL; * libjob operations we'll need in this plugin */ static struct job_operations { - create_f create; - getjid_f getjid; - waitjid_f waitjid; - killjid_f killjid; - detachpid_f detachpid; - attachpid_f attachpid; + create_f create; + getjid_f getjid; + waitjid_f waitjid; + killjid_f killjid; + detachpid_f detachpid; + attachpid_f attachpid; + getpidlist_f getpidlist; + getpidcnt_f getpidcnt; } job_ops; @@ -117,6 +121,8 @@ int init (void) job_ops.killjid = dlsym (libjob_handle, "job_killjid"); job_ops.detachpid = dlsym (libjob_handle, "job_detachpid"); job_ops.attachpid = dlsym (libjob_handle, "job_attachpid"); + job_ops.getpidlist= dlsym (libjob_handle, "job_getpidlist"); + job_ops.getpidcnt = dlsyn (libjob_handle, "job_getpidcnt"); if (!job_ops.create) error ("Unable to resolve job_create in libjob.so"); @@ -130,6 +136,10 @@ int init (void) error ("Unable to resolve job_detachpid in libjob.so"); if (!job_ops.attachpid) error ("Unable to resolve job_attachpid in libjob.so"); + if (!job_ops.getpidlist) + error ("Unable to resolve job_getpidlist in libjob.so"); + if (!job_ops.getpidcnt) + error ("Unable to resolve job_getpidcnt in libjob.so"); info ("successfully loaded libjob.so"); return SLURM_SUCCESS; @@ -171,6 +181,15 @@ int _job_attachpid (pid_t pid, jid_t jid) return ((*job_ops.attachpid) (pid, jid)); } +int _job_getpidlist (jid_t jid, pid_t *pid, int bufsize) +{ + return ((*job_ops.getpidlist) (jid, pid, bufsize)); +} + +int _job_getpidcnt (jid_t jid) +{ + return ((*job_ops.getpidcnt) (jid)); +} int slurm_container_create (slurmd_job_t *job) { @@ -227,8 +246,59 @@ uint32_t slurm_container_find (pid_t pid) jid_t jid; if ((jid = _job_getjid (pid)) == (jid_t) -1) - return ((uint32_t) 0); /* XXX: What to return on error? */ + return ((uint32_t) 0); return ((uint32_t) jid); } +bool slurm_container_has_pid (uint32_t cont_id, pid_t pid) +{ + jid_t jid; + + if ((jid = _job_getjid (pid)) == (jid_t) -1) + return false; + if ((uint32_t)jid != cont_id) + return false; + + return true; +} + +int slurm_container_wait (uint32_t id) +{ + if (_job_waitjid ((jid_t) id, NULL, 0) == (jid_t)-1) + return SLURM_ERROR; + + return SLURM_SUCCESS; +} + +int slurm_container_get_pids(uint32_t cont_id, pid_t **pids, int *npids) +{ + int pidcnt, bufsize; + pid_t *p; + + pidcnt = _job_getpidcnt((jid_t)cont_id); + if (pidcnt > 0) { + /* + * FIXME - The "+ 128" is a rough attempt to allow for + * the fact that _job_getpidcnt() followed by _job_get_pidlist + * is not atomic. + */ + bufsize = sizeof(pid_t) * (pidcnt + 128); + p = (pid_t *)xmalloc(bufsize); + pidcnt = _job_getpidlist((jid_t)cont_id, p, bufsize); + if (pidcnt == -1) { + error("job_getpidlist() failed: %m"); + *pids = NULL; + *npids = 0; + xfree(p); + return SLURM_ERROR; + } + *pids = p; + *npids = pidcnt; + } else { + *pids = NULL; + *npids = 0; + } + + return SLURM_SUCCESS; +} diff --git a/src/slurmd/common/proctrack.c b/src/slurmd/common/proctrack.c index 8a8bba4316d5e83f2449542d633d078fe1e49073..24185ddd0a98e04a99d96dc5ef87715cb4da3f87 100644 --- a/src/slurmd/common/proctrack.c +++ b/src/slurmd/common/proctrack.c @@ -53,6 +53,10 @@ typedef struct slurm_proctrack_ops { int (*signal) ( uint32_t id, int signal ); int (*destroy) ( uint32_t id ); uint32_t (*find_cont) ( pid_t pid ); + bool (*has_pid) ( uint32_t id, pid_t pid ); + int (*wait) ( uint32_t id ); + int (*get_pids) ( uint32_t id, pid_t **pids, + int *npids); } slurm_proctrack_ops_t; @@ -85,7 +89,10 @@ _proctrack_get_ops( slurm_proctrack_context_t *c ) "slurm_container_add", "slurm_container_signal", "slurm_container_destroy", - "slurm_container_find" + "slurm_container_find", + "slurm_container_has_pid", + "slurm_container_wait", + "slurm_container_get_pids" }; int n_syms = sizeof( syms ) / sizeof( char * ); @@ -296,9 +303,9 @@ slurm_container_destroy(uint32_t cont_id) } /* - * Get container ID for give process ID + * Get container ID for given process ID * - * Returns a SLURM errno. + * Returns zero if no container found for the given pid. */ extern uint32_t slurm_container_find(pid_t pid) @@ -309,3 +316,55 @@ slurm_container_find(pid_t pid) return (*(g_proctrack_context->ops.find_cont))( pid ); } +/* + * Return "true" if the container "cont_id" contains the process with + * ID "pid". + */ +extern bool +slurm_container_has_pid(uint32_t cont_id, pid_t pid) +{ + if ( slurm_proctrack_init() < 0 ) + return SLURM_ERROR; + + return (*(g_proctrack_context->ops.has_pid))( cont_id, pid ); +} + +/* + * Wait for all processes within a container to exit. + * + * When slurm_container_wait returns SLURM_SUCCESS, the container is considered + * destroyed. There is no need to call slurm_container_destroy after + * a successful call to slurm_container_wait, and in fact it will trigger + * undefined behavior. + * + * Return SLURM_SUCCESS or SLURM_ERROR. + */ +extern int +slurm_container_wait(uint32_t cont_id) +{ + if ( slurm_proctrack_init() < 0 ) + return SLURM_ERROR; + + return (*(g_proctrack_context->ops.wait))( cont_id ); +} + +/* + * Get all process IDs within a container. + * + * IN cont_id - Container ID. + * OUT pids - a pointer to an xmalloc'ed array of process ids, of + * length "npids". Caller must free array with xfree(). + * OUT npids - number of process IDs in the returned "pids" array. + * + * Return SLURM_SUCCESS if container exists (npids may be zero, and + * pids NULL), return SLURM_ERROR if container does not exist, or + * plugin does not implement the call. + */ +extern int +slurm_container_get_pids(uint32_t cont_id, pid_t **pids, int *npids) +{ + if ( slurm_proctrack_init() < 0 ) + return SLURM_ERROR; + + return (*(g_proctrack_context->ops.get_pids))(cont_id, pids, npids); +} diff --git a/src/slurmd/common/proctrack.h b/src/slurmd/common/proctrack.h index df83395c6bb24748393da27ffa47a3c534a1b42d..f7bfd8528d9b1859e63fc04ab354af6c7fb9dcd7 100644 --- a/src/slurmd/common/proctrack.h +++ b/src/slurmd/common/proctrack.h @@ -40,6 +40,7 @@ #include <slurm/slurm.h> #include "src/slurmd/slurmstepd/slurmstepd_job.h" +#include <stdbool.h> /* * Initialize the process tracking plugin. @@ -103,16 +104,44 @@ extern int slurm_container_signal(uint32_t cont_id, int signal); extern int slurm_container_destroy(uint32_t cont_id); /* - * Get container ID for give process ID + * Get container ID for given process ID * * Returns zero if no container found for the given pid. */ extern uint32_t slurm_container_find(pid_t pid); -/* Wait for all processes within a container to exit */ -/* Add process to a container */ -/* Get process IDs within a container */ -/* Get container ID for give process ID */ +/* + * Return "true" if the container "cont_id" contains the process with + * ID "pid". + */ +extern bool slurm_container_has_pid(uint32_t cont_id, pid_t pid); + +/* + * Wait for all processes within a container to exit. + * + * When slurm_container_wait returns SLURM_SUCCESS, the container is considered + * destroyed. There is no need to call slurm_container_destroy after + * a successful call to slurm_container_wait, and in fact it will trigger + * undefined behavior. + * + * Return SLURM_SUCCESS or SLURM_ERROR. + */ +extern int slurm_container_wait(uint32_t cont_id); + +/* + * Get all process IDs within a container. + * + * IN cont_id - Container ID. + * OUT pids - a pointer to an xmalloc'ed array of process ids, of + * length "npids". If not NULL, caller must free array with xfree(). + * OUT npids - number of process IDs in the returned "pids" array. + * + * Return SLURM_SUCCESS if container exists (npids may be zero, and + * pids NULL), return SLURM_ERROR if container does not exist, or + * plugin does not implement the call. + */ +extern int slurm_container_get_pids(uint32_t cont_id, pid_t **pids, int *npids); + /* Collect accounting information for all processes within a container */ #endif /*__PROC_TRACK_H__*/ diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 5d61ec331dd5b470b77ec57a6a8c48ce2638706b..84594d2d75699983c384125f4af9c1f5ced4d57f 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -165,7 +165,6 @@ static void _wait_for_io(slurmd_job_t *job); static int _send_exit_msg(slurmd_job_t *job, uint32_t *tid, int n, int status); static int _send_pending_exit_msgs(slurmd_job_t *job); -static void _kill_running_tasks(slurmd_job_t *job); static void _wait_for_all_tasks(slurmd_job_t *job); static int _wait_for_any_task(slurmd_job_t *job, bool waitflag); @@ -712,13 +711,23 @@ job_manager(slurmd_job_t *job) fail2: /* - * First call interconnect_postfini() - In at least one case, - * this will clean up any straggling processes. If this call - * is moved behind wait_for_io(), we may block waiting for IO - * on a hung process. + * First call interconnect_postfini() - In at least one case, + * this will clean up any straggling processes. If this call + * is moved behind wait_for_io(), we may block waiting for IO + * on a hung process. + * + * Make sure all processes in session are dead for interactive + * jobs. On systems with an IBM Federation switch, all processes + * must be terminated before the switch window can be released by + * interconnect_postfini(). For batch jobs, we let spawned processes + * continue by convention (although this could go either way). The + * Epilog program could be used to terminate any "orphan" processes. */ if (!job->batch) { - _kill_running_tasks(job); + if (job->cont_id != 0) { + slurm_container_signal(job->cont_id, SIGKILL); + slurm_container_wait(job->cont_id); + } if (interconnect_postfini(job->switch_job, job->jmgr_pid, job->jobid, job->stepid) < 0) error("interconnect_postfini: %m"); @@ -1143,41 +1152,6 @@ _wait_for_all_tasks(slurmd_job_t *job) } } -/* - * Make sure all processes in session are dead for interactive jobs. On - * systems with an IBM Federation switch, all processes must be terminated - * before the switch window can be released by interconnect_postfini(). - * For batch jobs, we let spawned processes continue by convention - * (although this could go either way). The Epilog program could be used - * to terminate any "orphan" processes. - */ -static void -_kill_running_tasks(slurmd_job_t *job) -{ - int delay = 1; - - if (job->batch) - return; - - if (job->cont_id) { - slurm_container_signal(job->cont_id, SIGKILL); - - /* Spin until the container is successfully destroyed */ - while (slurm_container_destroy(job->cont_id) != SLURM_SUCCESS) { - slurm_container_signal(job->cont_id, SIGKILL); - sleep(delay); - if (delay < 120) { - delay *= 2; - } else { - error("Unable to destroy container, job %u.%u", - job->jobid, job->stepid); - } - } - } - - return; -} - /* * Wait for IO */ diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index 31e4314bb970a6554d6b3b80b1f8b90ce399da82..e8d55946c83b12e28ee353b831d9c7b2ec09bc70 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -867,12 +867,7 @@ _handle_pid_in_container(int fd, slurmd_job_t *job) safe_read(fd, &pid, sizeof(pid_t)); - /* - * FIXME - we should add a new call in the proctrack API - * that simply returns "true" if a pid is in the step - */ - if (job->cont_id == slurm_container_find(pid)) - rc = true; + rc = slurm_container_has_pid(job->cont_id, pid); /* Send the return code */ safe_write(fd, &rc, sizeof(bool));