From 1cb0007c2df5b12e4157282ae15f44bcdda2fa22 Mon Sep 17 00:00:00 2001 From: "Christopher J. Morrone" <morrone2@llnl.gov> Date: Fri, 16 Dec 2005 19:47:57 +0000 Subject: [PATCH] svn merge -r6839:6846 https://eris.llnl.gov/svn/slurm/branches/slurm-0-6-branch --- NEWS | 1 + src/plugins/proctrack/linuxproc/kill_tree.c | 159 +++++++++--------- src/plugins/proctrack/linuxproc/kill_tree.h | 1 - .../proctrack/linuxproc/proctrack_linuxproc.c | 2 +- src/plugins/proctrack/rms/proctrack_rms.c | 11 ++ src/plugins/switch/elan/qsw.c | 12 ++ src/slurmd/slurmstepd/mgr.c | 44 ++++- src/slurmd/slurmstepd/pdebug.c | 19 ++- 8 files changed, 161 insertions(+), 88 deletions(-) diff --git a/NEWS b/NEWS index 4d59acaa052..ca9af4de580 100644 --- a/NEWS +++ b/NEWS @@ -102,6 +102,7 @@ documents those changes that are of interest to users and admins. -- Add job_id to maui scheduler plugin start job status message. -- Fix for srun's handling of null characters in stdout or stderr. -- Update job accounting for larger systems (Andy Riebs, uptodate.patch). + -- Fixes for proctrack/linuxproc and mpich-gm support (Takao Hatazaki, HP). * Changes in SLURM 0.6.9 ======================== diff --git a/src/plugins/proctrack/linuxproc/kill_tree.c b/src/plugins/proctrack/linuxproc/kill_tree.c index 2d60024a3e3..9bd2303aca6 100644 --- a/src/plugins/proctrack/linuxproc/kill_tree.c +++ b/src/plugins/proctrack/linuxproc/kill_tree.c @@ -39,12 +39,15 @@ #include <strings.h> #include <unistd.h> #include <string.h> +#include <limits.h> #include "src/common/xmalloc.h" #include "src/common/log.h" typedef struct xpid_s { pid_t pid; + int is_usercmd; + char *cmd; struct xpid_s *next; } xpid_t; @@ -54,33 +57,36 @@ typedef struct xppid_s { struct xppid_s *next; } xppid_t; -#define MAX_NAME_LEN 64 #define HASH_LEN 64 #define GET_HASH_IDX(ppid) ((ppid)%HASH_LEN) -static xpid_t *_alloc_pid(pid_t pid, xpid_t *next) +static xpid_t *_alloc_pid(pid_t pid, int is_usercmd, char *cmd, xpid_t *next) { xpid_t *new; new = (xpid_t *)xmalloc(sizeof(*new)); new->pid = pid; + new->is_usercmd = is_usercmd; + new->cmd = xstrdup(cmd); new->next = next; return new; } -static xppid_t *_alloc_ppid(pid_t ppid, pid_t pid, xppid_t *next) +static xppid_t *_alloc_ppid(pid_t ppid, pid_t pid, int is_usercmd, char *cmd, + xppid_t *next) { xppid_t *new; new = xmalloc(sizeof(*new)); new->ppid = ppid; - new->list = _alloc_pid(pid, NULL); + new->list = _alloc_pid(pid, is_usercmd, cmd, NULL); new->next = next; return new; } -static void _push_to_hashtbl(pid_t ppid, pid_t pid, xppid_t **hashtbl) +static void _push_to_hashtbl(pid_t ppid, pid_t pid, + int is_usercmd, char *cmd, xppid_t **hashtbl) { int idx; xppid_t *ppids, *newppid; @@ -90,21 +96,45 @@ static void _push_to_hashtbl(pid_t ppid, pid_t pid, xppid_t **hashtbl) ppids = hashtbl[idx]; while (ppids) { if (ppids->ppid == ppid) { - newpid = _alloc_pid(pid, ppids->list); + newpid = _alloc_pid(pid, is_usercmd, cmd, ppids->list); ppids->list = newpid; return; } ppids = ppids->next; } - newppid = _alloc_ppid(ppid, pid, hashtbl[idx]); + newppid = _alloc_ppid(ppid, pid, is_usercmd, cmd, hashtbl[idx]); hashtbl[idx] = newppid; } +static int get_myname(char *s) +{ + char path[PATH_MAX], rbuf[1024]; + int fd; + + sprintf(path, "/proc/%ld/stat", (long)getpid()); + if ((fd = open(path, O_RDONLY)) < 0) { + error("Cannot open /proc/getpid()/stat"); + return -1; + } + if (read(fd, rbuf, 1024) <= 0) { + error("Cannot read /proc/getpid()/stat"); + close(fd); + return -1; + } + close(fd); + if (sscanf(rbuf, "%*ld %s ", s) != 1) { + error("Cannot get the command name from /proc/getpid()/stat"); + return -1; + } + return 0; +} + static xppid_t **_build_hashtbl() { DIR *dir; struct dirent *de; - char path[MAX_NAME_LEN], *endptr, *num, rbuf[1024]; + char path[PATH_MAX], *endptr, *num, rbuf[1024]; + char myname[1024], cmd[1024]; int fd; long pid, ppid; xppid_t **hashtbl; @@ -113,6 +143,8 @@ static xppid_t **_build_hashtbl() error("opendir(/proc): %m"); return NULL; } + if (get_myname(myname) < 0) return NULL; + debug3("Myname in build_hashtbl: %s", myname); hashtbl = (xppid_t **)xmalloc(HASH_LEN * sizeof(xppid_t *)); @@ -121,7 +153,7 @@ static xppid_t **_build_hashtbl() strtol(num, &endptr, 10); if (endptr == NULL || *endptr != 0) continue; - snprintf(path, MAX_NAME_LEN, "/proc/%s/stat", num); + sprintf(path, "/proc/%s/stat", num); if ((fd = open(path, O_RDONLY)) < 0) { continue; } @@ -129,35 +161,44 @@ static xppid_t **_build_hashtbl() close(fd); continue; } - if (sscanf(rbuf, "%ld %*s %*s %ld", &pid, &ppid) != 2) { + if (sscanf(rbuf, "%ld %s %*s %ld", &pid, cmd, &ppid) != 3) { close(fd); continue; } close(fd); - _push_to_hashtbl((pid_t)ppid, (pid_t)pid, hashtbl); + + /* Record cmd for debugging purpose */ + _push_to_hashtbl((pid_t)ppid, (pid_t)pid, + strcmp(myname, cmd), cmd, hashtbl); } closedir(dir); return hashtbl; } +static void _destroy_list(xpid_t *list) +{ + xpid_t *tmp; + + while (list) { + tmp = list->next; + xfree(list->cmd); + xfree(list); + list = tmp; + } +} + static void _destroy_hashtbl(xppid_t **hashtbl) { int i; - xppid_t *ppid, *tmp2; - xpid_t *list, *tmp; + xppid_t *ppid, *tmp; for (i=0; i<HASH_LEN; i++) { ppid = hashtbl[i]; while (ppid) { - list = ppid->list; - while (list) { - tmp = list->next; - xfree(list); - list = tmp; - } - tmp2 = ppid->next; + _destroy_list(ppid->list); + tmp = ppid->next; xfree(ppid); - ppid = tmp2; + ppid = tmp; } } xfree(hashtbl); @@ -174,7 +215,10 @@ static xpid_t *_get_list(int top, xpid_t *list, xppid_t **hashtbl) if (ppid->ppid == top) { children = ppid->list; while (children) { - list = _alloc_pid(children->pid, list); + list = _alloc_pid(children->pid, + children->is_usercmd, + children->cmd, + list); children = children->next; } children = ppid->list; @@ -189,25 +233,23 @@ static xpid_t *_get_list(int top, xpid_t *list, xppid_t **hashtbl) return list; } -static void _destroy_list(xpid_t *list) -{ - xpid_t *tmp; - - while (list) { - tmp = list->next; - xfree(list); - list = tmp; - } -} - static int _kill_proclist(xpid_t *list, int sig) { - int rc = -1; + int rc, rc0; + rc = 0; while (list) { if (list->pid > 1) { - verbose("Sending %d to %d", sig, list->pid); - rc &= kill(list->pid, sig); + if (! list->is_usercmd) { + debug2("%ld %s is not a user command. " + "Skipped sending signal %d", + (long)list->pid, list->cmd, sig); + } else { + verbose("Sending %d to %d %s", + sig, list->pid, list->cmd); + rc0 = kill(list->pid, sig); + if (rc0) rc = errno; /* save the last error */ + } } list = list->next; } @@ -230,7 +272,7 @@ extern int kill_proc_tree(pid_t top, int sig) if ((hashtbl = _build_hashtbl()) == NULL) return -1; - list = _get_list(top, _alloc_pid(top, NULL), hashtbl); + list = _get_list(top, NULL, hashtbl); rc = _kill_proclist(list, sig); _destroy_hashtbl(hashtbl); _destroy_list(list); @@ -238,50 +280,13 @@ extern int kill_proc_tree(pid_t top, int sig) } -static int _kill_proclist_exclude(xpid_t *list, pid_t exclude, int sig) -{ - int rc = -1; - - while (list) { - if (list->pid > 1 && list->pid != exclude) { - verbose("Sending %d to %d", sig, list->pid); - rc &= kill(list->pid, sig); - } - list = list->next; - } - - return rc; -} - - -/* - * Send signal "sig" to every process in the tree EXCEPT for the top. - */ -extern int kill_proc_tree_not_top(pid_t top, int sig) -{ - xpid_t *list; - int rc; - xppid_t **hashtbl; - - if ((hashtbl = _build_hashtbl()) == NULL) - return -1; - - list = _get_list(top, _alloc_pid(top, NULL), hashtbl); - rc = _kill_proclist_exclude(list, top, sig); - _destroy_hashtbl(hashtbl); - _destroy_list(list); - - return rc; -} - - /* * Return the pid of the process named "process_name" * which is the ancestor of "process". */ extern pid_t find_ancestor(pid_t process, char *process_name) { - char path[MAX_NAME_LEN], rbuf[1024]; + char path[PATH_MAX], rbuf[1024]; int fd; long pid, ppid; @@ -291,7 +296,7 @@ extern pid_t find_ancestor(pid_t process, char *process_name) return 0; } - snprintf(path, MAX_NAME_LEN, "/proc/%d/stat", ppid); + sprintf(path, "/proc/%d/stat", ppid); if ((fd = open(path, O_RDONLY)) < 0) { return 0; } @@ -304,7 +309,7 @@ extern pid_t find_ancestor(pid_t process, char *process_name) return 0; } - snprintf(path, MAX_NAME_LEN, "/proc/%d/cmdline", pid); + sprintf(path, "/proc/%d/cmdline", pid); if ((fd = open(path, O_RDONLY)) < 0) { continue; } diff --git a/src/plugins/proctrack/linuxproc/kill_tree.h b/src/plugins/proctrack/linuxproc/kill_tree.h index e185dd0441f..7e34ce6aaa4 100644 --- a/src/plugins/proctrack/linuxproc/kill_tree.h +++ b/src/plugins/proctrack/linuxproc/kill_tree.h @@ -31,7 +31,6 @@ #include <sys/types.h> extern int kill_proc_tree(pid_t top, int sig); -extern int kill_proc_tree_not_top(pid_t top, int sig); extern pid_t find_ancestor(pid_t process, char *process_name); /* * Some of processes may not be in the same process group diff --git a/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c b/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c index 1941c6e5d83..37c541e3485 100644 --- a/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c +++ b/src/plugins/proctrack/linuxproc/proctrack_linuxproc.c @@ -105,7 +105,7 @@ extern int slurm_container_add ( slurmd_job_t *job, pid_t pid ) extern int slurm_container_signal ( uint32_t id, int signal ) { - return kill_proc_tree_not_top((pid_t)id, signal); + return kill_proc_tree((pid_t)id, signal); } extern int slurm_container_destroy ( uint32_t id ) diff --git a/src/plugins/proctrack/rms/proctrack_rms.c b/src/plugins/proctrack/rms/proctrack_rms.c index ed91fa1692a..4d1f945fc77 100644 --- a/src/plugins/proctrack/rms/proctrack_rms.c +++ b/src/plugins/proctrack/rms/proctrack_rms.c @@ -218,10 +218,21 @@ _prg_destructor_fork() } else if (pid > 0) { /* parent */ close(fdpair[0]); + waitpid(pid, (int *)NULL, 0); return fdpair[1]; } /****************************************/ + /* fork again so the destructor process + * will not be a child of the slurmd + */ + pid = fork(); + if (pid < 0) { + error("_prg_destructor_fork: second fork failed"); + } else if (pid > 0) { + exit(0); + } + /* child */ close(fdpair[1]); diff --git a/src/plugins/switch/elan/qsw.c b/src/plugins/switch/elan/qsw.c index 3ad42944ad6..cb213882ef9 100644 --- a/src/plugins/switch/elan/qsw.c +++ b/src/plugins/switch/elan/qsw.c @@ -1021,10 +1021,22 @@ _prg_destructor_fork() } else if (pid > 0) { /* parent */ close(fdpair[0]); + waitpid(pid, (int *)NULL, 0); return fdpair[1]; } /****************************************/ + /* + * fork again so the destructor process + * will not be a child of the slurmd + */ + pid = fork(); + if (pid < 0) { + error("switch/elan: second fork failed"); + } else if (pid > 0) { + exit(0); + } + /* child */ close(fdpair[1]); diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 5eb46b3da07..a8884bd2e0e 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -725,6 +725,9 @@ _send_pending_exit_msgs(slurmd_job_t *job) * * If waitflag is false, do repeated non-blocking waits until * there are no more processes to reap (waitpid returns 0). + * + * Returns the number of tasks for which a wait3() was succesfully + * performed, or -1 if there are no child tasks. */ static int _wait_for_any_task(slurmd_job_t *job, bool waitflag) @@ -738,8 +741,19 @@ _wait_for_any_task(slurmd_job_t *job, bool waitflag) do { pid = wait3(&status, waitflag ? 0 : WNOHANG, &rusage); - if (pid <= 0) - continue; + if (pid == -1) { + if (errno == ECHILD) { + debug("No child processes"); + completed = -1; + break; + } else if (errno == EINTR) { + debug("wait3 was interrupted"); + continue; + } else { + debug("Unknown errno %d", errno); + continue; + } + } /* See if the pid matches that of one of the tasks */ for (i = 0; i < job->ntasks; i++) { @@ -785,12 +799,30 @@ _wait_for_any_task(slurmd_job_t *job, bool waitflag) static void _wait_for_all_tasks(slurmd_job_t *job) { + int tasks_left = 0; int i; - for (i = 0; i < job->ntasks; ) { - i += _wait_for_any_task(job, true); - if (i < job->ntasks) - i += _wait_for_any_task(job, false); + for (i = 0; i < job->ntasks; i++) { + if (job->task[i]->state < SLURMD_TASK_COMPLETE) { + tasks_left++; + } + } + if (tasks_left < job->ntasks) + verbose("Only %d of %d requested tasks successfully launched", + tasks_left, job->ntasks); + + for (i = 0; i < tasks_left; ) { + int rc; + rc = _wait_for_any_task(job, true); + if (rc == -1) /* Got ECHILD */ + break; + i += rc; + if (i < job->ntasks) { + rc = _wait_for_any_task(job, false); + if (rc == -1) /* Got ECHILD */ + break; + i += rc; + } while (_send_pending_exit_msgs(job)) {;} } diff --git a/src/slurmd/slurmstepd/pdebug.c b/src/slurmd/slurmstepd/pdebug.c index e84ac80ed98..a7560d5b309 100644 --- a/src/slurmd/slurmstepd/pdebug.c +++ b/src/slurmd/slurmstepd/pdebug.c @@ -43,16 +43,29 @@ pdebug_trace_process(slurmd_job_t *job, pid_t pid) int status; waitpid(pid, &status, WUNTRACED); if (!WIFSTOPPED(status)) { - debug("pdebug_trace_process WIFSTOPPED false" + int i; + error("pdebug_trace_process WIFSTOPPED false" " for pid %lu", pid); if (WIFEXITED(status)) { - debug("Process %lu exited \"normally\"" + error("Process %lu exited \"normally\"" " with return code %d", pid, WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { - debug("Process %lu kill by signal %d", + error("Process %lu killed by signal %d", pid, WTERMSIG(status)); } + + /* + * Mark this process as complete since it died + * prematurely. + */ + for (i = 0; i < job->ntasks; i++) { + if (job->task[i]->pid == pid) { + job->task[i]->state = + SLURMD_TASK_COMPLETE; + } + } + return SLURM_ERROR; } if ((pid > (pid_t) 0) && (kill(pid, SIGSTOP) < 0)) { -- GitLab