From 591d89340889ef90e8b6f29b31858b0fd5ff86c4 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" <mgrondona@llnl.gov> Date: Fri, 7 Oct 2011 15:16:39 -0700 Subject: [PATCH] slurmstepd: Move wait-for-parent code into fork_all_tasks Move the code that waits for parent signal before exec(2) out of exec_task() and into fork_all_tasks() directly. This makes all the code that handles the fork-and-wait into slurmstepd/mgr.c, and allows the exec_wait_child_wait_for_parent() function to be used in place of explicit read(). --- src/slurmd/slurmstepd/mgr.c | 10 +++++++++- src/slurmd/slurmstepd/task.c | 14 +------------- src/slurmd/slurmstepd/task.h | 2 +- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 6dab4d8f2f0..55e9d549872 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1319,7 +1319,15 @@ _fork_all_tasks(slurmd_job_t *job) */ prepare_tty (job, job->task[i]); - exec_task(job, i, ei->childfd); + /* + * Block until parent notifies us that it is ok to + * proceed. This allows the parent to place all + * children in any process groups or containers + * before they make a call to exec(2). + */ + exec_wait_child_wait_for_parent (ei); + + exec_task(job, i); } /* diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index 56fe04cd120..925e67e4ab0 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -329,27 +329,15 @@ _setup_mpi(slurmd_job_t *job, int ltaskid) * Current process is running as the user when this is called. */ void -exec_task(slurmd_job_t *job, int i, int waitfd) +exec_task(slurmd_job_t *job, int i) { - char c; uint32_t *gtids; /* pointer to arrary of ranks */ int fd, j; - int rc; slurmd_task_info_t *task = job->task[i]; if (i == 0) _make_tmpdir(job); - /* - * Stall exec until all tasks have joined the same process group - */ - if ((rc = read (waitfd, &c, sizeof (c))) != 1) { - error ("_exec_task read failed, fd = %d, rc=%d: %m", waitfd, rc); - log_fini(); - exit(1); - } - close(waitfd); - gtids = xmalloc(job->node_tasks * sizeof(uint32_t)); for (j = 0; j < job->node_tasks; j++) gtids[j] = job->task[j]->gtid; diff --git a/src/slurmd/slurmstepd/task.h b/src/slurmd/slurmstepd/task.h index d067df52dca..78c0b6058cd 100644 --- a/src/slurmd/slurmstepd/task.h +++ b/src/slurmd/slurmstepd/task.h @@ -52,6 +52,6 @@ #include "src/slurmd/slurmstepd/slurmstepd_job.h" -void exec_task(slurmd_job_t *job, int i, int waitfd); +void exec_task(slurmd_job_t *job, int i); #endif /* !_TASK_H */ -- GitLab