From ac77a3f33fee7b7da2f257e7d672a40e9157bc2f Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 16 Feb 2005 02:07:31 +0000
Subject: [PATCH] Fix race condition in pgid set for spawned tasks.

---
 NEWS              |  5 +++++
 src/slurmd/smgr.c | 45 ++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/NEWS b/NEWS
index c0398d927eb..431b7f3492f 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,11 @@
 This file describes changes in recent versions of SLURM. It primarily
 documents those changes that are of interest to users and admins. 
 
+* Changes in SLURM 0.4.4
+========================
+ -- Fix race condition in slurmd seting pgid of spawned tasks for 
+    process tracking.
+
 * Changes in SLURM 0.4.3
 ========================
  -- Turn off some RPM build checks (bug in RPM, see slurm.spec.in)
diff --git a/src/slurmd/smgr.c b/src/slurmd/smgr.c
index f5061545497..fb70c70d7aa 100644
--- a/src/slurmd/smgr.c
+++ b/src/slurmd/smgr.c
@@ -75,7 +75,7 @@ static int smgr_sigarray[] = {
  */
 static void  _session_mgr(slurmd_job_t *job);
 static int   _exec_all_tasks(slurmd_job_t *job);
-static void  _exec_task(slurmd_job_t *job, int i);
+static void  _exec_task(slurmd_job_t *job, int i, int fd);
 static int   _become_user(slurmd_job_t *job);
 static void  _make_tmpdir(slurmd_job_t *job);
 static int   _child_exited(void);
@@ -273,6 +273,8 @@ _exec_all_tasks(slurmd_job_t *job)
 {
 	int i;
 	int fd = job->fdpair[1];
+	int cpipe[2];
+	uint8_t tid;
 
 	xassert(job != NULL);
 	xassert(fd >= 0);
@@ -285,6 +287,9 @@ _exec_all_tasks(slurmd_job_t *job)
 	if (xsignal_block(smgr_sigarray) < 0)
 		return error ("Unable to block signals");
 
+	if (pipe (cpipe) < 0)
+		return error ("Unable to open child pipe: %m");
+
 	for (i = 0; i < job->ntasks; i++) {
 		pid_t pid = fork();
 
@@ -292,7 +297,7 @@ _exec_all_tasks(slurmd_job_t *job)
 			error("fork: %m");
 			return SLURM_ERROR;
 		} else if (pid == 0)  /* child */
-			_exec_task(job, i);
+			_exec_task(job, i, cpipe[1]);
 
 		/* Parent continues: 
 		 */
@@ -310,6 +315,12 @@ _exec_all_tasks(slurmd_job_t *job)
 
 		job->task[i]->pid = pid;
 
+		/*
+		 * For task 0, wait until it has created a new pgrp.
+		 */
+		if (i == 0)
+			read (cpipe[0], &tid, sizeof (tid));
+
 		/*
 		 * Prepare process for attach by parallel debugger 
 		 * (if specified and able)
@@ -317,18 +328,30 @@ _exec_all_tasks(slurmd_job_t *job)
 		_pdebug_trace_process(job, pid);
 	}
 
-   again:
-	for (i = 1; i < job->ntasks; i++)
-		if (getpgid (job->task[i]->pid) != job->task[0]->pid) 
-			goto again;
+	/*
+	 * Wait for all tasks to finish joining new process group
+	 */
+	for (i = 1; i < job->ntasks; i++)  
+		read (cpipe[0], &tid, sizeof (tid));
+
+	close (cpipe[0]);
+	close (cpipe[1]);
 
 	return SLURM_SUCCESS;
 }
 
 
 static void
-_exec_task(slurmd_job_t *job, int i)
+_exec_task(slurmd_job_t *job, int i, int fd)
 {
+	/*
+	 * Fit taskid into single byte for writing back to
+	 *  initiating slurmd. Doesn't matter if the values
+	 *  are truncated, we just need the number of bytes to
+	 *  be right.
+	 */
+	uint8_t tid = i;
+
 	if (xsignal_unblock(smgr_sigarray) < 0) {
 		error("unable to unblock signals");
 		exit(1);
@@ -341,6 +364,14 @@ _exec_task(slurmd_job_t *job, int i)
 		error ("Unable to put task %d into pgrp %ld: %m",
 			i, job->task[0]->pid);
 
+	/*
+	 * Notify slurmd that pgid has been set for this task
+	 */
+	if (write (fd, &tid, sizeof (tid)) < 0)
+		error ("Unable to notify slurmd that task %d started\n", i);
+
+	close (fd);
+
 	if (!job->batch) {
 		if (interconnect_attach(job->switch_job, &job->env,
 				job->nodeid, (uint32_t) i, job->nnodes,
-- 
GitLab