From cd0d28a83a787bd766b92c746e4fc537d3ccf45a Mon Sep 17 00:00:00 2001
From: Mark Grondona <mgrondona@llnl.gov>
Date: Wed, 22 Jan 2003 00:39:45 +0000
Subject: [PATCH]  o new list of job states in job.h  o Only terminate IO
 thread if IO is done or job is in FORCETERM state

---
 src/srun/io.c       |  5 +----
 src/srun/job.c      |  2 +-
 src/srun/job.h      | 19 +++++++++++--------
 src/srun/msg.c      | 15 +++------------
 src/srun/reattach.c |  7 +------
 src/srun/signals.c  |  8 ++------
 src/srun/srun.c     |  7 +------
 7 files changed, 20 insertions(+), 43 deletions(-)

diff --git a/src/srun/io.c b/src/srun/io.c
index e7774200ac7..901df8f0e9d 100644
--- a/src/srun/io.c
+++ b/src/srun/io.c
@@ -418,10 +418,7 @@ static char *_host_state_name(host_state_t state_inx)
 
 static bool _job_io_done(job_t *job)
 {
-	if (job->state == SRUN_JOB_DETACHED)
-		return true;
-	else
-		return false;
+	return (job->state >= SRUN_JOB_FORCETERM);
 }
 
 void report_task_status(job_t *job)
diff --git a/src/srun/job.c b/src/srun/job.c
index f55de1377da..17fd3c11293 100644
--- a/src/srun/job.c
+++ b/src/srun/job.c
@@ -303,7 +303,7 @@ job_force_termination(job_t *job)
 		update_job_state(job, SRUN_JOB_DETACHED); 	
 	} else {
 		info ("forcing job termination");
-		update_job_state(job, SRUN_JOB_OVERDONE);
+		update_job_state(job, SRUN_JOB_FORCETERM);
 	}
 
 	pthread_kill(job->ioid,  SIGHUP);
diff --git a/src/srun/job.h b/src/srun/job.h
index 2cfe4e400d8..59ba5a92f6d 100644
--- a/src/srun/job.h
+++ b/src/srun/job.h
@@ -17,14 +17,17 @@
 #include "src/srun/fname.h"
 
 typedef enum {
-	SRUN_JOB_INIT = 0,
-	SRUN_JOB_LAUNCHING,
-	SRUN_JOB_STARTING,
-	SRUN_JOB_RUNNING,
-	SRUN_JOB_FAILED,
-	SRUN_JOB_TERMINATING,
-	SRUN_JOB_OVERDONE,
-	SRUN_JOB_DETACHED
+	SRUN_JOB_INIT = 0,         /* Job's initial state                   */
+	SRUN_JOB_LAUNCHING,        /* Launch thread is running              */
+	SRUN_JOB_STARTING,         /* Launch thread is complete             */
+	SRUN_JOB_RUNNING,          /* Launch thread complete                */
+	SRUN_JOB_TERMINATING,      /* Once first task terminates            */
+	SRUN_JOB_TERMINATED,       /* All tasks terminated (may have IO)    */
+	SRUN_JOB_WAITING_ON_IO,    /* All tasks terminated; waiting for IO  */
+	SRUN_JOB_FORCETERM,        /* Forced termination of IO thread       */
+	SRUN_JOB_DONE,             /* tasks and IO complete                 */
+	SRUN_JOB_DETACHED,         /* Detached IO from job (Not used now)   */
+	SRUN_JOB_FAILED,           /* Job failed for some reason            */
 } job_state_t;
 
 typedef enum {
diff --git a/src/srun/msg.c b/src/srun/msg.c
index 08c99323e09..c8a20b94281 100644
--- a/src/srun/msg.c
+++ b/src/srun/msg.c
@@ -171,11 +171,7 @@ void MPIR_Breakpoint(void)
 
 static bool _job_msg_done(job_t *job)
 {
-	if ((job->state == SRUN_JOB_DETACHED) ||
-	    (job->state == SRUN_JOB_OVERDONE))
-		return true;
-	else
-		return false;
+	return (job->state >= SRUN_JOB_TERMINATED);
 }
 
 static void
@@ -233,7 +229,7 @@ update_failed_tasks(job_t *job, uint32_t nodeid)
 
 	if (tasks_exited == opt.nprocs) {
 		debug2("all tasks exited");
-		update_job_state(job, SRUN_JOB_OVERDONE);
+		update_job_state(job, SRUN_JOB_TERMINATED);
 	}
 		
 }
@@ -336,11 +332,6 @@ _reattach_handler(job_t *job, slurm_msg_t *msg)
 
 	update_running_tasks(job, resp->srun_node_id);
 
-	/* 
-	   if (job->stepid == NO_VAL)
-	   update_job_state(job, SRUN_JOB_OVERDONE);
-	 */
-
 }
 
 static void 
@@ -380,7 +371,7 @@ _exit_handler(job_t *job, slurm_msg_t *exit_msg)
 		tasks_exited++;
 		if (tasks_exited == opt.nprocs) {
 			debug2("All tasks exited");
-			update_job_state(job, SRUN_JOB_OVERDONE);
+			update_job_state(job, SRUN_JOB_TERMINATED);
 		}
 	}
 }
diff --git a/src/srun/reattach.c b/src/srun/reattach.c
index d7aa9a17c34..86490eda973 100644
--- a/src/srun/reattach.c
+++ b/src/srun/reattach.c
@@ -512,11 +512,6 @@ int reattach()
 
 static bool _job_all_done(job_t *job)
 {
-	if ((job->state == SRUN_JOB_DETACHED ) ||
-	    (job->state == SRUN_JOB_FAILED   ) ||
-	    (job->state == SRUN_JOB_OVERDONE))
-		return true;
-	else
-		return false;
+	return (job->state == SRUN_JOB_DETACHED);
 }
 
diff --git a/src/srun/signals.c b/src/srun/signals.c
index bdeb26e40f0..606f9b90f99 100644
--- a/src/srun/signals.c
+++ b/src/srun/signals.c
@@ -81,11 +81,7 @@ static void * _p_signal_task(void *);
 
 static bool _job_sig_done(job_t *job)
 {
-	if ((job->state == SRUN_JOB_DETACHED) ||
-	    (job->state == SRUN_JOB_OVERDONE))
-		return true;
-	else
-		return false;
+	return (job->state >= SRUN_JOB_DONE);
 }
 
 int 
@@ -182,7 +178,7 @@ _handle_intr(job_t *job, time_t *last_intr, time_t *last_intr_sent)
 	} else  { /* second Ctrl-C in half as many seconds */
 
 		/* terminate job */
-		if (job->state != SRUN_JOB_OVERDONE) {
+		if (job->state < SRUN_JOB_FORCETERM) {
 
 			info("sending Ctrl-C to job");
 			*last_intr = time(NULL);
diff --git a/src/srun/srun.c b/src/srun/srun.c
index 38cb0265246..eed374c29d4 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -352,12 +352,7 @@ _sig_kill_alloc(int signum)
 
 static bool _job_all_done(job_t *job)
 {
-	if ((job->state == SRUN_JOB_DETACHED ) ||
-	    (job->state == SRUN_JOB_FAILED   ) ||
-	    (job->state == SRUN_JOB_OVERDONE))
-		return true;
-	else
-		return false;
+	return (job->state >= SRUN_JOB_TERMINATED);
 }
 
 
-- 
GitLab