From 7bd1849e46b36ceb61ebd5f79d436930359d7214 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 26 Apr 2007 20:26:34 +0000
Subject: [PATCH] Fix bug which prevented srun from reporting to slurmctld the
 job's exit code. Applies only to non-batch jobs. and the bad exit code was
 only reported by "scontrol show jobs" (the exit code in accounting data was
 correct).

---
 src/srun/srun.c     |  4 ++--
 src/srun/srun_job.c | 29 +++++++++++++----------------
 src/srun/srun_job.h |  3 ++-
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/srun/srun.c b/src/srun/srun.c
index e2239510f7e..60bdd19c92d 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -421,6 +421,7 @@ int srun(int ac, char **av)
 	
 	/* have to check if job was cancelled here just to make sure 
 	   state didn't change when we were waiting for the message thread */
+	exitcode = set_job_rc(job);
 	if (job->state == SRUN_JOB_CANCELLED) {
 		info("Cancelling job");
 		srun_job_destroy(job, NO_VAL);
@@ -428,7 +429,7 @@ int srun(int ac, char **av)
 		info("Terminating job");
 		srun_job_destroy(job, job->rc);
 	} else 
-		srun_job_destroy(job, 0);
+		srun_job_destroy(job, job->rc);
 		
 	/* wait for launch thread */
 	if (pthread_join(job->lid, NULL) < 0)
@@ -455,7 +456,6 @@ int srun(int ac, char **av)
 	/* 
 	 *  Let exit() clean up remaining threads.
 	 */
-	exitcode = job_rc(job);
 	log_fini();
 	exit(exitcode);
 }
diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c
index b6e7fc2adde..1d28c43346f 100644
--- a/src/srun/srun_job.c
+++ b/src/srun/srun_job.c
@@ -556,32 +556,29 @@ job_force_termination(srun_job_t *job)
 
 
 int
-job_rc(srun_job_t *job)
+set_job_rc(srun_job_t *job)
 {
-	int i;
-	int rc = 0;
-
-	if (job->rc >= 0) return(job->rc);
+	int i, rc = 0, task_failed = 0;
 
 	/*
-	 *  return (1) if any tasks failed launch
+	 *  return code set to at least one if any tasks failed launch
 	 */
 	for (i = 0; i < opt.nprocs; i++) {
-		if (job->task_state[i] == SRUN_TASK_FAILED) 
-			return (job->rc = 1);
-	}
-
-	for (i = 0; i < opt.nprocs; i++) {
+		if (job->task_state[i] == SRUN_TASK_FAILED)
+			task_failed = 1; 
 		if (job->rc < job->tstatus[i])
 			job->rc = job->tstatus[i];
 	}
+	if (task_failed && (job->rc <= 0)) {
+		job->rc = 1;
+		return 1;
+	}
 
 	if ((rc = WEXITSTATUS(job->rc)))
-		job->rc = rc;
-	else if (WIFSIGNALED(job->rc))
-		job->rc = 128 + WTERMSIG(job->rc);
-
-	return(job->rc);
+		return rc;
+	if (WIFSIGNALED(job->rc))
+		return (128 + WTERMSIG(job->rc));
+	return job->rc;
 }
 
 
diff --git a/src/srun/srun_job.h b/src/srun/srun_job.h
index 69c3d728283..d2afa64ffce 100644
--- a/src/srun/srun_job.h
+++ b/src/srun/srun_job.h
@@ -211,9 +211,10 @@ void    report_task_status(srun_job_t *job);
 void    report_job_status(srun_job_t *job);
 
 /*
+ * Sets job->rc to highest task exit value.
  * Returns job return code (for srun exit status)
  */
-int    job_rc(srun_job_t *job);
+int    set_job_rc(srun_job_t *job);
 
 void   fwd_signal(srun_job_t *job, int signal, int max_threads);
 int    job_active_tasks_on_host(srun_job_t *job, int hostid);
-- 
GitLab