From 7bd1849e46b36ceb61ebd5f79d436930359d7214 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Thu, 26 Apr 2007 20:26:34 +0000 Subject: [PATCH] Fix bug which prevented srun from reporting to slurmctld the job's exit code. Applies only to non-batch jobs. and the bad exit code was only reported by "scontrol show jobs" (the exit code in accounting data was correct). --- src/srun/srun.c | 4 ++-- src/srun/srun_job.c | 29 +++++++++++++---------------- src/srun/srun_job.h | 3 ++- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/srun/srun.c b/src/srun/srun.c index e2239510f7e..60bdd19c92d 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -421,6 +421,7 @@ int srun(int ac, char **av) /* have to check if job was cancelled here just to make sure state didn't change when we were waiting for the message thread */ + exitcode = set_job_rc(job); if (job->state == SRUN_JOB_CANCELLED) { info("Cancelling job"); srun_job_destroy(job, NO_VAL); @@ -428,7 +429,7 @@ int srun(int ac, char **av) info("Terminating job"); srun_job_destroy(job, job->rc); } else - srun_job_destroy(job, 0); + srun_job_destroy(job, job->rc); /* wait for launch thread */ if (pthread_join(job->lid, NULL) < 0) @@ -455,7 +456,6 @@ int srun(int ac, char **av) /* * Let exit() clean up remaining threads. */ - exitcode = job_rc(job); log_fini(); exit(exitcode); } diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c index b6e7fc2adde..1d28c43346f 100644 --- a/src/srun/srun_job.c +++ b/src/srun/srun_job.c @@ -556,32 +556,29 @@ job_force_termination(srun_job_t *job) int -job_rc(srun_job_t *job) +set_job_rc(srun_job_t *job) { - int i; - int rc = 0; - - if (job->rc >= 0) return(job->rc); + int i, rc = 0, task_failed = 0; /* - * return (1) if any tasks failed launch + * return code set to at least one if any tasks failed launch */ for (i = 0; i < opt.nprocs; i++) { - if (job->task_state[i] == SRUN_TASK_FAILED) - return (job->rc = 1); - } - - for (i = 0; i < opt.nprocs; i++) { + if (job->task_state[i] == SRUN_TASK_FAILED) + task_failed = 1; if (job->rc < job->tstatus[i]) job->rc = job->tstatus[i]; } + if (task_failed && (job->rc <= 0)) { + job->rc = 1; + return 1; + } if ((rc = WEXITSTATUS(job->rc))) - job->rc = rc; - else if (WIFSIGNALED(job->rc)) - job->rc = 128 + WTERMSIG(job->rc); - - return(job->rc); + return rc; + if (WIFSIGNALED(job->rc)) + return (128 + WTERMSIG(job->rc)); + return job->rc; } diff --git a/src/srun/srun_job.h b/src/srun/srun_job.h index 69c3d728283..d2afa64ffce 100644 --- a/src/srun/srun_job.h +++ b/src/srun/srun_job.h @@ -211,9 +211,10 @@ void report_task_status(srun_job_t *job); void report_job_status(srun_job_t *job); /* + * Sets job->rc to highest task exit value. * Returns job return code (for srun exit status) */ -int job_rc(srun_job_t *job); +int set_job_rc(srun_job_t *job); void fwd_signal(srun_job_t *job, int signal, int max_threads); int job_active_tasks_on_host(srun_job_t *job, int hostid); -- GitLab