From 2ce565f34ebb4fc303939888c130f1431575b8fe Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Wed, 26 Oct 2005 21:20:11 +0000 Subject: [PATCH] svn merge -r6446:6455 https://eris.llnl.gov/svn/slurm/branches/slurm-0-6-branch --- NEWS | 1 + src/srun/msg.c | 32 +++++++++++++++++++++++++++++++- src/srun/srun_job.h | 1 + 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 1b3640b0b4b..89340f5d9df 100644 --- a/NEWS +++ b/NEWS @@ -42,6 +42,7 @@ documents those changes that are of interest to users and admins. -- Fix problem in switch/elan error handling that could hang a slurmd step manager process. -- Build on AIX with -bmaxdata:0x70000000 for memory limit more than 256MB. + -- Restore srun's return code support. * Changes in SLURM 0.6.6 ======================== diff --git a/src/srun/msg.c b/src/srun/msg.c index c1ecadbe566..14edd64d73e 100644 --- a/src/srun/msg.c +++ b/src/srun/msg.c @@ -537,6 +537,21 @@ _die_if_signaled(srun_job_t *job, int status) } } +static void +_update_task_exitcode(srun_job_t *job, int taskid) +{ + pipe_enum_t pipe_enum = PIPE_TASK_EXITCODE; + + if(message_thread) { + write(job->forked_msg->par_msg->msg_pipe[1], + &pipe_enum, sizeof(int)); + write(job->forked_msg->par_msg->msg_pipe[1], + &taskid, sizeof(int)); + write(job->forked_msg->par_msg->msg_pipe[1], + &job->tstatus[taskid], sizeof(int)); + } +} + static void _exit_handler(srun_job_t *job, slurm_msg_t *exit_msg) { @@ -564,6 +579,7 @@ _exit_handler(srun_job_t *job, slurm_msg_t *exit_msg) slurm_mutex_lock(&job->task_mutex); job->tstatus[taskid] = status; + _update_task_exitcode(job, taskid); if (status) job->task_state[taskid] = SRUN_TASK_ABNORMAL_EXIT; else { @@ -877,7 +893,7 @@ par_thr(void *arg) //slurm_uid = (uid_t) slurm_get_slurm_user_id(); close(msg_par->msg_pipe[0]); // close read end of pipe close(par_msg->msg_pipe[1]); // close write end of pipe - while(read(par_msg->msg_pipe[0],&c,sizeof(int))>0) { + while(read(par_msg->msg_pipe[0], &c, sizeof(int)) == sizeof(int)) { // getting info from msg thread if(type == PIPE_NONE) { debug2("got type %d\n",c); @@ -886,8 +902,10 @@ par_thr(void *arg) } if(type == PIPE_JOB_STATE) { + debug("PIPE_JOB_STATE, c = %d", c); update_job_state(job, c); } else if(type == PIPE_TASK_STATE) { + debug("PIPE_TASK_STATE"); if(tid == -1) { tid = c; continue; @@ -902,6 +920,18 @@ par_thr(void *arg) update_job_state(job, SRUN_JOB_TERMINATED); } tid = -1; + } else if(type == PIPE_TASK_EXITCODE) { + debug("PIPE_TASK_EXITCODE"); + if(tid == -1) { + debug(" setting tid"); + tid = c; + continue; + } + slurm_mutex_lock(&job->task_mutex); + debug(" setting task %d exitcode %d", tid, c); + job->tstatus[tid] = c; + slurm_mutex_unlock(&job->task_mutex); + tid = -1; } else if(type == PIPE_HOST_STATE) { if(tid == -1) { tid = c; diff --git a/src/srun/srun_job.h b/src/srun/srun_job.h index 802476d7d81..bd0ee56e866 100644 --- a/src/srun/srun_job.h +++ b/src/srun/srun_job.h @@ -49,6 +49,7 @@ typedef enum { PIPE_NONE = 0, PIPE_JOB_STATE, PIPE_TASK_STATE, + PIPE_TASK_EXITCODE, PIPE_HOST_STATE, PIPE_SIGNALED, PIPE_MPIR_PROCTABLE_SIZE, -- GitLab