Skip to content
Snippets Groups Projects
Commit 2ce565f3 authored by Moe Jette's avatar Moe Jette
Browse files
parent 191f65f5
No related branches found
No related tags found
No related merge requests found
...@@ -42,6 +42,7 @@ documents those changes that are of interest to users and admins. ...@@ -42,6 +42,7 @@ documents those changes that are of interest to users and admins.
-- Fix problem in switch/elan error handling that could hang a slurmd -- Fix problem in switch/elan error handling that could hang a slurmd
step manager process. step manager process.
-- Build on AIX with -bmaxdata:0x70000000 for memory limit more than 256MB. -- Build on AIX with -bmaxdata:0x70000000 for memory limit more than 256MB.
-- Restore srun's return code support.
* Changes in SLURM 0.6.6 * Changes in SLURM 0.6.6
======================== ========================
......
...@@ -537,6 +537,21 @@ _die_if_signaled(srun_job_t *job, int status) ...@@ -537,6 +537,21 @@ _die_if_signaled(srun_job_t *job, int status)
} }
} }
static void
_update_task_exitcode(srun_job_t *job, int taskid)
{
pipe_enum_t pipe_enum = PIPE_TASK_EXITCODE;
if(message_thread) {
write(job->forked_msg->par_msg->msg_pipe[1],
&pipe_enum, sizeof(int));
write(job->forked_msg->par_msg->msg_pipe[1],
&taskid, sizeof(int));
write(job->forked_msg->par_msg->msg_pipe[1],
&job->tstatus[taskid], sizeof(int));
}
}
static void static void
_exit_handler(srun_job_t *job, slurm_msg_t *exit_msg) _exit_handler(srun_job_t *job, slurm_msg_t *exit_msg)
{ {
...@@ -564,6 +579,7 @@ _exit_handler(srun_job_t *job, slurm_msg_t *exit_msg) ...@@ -564,6 +579,7 @@ _exit_handler(srun_job_t *job, slurm_msg_t *exit_msg)
slurm_mutex_lock(&job->task_mutex); slurm_mutex_lock(&job->task_mutex);
job->tstatus[taskid] = status; job->tstatus[taskid] = status;
_update_task_exitcode(job, taskid);
if (status) if (status)
job->task_state[taskid] = SRUN_TASK_ABNORMAL_EXIT; job->task_state[taskid] = SRUN_TASK_ABNORMAL_EXIT;
else { else {
...@@ -877,7 +893,7 @@ par_thr(void *arg) ...@@ -877,7 +893,7 @@ par_thr(void *arg)
//slurm_uid = (uid_t) slurm_get_slurm_user_id(); //slurm_uid = (uid_t) slurm_get_slurm_user_id();
close(msg_par->msg_pipe[0]); // close read end of pipe close(msg_par->msg_pipe[0]); // close read end of pipe
close(par_msg->msg_pipe[1]); // close write end of pipe close(par_msg->msg_pipe[1]); // close write end of pipe
while(read(par_msg->msg_pipe[0],&c,sizeof(int))>0) { while(read(par_msg->msg_pipe[0], &c, sizeof(int)) == sizeof(int)) {
// getting info from msg thread // getting info from msg thread
if(type == PIPE_NONE) { if(type == PIPE_NONE) {
debug2("got type %d\n",c); debug2("got type %d\n",c);
...@@ -886,8 +902,10 @@ par_thr(void *arg) ...@@ -886,8 +902,10 @@ par_thr(void *arg)
} }
if(type == PIPE_JOB_STATE) { if(type == PIPE_JOB_STATE) {
debug("PIPE_JOB_STATE, c = %d", c);
update_job_state(job, c); update_job_state(job, c);
} else if(type == PIPE_TASK_STATE) { } else if(type == PIPE_TASK_STATE) {
debug("PIPE_TASK_STATE");
if(tid == -1) { if(tid == -1) {
tid = c; tid = c;
continue; continue;
...@@ -902,6 +920,18 @@ par_thr(void *arg) ...@@ -902,6 +920,18 @@ par_thr(void *arg)
update_job_state(job, SRUN_JOB_TERMINATED); update_job_state(job, SRUN_JOB_TERMINATED);
} }
tid = -1; tid = -1;
} else if(type == PIPE_TASK_EXITCODE) {
debug("PIPE_TASK_EXITCODE");
if(tid == -1) {
debug(" setting tid");
tid = c;
continue;
}
slurm_mutex_lock(&job->task_mutex);
debug(" setting task %d exitcode %d", tid, c);
job->tstatus[tid] = c;
slurm_mutex_unlock(&job->task_mutex);
tid = -1;
} else if(type == PIPE_HOST_STATE) { } else if(type == PIPE_HOST_STATE) {
if(tid == -1) { if(tid == -1) {
tid = c; tid = c;
......
...@@ -49,6 +49,7 @@ typedef enum { ...@@ -49,6 +49,7 @@ typedef enum {
PIPE_NONE = 0, PIPE_NONE = 0,
PIPE_JOB_STATE, PIPE_JOB_STATE,
PIPE_TASK_STATE, PIPE_TASK_STATE,
PIPE_TASK_EXITCODE,
PIPE_HOST_STATE, PIPE_HOST_STATE,
PIPE_SIGNALED, PIPE_SIGNALED,
PIPE_MPIR_PROCTABLE_SIZE, PIPE_MPIR_PROCTABLE_SIZE,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment