diff --git a/src/srun/launch.c b/src/srun/launch.c index c313311ba0daa0d5fe5da73aa89ada35e299ff2a..3f7c31fca54adc2196e5d71466f5611bc61edda8 100644 --- a/src/srun/launch.c +++ b/src/srun/launch.c @@ -478,14 +478,15 @@ static void * _p_launch_task(void *arg) again: if (_send_msg_rc(req) < 0) { /* Has timeout */ - if (errno != EINTR) - verbose("first launch error on %s: %m", - job->step_layout->host[nodeid]); - - if ((errno != ETIMEDOUT) - && (job->state == SRUN_JOB_LAUNCHING) - && (errno != ESLURMD_INVALID_JOB_CREDENTIAL) - && retry-- ) { + if ((job->state == SRUN_JOB_LAUNCHING) + && (errno != ETIMEDOUT) + && (errno != ESLURMD_INVALID_JOB_CREDENTIAL) + && (errno != ESLURMD_CREDENTIAL_REPLAYED) + && (retry--) ) { + if (errno != EINTR) { + verbose("launch retry on %s: %m", + job->step_layout->host[nodeid]); + } sleep(1); goto again; } @@ -494,7 +495,7 @@ static void * _p_launch_task(void *arg) verbose("launch on %s canceled", job->step_layout->host[nodeid]); else - error("second launch error on %s: %m", + error("launch error on %s: %m", job->step_layout->host[nodeid]); _update_failed_node(job, nodeid);