Skip to content
Snippets Groups Projects
Commit a1b97b75 authored by Moe Jette's avatar Moe Jette
Browse files

Don't repeatedly sent launch request if we get ESLURMD_CREDENTIAL_REPLAYED

return code, just report an error and abort.
parent 2d5c86f3
No related branches found
No related tags found
No related merge requests found
......@@ -478,14 +478,15 @@ static void * _p_launch_task(void *arg)
again:
if (_send_msg_rc(req) < 0) { /* Has timeout */
if (errno != EINTR)
verbose("first launch error on %s: %m",
job->step_layout->host[nodeid]);
if ((errno != ETIMEDOUT)
&& (job->state == SRUN_JOB_LAUNCHING)
&& (errno != ESLURMD_INVALID_JOB_CREDENTIAL)
&& retry-- ) {
if ((job->state == SRUN_JOB_LAUNCHING)
&& (errno != ETIMEDOUT)
&& (errno != ESLURMD_INVALID_JOB_CREDENTIAL)
&& (errno != ESLURMD_CREDENTIAL_REPLAYED)
&& (retry--) ) {
if (errno != EINTR) {
verbose("launch retry on %s: %m",
job->step_layout->host[nodeid]);
}
sleep(1);
goto again;
}
......@@ -494,7 +495,7 @@ static void * _p_launch_task(void *arg)
verbose("launch on %s canceled",
job->step_layout->host[nodeid]);
else
error("second launch error on %s: %m",
error("launch error on %s: %m",
job->step_layout->host[nodeid]);
_update_failed_node(job, nodeid);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment