Skip to content
Snippets Groups Projects
Commit 6f20c615 authored by Moe Jette's avatar Moe Jette
Browse files

Fix checkpoint completion counting, need one reply per node plus one from poe.

parent ac4f63fb
No related branches found
No related tags found
No related merge requests found
...@@ -256,7 +256,9 @@ extern int slurm_ckpt_comp ( struct step_record * step_ptr, time_t event_time, ...@@ -256,7 +256,9 @@ extern int slurm_ckpt_comp ( struct step_record * step_ptr, time_t event_time,
check_ptr->error_msg = xstrdup(error_msg); check_ptr->error_msg = xstrdup(error_msg);
} }
if (++check_ptr->reply_cnt == check_ptr->node_cnt) { /* We need a reply from each compute node,
* plus POE itself */
if (check_ptr->reply_cnt++ == check_ptr->node_cnt) {
info("Checkpoint complete for job %u.%u", info("Checkpoint complete for job %u.%u",
step_ptr->job_ptr->job_id, step_ptr->step_id); step_ptr->job_ptr->job_id, step_ptr->step_id);
check_ptr->time_stamp = time(NULL); check_ptr->time_stamp = time(NULL);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment