Skip to content
Snippets Groups Projects
Commit a5ffb720 authored by Moe Jette's avatar Moe Jette
Browse files

add logging for trigger program failure

parent cf06a597
No related branches found
No related tags found
No related merge requests found
...@@ -1006,6 +1006,8 @@ extern void trigger_process(void) ...@@ -1006,6 +1006,8 @@ extern void trigger_process(void)
slurmctld_lock_t job_node_read_lock = slurmctld_lock_t job_node_read_lock =
{ NO_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; { NO_LOCK, READ_LOCK, READ_LOCK, NO_LOCK };
bool state_change = false; bool state_change = false;
pid_t rc;
int prog_stat;
lock_slurmctld(job_node_read_lock); lock_slurmctld(job_node_read_lock);
slurm_mutex_lock(&trigger_mutex); slurm_mutex_lock(&trigger_mutex);
...@@ -1036,22 +1038,22 @@ extern void trigger_process(void) ...@@ -1036,22 +1038,22 @@ extern void trigger_process(void)
} else if ((trig_in->state == 2) && } else if ((trig_in->state == 2) &&
(difftime(now, trig_in->trig_time) > (difftime(now, trig_in->trig_time) >
MAX_PROG_TIME)) { MAX_PROG_TIME)) {
bool purge;
if (trig_in->group_id != 0) { if (trig_in->group_id != 0) {
pid_t rc;
killpg(trig_in->group_id, SIGKILL); killpg(trig_in->group_id, SIGKILL);
rc = waitpid(trig_in->group_id, NULL, WNOHANG); rc = waitpid(trig_in->group_id, &prog_stat,
if ((rc == trig_in->group_id) WNOHANG);
|| ((rc == -1) && (errno == ECHILD))) if ((rc > 0) && prog_stat) {
purge = true; info("trigger uid=%u exit_status=%u:%u",
else trig_in->user_id,
purge = false; WIFEXITED(prog_stat),
} else /* No PID to wait for */ WTERMSIG(prog_stat));
purge = true; }
if ((rc == trig_in->group_id) ||
if (purge) { ((rc == -1) && (errno == ECHILD)))
trig_in->group_id = 0;
}
if (trig_in->group_id == 0) {
#if _DEBUG #if _DEBUG
info("purging trigger[%u]", trig_in->trig_id); info("purging trigger[%u]", trig_in->trig_id);
#endif #endif
...@@ -1061,7 +1063,15 @@ extern void trigger_process(void) ...@@ -1061,7 +1063,15 @@ extern void trigger_process(void)
} else if (trig_in->state == 2) { } else if (trig_in->state == 2) {
/* Elimiate zombie processes right away. /* Elimiate zombie processes right away.
* Purge trigger entry above MAX_PROG_TIME later */ * Purge trigger entry above MAX_PROG_TIME later */
waitpid(trig_in->group_id, NULL, WNOHANG); rc = waitpid(trig_in->group_id, &prog_stat, WNOHANG);
if ((rc > 0) && prog_stat) {
info("trigger uid=%u exit_status=%u:%u",
trig_in->user_id,
WIFEXITED(prog_stat), WTERMSIG(prog_stat));
}
if ((rc == trig_in->group_id) ||
((rc == -1) && (errno == ECHILD)))
trig_in->group_id = 0;
} }
} }
list_iterator_destroy(trig_iter); list_iterator_destroy(trig_iter);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment