Skip to content
Snippets Groups Projects
Commit 1bb3b045 authored by Mark Grondona's avatar Mark Grondona
Browse files

o send session manager SIGXCPU on job timelimit instead of tasks,

   then send tasks SIGTERM. smgr handles SIGXCPU by printing a message
   that job exceeded timelimit.
parent 13bc1671
No related branches found
No related tags found
No related merge requests found
......@@ -433,6 +433,24 @@ _rpc_kill_tasks(slurm_msg_t *msg, slurm_addr *cli_addr)
slurm_send_rc_msg(msg, rc);
}
static void
_kill_running_session_mgrs(uint32_t jobid, int signum)
{
List steps = shm_get_steps();
ListIterator i = list_iterator_create(steps);
job_step_t *s = NULL;
int step_cnt = 0;
while ((s = list_next(i))) {
if (s->jobid == jobid) {
kill(s->sid, signum);
}
}
list_destroy(steps);
return step_cnt;
}
/* For the specified job_id: Send SIGXCPU, reply to slurmctld,
* sleep(configured kill_wait), then send SIGKILL */
static void
......@@ -451,7 +469,13 @@ _rpc_timelimit(slurm_msg_t *msg, slurm_addr *cli_addr)
return;
}
step_cnt = _kill_all_active_steps(req->job_id, SIGXCPU);
/*
* Send SIGXCPU to warn session managers of job steps for this
* job that the job is about to be terminated
*/
_kill_running_session_mgrs(req->job_id, SIGXCPU);
step_cnt = _kill_all_active_steps(req->job_id, SIGTERM);
info("Timeout for job=%u, step_cnt=%d, kill_wait=%u",
req->job_id, step_cnt, conf->cf.kill_wait);
......
......@@ -281,6 +281,12 @@ _exec_task(slurmd_job_t *job, int i)
exit(errno);
}
static sig_atomic_t timelimit_exceeded = 0;
static
_xcpu_handler()
{
timelimit_exceeded = 1;
}
/* wait for N tasks to exit, reporting exit status back to slurmd mgr
......@@ -295,12 +301,16 @@ _wait_for_all_tasks(slurmd_job_t *job)
int id = 0;
int fd = job->fdpair[1];
xsignal(SIGXCPU, _xcpu_handler);
while (waiting > 0) {
int status = 0;
pid_t pid;
if ((pid = waitpid(0, &status, 0)) < (pid_t) 0) {
if (errno != EINTR)
if ((errno == EINTR) && (timelimit_exceeded))
error("job exceeded timelimit");
else
error("waitpid: %m");
continue;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment