diff --git a/src/slurmd/common/stepd_api.c b/src/slurmd/common/stepd_api.c index 684312c727978993651ccbf2033f0d81aebb992c..efde9787689a3ed67ad4e0d33399c7dfff0ad7bf 100644 --- a/src/slurmd/common/stepd_api.c +++ b/src/slurmd/common/stepd_api.c @@ -192,7 +192,6 @@ stepd_signal(int fd, int signal) /* Receive the return code */ safe_read(fd, &rc, sizeof(int)); - return rc; rwfail: return -1; @@ -204,7 +203,7 @@ rwfail: int stepd_signal_task_local(int fd, int signal, int ltaskid) { - int req = REQUEST_SIGNAL_PROCESS_GROUP; + int req = REQUEST_SIGNAL_TASK_LOCAL; int rc; safe_write(fd, &req, sizeof(int)); @@ -557,3 +556,28 @@ stepd_resume(int fd) rwfail: return -1; } + +/* + * Terminate the job step. + * + * Returns SLURM_SUCCESS is successful. On error returns SLURM_ERROR + * and sets errno. + */ +int +stepd_terminate(int fd) +{ + int req = REQUEST_STEP_TERMINATE; + int rc; + int errnum = 0; + + safe_write(fd, &req, sizeof(int)); + + /* Receive the return code and errno */ + safe_read(fd, &rc, sizeof(int)); + safe_read(fd, &errnum, sizeof(int)); + + errno = errnum; + return rc; +rwfail: + return -1; +} diff --git a/src/slurmd/common/stepd_api.h b/src/slurmd/common/stepd_api.h index c425f3774603d8c605d692564fde088fa5636250..92e062cf4bee7cf84dff5a6ae8ff79343d15fee8 100644 --- a/src/slurmd/common/stepd_api.h +++ b/src/slurmd/common/stepd_api.h @@ -53,7 +53,8 @@ typedef enum { REQUEST_PID_IN_CONTAINER, REQUEST_DAEMON_PID, REQUEST_STEP_SUSPEND, - REQUEST_STEP_RESUME + REQUEST_STEP_RESUME, + REQUEST_STEP_TERMINATE } step_msg_t; typedef enum { diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 41ed3cccc40c4553b930190d62717ca4ea203ec8..15775f0729bf702b6279492d213bb456e6cac37e 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -76,6 +76,7 @@ static char ** _build_env(uint32_t jobid, uid_t uid, char *bg_part_id); static bool _slurm_authorized_user(uid_t uid); static bool _job_still_running(uint32_t job_id); static int _kill_all_active_steps(uint32_t jobid, int sig, bool batch); +static int _terminate_all_steps(uint32_t jobid, bool batch); static void _rpc_launch_tasks(slurm_msg_t *, slurm_addr *); static void _rpc_spawn_task(slurm_msg_t *, slurm_addr *); static void _rpc_batch_job(slurm_msg_t *, slurm_addr *); @@ -988,7 +989,7 @@ _rpc_terminate_tasks(slurm_msg_t *msg, slurm_addr *cli_addr) goto done3; } - rc = stepd_signal_container(fd, req->signal); + rc = stepd_terminate(fd); if (rc == -1) rc = ESLURMD_JOB_NOTRUNNING; @@ -1268,6 +1269,56 @@ _kill_all_active_steps(uint32_t jobid, int sig, bool batch) return step_cnt; } +/* + * _terminate_all_steps - signals the container of all steps of a job + * jobid IN - id of job to signal + * batch IN - if true signal batch script, otherwise skip it + * RET count of signaled job steps (plus batch script, if applicable) + */ +static int +_terminate_all_steps(uint32_t jobid, bool batch) +{ + List steps; + ListIterator i; + step_loc_t *stepd; + int step_cnt = 0; + int fd; + + steps = stepd_available(conf->spooldir, conf->node_name); + i = list_iterator_create(steps); + while (stepd = list_next(i)) { + if (stepd->jobid != jobid) { + /* multiple jobs expected on shared nodes */ + debug3("Step from other job: jobid=%u (this jobid=%u)", + stepd->jobid, jobid); + continue; + } + + if ((stepd->stepid == SLURM_BATCH_SCRIPT) && (!batch)) + continue; + + step_cnt++; + + fd = stepd_connect(stepd->directory, stepd->nodename, + stepd->jobid, stepd->stepid); + if (fd == -1) { + debug3("Unable to connect to step %u.%u", + stepd->jobid, stepd->stepid); + continue; + } + + debug2("terminsate job step %u.%u", jobid, stepd->stepid); + if (stepd_terminate(fd) < 0) + debug("kill jobid=%u failed: %m", jobid); + close(fd); + } + list_iterator_destroy(i); + list_destroy(steps); + if (step_cnt == 0) + debug2("No steps in job %u to terminate", jobid); + return step_cnt; +} + static bool _job_still_running(uint32_t job_id) { @@ -1616,8 +1667,16 @@ _rpc_terminate_job(slurm_msg_t *msg, slurm_addr *cli) * so send SIGCONT first. */ _kill_all_active_steps(req->job_id, SIGCONT, true); - - nsteps = _kill_all_active_steps(req->job_id, SIGTERM, true); + if (errno == ESLURMD_STEP_SUSPENDED) { + /* + * If the job step is currently suspended, we don't + * bother with a "nice" termination. + */ + debug2("Job is currently suspened, terminating"); + nsteps = _terminate_all_steps(req->job_id, true); + } else { + nsteps = _kill_all_active_steps(req->job_id, SIGTERM, true); + } /* * If there are currently no active job steps and no @@ -1654,7 +1713,7 @@ _rpc_terminate_job(slurm_msg_t *msg, slurm_addr *cli) */ delay = MAX(conf->cf.kill_wait, 5); if ( !_pause_for_job_completion (req->job_id, delay) - && _kill_all_active_steps(req->job_id, SIGKILL, true) ) { + && _terminate_all_steps(req->job_id, true) ) { /* * Block until all user processes are complete. */ @@ -1762,7 +1821,7 @@ _pause_for_job_completion (uint32_t job_id, int max_time) while ( ((sec++ < max_time) || (max_time == 0)) && (rc = _job_still_running (job_id))) { if ((max_time == 0) && (sec > 1)) - _kill_all_active_steps(job_id, SIGKILL, true); + _terminate_all_steps(job_id, true); sleep (1); } /* diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index 350e49a5e6cda59aa08a6e0a85cf787331063aa5..0108cb27a158473f844fd6c5790d9e0f5a7b5b03 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -59,6 +59,7 @@ static int _handle_pid_in_container(int fd, slurmd_job_t *job); static int _handle_daemon_pid(int fd, slurmd_job_t *job); static int _handle_suspend(int fd, slurmd_job_t *job, uid_t uid); static int _handle_resume(int fd, slurmd_job_t *job, uid_t uid); +static int _handle_terminate(int fd, slurmd_job_t *job, uid_t uid); static bool _msg_socket_readable(eio_obj_t *obj); static int _msg_socket_accept(eio_obj_t *obj, List objs); @@ -414,6 +415,10 @@ _handle_request(int fd, slurmd_job_t *job, uid_t uid, gid_t gid) debug("Handling REQUEST_STEP_RESUME"); rc = _handle_resume(fd, job, uid); break; + case REQUEST_STEP_TERMINATE: + debug("Handling REQUEST_STEP_TERMINATE"); + rc = _handle_terminate(fd, job, uid); + break; default: error("Unrecognized request: %d", req); rc = SLURM_FAILURE; @@ -645,6 +650,66 @@ rwfail: return SLURM_FAILURE; } +static int +_handle_terminate(int fd, slurmd_job_t *job, uid_t uid) +{ + int rc = SLURM_SUCCESS; + int errnum = 0; + + debug("_handle_terminate for job %u.%u", + job->jobid, job->stepid); + + debug3(" uid = %d", uid); + if (uid != job->uid && !_slurm_authorized_user(uid)) { + debug("terminate req from uid %ld for job %u.%u " + "owned by uid %ld", + (long)uid, job->jobid, job->stepid, (long)job->uid); + rc = -1; + errnum = EPERM; + goto done; + } + + /* + * Sanity checks + */ + if (job->cont_id == 0) { + debug ("step %u.%u invalid container [cont_id:%u]", + job->jobid, job->stepid, job->cont_id); + rc = -1; + errnum = ESLURMD_JOB_NOTRUNNING; + goto done; + } + + /* + * Signal the container with SIGKILL + */ + pthread_mutex_lock(&suspend_mutex); + if (suspended) { + debug("Terminating suspended job step %u.%u", + job->jobid, job->stepid); + } + + if (slurm_container_signal(job->cont_id, SIGKILL) < 0) { + rc = -1; + errnum = errno; + verbose("Error sending signal %d to %u.%u: %s", + signal, job->jobid, job->stepid, + slurm_strerror(rc)); + } else { + verbose("Sent signal %d to %u.%u", + signal, job->jobid, job->stepid); + } + pthread_mutex_unlock(&suspend_mutex); + +done: + /* Send the return code and errnum */ + safe_write(fd, &rc, sizeof(int)); + safe_write(fd, &errnum, sizeof(int)); + return SLURM_SUCCESS; +rwfail: + return SLURM_FAILURE; +} + static int _handle_attach(int fd, slurmd_job_t *job, uid_t uid) {