Skip to content
Snippets Groups Projects
Commit 3e6748e1 authored by jette's avatar jette
Browse files

Enable SIGINT to srun with pending job step

The logic added to wake pending job steps as soon as resources become
available lacked signal handling logic. This adds signal handling logic.
Fix for bug 339
parent ba58d59c
No related branches found
No related tags found
No related merge requests found
...@@ -64,10 +64,24 @@ ...@@ -64,10 +64,24 @@
#include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_api.h"
#include "src/common/slurm_protocol_defs.h" #include "src/common/slurm_protocol_defs.h"
#include "src/common/xmalloc.h" #include "src/common/xmalloc.h"
#include "src/common/xsignal.h"
#include "src/common/xstring.h" #include "src/common/xstring.h"
#include "src/common/slurm_cred.h"
#include "src/api/step_ctx.h" #include "src/api/step_ctx.h"
int step_signals[] = {
SIGINT, SIGQUIT, SIGCONT, SIGTERM, SIGHUP,
SIGALRM, SIGUSR1, SIGUSR2, SIGPIPE, 0 };
static int destroy_step = 0;
static void _signal_while_allocating(int signo)
{
debug("Got signal %d", signo);
if (signo == SIGCONT)
return;
destroy_step = 1;
}
static void static void
_job_fake_cred(struct slurm_step_ctx_struct *ctx) _job_fake_cred(struct slurm_step_ctx_struct *ctx)
{ {
...@@ -203,7 +217,7 @@ slurm_step_ctx_create_timeout (const slurm_step_ctx_params_t *step_params, ...@@ -203,7 +217,7 @@ slurm_step_ctx_create_timeout (const slurm_step_ctx_params_t *step_params,
struct slurm_step_ctx_struct *ctx = NULL; struct slurm_step_ctx_struct *ctx = NULL;
job_step_create_request_msg_t *step_req = NULL; job_step_create_request_msg_t *step_req = NULL;
job_step_create_response_msg_t *step_resp = NULL; job_step_create_response_msg_t *step_resp = NULL;
int rc, time_left = timeout; int i, rc, time_left = timeout;
int sock = -1; int sock = -1;
short port = 0; short port = 0;
int errnum = 0; int errnum = 0;
...@@ -232,12 +246,22 @@ slurm_step_ctx_create_timeout (const slurm_step_ctx_params_t *step_params, ...@@ -232,12 +246,22 @@ slurm_step_ctx_create_timeout (const slurm_step_ctx_params_t *step_params,
struct pollfd fds; struct pollfd fds;
fds.fd = sock; fds.fd = sock;
fds.events = POLLIN; fds.events = POLLIN;
xsignal_unblock(step_signals);
for (i = 0; step_signals[i]; i++)
xsignal(step_signals[i], _signal_while_allocating);
while ((rc = poll(&fds, 1, time_left)) <= 0) { while ((rc = poll(&fds, 1, time_left)) <= 0) {
if (destroy_step)
break;
if ((errno == EINTR) || (errno == EAGAIN)) if ((errno == EINTR) || (errno == EAGAIN))
continue; continue;
break; break;
} }
rc = slurm_job_step_create(step_req, &step_resp); xsignal_block(step_signals);
if (destroy_step) {
info("Cancelled pending job step");
errno = ESLURM_ALREADY_DONE;
} else
rc = slurm_job_step_create(step_req, &step_resp);
} }
if ((rc < 0) || (step_resp == NULL)) { if ((rc < 0) || (step_resp == NULL)) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment