Skip to content
Snippets Groups Projects
Commit a63e879e authored by Moe Jette's avatar Moe Jette
Browse files

scancel now defined to accept signal number argument.

slurm_cancel_job RPC now takes signal argument.
parent 1bb0dae4
No related branches found
No related tags found
No related merge requests found
.TH "Slurm API" "3" "October 2002" "Morris Jette" "Slurm job cancel calls" .TH "Slurm API" "3" "January 2003" "Morris Jette" "Slurm job cancel calls"
.SH "NAME" .SH "NAME"
slurm_cancel_job, slurm_cancel_job_step \- Slurm job cancel calls slurm_kill_job, slurm_kill_job_step \- Slurm job signal calls
.SH "SYNTAX" .SH "SYNTAX"
.LP .LP
#include <slurm.h> #include <slurm.h>
.LP .LP
int \fBslurm_cancel_job\fR ( int \fBslurm_kill_job\fR (
.br .br
uint32_t \fIjob_id\fP uint32_t \fIjob_id\fP,
.br
uint16_t \fIsignal\fP
.br .br
); );
.LP .LP
int \fBslurm_cancel_job_step\fR ( int \fBslurm_kill_job_step\fR (
.br .br
uint32_t \fIjob_id\fP, uint32_t \fIjob_id\fP,
.br .br
uint32_t \fIjob_step_id\fP uint32_t \fIjob_step_id\fP,
.br
uint16_t \fIsignal\fP
.br .br
); );
.SH "ARGUMENTS" .SH "ARGUMENTS"
...@@ -26,17 +30,18 @@ Slurm job id number. ...@@ -26,17 +30,18 @@ Slurm job id number.
.TP .TP
\fIjob_step_id\fp \fIjob_step_id\fp
Slurm job step id number. Slurm job step id number.
.TP
\fIsignal\fp
Signal to be sent to the job or job step.
.SH "DESCRIPTION" .SH "DESCRIPTION"
.LP .LP
\fBslurm_cancel_job\fR Request the cancellation of a running or pending job. This function \fBslurm_kill_job\fR Request that a signal be sent to a job and all of its job steps. If the job is pending, it will be terminated immediately.This function
may only be successfully executed by the job's owner or user root. may only be successfully executed by the job's owner or user root.
.LP .LP
\fBslurm_cancel_job_step\fR Request the cancellation of a running job step. This function \fBslurm_kill_job_step\fR Request that a signal be sent to a specific job step. This function may only be successfully executed by the job's owner or user root.
may only be successfully executed by the job's owner or user root.
.SH "RETURN VALUE" .SH "RETURN VALUE"
.LP .LP
On success, zero is returned. On error, -1 is returned, and Slurm error code is set On success, zero is returned. On error, -1 is returned, and Slurm error code is set appropriately.
appropriately.
.SH "ERRORS" .SH "ERRORS"
.LP .LP
\fBSLURM_PROTOCOL_VERSION_ERROR\fR Protocol version has changed, re-link your code. \fBSLURM_PROTOCOL_VERSION_ERROR\fR Protocol version has changed, re-link your code.
......
...@@ -36,31 +36,33 @@ ...@@ -36,31 +36,33 @@
#include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_api.h"
/* /*
* slurm_cancel_job - cancel an existing job and all of its steps * slurm_kill_job - send the specified signal to all steps of an existing job
* IN job_id - the job's id * IN job_id - the job's id
* IN signal - signal number
* RET 0 on success or slurm error code * RET 0 on success or slurm error code
*/ */
int int
slurm_cancel_job ( uint32_t job_id ) slurm_kill_job ( uint32_t job_id, uint16_t signal )
{ {
return slurm_cancel_job_step ( job_id, NO_VAL); return slurm_kill_job_step ( job_id, NO_VAL, signal );
} }
/* /*
* slurm_cancel_job_step - cancel a specific job step * slurm_kill_job_step - send the specified signal to an existing job step
* IN job_id - the job's id * IN job_id - the job's id
* IN step_id - the job step's id * IN step_id - the job step's id
* IN signal - signal number
* RET 0 on success or slurm error code * RET 0 on success or slurm error code
*/ */
int int
slurm_cancel_job_step ( uint32_t job_id, uint32_t step_id ) slurm_kill_job_step ( uint32_t job_id, uint32_t step_id, uint16_t signal )
{ {
int msg_size ; int msg_size ;
int rc ; int rc ;
slurm_fd sockfd ; slurm_fd sockfd ;
slurm_msg_t request_msg ; slurm_msg_t request_msg ;
slurm_msg_t response_msg ; slurm_msg_t response_msg ;
job_step_id_msg_t job_step_id_msg ; job_step_kill_msg_t job_step_kill_msg ;
return_code_msg_t * slurm_rc_msg ; return_code_msg_t * slurm_rc_msg ;
/* init message connection for message communication with controller */ /* init message connection for message communication with controller */
...@@ -71,10 +73,11 @@ slurm_cancel_job_step ( uint32_t job_id, uint32_t step_id ) ...@@ -71,10 +73,11 @@ slurm_cancel_job_step ( uint32_t job_id, uint32_t step_id )
} }
/* send request message */ /* send request message */
job_step_id_msg . job_id = job_id ; job_step_kill_msg . job_id = job_id ;
job_step_id_msg . job_step_id = step_id ; job_step_kill_msg . job_step_id = step_id ;
job_step_kill_msg . signal = signal ;
request_msg . msg_type = REQUEST_CANCEL_JOB_STEP ; request_msg . msg_type = REQUEST_CANCEL_JOB_STEP ;
request_msg . data = &job_step_id_msg ; request_msg . data = &job_step_kill_msg ;
if ( ( rc = slurm_send_controller_msg ( sockfd , & request_msg ) ) if ( ( rc = slurm_send_controller_msg ( sockfd , & request_msg ) )
== SLURM_SOCKET_ERROR ) { == SLURM_SOCKET_ERROR ) {
slurm_seterrno ( SLURM_COMMUNICATIONS_SEND_ERROR ); slurm_seterrno ( SLURM_COMMUNICATIONS_SEND_ERROR );
......
...@@ -487,23 +487,26 @@ extern int slurm_job_will_run (job_desc_msg_t * job_desc_msg , ...@@ -487,23 +487,26 @@ extern int slurm_job_will_run (job_desc_msg_t * job_desc_msg ,
/*****************************************************************************\ /*****************************************************************************\
* JOB/STEP CANCELATION FUNCTIONS * JOB/STEP SIGNALING FUNCTIONS
\*****************************************************************************/ \*****************************************************************************/
/* /*
* slurm_cancel_job - cancel an existing job and all of its steps * slurm_kill_job - send the specified signal to all steps of an existing job
* IN job_id - the job's id * IN job_id - the job's id
* IN signal - signal number
* RET 0 on success or slurm error code * RET 0 on success or slurm error code
*/ */
extern int slurm_cancel_job (uint32_t job_id); extern int slurm_kill_job (uint32_t job_id, uint16_t signal);
/* /*
* slurm_cancel_job_step - cancel a specific job step * slurm_kill_job_step - send the specified signal to an existing job step
* IN job_id - the job's id * IN job_id - the job's id
* IN step_id - the job step's id * IN step_id - the job step's id
* IN signal - signal number
* RET 0 on success or slurm error code * RET 0 on success or slurm error code
*/ */
extern int slurm_cancel_job_step (uint32_t job_id, uint32_t step_id); extern int slurm_kill_job_step (uint32_t job_id, uint32_t step_id,
uint16_t signal);
/*****************************************************************************\ /*****************************************************************************\
......
...@@ -85,6 +85,11 @@ void slurm_free_batch_resp_msg(batch_launch_response_msg_t * msg) ...@@ -85,6 +85,11 @@ void slurm_free_batch_resp_msg(batch_launch_response_msg_t * msg)
FREE_IF_SET(msg); FREE_IF_SET(msg);
} }
void slurm_free_job_step_kill_msg(job_step_kill_msg_t * msg)
{
FREE_IF_SET(msg);
}
void slurm_free_job_desc_msg(job_desc_msg_t * msg) void slurm_free_job_desc_msg(job_desc_msg_t * msg)
{ {
int i; int i;
......
...@@ -194,6 +194,12 @@ typedef struct job_step_id { ...@@ -194,6 +194,12 @@ typedef struct job_step_id {
uint32_t job_step_id; uint32_t job_step_id;
} job_step_id_t; } job_step_id_t;
typedef struct job_step_kill_msg {
uint32_t job_id;
uint32_t job_step_id;
uint16_t signal;
} job_step_kill_msg_t;
typedef struct job_id_msg { typedef struct job_id_msg {
uint32_t job_id; uint32_t job_id;
} job_id_msg_t; } job_id_msg_t;
...@@ -412,6 +418,8 @@ void inline slurm_free_update_job_time_msg(job_time_msg_t * msg); ...@@ -412,6 +418,8 @@ void inline slurm_free_update_job_time_msg(job_time_msg_t * msg);
void inline slurm_free_batch_resp_msg(batch_launch_response_msg_t * msg); void inline slurm_free_batch_resp_msg(batch_launch_response_msg_t * msg);
void inline slurm_free_job_step_kill_msg(job_step_kill_msg_t * msg);
extern char *job_dist_string(uint16_t inx); extern char *job_dist_string(uint16_t inx);
extern char *job_state_string(enum job_states inx); extern char *job_state_string(enum job_states inx);
extern char *job_state_string_compact(enum job_states inx); extern char *job_state_string_compact(enum job_states inx);
......
...@@ -203,6 +203,10 @@ static void _pack_batch_job_resp_msg(batch_launch_response_msg_t * msg, ...@@ -203,6 +203,10 @@ static void _pack_batch_job_resp_msg(batch_launch_response_msg_t * msg,
static int _unpack_batch_job_resp_msg(batch_launch_response_msg_t ** msg, static int _unpack_batch_job_resp_msg(batch_launch_response_msg_t ** msg,
Buf buffer); Buf buffer);
static void _pack_job_step_kill_msg(job_step_kill_msg_t * msg, Buf buffer);
static int _unpack_job_step_kill_msg(job_step_kill_msg_t ** msg_ptr,
Buf buffer);
static void _pack_buffer_msg(slurm_msg_t * msg, Buf buffer); static void _pack_buffer_msg(slurm_msg_t * msg, Buf buffer);
/* pack_header /* pack_header
...@@ -411,9 +415,12 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) ...@@ -411,9 +415,12 @@ pack_msg(slurm_msg_t const *msg, Buf buffer)
break; break;
/******** job_step_id_t Messages ********/ /******** job_step_id_t Messages ********/
case REQUEST_JOB_INFO: case REQUEST_JOB_INFO:
case REQUEST_CANCEL_JOB_STEP:
_pack_job_step_id_msg((job_step_id_t *) msg->data, buffer); _pack_job_step_id_msg((job_step_id_t *) msg->data, buffer);
break; break;
case REQUEST_CANCEL_JOB_STEP:
_pack_job_step_kill_msg((job_step_kill_msg_t *)
msg->data, buffer);
break;
case REQUEST_COMPLETE_JOB_STEP: case REQUEST_COMPLETE_JOB_STEP:
_pack_complete_job_step_msg((complete_job_step_msg_t *) _pack_complete_job_step_msg((complete_job_step_msg_t *)
msg->data, buffer); msg->data, buffer);
...@@ -619,10 +626,13 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) ...@@ -619,10 +626,13 @@ unpack_msg(slurm_msg_t * msg, Buf buffer)
break; break;
/******** job_step_id_t Messages ********/ /******** job_step_id_t Messages ********/
case REQUEST_JOB_INFO: case REQUEST_JOB_INFO:
case REQUEST_CANCEL_JOB_STEP:
rc = _unpack_job_step_id_msg((job_step_id_t **) rc = _unpack_job_step_id_msg((job_step_id_t **)
& (msg->data), buffer); & (msg->data), buffer);
break; break;
case REQUEST_CANCEL_JOB_STEP:
rc = _unpack_job_step_kill_msg((job_step_kill_msg_t **)
& (msg->data), buffer);
break;
case REQUEST_COMPLETE_JOB_STEP: case REQUEST_COMPLETE_JOB_STEP:
rc = _unpack_complete_job_step_msg((complete_job_step_msg_t rc = _unpack_complete_job_step_msg((complete_job_step_msg_t
**) & (msg->data), **) & (msg->data),
...@@ -2212,6 +2222,45 @@ _unpack_job_step_id_msg(job_step_id_t ** msg_ptr, Buf buffer) ...@@ -2212,6 +2222,45 @@ _unpack_job_step_id_msg(job_step_id_t ** msg_ptr, Buf buffer)
return SLURM_ERROR; return SLURM_ERROR;
} }
/* _pack_job_step_kill_msg
* packs a slurm job step signal message
* IN msg - pointer to the job step signal message
* IN/OUT buffer - destination of the pack, contains pointers that are
* automatically updated
*/
static void
_pack_job_step_kill_msg(job_step_kill_msg_t * msg, Buf buffer)
{
pack32(msg->job_id, buffer);
pack32(msg->job_step_id, buffer);
pack16(msg->signal, buffer);
}
/* _unpack_job_step_kill_msg
* unpacks a slurm job step signal message
* OUT msg_ptr - pointer to the job step signal message buffer
* IN/OUT buffer - source of the unpack, contains pointers that are
* automatically updated
*/
static int
_unpack_job_step_kill_msg(job_step_kill_msg_t ** msg_ptr, Buf buffer)
{
job_step_kill_msg_t *msg;
msg = xmalloc(sizeof(job_step_kill_msg_t));
*msg_ptr = msg;
safe_unpack32(&msg->job_id, buffer);
safe_unpack32(&msg->job_step_id, buffer);
safe_unpack16(&msg->signal, buffer);
return SLURM_SUCCESS;
unpack_error:
FREE_NULL(msg);
*msg_ptr = NULL;
return SLURM_ERROR;
}
static void static void
_pack_complete_job_step_msg(complete_job_step_msg_t * msg, Buf buffer) _pack_complete_job_step_msg(complete_job_step_msg_t * msg, Buf buffer)
{ {
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#endif #endif
#include <pwd.h> #include <pwd.h>
#include <signal.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> /* strcpy, strncasecmp */ #include <string.h> /* strcpy, strncasecmp */
#include <sys/types.h> #include <sys/types.h>
...@@ -48,6 +49,8 @@ ...@@ -48,6 +49,8 @@
#define __DEBUG 0 #define __DEBUG 0
#define SIZE(a) (sizeof(a)/sizeof(a[0]))
/*---[ popt definitions ]------------------------------------------------*/ /*---[ popt definitions ]------------------------------------------------*/
/* generic OPT_ definitions -- mainly for use with env vars /* generic OPT_ definitions -- mainly for use with env vars
...@@ -65,6 +68,7 @@ ...@@ -65,6 +68,7 @@
#define OPT_USER 0x07 #define OPT_USER 0x07
#define OPT_VERBOSE 0x08 #define OPT_VERBOSE 0x08
#define OPT_VERSION 0x09 #define OPT_VERSION 0x09
#define OPT_SIGNAL 0x0a
#ifndef POPT_TABLEEND #ifndef POPT_TABLEEND
...@@ -78,7 +82,9 @@ struct poptOption options[] = { ...@@ -78,7 +82,9 @@ struct poptOption options[] = {
"name of job", "name"}, "name of job", "name"},
{"partition", 'p', POPT_ARG_STRING, NULL, OPT_PARTITION, {"partition", 'p', POPT_ARG_STRING, NULL, OPT_PARTITION,
"name of job's partition", "name"}, "name of job's partition", "name"},
{"state", 's', POPT_ARG_STRING, NULL, OPT_STATE, {"signal", 's', POPT_ARG_STRING, NULL, OPT_SIGNAL,
"signal name or number", "name | integer"},
{"state", 't', POPT_ARG_STRING, NULL, OPT_STATE,
"name of job's state", "PENDING | RUNNING"}, "name of job's state", "PENDING | RUNNING"},
{"user", 'u', POPT_ARG_STRING, NULL, OPT_USER, {"user", 'u', POPT_ARG_STRING, NULL, OPT_USER,
"name of job's owner", "name"}, "name of job's owner", "name"},
...@@ -90,6 +96,22 @@ struct poptOption options[] = { ...@@ -90,6 +96,22 @@ struct poptOption options[] = {
POPT_TABLEEND POPT_TABLEEND
}; };
struct signv {
char *name;
uint16_t val;
} sys_signame[ ] = {
{ "HUP", SIGHUP },
{ "INT", SIGINT },
{ "QUIT", SIGQUIT },
{ "KILL", SIGKILL },
{ "ALRM", SIGALRM },
{ "TERM", SIGTERM },
{ "USR1", SIGUSR1 },
{ "USR2", SIGUSR2 },
{ "STOP", SIGSTOP },
{ "CONT", SIGCONT }
};
/*---[ end popt definitions ]---------------------------------------------*/ /*---[ end popt definitions ]---------------------------------------------*/
/* forward declarations of static functions /* forward declarations of static functions
...@@ -119,6 +141,10 @@ static void print_version (void); ...@@ -119,6 +141,10 @@ static void print_version (void);
*/ */
static enum job_states xlate_state_name(const char *state_name); static enum job_states xlate_state_name(const char *state_name);
/* translate name name to number
*/
static uint16_t xlate_signal_name(const char *signal_name);
/* list known options and their settings /* list known options and their settings
*/ */
#if __DEBUG #if __DEBUG
...@@ -165,12 +191,40 @@ static enum job_states xlate_state_name(const char *state_name) ...@@ -165,12 +191,40 @@ static enum job_states xlate_state_name(const char *state_name)
xstrcat(state_names, ","); xstrcat(state_names, ",");
xstrcat(state_names, job_state_string(i)); xstrcat(state_names, job_state_string(i));
} }
fprintf (stderr, "Valid job states include: %s", state_names); fprintf (stderr, "Valid job states include: %s\n", state_names);
xfree (state_names); xfree (state_names);
exit (1); exit (1);
} }
static uint16_t xlate_signal_name(const char *signal_name)
{
uint16_t sig_num;
char *end_ptr, *sig_names;
int i;
sig_num = (uint16_t) strtol(signal_name, &end_ptr, 10);
if ((*end_ptr == '\0') || (sig_num != 0))
return sig_num;
for (i=0; i<SIZE(sys_signame); i++) {
if (strcasecmp(sys_signame[i].name, signal_name) == 0) {
xfree(sig_names);
return sys_signame[i].val;
}
if (i == 0)
sig_names = xstrdup(sys_signame[i].name);
else {
xstrcat(sig_names, ",");
xstrcat(sig_names, sys_signame[i].name);
}
}
fprintf (stderr, "Invalid job signal: %s\n", signal_name);
fprintf (stderr, "Valid signals include: %s\n", sig_names);
xfree(sig_names);
exit(1);
}
static void print_version (void) static void print_version (void)
{ {
printf("%s %s\n", PACKAGE, VERSION); printf("%s %s\n", PACKAGE, VERSION);
...@@ -185,6 +239,7 @@ static void opt_default() ...@@ -185,6 +239,7 @@ static void opt_default()
opt.job_cnt = 0; opt.job_cnt = 0;
opt.job_name = NULL; opt.job_name = NULL;
opt.partition = NULL; opt.partition = NULL;
opt.signal = SIGKILL;
opt.state = JOB_END; opt.state = JOB_END;
opt.user_name = NULL; opt.user_name = NULL;
opt.user_id = 0; opt.user_id = 0;
...@@ -279,6 +334,10 @@ static void opt_args(int ac, char **av) ...@@ -279,6 +334,10 @@ static void opt_args(int ac, char **av)
opt.partition = xstrdup(arg); opt.partition = xstrdup(arg);
break; break;
case OPT_SIGNAL:
opt.signal = xlate_signal_name(arg);
break;
case OPT_STATE: case OPT_STATE:
opt.state = xlate_state_name(arg); opt.state = xlate_state_name(arg);
break; break;
...@@ -415,6 +474,7 @@ opt_list(void) ...@@ -415,6 +474,7 @@ opt_list(void)
info("interactive : %s", tf_(opt.interactive)); info("interactive : %s", tf_(opt.interactive));
info("job_name : %s", opt.job_name); info("job_name : %s", opt.job_name);
info("partition : %s", opt.partition); info("partition : %s", opt.partition);
info("signal : %u", opt.signal);
info("state : %s", job_state_string(opt.state)); info("state : %s", job_state_string(opt.state));
info("user_id : %u", opt.user_id); info("user_id : %u", opt.user_id);
info("user_name : %s", opt.user_name); info("user_name : %s", opt.user_name);
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
\*****************************************************************************/ \*****************************************************************************/
#if HAVE_CONFIG_H #if HAVE_CONFIG_H
# include <config.h> # include "config.h"
#endif #endif
#include <unistd.h> #include <unistd.h>
...@@ -51,8 +51,9 @@ ...@@ -51,8 +51,9 @@
#define MAX_CANCEL_RETRY 10 #define MAX_CANCEL_RETRY 10
static void cancel_jobs (void); static void cancel_jobs (void);
static void cancel_job_id (uint32_t job_id); static void _cancel_job_id (uint32_t job_id, uint16_t signal);
static void cancel_step_id (uint32_t job_id, uint32_t step_id); static void _cancel_step_id (uint32_t job_id, uint32_t step_id,
uint16_t signal);
static int confirmation (int i); static int confirmation (int i);
static void filter_job_records (void); static void filter_job_records (void);
static void load_job_records (void); static void load_job_records (void);
...@@ -173,10 +174,12 @@ cancel_jobs (void) ...@@ -173,10 +174,12 @@ cancel_jobs (void)
if (opt.interactive && (confirmation(i) == 0)) if (opt.interactive && (confirmation(i) == 0))
break; break;
if (opt.step_id[j] == NO_VAL) if (opt.step_id[j] == NO_VAL)
cancel_job_id (opt.job_id[j]); _cancel_job_id (opt.job_id[j],
opt.signal);
else else
cancel_step_id (opt.job_id[j], _cancel_step_id (opt.job_id[j],
opt.step_id[j]); opt.step_id[j],
opt.signal);
break; break;
} }
if (i >= job_buffer_ptr->record_count) if (i >= job_buffer_ptr->record_count)
...@@ -187,10 +190,12 @@ cancel_jobs (void) ...@@ -187,10 +190,12 @@ cancel_jobs (void)
} else if (opt.job_cnt) { /* delete specific jobs */ } else if (opt.job_cnt) { /* delete specific jobs */
for (j = 0; j < opt.job_cnt; j++ ) { for (j = 0; j < opt.job_cnt; j++ ) {
if (opt.step_id[j] == NO_VAL) if (opt.step_id[j] == NO_VAL)
cancel_job_id (opt.job_id[j]); _cancel_job_id (opt.job_id[j],
opt.signal);
else else
cancel_step_id (opt.job_id[j], _cancel_step_id (opt.job_id[j],
opt.step_id[j]); opt.step_id[j],
opt.signal);
} }
} else { /* delete all jobs per filtering */ } else { /* delete all jobs per filtering */
...@@ -200,19 +205,19 @@ cancel_jobs (void) ...@@ -200,19 +205,19 @@ cancel_jobs (void)
continue; continue;
if (opt.interactive && (confirmation(i) == 0)) if (opt.interactive && (confirmation(i) == 0))
continue; continue;
cancel_job_id (job_ptr[i].job_id); _cancel_job_id (job_ptr[i].job_id, opt.signal);
} }
} }
} }
static void static void
cancel_job_id (uint32_t job_id) _cancel_job_id (uint32_t job_id, uint16_t signal)
{ {
int error_code, i; int error_code, i;
for (i=0; i<MAX_CANCEL_RETRY; i++) { for (i=0; i<MAX_CANCEL_RETRY; i++) {
verbose("cancelling job %u", job_id); verbose("Killing job %u", job_id);
error_code = slurm_cancel_job (job_id); error_code = slurm_kill_job (job_id, signal);
if ((error_code == 0) || if ((error_code == 0) ||
(errno != ESLURM_TRANSITION_STATE_NO_UPDATE)) (errno != ESLURM_TRANSITION_STATE_NO_UPDATE))
break; break;
...@@ -220,19 +225,19 @@ cancel_job_id (uint32_t job_id) ...@@ -220,19 +225,19 @@ cancel_job_id (uint32_t job_id)
sleep ( 5 + i ); sleep ( 5 + i );
} }
if (error_code) { if (error_code) {
fprintf (stderr, "Cancel job error on job id %u: %s\n", fprintf (stderr, "Kill job error on job id %u: %s\n",
job_id, slurm_strerror(slurm_get_errno())); job_id, slurm_strerror(slurm_get_errno()));
} }
} }
static void static void
cancel_step_id (uint32_t job_id, uint32_t step_id) _cancel_step_id (uint32_t job_id, uint32_t step_id, uint16_t signal)
{ {
int error_code, i; int error_code, i;
for (i=0; i<MAX_CANCEL_RETRY; i++) { for (i=0; i<MAX_CANCEL_RETRY; i++) {
verbose("cancelling steo %u.%u", job_id, step_id); verbose("Killing step %u.%u", job_id, step_id);
error_code = slurm_cancel_job_step (job_id, step_id); error_code = slurm_kill_job_step (job_id, step_id, signal);
if ((error_code == 0) || if ((error_code == 0) ||
(errno != ESLURM_TRANSITION_STATE_NO_UPDATE)) (errno != ESLURM_TRANSITION_STATE_NO_UPDATE))
break; break;
...@@ -240,7 +245,7 @@ cancel_step_id (uint32_t job_id, uint32_t step_id) ...@@ -240,7 +245,7 @@ cancel_step_id (uint32_t job_id, uint32_t step_id)
sleep ( 5 + i ); sleep ( 5 + i );
} }
if (error_code) { if (error_code) {
fprintf (stderr, "Cancel job error on job id %u.%u: %s\n", fprintf (stderr, "Kill job error on job id %u.%u: %s\n",
job_id, step_id, slurm_strerror(slurm_get_errno())); job_id, step_id, slurm_strerror(slurm_get_errno()));
} }
} }
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#define _HAVE_SCANCEL_H #define _HAVE_SCANCEL_H
#if HAVE_CONFIG_H #if HAVE_CONFIG_H
#include <config.h> #include "config.h"
#endif #endif
/* /*
...@@ -53,7 +53,8 @@ typedef struct scancel_options { ...@@ -53,7 +53,8 @@ typedef struct scancel_options {
bool interactive; /* --interactive, -i */ bool interactive; /* --interactive, -i */
char *job_name; /* --name=n, -nn */ char *job_name; /* --name=n, -nn */
char *partition; /* --partition=n, -pn */ char *partition; /* --partition=n, -pn */
enum job_states state; /* --state=n, -sn */ uint16_t signal; /* --signal=n, -sn */
enum job_states state; /* --state=n, -tn */
uid_t user_id; /* --user=n, -un */ uid_t user_id; /* --user=n, -un */
char *user_name; /* --user=n, -un */ char *user_name; /* --user=n, -un */
int verbose; /* --verbose, -v */ int verbose; /* --verbose, -v */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment