Skip to content
Snippets Groups Projects
Commit f40dc7d6 authored by Moe Jette's avatar Moe Jette
Browse files

Greatly expanded functionality, can filter on job name, user name, job

state, or job_id. Reads job table to perform filtering and interactive
responses.
parent 5b068fb8
No related branches found
No related tags found
No related merge requests found
......@@ -2,13 +2,15 @@
AUTOMAKE_OPTIONS = foreign
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/api
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/api $(POPT_INCLUDES)
bin_PROGRAMS = scancel
LDADD = \
$(top_builddir)/src/common/libcommon.la \
$(top_builddir)/src/api/libslurm.la
$(top_builddir)/src/api/libslurm.la \
$(POPT_LIBS)
scancel_SOURCES = scancel.c
noinst_HEADERS = scancel.h
scancel_SOURCES = scancel.c opt.c
/*****************************************************************************\
* opt.c - options processing for scancel
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Mark Grondona <grondona1@llnl.gov>, et. al.
* UCRL-CODE-2002-040.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.llnl.gov/linux/slurm/>.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
\*****************************************************************************/
#if HAVE_POPT_H
#include <popt.h>
#else
#include <src/popt/popt.h>
#endif
#include <pwd.h>
#include <stdlib.h>
#include <string.h> /* strcpy, strncasecmp */
#include <sys/types.h>
#include <unistd.h>
#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif
#include <src/common/log.h>
#include <src/common/slurm_protocol_defs.h>
#include <src/common/xmalloc.h>
#include <src/common/xstring.h>
#include <src/scancel/scancel.h>
#define __DEBUG 0
/*---[ popt definitions ]------------------------------------------------*/
/* generic OPT_ definitions -- mainly for use with env vars
* (not to be confused with POPT_* definitions)
*/
#define OPT_NONE 0x00
#define OPT_INT 0x01
#define OPT_STRING 0x02
/* specific options, used with popt (and env var processing) */
#define OPT_INTERACTIVE 0x03
#define OPT_NAME 0x04
#define OPT_PARTITION 0x05
#define OPT_STATE 0x06
#define OPT_USER 0x07
#define OPT_VERBOSE 0x08
#define OPT_VERSION 0x09
#ifndef POPT_TABLEEND
# define POPT_TABLEEND { NULL, '\0', 0, 0, 0, NULL, NULL }
#endif
struct poptOption options[] = {
{"interactive", 'i', POPT_ARG_NONE, &opt.interactive, OPT_INTERACTIVE,
"confirm each job cancelation", },
{"name", 'n', POPT_ARG_STRING, NULL, OPT_NAME,
"name of job", "name"},
{"partition", 'p', POPT_ARG_STRING, NULL, OPT_PARTITION,
"name of job's partition", "name"},
{"state", 's', POPT_ARG_STRING, NULL, OPT_STATE,
"name of job's state", "PENDING | RUNNING"},
{"user", 'u', POPT_ARG_STRING, NULL, OPT_USER,
"name of job's owner", "name"},
{"verbose", 'v', 0, 0, OPT_VERBOSE,
"verbose operation (multiple -v's increase verbosity)", },
{"Version", 'V', POPT_ARG_NONE, NULL, OPT_VERSION,
"report the current version", },
POPT_AUTOHELP
POPT_TABLEEND
};
/*---[ end popt definitions ]---------------------------------------------*/
/* forward declarations of static functions
*
*/
/*
* fill in default options
*/
static void opt_default(void);
/* set options based upon env vars
*/
static void opt_env(void);
/* set options based upon commandline args
*/
static void opt_args(int, char **);
/* verify options sanity
*/
static bool opt_verify(void);
static void print_version (void);
/* translate job state name to number
*/
static enum job_states xlate_state_name(const char *state_name);
/* list known options and their settings
*/
#if __DEBUG
static void opt_list(void);
#endif
static void xlate_job_step_ids(const char **rest);
/*---[ end forward declarations of static functions ]---------------------*/
int initialize_and_process_args(int argc, char *argv[])
{
/* initialize option defaults */
opt_default();
/* initialize options with env vars */
opt_env();
/* initialize options with argv */
opt_args(argc, argv);
#if __DEBUG
opt_list();
#endif
return 1;
}
static enum job_states xlate_state_name(const char *state_name)
{
enum job_states i;
char *state_names;
for (i=0; i<JOB_END; i++) {
if ((strcasecmp(state_name, job_state_string(i)) == 0) ||
(strcasecmp(state_name, job_state_string_compact(i)) == 0)) {
return i;
}
}
fprintf (stderr, "Invalid job state specified: %s", state_name);
state_names = xstrdup(job_state_string(0));
for (i=1; i<JOB_END; i++) {
xstrcat(state_names, ",");
xstrcat(state_names, job_state_string(i));
}
fprintf (stderr, "Valid job states include: %s", state_names);
xfree (state_names);
exit (1);
}
static void print_version (void)
{
printf("%s %s\n", PACKAGE, VERSION);
}
/*
* opt_default(): used by initialize_and_process_args to set defaults
*/
static void opt_default()
{
opt.interactive = false;
opt.job_cnt = 0;
opt.job_name = NULL;
opt.partition = NULL;
opt.state = JOB_END;
opt.user_name = NULL;
opt.user_id = 0;
opt.verbose = false;
}
/*
* opt_env(): used by initialize_and_process_args to set options via
* environment variables. See comments above for how to
* extend srun to process different vars
*/
static void opt_env()
{
char *val;
if ( (val=getenv("SCANCEL_INTERACTIVE")) ) {
if (strcasecmp(val, "true") == 0)
opt.interactive = true;
else if (strcasecmp(val, "T") == 0)
opt.interactive = true;
else if (strcasecmp(val, "false") == 0)
opt.interactive = false;
else if (strcasecmp(val, "F") == 0)
opt.interactive = false;
else
error ("Unrecognized SCANCEL_INTERACTIVE value: %s",
val);
}
if ( (val=getenv("SCANCEL_NAME")) ) {
opt.job_name = xstrdup(val);
}
if ( (val=getenv("SCANCEL_PARTITION")) ) {
opt.partition = xstrdup(val);
}
if ( (val=getenv("SCANCEL_STATE")) ) {
opt.state = true;
error ("Unrecognized SCANCEL_STATE value: %s",
val);
}
if ( (val=getenv("SCANCEL_USER")) ) {
opt.user_name = xstrdup(val);
}
if ( (val=getenv("SCANCEL_VERBOSE")) ) {
if (strcasecmp(val, "true") == 0)
opt.verbose = true;
else if (strcasecmp(val, "T") == 0)
opt.verbose = true;
else if (strcasecmp(val, "false") == 0)
opt.verbose = false;
else if (strcasecmp(val, "F") == 0)
opt.verbose = false;
else
error ("Unrecognized SCANCEL_VERBOSE value: %s",
val);
}
}
/*
* opt_args() : set options via commandline args and popt
*/
static void opt_args(int ac, char **av)
{
int rc;
const char **rest;
const char *arg;
poptContext optctx;
optctx = poptGetContext("scancel", ac, (const char **) av, options,
POPT_CONTEXT_POSIXMEHARDER);
poptSetOtherOptionHelp(optctx, "[OPTIONS] [job_id.step_id]");
poptReadDefaultConfig(optctx, 0);
/* first pass through args to see if attach or allocate mode
* are set
*/
while ((rc = poptGetNextOpt(optctx)) > 0) {
arg = poptGetOptArg(optctx);
switch (rc) {
case OPT_NAME:
opt.job_name = xstrdup(arg);
break;
case OPT_PARTITION:
opt.partition = xstrdup(arg);
break;
case OPT_STATE:
opt.state = xlate_state_name(arg);
break;
case OPT_USER:
opt.user_name = xstrdup(arg);
break;
case OPT_VERBOSE:
opt.verbose++;
break;
case OPT_VERSION:
print_version();
exit(0);
break;
default:
break;
/* do nothing */
}
}
if (rc < -1) {
const char *bad_opt;
bad_opt = poptBadOption(optctx, POPT_BADOPTION_NOALIAS);
error("bad argument %s: %s", bad_opt, poptStrerror(rc));
error("Try \"scancel --help\" for more information\n");
exit(1);
}
rest = poptGetArgs(optctx);
xlate_job_step_ids(rest);
if (!opt_verify()) {
poptPrintUsage(optctx, stderr, 0);
exit(1);
}
poptFreeContext(optctx);
}
static void
xlate_job_step_ids(const char **rest)
{
int i;
long tmp_l;
char *next_str;
opt.job_cnt = 0;
if (rest != NULL) {
while (rest[opt.job_cnt] != NULL)
opt.job_cnt++;
}
opt.job_id = xmalloc(opt.job_cnt * sizeof(uint32_t));
opt.step_id = xmalloc(opt.job_cnt * sizeof(uint32_t));
for (i=0; i<opt.job_cnt; i++) {
tmp_l = strtol(rest[i], &next_str, 10);
if (tmp_l <= 0) {
error ("Invalid job_id %s", rest[i]);
exit (1);
}
opt.job_id[i] = tmp_l;
if (next_str[0] == '.') {
tmp_l = strtol(&next_str[1], &next_str, 10);
if (tmp_l < 0) {
error ("Invalid job id %s", rest[i]);
exit (1);
}
opt.step_id[i] = tmp_l;
}
if (next_str[0] != '\0') {
error ("Invalid job ID %s", rest[i]);
exit (1);
}
}
}
/*
* opt_verify : perform some post option processing verification
*
*/
static bool
opt_verify(void)
{
bool verified = true;
struct passwd *passwd_ptr;
if (opt.user_name) { /* translate to user_id */
passwd_ptr = getpwnam (opt.user_name);
if (passwd_ptr == NULL) {
error ("Invalid user name: %s", opt.user_name);
return false;
} else {
opt.user_id = passwd_ptr->pw_uid;
}
}
if ((opt.user_id) &&
(opt.user_id != getuid()) &&
(opt.user_id != geteuid())) {
error ("You are not authorized to delete jobs of user %u",
opt.user_id);
exit (1);
}
if ((opt.job_name == NULL) &&
(opt.partition == NULL) &&
(opt.state == JOB_END) &&
(opt.user_name == NULL) &&
(opt.job_cnt == 0))
verified = false; /* no job specification */
return verified;
}
#if __DEBUG
#define tf_(b) (b == true) ? "true" : "false"
static void
opt_list(void)
{
int i;
info("interactive : %s", tf_(opt.interactive));
info("job_name : %s", opt.job_name);
info("partition : %s", opt.partition);
info("state : %s", job_state_string(opt.state));
info("user_id : %u", opt.user_id);
info("user_name : %s", opt.user_name);
info("verbose : %d", opt.verbose);
for (i=0; i<opt.job_cnt; i++) {
info("job_steps : %u.%u ", opt.job_id[i], opt.step_id[i]);
}
}
#endif /* __DEBUG */
......@@ -46,118 +46,214 @@
#include <src/common/log.h>
#include <src/common/slurm_protocol_api.h>
#include <src/common/xmalloc.h>
#include <src/scancel/scancel.h>
#define MAX_CANCEL_RETRY 10
int confirmation (uint32_t job_id, int has_step, uint32_t step_id);
void job_cancel (char *name, int interactive);
void usage (char * command);
static void cancel_jobs (void);
static void cancel_job_id (uint32_t job_id);
static void cancel_step_id (uint32_t job_id, uint32_t step_id);
static int confirmation (int i);
static void filter_job_records (void);
static void load_job_records (void);
static job_info_msg_t * job_buffer_ptr = NULL;
int
main (int argc, char *argv[])
{
int interactive = 0, pos;
log_options_t opts = LOG_OPTS_STDERR_ONLY ;
log_options_t log_opts = LOG_OPTS_STDERR_ONLY ;
if (argc < 2) {
usage (argv[0]);
exit (1);
log_init (argv[0], log_opts, SYSLOG_FACILITY_DAEMON, NULL);
initialize_and_process_args(argc, argv);
if (opt.verbose) {
log_opts.stderr_level =+opt.verbose;
log_init (argv[0], log_opts, SYSLOG_FACILITY_DAEMON, NULL);
}
log_init (argv[0], opts, SYSLOG_FACILITY_DAEMON, NULL);
for (pos = 1; pos < argc; pos++) {
if (argv[pos][0] != '-')
break;
else if (strncmp (argv[pos], "-help", 2) == 0)
usage (argv[0]);
else if (strcmp (argv[pos], "-i") == 0)
interactive = 1;
else if (strcmp (argv[pos], "-v") == 0)
printf ("Version %s\n", VERSION);
else {
fprintf (stderr, "Invalid option %s\n", argv[pos]);
exit (1);
}
if ((opt.interactive) ||
(opt.job_name) ||
(opt.partition) ||
(opt.state != JOB_END) ||
(opt.user_name)) {
load_job_records ();
filter_job_records ();
}
for ( ; pos<argc; pos++) {
job_cancel (argv[pos], interactive);
}
cancel_jobs ();
exit (0);
}
/* job_cancel - process request to cancel a specific job or job step */
void
job_cancel (char *name, int interactive)
/* load_job_records - load all job information for filtering and verification */
static void
load_job_records (void)
{
int error_code = 0, i;
long tmp_l;
uint32_t job_id, step_id;
char *next_str;
tmp_l = strtol(name, &next_str, 10);
if (tmp_l <= 0) {
fprintf (stderr, "Invalid job_id %s\n", name);
int error_code;
error_code = slurm_load_jobs ((time_t) NULL, &job_buffer_ptr);
if (error_code) {
slurm_perror ("slurm_load_jobs error: ");
exit (1);
}
job_id = tmp_l;
/* cancelling individual job step */
if (next_str[0] == '.') {
tmp_l = strtol(&next_str[1], NULL, 10);
if (tmp_l < 0) {
fprintf (stderr, "Invalid step_id %s\n", name);
exit (1);
}
/* filter_job_records - filtering job information per user specification */
static void
filter_job_records (void)
{
int i, j;
job_info_t *job_ptr = NULL;
job_ptr = job_buffer_ptr->job_array ;
for (i = 0; i < job_buffer_ptr->record_count; i++) {
if (job_ptr[i].job_id == 0)
continue;
if ((job_ptr[i].job_state != JOB_PENDING) &&
(job_ptr[i].job_state != JOB_RUNNING)) {
job_ptr[i].job_id = 0;
continue;
}
if ((opt.job_name != NULL) &&
(strcmp(job_ptr[i].name,opt.job_name) != 0)) {
job_ptr[i].job_id = 0;
continue;
}
if ((opt.partition != NULL) &&
(strcmp(job_ptr[i].partition,opt.partition) != 0)) {
job_ptr[i].job_id = 0;
continue;
}
if ((opt.state != JOB_END) &&
(job_ptr[i].job_state != opt.state)) {
job_ptr[i].job_id = 0;
continue;
}
step_id = tmp_l;
if (interactive && (confirmation (job_id, 1, step_id) == 0 ))
return;
if ((opt.user_name != NULL) &&
(job_ptr[i].user_id != opt.user_id)) {
job_ptr[i].job_id = 0;
continue;
}
for (i=0; i<MAX_CANCEL_RETRY; i++) {
error_code = slurm_cancel_job_step (job_id, step_id);
if ((error_code == 0) ||
(errno != ESLURM_TRANSITION_STATE_NO_UPDATE))
if (opt.job_cnt == 0)
continue;
for (j = 0; j < opt.job_cnt; j++) {
if (job_ptr[i].job_id == opt.job_id[j])
break;
printf ("Job is in transistional state, retrying\n");
sleep ( 5 + i );
}
if (j >= opt.job_cnt) { /* not found */
job_ptr[i].job_id = 0;
continue;
}
}
}
/* cancelling entire job, no job step */
else {
if (interactive && (confirmation (job_id, 0, 0) == 0 ))
return;
for (i=0; i<MAX_CANCEL_RETRY; i++) {
error_code = slurm_cancel_job (job_id);
if ((error_code == 0) ||
(errno != ESLURM_TRANSITION_STATE_NO_UPDATE))
/* cancel_jobs - filter then cancel jobs or job steps per request */
static void
cancel_jobs (void)
{
int i, j;
job_info_t *job_ptr = NULL;
if (opt.job_cnt && opt.interactive) { /* delete jobs with interactive */
job_ptr = job_buffer_ptr->job_array ;
for (j = 0; j < opt.job_cnt; j++ ) {
for (i = 0; i < job_buffer_ptr->record_count; i++) {
if (job_ptr[i].job_id != opt.job_id[j])
continue;
if (opt.interactive && (confirmation(i) == 0))
break;
if (opt.step_id[j] == 0)
cancel_job_id (opt.job_id[j]);
else
cancel_step_id (opt.job_id[j],
opt.step_id[j]);
break;
printf ("Job is in transistional state, retrying\n");
sleep ( 5 + i );
}
if (i >= job_buffer_ptr->record_count)
fprintf (stderr, "Job %u not found",
opt.job_id[j]);
}
} else if (opt.job_cnt) { /* delete specific jobs */
for (j = 0; j < opt.job_cnt; j++ ) {
if (opt.step_id[j] == 0)
cancel_job_id (opt.job_id[j]);
else
cancel_step_id (opt.job_id[j],
opt.step_id[j]);
}
} else { /* delete all jobs per filtering */
job_ptr = job_buffer_ptr->job_array ;
for (i = 0; i < job_buffer_ptr->record_count; i++) {
if (job_ptr[i].job_id == 0)
continue;
if (opt.interactive && (confirmation(i) == 0))
continue;
cancel_job_id (job_ptr[i].job_id);
}
}
}
static void
cancel_job_id (uint32_t job_id)
{
int error_code, i;
for (i=0; i<MAX_CANCEL_RETRY; i++) {
error_code = slurm_cancel_job (job_id);
if ((error_code == 0) ||
(errno != ESLURM_TRANSITION_STATE_NO_UPDATE))
break;
printf ("Job is in transistional state, retrying\n");
sleep ( 5 + i );
}
if (error_code) {
slurm_perror ("Cancel job error: ");
exit (1);
fprintf (stderr, "Cancel job error on job id %u: %s\n",
job_id, slurm_strerror(slurm_get_errno()));
}
}
static void
cancel_step_id (uint32_t job_id, uint32_t step_id)
{
int error_code, i;
for (i=0; i<MAX_CANCEL_RETRY; i++) {
error_code = slurm_cancel_job_step (job_id, step_id);
if ((error_code == 0) ||
(errno != ESLURM_TRANSITION_STATE_NO_UPDATE))
break;
printf ("Job is in transistional state, retrying\n");
sleep ( 5 + i );
}
if (error_code) {
fprintf (stderr, "Cancel job error on job id %u.%u: %s\n",
job_id, step_id, slurm_strerror(slurm_get_errno()));
}
}
/* confirmation - Confirm job cancel request interactively */
int
confirmation (uint32_t job_id, int has_step, uint32_t step_id)
static int
confirmation (int i)
{
char in_line[128];
job_info_t *job_ptr = NULL;
job_ptr = job_buffer_ptr->job_array ;
while (1) {
if (has_step)
printf ("Cancel job step %u.%u [y/n]? ", job_id, step_id);
else
printf ("Cancel job %u [y/n]? ", job_id);
printf ("Cancel job_id=%u name=%s partition=%s [y/n]? ",
job_ptr[i].job_id, job_ptr[i].name, job_ptr[i].partition);
fgets (in_line, sizeof (in_line), stdin);
if ((in_line[0] == 'y') || (in_line[0] == 'Y'))
......@@ -167,12 +263,3 @@ confirmation (uint32_t job_id, int has_step, uint32_t step_id)
}
}
/* usage - print message describing command lone options for scancel */
void
usage (char *command)
{
printf ("Usage: %s [-i] [-v] job_id[.step_id] [job_id[.step_id] ...]\n", command);
}
/*****************************************************************************\
* scancel.h - definitions for scancel data structures and functions
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Moe Jette<jette1@llnl.gov>, et. al.
* UCRL-CODE-2002-040.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.llnl.gov/linux/slurm/>.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
\*****************************************************************************/
#ifndef _HAVE_SCANCEL_H
#define _HAVE_SCANCEL_H
#if HAVE_CONFIG_H
#include <config.h>
#endif
/*
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <getopt.h>
*/
#if HAVE_POPT_H
#include <popt.h>
#else
#include <src/popt/popt.h>
#endif
#include <src/common/macros.h> /* true and false */
#include <src/common/slurm_protocol_defs.h>
typedef struct scancel_options {
bool interactive; /* --interactive, -i */
char *job_name; /* --name=n, -nn */
char *partition; /* --partition=n, -pn */
enum job_states state; /* --state=n, -sn */
uid_t user_id; /* --user=n, -un */
char *user_name; /* --user=n, -un */
int verbose; /* --verbose, -v */
uint16_t job_cnt; /* count of job_id's specified */
uint32_t *job_id; /* list of job_id's */
uint32_t *step_id; /* list of job step id's */
} opt_t;
opt_t opt;
/* process options:
* 1. set defaults
* 2. update options with env vars
* 3. update options with commandline args
* 4. perform some verification that options are reasonable
*/
int initialize_and_process_args(int argc, char *argv[]);
#endif /* _HAVE_SCANCEL_H */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment