From f40dc7d6163ca2bb71b8dff4abffed5bfa165f61 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 12 Nov 2002 00:49:24 +0000 Subject: [PATCH] Greatly expanded functionality, can filter on job name, user name, job state, or job_id. Reads job table to perform filtering and interactive responses. --- src/scancel/Makefile.am | 8 +- src/scancel/opt.c | 428 ++++++++++++++++++++++++++++++++++++++++ src/scancel/scancel.c | 249 +++++++++++++++-------- src/scancel/scancel.h | 76 +++++++ 4 files changed, 677 insertions(+), 84 deletions(-) create mode 100644 src/scancel/opt.c create mode 100644 src/scancel/scancel.h diff --git a/src/scancel/Makefile.am b/src/scancel/Makefile.am index df6d8ec7903..5f314639e69 100644 --- a/src/scancel/Makefile.am +++ b/src/scancel/Makefile.am @@ -2,13 +2,15 @@ AUTOMAKE_OPTIONS = foreign -INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/api +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/api $(POPT_INCLUDES) bin_PROGRAMS = scancel LDADD = \ $(top_builddir)/src/common/libcommon.la \ - $(top_builddir)/src/api/libslurm.la + $(top_builddir)/src/api/libslurm.la \ + $(POPT_LIBS) -scancel_SOURCES = scancel.c +noinst_HEADERS = scancel.h +scancel_SOURCES = scancel.c opt.c diff --git a/src/scancel/opt.c b/src/scancel/opt.c new file mode 100644 index 00000000000..a55a565d229 --- /dev/null +++ b/src/scancel/opt.c @@ -0,0 +1,428 @@ +/*****************************************************************************\ + * opt.c - options processing for scancel + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona <grondona1@llnl.gov>, et. al. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#if HAVE_POPT_H +#include <popt.h> +#else +#include <src/popt/popt.h> +#endif + +#include <pwd.h> +#include <stdlib.h> +#include <string.h> /* strcpy, strncasecmp */ +#include <sys/types.h> +#include <unistd.h> + +#ifdef HAVE_STRINGS_H +# include <strings.h> +#endif + +#include <src/common/log.h> +#include <src/common/slurm_protocol_defs.h> +#include <src/common/xmalloc.h> +#include <src/common/xstring.h> +#include <src/scancel/scancel.h> + +#define __DEBUG 0 + +/*---[ popt definitions ]------------------------------------------------*/ + +/* generic OPT_ definitions -- mainly for use with env vars + * (not to be confused with POPT_* definitions) + */ +#define OPT_NONE 0x00 +#define OPT_INT 0x01 +#define OPT_STRING 0x02 + +/* specific options, used with popt (and env var processing) */ +#define OPT_INTERACTIVE 0x03 +#define OPT_NAME 0x04 +#define OPT_PARTITION 0x05 +#define OPT_STATE 0x06 +#define OPT_USER 0x07 +#define OPT_VERBOSE 0x08 +#define OPT_VERSION 0x09 + + +#ifndef POPT_TABLEEND +# define POPT_TABLEEND { NULL, '\0', 0, 0, 0, NULL, NULL } +#endif + +struct poptOption options[] = { + {"interactive", 'i', POPT_ARG_NONE, &opt.interactive, OPT_INTERACTIVE, + "confirm each job cancelation", }, + {"name", 'n', POPT_ARG_STRING, NULL, OPT_NAME, + "name of job", "name"}, + {"partition", 'p', POPT_ARG_STRING, NULL, OPT_PARTITION, + "name of job's partition", "name"}, + {"state", 's', POPT_ARG_STRING, NULL, OPT_STATE, + "name of job's state", "PENDING | RUNNING"}, + {"user", 'u', POPT_ARG_STRING, NULL, OPT_USER, + "name of job's owner", "name"}, + {"verbose", 'v', 0, 0, OPT_VERBOSE, + "verbose operation (multiple -v's increase verbosity)", }, + {"Version", 'V', POPT_ARG_NONE, NULL, OPT_VERSION, + "report the current version", }, + POPT_AUTOHELP + POPT_TABLEEND +}; + +/*---[ end popt definitions ]---------------------------------------------*/ + +/* forward declarations of static functions + * + */ + +/* + * fill in default options + */ +static void opt_default(void); + +/* set options based upon env vars + */ +static void opt_env(void); + +/* set options based upon commandline args + */ +static void opt_args(int, char **); + +/* verify options sanity + */ +static bool opt_verify(void); + +static void print_version (void); + +/* translate job state name to number + */ +static enum job_states xlate_state_name(const char *state_name); + +/* list known options and their settings + */ +#if __DEBUG +static void opt_list(void); +#endif + +static void xlate_job_step_ids(const char **rest); + +/*---[ end forward declarations of static functions ]---------------------*/ + +int initialize_and_process_args(int argc, char *argv[]) +{ + /* initialize option defaults */ + opt_default(); + + /* initialize options with env vars */ + opt_env(); + + /* initialize options with argv */ + opt_args(argc, argv); + +#if __DEBUG + opt_list(); +#endif + return 1; + +} + +static enum job_states xlate_state_name(const char *state_name) +{ + enum job_states i; + char *state_names; + + for (i=0; i<JOB_END; i++) { + if ((strcasecmp(state_name, job_state_string(i)) == 0) || + (strcasecmp(state_name, job_state_string_compact(i)) == 0)) { + return i; + } + } + + fprintf (stderr, "Invalid job state specified: %s", state_name); + state_names = xstrdup(job_state_string(0)); + for (i=1; i<JOB_END; i++) { + xstrcat(state_names, ","); + xstrcat(state_names, job_state_string(i)); + } + fprintf (stderr, "Valid job states include: %s", state_names); + xfree (state_names); + exit (1); +} + + +static void print_version (void) +{ + printf("%s %s\n", PACKAGE, VERSION); +} + +/* + * opt_default(): used by initialize_and_process_args to set defaults + */ +static void opt_default() +{ + opt.interactive = false; + opt.job_cnt = 0; + opt.job_name = NULL; + opt.partition = NULL; + opt.state = JOB_END; + opt.user_name = NULL; + opt.user_id = 0; + opt.verbose = false; +} + +/* + * opt_env(): used by initialize_and_process_args to set options via + * environment variables. See comments above for how to + * extend srun to process different vars + */ +static void opt_env() +{ + char *val; + + if ( (val=getenv("SCANCEL_INTERACTIVE")) ) { + if (strcasecmp(val, "true") == 0) + opt.interactive = true; + else if (strcasecmp(val, "T") == 0) + opt.interactive = true; + else if (strcasecmp(val, "false") == 0) + opt.interactive = false; + else if (strcasecmp(val, "F") == 0) + opt.interactive = false; + else + error ("Unrecognized SCANCEL_INTERACTIVE value: %s", + val); + } + + if ( (val=getenv("SCANCEL_NAME")) ) { + opt.job_name = xstrdup(val); + } + + if ( (val=getenv("SCANCEL_PARTITION")) ) { + opt.partition = xstrdup(val); + } + + if ( (val=getenv("SCANCEL_STATE")) ) { + opt.state = true; + error ("Unrecognized SCANCEL_STATE value: %s", + val); + } + + if ( (val=getenv("SCANCEL_USER")) ) { + opt.user_name = xstrdup(val); + } + + if ( (val=getenv("SCANCEL_VERBOSE")) ) { + if (strcasecmp(val, "true") == 0) + opt.verbose = true; + else if (strcasecmp(val, "T") == 0) + opt.verbose = true; + else if (strcasecmp(val, "false") == 0) + opt.verbose = false; + else if (strcasecmp(val, "F") == 0) + opt.verbose = false; + else + error ("Unrecognized SCANCEL_VERBOSE value: %s", + val); + } +} + +/* + * opt_args() : set options via commandline args and popt + */ +static void opt_args(int ac, char **av) +{ + int rc; + const char **rest; + const char *arg; + poptContext optctx; + + optctx = poptGetContext("scancel", ac, (const char **) av, options, + POPT_CONTEXT_POSIXMEHARDER); + + poptSetOtherOptionHelp(optctx, "[OPTIONS] [job_id.step_id]"); + + poptReadDefaultConfig(optctx, 0); + + /* first pass through args to see if attach or allocate mode + * are set + */ + while ((rc = poptGetNextOpt(optctx)) > 0) { + arg = poptGetOptArg(optctx); + + switch (rc) { + case OPT_NAME: + opt.job_name = xstrdup(arg); + break; + + case OPT_PARTITION: + opt.partition = xstrdup(arg); + break; + + case OPT_STATE: + opt.state = xlate_state_name(arg); + break; + + case OPT_USER: + opt.user_name = xstrdup(arg); + break; + + case OPT_VERBOSE: + opt.verbose++; + break; + + case OPT_VERSION: + print_version(); + exit(0); + break; + + default: + break; + /* do nothing */ + } + } + + if (rc < -1) { + const char *bad_opt; + bad_opt = poptBadOption(optctx, POPT_BADOPTION_NOALIAS); + error("bad argument %s: %s", bad_opt, poptStrerror(rc)); + error("Try \"scancel --help\" for more information\n"); + exit(1); + } + + rest = poptGetArgs(optctx); + xlate_job_step_ids(rest); + + if (!opt_verify()) { + poptPrintUsage(optctx, stderr, 0); + exit(1); + } + + poptFreeContext(optctx); + +} + +static void +xlate_job_step_ids(const char **rest) +{ + int i; + long tmp_l; + char *next_str; + + opt.job_cnt = 0; + + if (rest != NULL) { + while (rest[opt.job_cnt] != NULL) + opt.job_cnt++; + } + + opt.job_id = xmalloc(opt.job_cnt * sizeof(uint32_t)); + opt.step_id = xmalloc(opt.job_cnt * sizeof(uint32_t)); + + for (i=0; i<opt.job_cnt; i++) { + tmp_l = strtol(rest[i], &next_str, 10); + if (tmp_l <= 0) { + error ("Invalid job_id %s", rest[i]); + exit (1); + } + opt.job_id[i] = tmp_l; + + if (next_str[0] == '.') { + tmp_l = strtol(&next_str[1], &next_str, 10); + if (tmp_l < 0) { + error ("Invalid job id %s", rest[i]); + exit (1); + } + opt.step_id[i] = tmp_l; + } + + if (next_str[0] != '\0') { + error ("Invalid job ID %s", rest[i]); + exit (1); + } + } +} + + +/* + * opt_verify : perform some post option processing verification + * + */ +static bool +opt_verify(void) +{ + bool verified = true; + struct passwd *passwd_ptr; + + if (opt.user_name) { /* translate to user_id */ + passwd_ptr = getpwnam (opt.user_name); + if (passwd_ptr == NULL) { + error ("Invalid user name: %s", opt.user_name); + return false; + } else { + opt.user_id = passwd_ptr->pw_uid; + } + } + + if ((opt.user_id) && + (opt.user_id != getuid()) && + (opt.user_id != geteuid())) { + error ("You are not authorized to delete jobs of user %u", + opt.user_id); + exit (1); + } + + if ((opt.job_name == NULL) && + (opt.partition == NULL) && + (opt.state == JOB_END) && + (opt.user_name == NULL) && + (opt.job_cnt == 0)) + verified = false; /* no job specification */ + + return verified; +} + +#if __DEBUG + +#define tf_(b) (b == true) ? "true" : "false" + +static void +opt_list(void) +{ + int i; + + info("interactive : %s", tf_(opt.interactive)); + info("job_name : %s", opt.job_name); + info("partition : %s", opt.partition); + info("state : %s", job_state_string(opt.state)); + info("user_id : %u", opt.user_id); + info("user_name : %s", opt.user_name); + info("verbose : %d", opt.verbose); + + for (i=0; i<opt.job_cnt; i++) { + info("job_steps : %u.%u ", opt.job_id[i], opt.step_id[i]); + } +} + +#endif /* __DEBUG */ + diff --git a/src/scancel/scancel.c b/src/scancel/scancel.c index a12ddb2c866..346819f4578 100644 --- a/src/scancel/scancel.c +++ b/src/scancel/scancel.c @@ -46,118 +46,214 @@ #include <src/common/log.h> #include <src/common/slurm_protocol_api.h> #include <src/common/xmalloc.h> +#include <src/scancel/scancel.h> #define MAX_CANCEL_RETRY 10 -int confirmation (uint32_t job_id, int has_step, uint32_t step_id); -void job_cancel (char *name, int interactive); -void usage (char * command); +static void cancel_jobs (void); +static void cancel_job_id (uint32_t job_id); +static void cancel_step_id (uint32_t job_id, uint32_t step_id); +static int confirmation (int i); +static void filter_job_records (void); +static void load_job_records (void); + +static job_info_msg_t * job_buffer_ptr = NULL; int main (int argc, char *argv[]) { - int interactive = 0, pos; - log_options_t opts = LOG_OPTS_STDERR_ONLY ; + log_options_t log_opts = LOG_OPTS_STDERR_ONLY ; - if (argc < 2) { - usage (argv[0]); - exit (1); + log_init (argv[0], log_opts, SYSLOG_FACILITY_DAEMON, NULL); + initialize_and_process_args(argc, argv); + if (opt.verbose) { + log_opts.stderr_level =+opt.verbose; + log_init (argv[0], log_opts, SYSLOG_FACILITY_DAEMON, NULL); } - log_init (argv[0], opts, SYSLOG_FACILITY_DAEMON, NULL); - - for (pos = 1; pos < argc; pos++) { - if (argv[pos][0] != '-') - break; - else if (strncmp (argv[pos], "-help", 2) == 0) - usage (argv[0]); - else if (strcmp (argv[pos], "-i") == 0) - interactive = 1; - else if (strcmp (argv[pos], "-v") == 0) - printf ("Version %s\n", VERSION); - else { - fprintf (stderr, "Invalid option %s\n", argv[pos]); - exit (1); - } + if ((opt.interactive) || + (opt.job_name) || + (opt.partition) || + (opt.state != JOB_END) || + (opt.user_name)) { + load_job_records (); + filter_job_records (); } - for ( ; pos<argc; pos++) { - job_cancel (argv[pos], interactive); - } + cancel_jobs (); exit (0); } -/* job_cancel - process request to cancel a specific job or job step */ -void -job_cancel (char *name, int interactive) + +/* load_job_records - load all job information for filtering and verification */ +static void +load_job_records (void) { - int error_code = 0, i; - long tmp_l; - uint32_t job_id, step_id; - char *next_str; - - tmp_l = strtol(name, &next_str, 10); - if (tmp_l <= 0) { - fprintf (stderr, "Invalid job_id %s\n", name); + int error_code; + + error_code = slurm_load_jobs ((time_t) NULL, &job_buffer_ptr); + + if (error_code) { + slurm_perror ("slurm_load_jobs error: "); exit (1); } - job_id = tmp_l; - - /* cancelling individual job step */ - if (next_str[0] == '.') { - tmp_l = strtol(&next_str[1], NULL, 10); - if (tmp_l < 0) { - fprintf (stderr, "Invalid step_id %s\n", name); - exit (1); +} + + +/* filter_job_records - filtering job information per user specification */ +static void +filter_job_records (void) +{ + int i, j; + job_info_t *job_ptr = NULL; + + job_ptr = job_buffer_ptr->job_array ; + for (i = 0; i < job_buffer_ptr->record_count; i++) { + if (job_ptr[i].job_id == 0) + continue; + + if ((job_ptr[i].job_state != JOB_PENDING) && + (job_ptr[i].job_state != JOB_RUNNING)) { + job_ptr[i].job_id = 0; + continue; + } + + if ((opt.job_name != NULL) && + (strcmp(job_ptr[i].name,opt.job_name) != 0)) { + job_ptr[i].job_id = 0; + continue; + } + + if ((opt.partition != NULL) && + (strcmp(job_ptr[i].partition,opt.partition) != 0)) { + job_ptr[i].job_id = 0; + continue; + } + + if ((opt.state != JOB_END) && + (job_ptr[i].job_state != opt.state)) { + job_ptr[i].job_id = 0; + continue; } - step_id = tmp_l; - if (interactive && (confirmation (job_id, 1, step_id) == 0 )) - return; + if ((opt.user_name != NULL) && + (job_ptr[i].user_id != opt.user_id)) { + job_ptr[i].job_id = 0; + continue; + } - for (i=0; i<MAX_CANCEL_RETRY; i++) { - error_code = slurm_cancel_job_step (job_id, step_id); - if ((error_code == 0) || - (errno != ESLURM_TRANSITION_STATE_NO_UPDATE)) + if (opt.job_cnt == 0) + continue; + for (j = 0; j < opt.job_cnt; j++) { + if (job_ptr[i].job_id == opt.job_id[j]) break; - printf ("Job is in transistional state, retrying\n"); - sleep ( 5 + i ); + } + if (j >= opt.job_cnt) { /* not found */ + job_ptr[i].job_id = 0; + continue; } } +} - /* cancelling entire job, no job step */ - else { - if (interactive && (confirmation (job_id, 0, 0) == 0 )) - return; - for (i=0; i<MAX_CANCEL_RETRY; i++) { - error_code = slurm_cancel_job (job_id); - if ((error_code == 0) || - (errno != ESLURM_TRANSITION_STATE_NO_UPDATE)) +/* cancel_jobs - filter then cancel jobs or job steps per request */ +static void +cancel_jobs (void) +{ + int i, j; + job_info_t *job_ptr = NULL; + + if (opt.job_cnt && opt.interactive) { /* delete jobs with interactive */ + job_ptr = job_buffer_ptr->job_array ; + for (j = 0; j < opt.job_cnt; j++ ) { + for (i = 0; i < job_buffer_ptr->record_count; i++) { + if (job_ptr[i].job_id != opt.job_id[j]) + continue; + if (opt.interactive && (confirmation(i) == 0)) + break; + if (opt.step_id[j] == 0) + cancel_job_id (opt.job_id[j]); + else + cancel_step_id (opt.job_id[j], + opt.step_id[j]); break; - printf ("Job is in transistional state, retrying\n"); - sleep ( 5 + i ); + } + if (i >= job_buffer_ptr->record_count) + fprintf (stderr, "Job %u not found", + opt.job_id[j]); + } + + } else if (opt.job_cnt) { /* delete specific jobs */ + for (j = 0; j < opt.job_cnt; j++ ) { + if (opt.step_id[j] == 0) + cancel_job_id (opt.job_id[j]); + else + cancel_step_id (opt.job_id[j], + opt.step_id[j]); + } + + } else { /* delete all jobs per filtering */ + job_ptr = job_buffer_ptr->job_array ; + for (i = 0; i < job_buffer_ptr->record_count; i++) { + if (job_ptr[i].job_id == 0) + continue; + if (opt.interactive && (confirmation(i) == 0)) + continue; + cancel_job_id (job_ptr[i].job_id); } } +} +static void +cancel_job_id (uint32_t job_id) +{ + int error_code, i; + + for (i=0; i<MAX_CANCEL_RETRY; i++) { + error_code = slurm_cancel_job (job_id); + if ((error_code == 0) || + (errno != ESLURM_TRANSITION_STATE_NO_UPDATE)) + break; + printf ("Job is in transistional state, retrying\n"); + sleep ( 5 + i ); + } if (error_code) { - slurm_perror ("Cancel job error: "); - exit (1); + fprintf (stderr, "Cancel job error on job id %u: %s\n", + job_id, slurm_strerror(slurm_get_errno())); + } +} + +static void +cancel_step_id (uint32_t job_id, uint32_t step_id) +{ + int error_code, i; + + for (i=0; i<MAX_CANCEL_RETRY; i++) { + error_code = slurm_cancel_job_step (job_id, step_id); + if ((error_code == 0) || + (errno != ESLURM_TRANSITION_STATE_NO_UPDATE)) + break; + printf ("Job is in transistional state, retrying\n"); + sleep ( 5 + i ); + } + if (error_code) { + fprintf (stderr, "Cancel job error on job id %u.%u: %s\n", + job_id, step_id, slurm_strerror(slurm_get_errno())); } } /* confirmation - Confirm job cancel request interactively */ -int -confirmation (uint32_t job_id, int has_step, uint32_t step_id) +static int +confirmation (int i) { char in_line[128]; + job_info_t *job_ptr = NULL; + job_ptr = job_buffer_ptr->job_array ; while (1) { - if (has_step) - printf ("Cancel job step %u.%u [y/n]? ", job_id, step_id); - else - printf ("Cancel job %u [y/n]? ", job_id); + printf ("Cancel job_id=%u name=%s partition=%s [y/n]? ", + job_ptr[i].job_id, job_ptr[i].name, job_ptr[i].partition); fgets (in_line, sizeof (in_line), stdin); if ((in_line[0] == 'y') || (in_line[0] == 'Y')) @@ -167,12 +263,3 @@ confirmation (uint32_t job_id, int has_step, uint32_t step_id) } } - -/* usage - print message describing command lone options for scancel */ -void -usage (char *command) -{ - printf ("Usage: %s [-i] [-v] job_id[.step_id] [job_id[.step_id] ...]\n", command); -} - - diff --git a/src/scancel/scancel.h b/src/scancel/scancel.h new file mode 100644 index 00000000000..c94a16e05c5 --- /dev/null +++ b/src/scancel/scancel.h @@ -0,0 +1,76 @@ +/*****************************************************************************\ + * scancel.h - definitions for scancel data structures and functions + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Moe Jette<jette1@llnl.gov>, et. al. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#ifndef _HAVE_SCANCEL_H +#define _HAVE_SCANCEL_H + +#if HAVE_CONFIG_H +#include <config.h> +#endif + +/* +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#include <getopt.h> +*/ + +#if HAVE_POPT_H +#include <popt.h> +#else +#include <src/popt/popt.h> +#endif + +#include <src/common/macros.h> /* true and false */ +#include <src/common/slurm_protocol_defs.h> + +typedef struct scancel_options { + + bool interactive; /* --interactive, -i */ + char *job_name; /* --name=n, -nn */ + char *partition; /* --partition=n, -pn */ + enum job_states state; /* --state=n, -sn */ + uid_t user_id; /* --user=n, -un */ + char *user_name; /* --user=n, -un */ + int verbose; /* --verbose, -v */ + + uint16_t job_cnt; /* count of job_id's specified */ + uint32_t *job_id; /* list of job_id's */ + uint32_t *step_id; /* list of job step id's */ +} opt_t; + +opt_t opt; + +/* process options: + * 1. set defaults + * 2. update options with env vars + * 3. update options with commandline args + * 4. perform some verification that options are reasonable + */ +int initialize_and_process_args(int argc, char *argv[]); + +#endif /* _HAVE_SCANCEL_H */ -- GitLab