diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 415ab7ec6acccdb1e74bf9f072649571993558f8..acee59e7c1a900758fcb4b9d28f8fecc63692c51 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -246,6 +246,12 @@ is run as root, submit the job with \fIuser\fR's access permissions. If \fBsrun\fR is run as root, and the \fB\-\-gid\fR option is used, submit the job with \fIgroup\fR's group access permissions. \fIgroup\fR may be the group name or the numerical group ID. +.TP +\fB\-\-core\fR=\fItype\fR +Adjust corefile format for parallel job. If possible, srun will set +up the environment for the job such that a corefile format other than +full core dumps is enabled. If run with type = "list", srun will +print a list of supported corefile format types to stdout and exit. .PP Allocate options: .TP diff --git a/src/srun/Makefile.am b/src/srun/Makefile.am index 35a19e276a652ac4fc95b02a76ff7424b36100a7..29046b57f2cb712baaac050710fd19158a82ab96 100644 --- a/src/srun/Makefile.am +++ b/src/srun/Makefile.am @@ -33,7 +33,9 @@ srun_SOURCES = \ sigstr.c \ sigstr.h \ allocate.c \ - allocate.h + allocate.h \ + core-format.c \ + core-format.h convenience_libs = \ $(top_builddir)/src/common/libcommon.la \ diff --git a/src/srun/core-format.c b/src/srun/core-format.c new file mode 100644 index 0000000000000000000000000000000000000000..c342b060ba8a9c1740290a69be0771d2dd2259cc --- /dev/null +++ b/src/srun/core-format.c @@ -0,0 +1,141 @@ +/*****************************************************************************\ + * src/srun/core-format.c - Change corefile characteristics for job + * $Id$ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona <mgrondona@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <stdlib.h> + +#include "src/srun/core-format.h" +#include "src/srun/env.h" +#include "src/common/log.h" + +#define CORE_NORMAL 0 +#define CORE_LIGHT 1 /* Default lightweight corefile from liblwcf */ +#define CORE_LCB 2 /* PTOOLS Lightweight Corefile Browser (LCB) compliant*/ +#define CORE_LIST 3 /* List core format types to stdout and exit */ + + +struct core_format_info { + core_format_t type; + const char *name; + const char *descr; +}; + +/* + * Supported types for core=%s + */ +struct core_format_info core_types[] = { + { CORE_NORMAL, + "normal", + "Default full corefile (do nothing)" + }, + { CORE_LIGHT, + "light", + "liblwcf default lightweight corefile format" + }, + { CORE_LCB, + "lcb", + "liblwcf Lightweight Corefile Browser compliant" + }, + { CORE_LIST, + "list", + "list valid core format types" + }, + { CORE_INVALID, + NULL, + "Invalid format" + } +}; + +static struct core_format_info * _find_core_format_info (const char *name) +{ + struct core_format_info *ci; + + for (ci = core_types; ci && ci->name != NULL; ci++) { + if ( strncasecmp (ci->name, name, strlen (ci->name)) == 0) + break; + } + + return (ci); +} + +static void _print_valid_core_types (void) +{ + struct core_format_info *ci; + + info ("Valid corefile format types:"); + for (ci = core_types; ci && ci->name != NULL; ci++) { + if (ci->type != CORE_LIST) + info (" %-8s -- %s", ci->name, ci->descr); + } + return; +} + +core_format_t core_format_type (const char *str) +{ + struct core_format_info *ci = _find_core_format_info (str); + + if (ci->type == CORE_LIST) { + _print_valid_core_types (); + exit (0); + } + + return (ci->type); +} + +const char * core_format_name (core_format_t type) +{ + struct core_format_info *ci; + + for (ci = core_types; ci && ci->name != NULL; ci++) { + if (ci->type == type) + break; + } + + return (ci->name); +} + +int core_format_enable (core_format_t type) +{ + switch (type) { + case CORE_NORMAL: case CORE_INVALID: + break; + case CORE_LCB: + setenvf ("LWCF_CORE_FORMAT=LCB"); + case CORE_LIGHT: + setenvf ("LD_PRELOAD=liblwcf-preload.so"); + break; + } + + return (0); +} + diff --git a/src/srun/core-format.h b/src/srun/core-format.h new file mode 100644 index 0000000000000000000000000000000000000000..dc569e588333911b6910d1c5c9000eb5efb3779b --- /dev/null +++ b/src/srun/core-format.h @@ -0,0 +1,42 @@ +/*****************************************************************************\ + * src/srun/core-format.h - Change corefile characteristics for job + * $Id$ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona <mgrondona@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#ifndef _HAVE_CORE_FORMAT_H +#define _HAVE_CORE_FORMAT_H + +typedef int core_format_t; + +#define CORE_INVALID -1 +#define CORE_DEFAULT 0 + +core_format_t core_format_type (const char *type); + +const char * core_format_name (core_format_t type); + +int core_format_enable (core_format_t type); + +#endif /* !_HAVE_CORE_FORMAT_H */ diff --git a/src/srun/opt.c b/src/srun/opt.c index 164e98a8e2a1b74020f736bee9283274cd6ffe5a..dca280a635c64c4bdbef06025bb21fd9164cae4c 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -88,6 +88,7 @@ #define OPT_DISTRIB 0x04 #define OPT_NODES 0x05 #define OPT_OVERCOMMIT 0x06 +#define OPT_CORE 0x07 /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_HELP 0x100 @@ -103,6 +104,7 @@ #define LONG_OPT_UID 0x10a #define LONG_OPT_GID 0x10b #define LONG_OPT_MPI 0x10c +#define LONG_OPT_CORE 0x10e /*---- forward declarations of static functions ----*/ @@ -400,7 +402,7 @@ static void _opt_default() opt.ifname = NULL; opt.efname = NULL; - opt.core_format = "normal"; + opt.core_type = CORE_DEFAULT; opt.labelio = false; opt.unbuffered = false; @@ -476,6 +478,7 @@ struct env_vars { env_vars_t env_vars[] = { {"SLURMD_DEBUG", OPT_INT, &opt.slurmd_debug, NULL }, {"SLURM_CPUS_PER_TASK", OPT_INT, &opt.cpus_per_task, &opt.cpus_set }, + {"SLURM_CORE_FORMAT", OPT_CORE, NULL, NULL, }, {"SLURM_DEBUG", OPT_DEBUG, NULL, NULL }, {"SLURM_DISTRIBUTION", OPT_DISTRIB, NULL, NULL }, {"SLURM_IMMEDIATE", OPT_INT, &opt.immediate, NULL }, @@ -567,6 +570,10 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_OVERCOMMIT: opt.overcommit = true; break; + + case OPT_CORE: + opt.core_type = core_format_type (val); + break; default: /* do nothing */ @@ -641,6 +648,7 @@ static void _opt_args(int argc, char **argv) {"quit-on-interrupt", no_argument, 0, 'q'}, {"quiet", no_argument, 0, 'Q'}, {"contiguous", no_argument, 0, LONG_OPT_CONT}, + {"core", required_argument, 0, LONG_OPT_CORE}, {"mincpus", required_argument, 0, LONG_OPT_MINCPU}, {"mem", required_argument, 0, LONG_OPT_MEM}, {"mpi", required_argument, 0, LONG_OPT_MPI}, @@ -826,6 +834,12 @@ static void _opt_args(int argc, char **argv) case LONG_OPT_CONT: opt.contiguous = true; break; + case LONG_OPT_CORE: + opt.core_type = core_format_type (optarg); + if (opt.core_type == CORE_INVALID) + error ("--core=\"%s\" Invalid -- ignoring.\n", + optarg); + break; case LONG_OPT_MINCPU: opt.mincpus = _get_int(optarg, "mincpus"); break; @@ -1020,6 +1034,8 @@ static bool _opt_verify(void) verified = false; } + core_format_enable (opt.core_type); + /* massage the numbers */ if (opt.nodes_set && !opt.nprocs_set) { /* 1 proc / node default */ @@ -1258,7 +1274,7 @@ static void _opt_list() opt.partition == NULL ? "default" : opt.partition); info("job name : `%s'", opt.job_name); info("distribution : %s", format_distribution_t(opt.distribution)); - info("core format : %s", opt.core_format); + info("core format : %s", core_format_name (opt.core_type)); info("verbose : %d", _verbose); info("slurmd_debug : %d", opt.slurmd_debug); info("immediate : %s", tf_(opt.immediate)); @@ -1295,9 +1311,9 @@ Usage: srun [-N nnodes] [-n ntasks] [-i in] [-i in] [-e err] [-e err]\n\ [-D path] [--immediate] [--overcommit] [--no-kill]\n\ [--share] [--label] [--unbuffered] [-m dist] [-J jobname]\n\ [--jobid=id] [--batch] [--verbose] [--slurmd_debug=#]\n\ - [-T threads] [-W sec] [--attach] [--join] [--contiguous]\n\ - [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list] [--mpi=type]\n\ - [-w hosts...] [-x hosts...] [--usage] [OPTIONS...] \n\ + [--core=type] [-T threads] [-W sec] [--attach] [--join] \n\ + [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n\ + [--mpi=type] [-w hosts...] [-x hosts...] \n\ executable [args...]\n"); } @@ -1337,13 +1353,16 @@ Parallel run options:\n\ -v, --verbose verbose mode (multiple -v's increase verbosity)\n\ -Q, --quiet quiet mode (suppress informational messages)\n\ -d, --slurmd-debug=level slurmd debug level\n\ + --core=type change default corefile format type\n\ + (type=\"list\" to list of valid formats)\n\ \n\ Allocate only:\n\ -A, --allocate allocate resources and spawn a shell\n\ \n\ Attach to running job:\n\ -a, --attach=jobid attach to running job with specified id\n\ - -j, --join when used with --attach, allow\n\ + -j, --join when used with --attach, allow forwarding of\n\ + signals and stdin.\n\ \n\ Constraint options:\n\ --mincpus=n minimum number of cpus per node\n\ diff --git a/src/srun/opt.h b/src/srun/opt.h index 96108bc08553fdbb69ac3fb1df8be070dc205cbf..1f037ed21611b6b0d89b3370a08be82c7f08ba69 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -35,6 +35,7 @@ #include <unistd.h> #include "src/common/macros.h" /* true and false */ +#include "src/srun/core-format.h" #define MAX_THREADS 64 #define MAX_USERNAME 9 @@ -111,7 +112,7 @@ typedef struct srun_options { char *efname; /* --error, -e filename */ int slurmd_debug; /* --slurmd-debug, -D */ - char *core_format; /* --corefile-format=, -C type */ + core_format_t core_type;/* --core= */ char *attach; /* --attach=id -a id */ bool join; /* --join, -j */