diff --git a/src/salloc/opt.c b/src/salloc/opt.c index b6a2e4e119419bb45221857fcaf3abb1d042dd24..a5a3058ab15476d78f56c2b17c1d2ca4d70e1bc3 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -160,6 +160,7 @@ /*---- global variables, defined in opt.h ----*/ opt_t opt; +int error_exit = 1; /*---- forward declarations of static functions ----*/ diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 16786e42e1fdf73ba2774fed49bcd622118eb31a..73217c9354cf35a0d1cd090b513664021f650bea 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -164,6 +164,7 @@ typedef struct salloc_options { } opt_t; extern opt_t opt; +extern int error_exit; /* process options: * 1. set defaults diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 09f6ec2f44cf372869f581ee9bacdb6a23680310..2a0a8af2fe11fa991e0abdb5b2b41a8e7acebb33 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -87,7 +87,6 @@ extern int spank_unset_job_env(const char *name); char **command_argv; int command_argc; pid_t command_pid = -1; -int error_exit = 1; enum possible_allocation_states allocation_state = NOT_GRANTED; pthread_mutex_t allocation_state_lock = PTHREAD_MUTEX_INITIALIZER; diff --git a/src/salloc/salloc.h b/src/salloc/salloc.h index 1e80a2c71fd6951616069990f9e2ed5be9e19849..fdd262aaf7b49575670d744e14e722759af9adcf 100644 --- a/src/salloc/salloc.h +++ b/src/salloc/salloc.h @@ -47,7 +47,6 @@ extern char **command_argv; extern int command_argc; extern pid_t command_pid; -extern int error_exit; enum possible_allocation_states {NOT_GRANTED, GRANTED, REVOKED}; extern enum possible_allocation_states allocation_state; diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 18fbcaf0acebb4b5058eaf69b7c30327387238d2..784cbdb4f9c2dcc5085dc9199c26221584df9c11 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -437,7 +437,7 @@ relinquish: slurm_free_resource_allocation_response_msg(resp); if(!destroy_job) slurm_complete_job(resp->job_id, 1); - exit(1); + exit(error_exit); return NULL; } @@ -475,7 +475,7 @@ existing_allocation(void) old_job_id); info ("Check SLURM_JOB_ID environment variable " "for expired or invalid job."); - exit(1); + exit(error_exit); } return resp; @@ -495,10 +495,14 @@ slurmctld_msg_init(void) slurmctld_fd = -1; slurmctld_comm_addr.port = 0; - if ((slurmctld_fd = slurm_init_msg_engine_port(0)) < 0) - fatal("slurm_init_msg_engine_port error %m"); - if (slurm_get_stream_addr(slurmctld_fd, &slurm_address) < 0) - fatal("slurm_get_stream_addr error %m"); + if ((slurmctld_fd = slurm_init_msg_engine_port(0)) < 0) { + error("slurm_init_msg_engine_port error %m"); + exit(error_exit); + } + if (slurm_get_stream_addr(slurmctld_fd, &slurm_address) < 0) { + error("slurm_get_stream_addr error %m"); + exit(error_exit); + } fd_set_nonblocking(slurmctld_fd); /* hostname is not set, so slurm_get_addr fails slurm_get_addr(&slurm_address, &port, hostname, sizeof(hostname)); */ diff --git a/src/srun/multi_prog.c b/src/srun/multi_prog.c index 4c3e6fbcac54362883d1c1de2575446117016b2f..693d5d580006f9eedc626a486ba31f344d2371a2 100644 --- a/src/srun/multi_prog.c +++ b/src/srun/multi_prog.c @@ -61,6 +61,7 @@ #include "src/common/xmalloc.h" #include "src/common/xstring.h" #include "src/srun/debugger.h" +#include "src/srun/opt.h" /* Given a program name, translate it to a fully qualified pathname * as needed based upon the PATH environment variable */ @@ -226,12 +227,14 @@ mpir_init(int num_tasks) { MPIR_proctable_size = num_tasks; MPIR_proctable = xmalloc(sizeof(MPIR_PROCDESC) * num_tasks); - if (MPIR_proctable == NULL) - fatal("Unable to initialize MPIR_proctable: %m"); + if (MPIR_proctable == NULL) { + error("Unable to initialize MPIR_proctable: %m"); + exit(error_exit); + } } extern void -mpir_cleanup() +mpir_cleanup(void) { int i; @@ -249,9 +252,11 @@ mpir_set_executable_names(const char *executable_name) for (i = 0; i < MPIR_proctable_size; i++) { MPIR_proctable[i].executable_name = xstrdup(executable_name); - if (MPIR_proctable[i].executable_name == NULL) - fatal("Unable to set MPI_proctable executable_name:" + if (MPIR_proctable[i].executable_name == NULL) { + error("Unable to set MPI_proctable executable_name:" " %m"); + exit(error_exit); + } } } diff --git a/src/srun/opt.c b/src/srun/opt.c index 09854dc6154a14886cd2e440c9e89c6c8ff67a86..c9f42b61c29333585b6b8a98e006ef2a098ff45a 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -185,6 +185,7 @@ /*---- global variables, defined in opt.h ----*/ int _verbose; opt_t opt; +int error_exit = 1; /*---- forward declarations of static functions ----*/ @@ -233,7 +234,7 @@ int initialize_and_process_args(int argc, char *argv[]) _opt_args(argc, argv); if (!_opt_verify()) - exit(1); + exit(error_exit); if (_verbose > 3) _opt_list(); @@ -305,8 +306,10 @@ static void _opt_default() opt.gid = getgid(); - if ((getcwd(buf, MAXPATHLEN)) == NULL) - fatal("getcwd failed: %m"); + if ((getcwd(buf, MAXPATHLEN)) == NULL) { + error("getcwd failed: %m"); + exit(error_exit); + } opt.cwd = xstrdup(buf); opt.cwd_set = false; @@ -587,13 +590,13 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_CPU_BIND: if (slurm_verify_cpu_bind(val, &opt.cpu_bind, &opt.cpu_bind_type)) - exit(1); + exit(error_exit); break; case OPT_MEM_BIND: if (slurm_verify_mem_bind(val, &opt.mem_bind, &opt.mem_bind_type)) - exit(1); + exit(error_exit); break; case OPT_NODES: @@ -659,16 +662,18 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_MPI: if (mpi_hook_client_init((char *)val) == SLURM_ERROR) { - fatal("\"%s=%s\" -- invalid MPI type, " + error("\"%s=%s\" -- invalid MPI type, " "--mpi=list for acceptable types.", e->var, val); + exit(error_exit); } break; case OPT_SIGNAL: if (get_signal_opts(optarg, &opt.warn_signal, &opt.warn_time)) { - fatal("Invalid signal specification: %s", optarg); + error("Invalid signal specification: %s", optarg); + exit(error_exit); } break; @@ -693,7 +698,7 @@ _get_int(const char *arg, const char *what, bool positive) if ((*p != '\0') || (result < 0L) || (positive && (result <= 0L))) { error ("Invalid numeric value \"%s\" for %s.", arg, what); - exit(1); + exit(error_exit); } else if (result > INT_MAX) { error ("Numeric argument (%ld) to big for %s.", result, what); } else if (result < INT_MIN) { @@ -818,12 +823,13 @@ static void set_options(const int argc, char **argv) }; char *opt_string = "+aA:bB:c:C:d:D:e:Eg:Hi:IjJ:kKlL:m:n:N:" "o:Op:P:qQr:Rst:T:uU:vVw:W:x:XZ"; + char *tmp_str; struct option *optz = spank_option_table_create (long_options); if (!optz) { - error ("Unable to create option table"); - exit (1); + error("Unable to create option table"); + exit(error_exit); } if(opt.progname == NULL) @@ -838,7 +844,7 @@ static void set_options(const int argc, char **argv) case (int)'?': fprintf(stderr, "Try \"srun --help\" for more information\n"); - exit(1); + exit(error_exit); break; case (int)'A': case (int)'U': /* backwards compatibility */ @@ -848,11 +854,11 @@ static void set_options(const int argc, char **argv) case (int)'a': error("Please use the \"sattach\" command instead of " "\"srun -a/--attach\"."); - exit(1); + exit(error_exit); case (int)'b': error("Please use the \"sbatch\" command instead of " "\"srun -b/--batch\"."); - exit(1); + exit(error_exit); case (int)'B': opt.extra_set = verify_socket_core_thread_count( optarg, @@ -868,7 +874,7 @@ static void set_options(const int argc, char **argv) if (opt.extra_set == false) { error("invalid resource allocation -B `%s'", optarg); - exit(1); + exit(error_exit); } break; case (int)'c': @@ -891,8 +897,9 @@ static void set_options(const int argc, char **argv) break; case (int)'e': if (opt.pty) { - fatal("--error incompatable with --pty " + error("--error incompatable with --pty " "option"); + exit(error_exit); } xfree(opt.efname); if (strncasecmp(optarg, "none", (size_t) 4) == 0) @@ -905,15 +912,16 @@ static void set_options(const int argc, char **argv) break; case (int)'g': if (verify_geometry(optarg, opt.geometry)) - exit(1); + exit(error_exit); break; case (int)'H': opt.hold = true; break; case (int)'i': if (opt.pty) { - fatal("--input incompatable with " + error("--input incompatable with " "--pty option"); + exit(error_exit); } xfree(opt.ifname); if (strncasecmp(optarg, "none", (size_t) 4) == 0) @@ -954,7 +962,7 @@ static void set_options(const int argc, char **argv) if (opt.distribution == SLURM_DIST_UNKNOWN) { error("distribution type `%s' " "is not recognized", optarg); - exit(1); + exit(error_exit); } break; case (int)'n': @@ -972,12 +980,15 @@ static void set_options(const int argc, char **argv) if (opt.nodes_set == false) { error("invalid resource allocation -N `%s'", optarg); - exit(1); + exit(error_exit); } break; case (int)'o': - if (opt.pty) - fatal("--output incompatable with --pty option"); + if (opt.pty) { + error("--output incompatable with --pty " + "option"); + exit(error_exit); + } xfree(opt.ofname); if (strncasecmp(optarg, "none", (size_t) 4) == 0) opt.ofname = xstrdup("/dev/null"); @@ -1041,7 +1052,7 @@ static void set_options(const int argc, char **argv) xfree(opt.exc_nodes); opt.exc_nodes = xstrdup(optarg); if (!_valid_node_list(&opt.exc_nodes)) - exit(1); + exit(error_exit); break; case (int)'X': opt.disable_status = true; @@ -1062,12 +1073,12 @@ static void set_options(const int argc, char **argv) case LONG_OPT_CPU_BIND: if (slurm_verify_cpu_bind(optarg, &opt.cpu_bind, &opt.cpu_bind_type)) - exit(1); + exit(error_exit); break; case LONG_OPT_MEM_BIND: if (slurm_verify_mem_bind(optarg, &opt.mem_bind, &opt.mem_bind_type)) - exit(1); + exit(error_exit); break; case LONG_OPT_CORE: opt.core_type = core_format_type (optarg); @@ -1094,7 +1105,7 @@ static void set_options(const int argc, char **argv) if (opt.job_min_memory < 0) { error("invalid memory constraint %s", optarg); - exit(1); + exit(error_exit); } break; case LONG_OPT_MEM_PER_CPU: @@ -1102,15 +1113,16 @@ static void set_options(const int argc, char **argv) if (opt.mem_per_cpu < 0) { error("invalid memory constraint %s", optarg); - exit(1); + exit(error_exit); } break; case LONG_OPT_MPI: if (mpi_hook_client_init((char *)optarg) == SLURM_ERROR) { - fatal("\"--mpi=%s\" -- long invalid MPI type, " + error("\"--mpi=%s\" -- long invalid MPI type, " "--mpi=list for acceptable types.", optarg); + exit(error_exit); } break; case LONG_OPT_RESV_PORTS: @@ -1123,7 +1135,7 @@ static void set_options(const int argc, char **argv) opt.job_min_tmp_disk = str_to_bytes(optarg); if (opt.job_min_tmp_disk < 0) { error("invalid tmp value %s", optarg); - exit(1); + exit(error_exit); } break; case LONG_OPT_JOBID: @@ -1143,16 +1155,24 @@ static void set_options(const int argc, char **argv) _get_int(optarg, "max-exit-timeout", true); break; case LONG_OPT_UID: - if (opt.euid != (uid_t) -1) - fatal ("duplicate --uid option"); - if (uid_from_string (optarg, &opt.euid) < 0) - fatal ("--uid=\"%s\" invalid", optarg); + if (opt.euid != (uid_t) -1) { + error("duplicate --uid option"); + exit(error_exit); + } + if (uid_from_string (optarg, &opt.euid) < 0) { + error("--uid=\"%s\" invalid", optarg); + exit(error_exit); + } break; case LONG_OPT_GID: - if (opt.egid != (gid_t) -1) - fatal ("duplicate --gid option"); - if (gid_from_string (optarg, &opt.egid) < 0) - fatal ("--gid=\"%s\" invalid", optarg); + if (opt.egid != (gid_t) -1) { + error("duplicate --gid option"); + exit(error_exit); + } + if (gid_from_string (optarg, &opt.egid) < 0) { + error("--gid=\"%s\" invalid", optarg); + exit(error_exit); + } break; case LONG_OPT_DEBUG_TS: opt.debugger_test = true; @@ -1200,14 +1220,17 @@ static void set_options(const int argc, char **argv) case LONG_OPT_BEGIN: opt.begin = parse_time(optarg, 0); if (opt.begin == 0) { - fatal("Invalid time specification %s", + error("Invalid time specification %s", optarg); + exit(error_exit); } break; case LONG_OPT_MAIL_TYPE: opt.mail_type |= parse_mail_type(optarg); - if (opt.mail_type == 0) - fatal("--mail-type=%s invalid", optarg); + if (opt.mail_type == 0) { + error("--mail-type=%s invalid", optarg); + exit(error_exit); + } break; case LONG_OPT_MAIL_USER: xfree(opt.mail_user); @@ -1229,7 +1252,7 @@ static void set_options(const int argc, char **argv) if (abs(opt.nice) > NICE_OFFSET) { error("Invalid nice value, must be between " "-%d and %d", NICE_OFFSET, NICE_OFFSET); - exit(1); + exit(error_exit); } if (opt.nice < 0) { uid_t my_uid = getuid(); @@ -1279,7 +1302,7 @@ static void set_options(const int argc, char **argv) &opt.min_threads_per_core, &opt.max_threads_per_core, &opt.cpu_bind_type)) { - exit(1); + exit(error_exit); } break; case LONG_OPT_NTASKSPERNODE: @@ -1322,13 +1345,21 @@ static void set_options(const int argc, char **argv) opt.pty = true; opt.unbuffered = true; /* implicit */ if (opt.ifname) - fatal("--input incompatable with --pty option"); - if (opt.ofname) - fatal("--output incompatable with --pty option"); - if (opt.efname) - fatal("--error incompatable with --pty option"); + tmp_str = "--input"; + else if (opt.ofname) + tmp_str = "--output"; + else if (opt.efname) + tmp_str = "--error"; + else + tmp_str = NULL; + if (tmp_str) { + error("%s incompatable with --pty option", + tmp_str); + exit(error_exit); + } #else - error("--pty not currently supported on this system type"); + error("--pty not currently supported on this system " + "type"); #endif break; case LONG_OPT_CHECKPOINT: @@ -1368,13 +1399,14 @@ static void set_options(const int argc, char **argv) case LONG_OPT_SIGNAL: if (get_signal_opts(optarg, &opt.warn_signal, &opt.warn_time)) { - fatal("Invalid signal specification: %s", + error("Invalid signal specification: %s", optarg); + exit(error_exit); } break; default: if (spank_process_option (opt_char, optarg) < 0) { - exit (1); + exit(error_exit); } } } @@ -1394,17 +1426,17 @@ static void _load_multi(int *argc, char **argv) if ((config_fd = open(argv[0], O_RDONLY)) == -1) { error("Could not open multi_prog config file %s", argv[0]); - exit(1); + exit(error_exit); } if (fstat(config_fd, &stat_buf) == -1) { error("Could not stat multi_prog config file %s", argv[0]); - exit(1); + exit(error_exit); } if (stat_buf.st_size > 60000) { error("Multi_prog config file %s is too large", argv[0]); - exit(1); + exit(error_exit); } data_buf = xmalloc(stat_buf.st_size + 1); while ((i = read(config_fd, &data_buf[data_read], stat_buf.st_size @@ -1412,7 +1444,7 @@ static void _load_multi(int *argc, char **argv) if (i < 0) { error("Error reading multi_prog config file %s", argv[0]); - exit(1); + exit(error_exit); } else data_read += i; } @@ -1461,7 +1493,7 @@ static void _opt_args(int argc, char **argv) #endif error("Too few processes for the requested " "{plane,node} distribution"); - exit(1); + exit(error_exit); } } } @@ -1500,7 +1532,7 @@ static void _opt_args(int argc, char **argv) if (opt.multi_prog) { if (opt.argc < 1) { error("configuration file not specified"); - exit(1); + exit(error_exit); } _load_multi(&opt.argc, opt.argv); } @@ -1514,7 +1546,7 @@ static void _opt_args(int argc, char **argv) } if (opt.multi_prog && verify_multi_name(opt.argv[0], opt.nprocs)) - exit(1); + exit(error_exit); } /* @@ -1586,7 +1618,7 @@ static bool _opt_verify(void) if (!_valid_node_list(&opt.nodelist)) { error("Failure getting NodeNames from " "hostfile"); - exit(1); + exit(error_exit); } else { debug("loaded nodes (%s) from hostfile", opt.nodelist); @@ -1594,7 +1626,7 @@ static bool _opt_verify(void) } } else { if (!_valid_node_list(&opt.nodelist)) - exit(1); + exit(error_exit); } /* set up the proc and node counts based on the arbitrary list @@ -1729,8 +1761,10 @@ static bool _opt_verify(void) /* massage the numbers */ if (opt.nodelist) { hl = hostlist_create(opt.nodelist); - if (!hl) - fatal("memory allocation failure"); + if (!hl) { + error("memory allocation failure"); + exit(error_exit); + } hostlist_uniq(hl); hl_cnt = hostlist_count(hl); if (opt.nodes_set) @@ -1764,8 +1798,10 @@ static bool _opt_verify(void) /* massage the numbers */ if (opt.nodelist) { hl = hostlist_create(opt.nodelist); - if (!hl) - fatal("memory allocation failure"); + if (!hl) { + error("memory allocation failure"); + exit(error_exit); + } if(opt.distribution == SLURM_DIST_ARBITRARY && !opt.nprocs_set) { opt.nprocs = hostlist_count(hl); @@ -1836,7 +1872,7 @@ static bool _opt_verify(void) if (opt.labelio && opt.unbuffered) { error("Do not specify both -l (--label) and " "-u (--unbuffered)"); - exit(1); + exit(error_exit); } /* @@ -1849,7 +1885,7 @@ static bool _opt_verify(void) opt.time_limit = time_str2mins(opt.time_limit_str); if ((opt.time_limit < 0) && (opt.time_limit != INFINITE)) { error("Invalid time limit specification"); - exit(1); + exit(error_exit); } if (opt.time_limit == 0) opt.time_limit = INFINITE; @@ -1860,7 +1896,7 @@ static bool _opt_verify(void) if ((opt.ckpt_interval < 0) && (opt.ckpt_interval != INFINITE)) { error("Invalid checkpoint interval specification"); - exit(1); + exit(error_exit); } } @@ -1875,7 +1911,7 @@ static bool _opt_verify(void) if (slurm_verify_cpu_bind(NULL, &opt.cpu_bind, &opt.cpu_bind_type)) - exit(1); + exit(error_exit); return verified; } diff --git a/src/srun/opt.h b/src/srun/opt.h index e4827c882cd7e52e8ff65f16ddffcf94cda627be..55fddb45666335567813ed3b99e413068d9c90e6 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -230,6 +230,8 @@ typedef struct srun_options { extern opt_t opt; +extern int error_exit; + /* return whether any constraints were specified by the user * (if new constraints are added above, might want to add them to this * macro or move this to a function if it gets a little complicated) diff --git a/src/srun/srun.c b/src/srun/srun.c index d9211358d318dbb075c0b40018c530d7f4553c28..b0719a46a6d50f6e60643e25da2ab9e4b275af37 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -143,6 +143,7 @@ static void _run_srun_prolog (srun_job_t *job); static void _run_srun_epilog (srun_job_t *job); static int _run_srun_script (srun_job_t *job, char *script); static void _set_cpu_env_var(resource_allocation_response_msg_t *resp); +static void _set_exit_code(void); static int _setup_signals(); static void _step_opt_exclusive(void); static void _set_stdio_fds(srun_job_t *job, slurm_step_io_fds_t *cio_fds); @@ -196,6 +197,7 @@ int srun(int ac, char **av) debug_level = _slurm_debug_env_val(); logopt.stderr_level += debug_level; log_init(xbasename(av[0]), logopt, 0, NULL); + _set_exit_code(); /* xsignal(SIGQUIT, _ignore_signal); */ /* xsignal(SIGPIPE, _ignore_signal); */ @@ -205,7 +207,8 @@ int srun(int ac, char **av) /* Initialize plugin stack, read options from plugins, etc. */ if (spank_init(NULL) < 0) { - fatal("Plug-in initialization failed"); + error("Plug-in initialization failed"); + exit(error_exit); _define_symbols(); } @@ -223,9 +226,11 @@ int srun(int ac, char **av) } record_ppid(); - if (spank_init_post_opt() < 0) - fatal("Plugin stack post-option processing failed."); - + if (spank_init_post_opt() < 0) { + error("Plugin stack post-option processing failed."); + exit(error_exit); + } + /* reinit log with new verbosity (if changed by command line) */ if (_verbose || opt.quiet) { @@ -262,7 +267,7 @@ int srun(int ac, char **av) info("do not allocate resources"); job = job_create_noalloc(); if (create_job_step(job, false) < 0) { - exit(1); + exit(error_exit); } } else if ((resp = existing_allocation())) { @@ -280,7 +285,7 @@ int srun(int ac, char **av) " are already allocated."); if (!job || create_job_step(job, false) < 0) - exit(1); + exit(error_exit); } else { /* Combined job allocation and job step launch */ #ifdef HAVE_FRONT_END @@ -288,12 +293,12 @@ int srun(int ac, char **av) if ((my_uid != 0) && (my_uid != slurm_get_slurm_user_id())) { error("srun task launch not supported on this system"); - exit(1); + exit(error_exit); } #endif if ( !(resp = allocate_nodes()) ) - exit(1); + exit(error_exit); got_alloc = 1; _print_job_information(resp); _set_cpu_env_var(resp); @@ -306,7 +311,7 @@ int srun(int ac, char **av) } if (!job || create_job_step(job, true) < 0) { slurm_complete_job(resp->job_id, 1); - exit(1); + exit(error_exit); } slurm_free_resource_allocation_response_msg(resp); @@ -430,7 +435,7 @@ int srun(int ac, char **av) if (_call_spank_local_user (job) < 0) { error("Failure in local plugin stack"); slurm_step_launch_abort(job->step_ctx); - exit(1); + exit(error_exit); } update_job_state(job, SRUN_JOB_LAUNCHING); @@ -467,10 +472,10 @@ int srun(int ac, char **av) slurm_step_ctx_destroy(job->step_ctx); if (got_alloc) { if (create_job_step(job, true) < 0) - exit(1); + exit(error_exit); } else { if (create_job_step(job, false) < 0) - exit(1); + exit(error_exit); } task_state_destroy(task_state); goto re_launch; @@ -659,6 +664,20 @@ static void _set_prio_process_env(void) debug ("propagating SLURM_PRIO_PROCESS=%d", retval); } +static void _set_exit_code(void) +{ + int i; + char *val = getenv("SLURM_ERROR_EXIT"); + + if (val) { + i = atoi(val); + if (i == 0) + error("SLURM_ERROR_EXIT has zero value"); + else + error_exit = i; + } +} + static void _set_cpu_env_var(resource_allocation_response_msg_t *resp) { char *tmp; @@ -686,8 +705,10 @@ static int _set_rlimit_env(void) slurm_rlimits_info_t *rli; /* Modify limits with any command-line options */ - if (opt.propagate && parse_rlimits( opt.propagate, PROPAGATE_RLIMITS)) - fatal( "--propagate=%s is not valid.", opt.propagate ); + if (opt.propagate && parse_rlimits( opt.propagate, PROPAGATE_RLIMITS)){ + error( "--propagate=%s is not valid.", opt.propagate ); + exit(error_exit); + } for (rli = get_slurm_rlimits_info(); rli->name != NULL; rli++ ) { @@ -856,12 +877,15 @@ _set_stdio_fds(srun_job_t *job, slurm_step_io_fds_t *cio_fds) * create stdin file descriptor */ if (_is_local_file(job->ifname)) { - if ((job->ifname->name == NULL) || (job->ifname->taskid != -1)) { + if ((job->ifname->name == NULL) || + (job->ifname->taskid != -1)) { cio_fds->in.fd = STDIN_FILENO; } else { cio_fds->in.fd = open(job->ifname->name, O_RDONLY); - if (cio_fds->in.fd == -1) - fatal("Could not open stdin file: %m"); + if (cio_fds->in.fd == -1) { + error("Could not open stdin file: %m"); + exit(error_exit); + } } if (job->ifname->type == IO_ONE) { cio_fds->in.taskid = job->ifname->taskid; @@ -875,13 +899,16 @@ _set_stdio_fds(srun_job_t *job, slurm_step_io_fds_t *cio_fds) * create stdout file descriptor */ if (_is_local_file(job->ofname)) { - if ((job->ofname->name == NULL) || (job->ofname->taskid != -1)) { + if ((job->ofname->name == NULL) || + (job->ofname->taskid != -1)) { cio_fds->out.fd = STDOUT_FILENO; } else { cio_fds->out.fd = open(job->ofname->name, file_flags, 0644); - if (cio_fds->out.fd == -1) - fatal("Could not open stdout file: %m"); + if (cio_fds->out.fd == -1) { + error("Could not open stdout file: %m"); + exit(error_exit); + } } if (job->ofname->name != NULL && job->efname->name != NULL @@ -899,13 +926,16 @@ _set_stdio_fds(srun_job_t *job, slurm_step_io_fds_t *cio_fds) cio_fds->err.fd = cio_fds->out.fd; cio_fds->err.taskid = cio_fds->out.taskid; } else if (_is_local_file(job->efname)) { - if ((job->efname->name == NULL) || (job->efname->taskid != -1)) { + if ((job->efname->name == NULL) || + (job->efname->taskid != -1)) { cio_fds->err.fd = STDERR_FILENO; } else { cio_fds->err.fd = open(job->efname->name, file_flags, 0644); - if (cio_fds->err.fd == -1) - fatal("Could not open stderr file: %m"); + if (cio_fds->err.fd == -1) { + error("Could not open stderr file: %m"); + exit(error_exit); + } } } } @@ -938,12 +968,18 @@ static void _step_opt_exclusive(void) opt.min_nodes = 1; opt.max_nodes = 0; } - if (!opt.nprocs_set) - fatal("--ntasks must be set with --exclusive"); - if (opt.relative_set) - fatal("--relative disabled, incompatible with --exclusive"); - if (opt.exc_nodes) - fatal("--exclude is incompatible with --exclusive"); + if (!opt.nprocs_set) { + error("--ntasks must be set with --exclusive"); + exit(error_exit); + } + if (opt.relative_set) { + error("--relative disabled, incompatible with --exclusive"); + exit(error_exit); + } + if (opt.exc_nodes) { + error("--exclude is incompatible with --exclusive"); + exit(error_exit); + } } static void @@ -1055,8 +1091,10 @@ _task_array_to_string(int ntasks, uint32_t taskids[]) int i; tasks_bitmap = bit_alloc(job->ntasks); - if (!tasks_bitmap) - fatal("bit_alloc: memory allocation failure"); + if (!tasks_bitmap) { + error("bit_alloc: memory allocation failure"); + exit(error_exit); + } for (i=0; i<ntasks; i++) bit_set(tasks_bitmap, taskids[i]); str = xmalloc(2048);