diff --git a/NEWS b/NEWS index 6b364ba51b285287e6fd3810dc6e36cd25c0682f..cf475475db943421f5b8bfb6572eea81827ca19c 100644 --- a/NEWS +++ b/NEWS @@ -72,8 +72,10 @@ documents those changes that are of interest to users and admins. -- Fix for checking for a non-existant job when querying steps -- For job steps with the --exclusive option, base initial time in exponential back-off be partly based upon the process ID for better - performance with many job steps started at the same time. + performance with many job steps started at the same time. -- Fix for correct step ordering in sview. + -- Support optional argument to srun and salloc --immediate option. Specify + timeout value in seconds for job or step to be allocated resources. * Changes in SLURM 2.0.2 ======================== diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 04388acc37804372cca7dace566005fa1f4dab8b..51088bf9dd3c0b0efa74442719819c364164cd9e 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -326,10 +326,13 @@ show this help message .RE .TP -\fB\-I\fR, \fB\-\-immediate\fR -Grab the requested resources immediately, or abort if the resources -are not currently available. The \fIcommand\fR parameter will not be -run if the resources are not available. +\fB\-I\fR, \fB\-\-immediate\fR[=<\fIseconds\fR>] +exit if resources are not available within the +time period specified. +If no argument is given, resources must be available immediately +for the request to succeed. +By default, \fB\-\-immediate\fR is off, and the command +will block until resources become available. .TP \fB\-J\fR, \fB\-\-job\-name\fR=<\fIjobname\fR> diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 940d10144ee61c60f73dd955bd5022b205c46745..3b00b8e71b2d9a2e9ef89d2441cb6de113c01ef9 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -372,10 +372,12 @@ show this help message .RE .TP -\fB\-I\fR, \fB\-\-immediate\fR -exit if resources are not immediately available. By default, -\fB\-\-immediate\fR is off, and -.B srun +\fB\-I\fR, \fB\-\-immediate\fR[=<\fIseconds\fR>] +exit if resources are not available within the +time period specified. +If no argument is given, resources must be available immediately +for the request to succeed. +By default, \fB\-\-immediate\fR is off, and the command will block until resources become available. .TP diff --git a/src/api/allocate.c b/src/api/allocate.c index 1c351538551e75d6f5a8677cdf6472916f19d99f..4292e694a3b6346d245af5b53c088bee47fc455d 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -252,7 +252,7 @@ slurm_allocate_resources_blocking (const job_desc_msg_t *user_req, /* yes, allocation has been granted */ errno = SLURM_PROTOCOL_SUCCESS; } else if (!req->immediate) { - if(resp->error_code != SLURM_SUCCESS) + if (resp->error_code != SLURM_SUCCESS) info("%s", slurm_strerror(resp->error_code)); /* no, we need to wait for a response */ job_id = resp->job_id; @@ -263,7 +263,7 @@ slurm_allocate_resources_blocking (const job_desc_msg_t *user_req, timeout); /* If NULL, we didn't get the allocation in the time desired, so just free the job id */ - if (resp == NULL && errno != ESLURM_ALREADY_DONE) { + if ((resp == NULL) && (errno != ESLURM_ALREADY_DONE)) { errnum = errno; slurm_complete_job(job_id, -1); } diff --git a/src/salloc/opt.c b/src/salloc/opt.c index b36b821a2a4d70c67223e18d63ef6f9826e76320..62ca9a98e7ceb2eb04f1668f3c1d195c9b4c303f 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -98,6 +98,7 @@ #define OPT_ACCTG_FREQ 0x0f #define OPT_CPU_BIND 0x10 #define OPT_MEM_BIND 0x11 +#define OPT_IMMEDIATE 0x12 #define OPT_WCKEY 0x15 /* generic getopt_long flags, integers and *not* valid characters */ @@ -272,7 +273,7 @@ static void _opt_default() opt.kill_command_signal = SIGTERM; opt.kill_command_signal_set = false; - opt.immediate = false; + opt.immediate = 0; opt.overcommit = false; opt.max_wait = 0; @@ -331,25 +332,25 @@ struct env_vars { }; env_vars_t env_vars[] = { - {"SALLOC_ACCOUNT", OPT_STRING, &opt.account, NULL }, - {"SALLOC_ACCTG_FREQ", OPT_INT, &opt.acctg_freq, NULL }, - {"SALLOC_BELL", OPT_BELL, NULL, NULL }, - {"SALLOC_CONN_TYPE", OPT_CONN_TYPE, NULL, NULL }, - {"SALLOC_CPU_BIND", OPT_CPU_BIND, NULL, NULL }, - {"SALLOC_DEBUG", OPT_DEBUG, NULL, NULL }, - {"SALLOC_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL }, - {"SALLOC_GEOMETRY", OPT_GEOMETRY, NULL, NULL }, - {"SALLOC_IMMEDIATE", OPT_BOOL, &opt.immediate, NULL }, - {"SALLOC_JOBID", OPT_JOBID, NULL, NULL }, - {"SALLOC_MEM_BIND", OPT_MEM_BIND, NULL, NULL }, - {"SALLOC_NETWORK", OPT_STRING , &opt.network, NULL }, - {"SALLOC_NO_BELL", OPT_NO_BELL, NULL, NULL }, - {"SALLOC_NO_ROTATE", OPT_NO_ROTATE, NULL, NULL }, - {"SALLOC_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, - {"SALLOC_PARTITION", OPT_STRING, &opt.partition, NULL }, - {"SALLOC_TIMELIMIT", OPT_STRING, &opt.time_limit_str,NULL }, - {"SALLOC_WAIT", OPT_INT, &opt.max_wait, NULL }, - {"SALLOC_WCKEY", OPT_STRING, &opt.wckey, NULL }, + {"SALLOC_ACCOUNT", OPT_STRING, &opt.account, NULL }, + {"SALLOC_ACCTG_FREQ", OPT_INT, &opt.acctg_freq, NULL }, + {"SALLOC_BELL", OPT_BELL, NULL, NULL }, + {"SALLOC_CONN_TYPE", OPT_CONN_TYPE, NULL, NULL }, + {"SALLOC_CPU_BIND", OPT_CPU_BIND, NULL, NULL }, + {"SALLOC_DEBUG", OPT_DEBUG, NULL, NULL }, + {"SALLOC_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL }, + {"SALLOC_GEOMETRY", OPT_GEOMETRY, NULL, NULL }, + {"SALLOC_IMMEDIATE", OPT_IMMEDIATE, NULL, NULL }, + {"SALLOC_JOBID", OPT_JOBID, NULL, NULL }, + {"SALLOC_MEM_BIND", OPT_MEM_BIND, NULL, NULL }, + {"SALLOC_NETWORK", OPT_STRING , &opt.network, NULL }, + {"SALLOC_NO_BELL", OPT_NO_BELL, NULL, NULL }, + {"SALLOC_NO_ROTATE", OPT_NO_ROTATE, NULL, NULL }, + {"SALLOC_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, + {"SALLOC_PARTITION", OPT_STRING, &opt.partition, NULL }, + {"SALLOC_TIMELIMIT", OPT_STRING, &opt.time_limit_str,NULL }, + {"SALLOC_WAIT", OPT_INT, &opt.max_wait, NULL }, + {"SALLOC_WCKEY", OPT_STRING, &opt.wckey, NULL }, {NULL, 0, NULL, NULL} }; @@ -390,8 +391,10 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_INT: if (val != NULL) { *((int *) e->arg) = (int) strtol(val, &end, 10); - if (!(end && *end == '\0')) - error("%s=%s invalid. ignoring...", e->var, val); + if (!(end && *end == '\0')) { + error("%s=%s invalid. ignoring...", + e->var, val); + } } break; @@ -444,6 +447,14 @@ _process_env_var(env_vars_t *e, const char *val) e->var, val); } break; + + case OPT_IMMEDIATE: + if (val) + opt.immediate = strtol(val, NULL, 10); + else + opt.immediate = DEFAULT_IMMEDIATE; + break; + case OPT_BELL: opt.bell = BELL_ALWAYS; break; @@ -518,7 +529,7 @@ void set_options(const int argc, char **argv) {"geometry", required_argument, 0, 'g'}, {"help", no_argument, 0, 'h'}, {"hold", no_argument, 0, 'H'}, - {"immediate", no_argument, 0, 'I'}, + {"immediate", optional_argument, 0, 'I'}, {"job-name", required_argument, 0, 'J'}, {"no-kill", no_argument, 0, 'k'}, {"kill-command", optional_argument, 0, 'K'}, @@ -658,7 +669,10 @@ void set_options(const int argc, char **argv) opt.hold = true; break; case 'I': - opt.immediate = true; + if (optarg) + opt.immediate = strtol(optarg, NULL, 10); + else + opt.immediate = DEFAULT_IMMEDIATE; break; case 'J': xfree(opt.job_name); @@ -1265,16 +1279,6 @@ static bool _opt_verify(void) opt.time_limit = INFINITE; } - if (opt.immediate) { - char *sched_name = slurm_get_sched_type(); - if (strcmp(sched_name, "sched/wiki") == 0) { - info("WARNING: Ignoring the -I/--immediate option " - "(not supported by Maui)"); - opt.immediate = false; - } - xfree(sched_name); - } - #ifdef HAVE_AIX if (opt.network == NULL) opt.network = "us,sn_all,bulk_xfer"; @@ -1309,6 +1313,11 @@ static bool _opt_verify(void) opt.ntasks_per_node); } + if (opt.max_wait) { + /* FIXME: Eliminate max_wait in slurm v2.1 */ + opt.immediate = MAX(opt.immediate, opt.max_wait); + } + return verified; } @@ -1536,7 +1545,10 @@ static void _opt_list() if(opt.distribution == SLURM_DIST_PLANE) info("plane size : %u", opt.plane_size); info("verbose : %d", opt.verbose); - info("immediate : %s", tf_(opt.immediate)); + if (opt.immediate <= 1) + info("immediate : %s", tf_(opt.immediate)); + else + info("immediate : %d secs", (opt.immediate - 1)); info("overcommit : %s", tf_(opt.overcommit)); if (opt.time_limit == INFINITE) info("time_limit : INFINITE"); @@ -1610,7 +1622,7 @@ static void _usage(void) printf( "Usage: salloc [-N numnodes|[min nodes]-[max nodes]] [-n num-processors]\n" " [[-c cpus-per-node] [-r n] [-p partition] [--hold] [-t minutes]\n" -" [--immediate] [--no-kill] [--overcommit] [-D path]\n" +" [--immediate[=secs]] [--no-kill] [--overcommit] [-D path]\n" " [--share] [-J jobname] [--jobid=id]\n" " [--verbose] [--gid=group] [--uid=user] [--licenses=names]\n" " [-W sec] [--minsockets=n] [--mincores=n] [--minthreads=n]\n" @@ -1650,7 +1662,7 @@ static void _help(void) " --get-user-env used by Moab. See srun man page.\n" " --gid=group_id group ID to run job as (user root only)\n" " -H, --hold submit job in held state\n" -" -I, --immediate exit if resources are not immediately available\n" +" -I, --immediate[=secs] exit if resources not available in \"secs\"\n" " --jobid=id specify jobid to use\n" " -J, --job-name=jobname name of job\n" " -k, --no-kill do not kill job on node failure\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index ffddb92c3f095e873acc6ab370e2c7f7a6f39227..3175f0d48423de9f489ba2619532cdb4341fcacc 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -34,15 +34,16 @@ # include "config.h" #endif -#include <time.h> #include <sys/types.h> +#include <time.h> #include <unistd.h> -#include "src/common/macros.h" /* true and false */ #include "src/common/env.h" +#include "src/common/macros.h" /* true and false */ -#define MAX_USERNAME 9 -#define DEFAULT_BELL_DELAY 10 +#define DEFAULT_IMMEDIATE 1 +#define MAX_USERNAME 9 +#define DEFAULT_BELL_DELAY 10 typedef enum {BELL_NEVER, BELL_AFTER_DELAY, BELL_ALWAYS} bell_flag_t; @@ -92,7 +93,7 @@ typedef struct salloc_options { char *account; /* --account, -U acct_name */ char *comment; /* --comment */ - int immediate; /* -i, --immediate */ + int immediate; /* -I, --immediate */ bool hold; /* --hold, -H */ bool no_kill; /* --no-kill, -k */ diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 6ecce218e3d8199d6f0ec6d528a2abfb398f6db7..68fee32342973c78fdb59219ea67643e52396491 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -193,7 +193,8 @@ int main(int argc, char *argv[]) callbacks.user_msg = _user_msg_handler; callbacks.node_fail = _node_fail_handler; /* create message thread to handle pings and such from slurmctld */ - msg_thr = slurm_allocation_msg_thr_create(&desc.other_port, &callbacks); + msg_thr = slurm_allocation_msg_thr_create(&desc.other_port, + &callbacks); xsignal(SIGHUP, _signal_while_allocating); xsignal(SIGINT, _signal_while_allocating); @@ -204,7 +205,7 @@ int main(int argc, char *argv[]) xsignal(SIGUSR2, _signal_while_allocating); before = time(NULL); - while ((alloc = slurm_allocate_resources_blocking(&desc, opt.max_wait, + while ((alloc = slurm_allocate_resources_blocking(&desc, opt.immediate, _pending_callback)) == NULL) { if ((errno != ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) || (retries >= MAX_RETRIES)) @@ -222,12 +223,15 @@ int main(int argc, char *argv[]) } else if (errno == EINTR) { error("Interrupted by signal." " Allocation request rescinded."); + } else if ((errno == ETIMEDOUT) && opt.immediate) { + error("Unable to allocate resources: %s", + slurm_strerror(ESLURM_NODES_BUSY)); } else { error("Failed to allocate resources: %m"); } slurm_allocation_msg_thr_destroy(msg_thr); exit(1); - } else if(!allocation_interrupted) { + } else if (!allocation_interrupted) { /* * Allocation granted! */ @@ -382,7 +386,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) { desc->contiguous = opt.contiguous ? 1 : 0; desc->features = opt.constraints; - desc->immediate = opt.immediate ? 1 : 0; + if (opt.immediate == 1) + desc->immediate = 1; desc->name = xstrdup(opt.job_name); desc->reservation = xstrdup(opt.reservation); desc->wckey = xstrdup(opt.wckey); diff --git a/src/srun/allocate.c b/src/srun/allocate.c index f596f9961c8583a4fdb770fa80a0971739add788..d41dd84fea1babe44b8142eb33942d1c0b7fe7b5 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -177,7 +177,7 @@ static void _node_fail_handler(srun_node_fail_msg_t *msg) -static bool _retry() +static bool _retry(void) { static int retries = 0; static char *msg = "Slurm controller not responding, " @@ -194,10 +194,16 @@ static bool _retry() } else if (errno == EINTR) { /* srun may be interrupted by the BLCR checkpoint signal */ /* - * XXX: this will cause the old job cancelled and a new job allocated + * XXX: this will cause the old job cancelled and a new + * job allocated */ - debug("Syscall interrupted while allocating resources, retrying."); + debug("Syscall interrupted while allocating resources, " + "retrying."); return true; + } else if ((errno == ETIMEDOUT) && opt.immediate) { + error("Unable to allocate resources: %s", + slurm_strerror(ESLURM_NODES_BUSY)); + return false; } else { error("Unable to allocate resources: %m"); return false; @@ -350,14 +356,14 @@ allocate_nodes(void) job_desc_msg_t *j = job_desc_msg_create_from_opts(); slurm_allocation_callbacks_t callbacks; - if(!j) + if (!j) return NULL; /* Do not re-use existing job id when submitting new job * from within a running job */ if ((j->job_id != NO_VAL) && !opt.jobid_set) { info("WARNING: Creating SLURM job allocation from within " - "another allocation"); + "another allocation"); info("WARNING: You are attempting to initiate a second job"); if (!opt.jobid_set) /* Let slurmctld set jobid */ j->job_id = NO_VAL; @@ -380,7 +386,7 @@ allocate_nodes(void) xsignal(SIGUSR2, _signal_while_allocating); while (!resp) { - resp = slurm_allocate_resources_blocking(j, 0, + resp = slurm_allocate_resources_blocking(j, opt.immediate, _set_pending_job_id); if (destroy_job) { /* cancelled by signal */ @@ -390,7 +396,7 @@ allocate_nodes(void) } } - if(resp && !destroy_job) { + if (resp && !destroy_job) { /* * Allocation granted! */ @@ -518,7 +524,8 @@ job_desc_msg_create_from_opts () j->contiguous = opt.contiguous; j->features = opt.constraints; - j->immediate = opt.immediate; + if (opt.immediate == 1) + j->immediate = opt.immediate; if (opt.job_name) j->name = xstrdup(opt.job_name); else @@ -529,7 +536,7 @@ job_desc_msg_create_from_opts () j->req_nodes = xstrdup(opt.nodelist); /* simplify the job allocation nodelist, - not laying out tasks until step */ + * not laying out tasks until step */ if(j->req_nodes) { hl = hostlist_create(j->req_nodes); hostlist_ranged_string(hl, sizeof(buf), buf); @@ -696,6 +703,7 @@ create_job_step(srun_job_t *job, bool use_all_cpus) int i, rc; SigFunc *oquitf = NULL, *ointf = NULL, *otermf = NULL; unsigned long my_sleep = 0; + time_t begin_time; slurm_step_ctx_params_t_init(&job->ctx_params); job->ctx_params.job_id = job->jobid; @@ -721,7 +729,8 @@ create_job_step(srun_job_t *job, bool use_all_cpus) job->ctx_params.ckpt_interval = (uint16_t)opt.ckpt_interval; job->ctx_params.ckpt_dir = opt.ckpt_dir; job->ctx_params.exclusive = (uint16_t)opt.exclusive; - job->ctx_params.immediate = (uint16_t)opt.immediate; + if (opt.immediate == 1) + job->ctx_params.immediate = (uint16_t)opt.immediate; if (opt.time_limit != NO_VAL) job->ctx_params.time_limit = (uint32_t)opt.time_limit; job->ctx_params.verbose_level = (uint16_t)_verbose; @@ -767,9 +776,10 @@ create_job_step(srun_job_t *job, bool use_all_cpus) debug("cpus %u, tasks %u, name %s, relative %u", job->ctx_params.cpu_count, job->ctx_params.task_count, job->ctx_params.name, job->ctx_params.relative); + begin_time = time(NULL); for (i=0; (!destroy_job); i++) { - if(opt.no_alloc) { + if (opt.no_alloc) { job->step_ctx = slurm_step_ctx_create_no_alloc( &job->ctx_params, job->stepid); } else @@ -783,7 +793,8 @@ create_job_step(srun_job_t *job, bool use_all_cpus) } rc = slurm_get_errno(); - if (opt.immediate || + if (((opt.immediate != 0) && + (difftime(time(NULL), begin_time) > opt.immediate)) || ((rc != ESLURM_NODES_BUSY) && (rc != ESLURM_PORTS_BUSY) && (rc != ESLURM_PROLOG_RUNNING) && (rc != ESLURM_DISABLED))) { diff --git a/src/srun/opt.c b/src/srun/opt.c index 72941235468751cbbfa90166a151569a99ea10f4..e0c92c3a094b4f2b6cf6cfeef73b9143d1ee8c39 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -98,6 +98,7 @@ #define OPT_NONE 0x00 #define OPT_INT 0x01 #define OPT_STRING 0x02 +#define OPT_IMMEDIATE 0x03 #define OPT_DISTRIB 0x04 #define OPT_NODES 0x05 #define OPT_OVERCOMMIT 0x06 @@ -380,7 +381,7 @@ static void _opt_default() opt.no_kill = false; opt.kill_bad_exit = false; - opt.immediate = false; + opt.immediate = 0; opt.join = false; opt.max_wait = slurm_get_wait_time(); @@ -481,7 +482,7 @@ env_vars_t env_vars[] = { {"SLURM_DEPENDENCY", OPT_STRING, &opt.dependency, NULL }, {"SLURM_DISTRIBUTION", OPT_DISTRIB, NULL, NULL }, {"SLURM_GEOMETRY", OPT_GEOMETRY, NULL, NULL }, -{"SLURM_IMMEDIATE", OPT_INT, &opt.immediate, NULL }, +{"SLURM_IMMEDIATE", OPT_IMMEDIATE, NULL, NULL }, {"SLURM_JOB_NAME", OPT_STRING, &opt.job_name, &opt.job_name_set_env}, {"SLURM_JOB_ID", OPT_INT, &opt.jobid, NULL }, @@ -652,6 +653,13 @@ _process_env_var(env_vars_t *e, const char *val) } break; + case OPT_IMMEDIATE: + if (val) + opt.immediate = strtol(val, NULL, 10); + else + opt.immediate = DEFAULT_IMMEDIATE; + break; + case OPT_MPI: if (mpi_hook_client_init((char *)val) == SLURM_ERROR) { fatal("\"%s=%s\" -- invalid MPI type, " @@ -710,7 +718,7 @@ static void set_options(const int argc, char **argv) {"geometry", required_argument, 0, 'g'}, {"hold", no_argument, 0, 'H'}, {"input", required_argument, 0, 'i'}, - {"immediate", no_argument, 0, 'I'}, + {"immediate", optional_argument, 0, 'I'}, {"join", no_argument, 0, 'j'}, {"job-name", required_argument, 0, 'J'}, {"no-kill", no_argument, 0, 'k'}, @@ -876,8 +884,10 @@ static void set_options(const int argc, char **argv) opt.cwd = xstrdup(optarg); break; case (int)'e': - if (opt.pty) - fatal("--error incompatable with --pty option"); + if (opt.pty) { + fatal("--error incompatable with --pty " + "option"); + } xfree(opt.efname); if (strncasecmp(optarg, "none", (size_t) 4) == 0) opt.efname = xstrdup("/dev/null"); @@ -895,8 +905,10 @@ static void set_options(const int argc, char **argv) opt.hold = true; break; case (int)'i': - if (opt.pty) - fatal("--input incompatable with --pty option"); + if (opt.pty) { + fatal("--input incompatable with " + "--pty option"); + } xfree(opt.ifname); if (strncasecmp(optarg, "none", (size_t) 4) == 0) opt.ifname = xstrdup("/dev/null"); @@ -904,7 +916,10 @@ static void set_options(const int argc, char **argv) opt.ifname = xstrdup(optarg); break; case (int)'I': - opt.immediate = true; + if (optarg) + opt.immediate = strtol(optarg, NULL, 10); + else + opt.immediate = DEFAULT_IMMEDIATE; break; case (int)'j': opt.join = true; @@ -1814,7 +1829,8 @@ static bool _opt_verify(void) if (opt.ckpt_interval_str) { opt.ckpt_interval = time_str2mins(opt.ckpt_interval_str); - if ((opt.ckpt_interval < 0) && (opt.ckpt_interval != INFINITE)) { + if ((opt.ckpt_interval < 0) && + (opt.ckpt_interval != INFINITE)) { error("Invalid checkpoint interval specification"); exit(1); } @@ -1829,16 +1845,6 @@ static bool _opt_verify(void) if ((opt.egid != (gid_t) -1) && (opt.egid != opt.gid)) opt.gid = opt.egid; - if (opt.immediate) { - char *sched_name = slurm_get_sched_type(); - if (strcmp(sched_name, "sched/wiki") == 0) { - info("WARNING: Ignoring the -I/--immediate option " - "(not supported by Maui)"); - opt.immediate = false; - } - xfree(sched_name); - } - if (slurm_verify_cpu_bind(NULL, &opt.cpu_bind, &opt.cpu_bind_type)) exit(1); @@ -2022,7 +2028,10 @@ static void _opt_list() info("core format : %s", core_format_name (opt.core_type)); info("verbose : %d", _verbose); info("slurmd_debug : %d", opt.slurmd_debug); - info("immediate : %s", tf_(opt.immediate)); + if (opt.immediate <= 1) + info("immediate : %s", tf_(opt.immediate)); + else + info("immediate : %d secs", (opt.immediate - 1)); info("label output : %s", tf_(opt.labelio)); info("unbuffered IO : %s", tf_(opt.unbuffered)); info("overcommit : %s", tf_(opt.overcommit)); @@ -2122,7 +2131,7 @@ static void _usage(void) printf( "Usage: srun [-N nnodes] [-n ntasks] [-i in] [-o out] [-e err]\n" " [-c ncpus] [-r n] [-p partition] [--hold] [-t minutes]\n" -" [-D path] [--immediate] [--overcommit] [--no-kill]\n" +" [-D path] [--immediate[=secs]] [--overcommit] [--no-kill]\n" " [--share] [--label] [--unbuffered] [-m dist] [-J jobname]\n" " [--jobid=id] [--verbose] [--slurmd_debug=#]\n" " [--core=type] [-T threads] [-W sec] [--checkpoint=time]\n" @@ -2177,7 +2186,7 @@ static void _help(void) " --get-user-env used by Moab. See srun man page.\n" " -H, --hold submit job in held state\n" " -i, --input=in location of stdin redirection\n" -" -I, --immediate exit if resources are not immediately available\n" +" -I, --immediate[=secs] exit if resources not available in \"secs\"\n" " --jobid=id run under already allocated job\n" " -J, --job-name=jobname name of job\n" " -k, --no-kill do not kill job on node failure\n" diff --git a/src/srun/opt.h b/src/srun/opt.h index fc12cb42d3f37e62255d42cf169246817cf73917..adabce9233eb5db382cd5d5f38f08d2be7f78db3 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -54,8 +54,9 @@ #include "src/common/env.h" #include "src/srun/fname.h" -#define MAX_THREADS 60 -#define MAX_USERNAME 9 +#define DEFAULT_IMMEDIATE 1 +#define MAX_THREADS 60 +#define MAX_USERNAME 9 #define INT_UNASSIGNED ((int)-1) @@ -64,14 +65,15 @@ extern int _verbose; extern enum modes mode; -#define format_task_dist_states(t) (t == SLURM_DIST_BLOCK) ? "block" : \ - (t == SLURM_DIST_CYCLIC) ? "cyclic" : \ - (t == SLURM_DIST_PLANE) ? "plane" : \ - (t == SLURM_DIST_CYCLIC_CYCLIC) ? "cyclic:cyclic" : \ - (t == SLURM_DIST_CYCLIC_BLOCK) ? "cyclic:block" : \ - (t == SLURM_DIST_BLOCK_CYCLIC) ? "block:cyclic" : \ - (t == SLURM_DIST_BLOCK_BLOCK) ? "block:block" : \ - (t == SLURM_DIST_ARBITRARY) ? "arbitrary" : \ +#define format_task_dist_states(t) \ + (t == SLURM_DIST_BLOCK) ? "block" : \ + (t == SLURM_DIST_CYCLIC) ? "cyclic" : \ + (t == SLURM_DIST_PLANE) ? "plane" : \ + (t == SLURM_DIST_CYCLIC_CYCLIC) ? "cyclic:cyclic" : \ + (t == SLURM_DIST_CYCLIC_BLOCK) ? "cyclic:block" : \ + (t == SLURM_DIST_BLOCK_CYCLIC) ? "block:cyclic" : \ + (t == SLURM_DIST_BLOCK_BLOCK) ? "block:block" : \ + (t == SLURM_DIST_ARBITRARY) ? "arbitrary" : \ "unknown" typedef struct srun_options { @@ -146,7 +148,7 @@ typedef struct srun_options { /*int verbose;*/ /* -v, --verbose */ /*int debug;*/ /* -d, --debug */ - int immediate; /* -i, --immediate */ + int immediate; /* -I, --immediate=secs */ bool hold; /* --hold, -H */ bool labelio; /* --label-output, -l */