diff --git a/NEWS b/NEWS index 1242bab7be26aacccd03fb80a4d4b82193d94c7a..19a63415554914ee8519b75de26ee5635342292b 100644 --- a/NEWS +++ b/NEWS @@ -161,6 +161,11 @@ documents those changes that are of interest to users and administrators. and administrators. -- Add salloc/sbatch/srun option --use-min-nodes to prefer smaller node counts when a range of node counts is specified (e.g. "-N 2-4"). + -- Validate salloc/sbatch --wait-all-nodes argument. + -- Add "sbatch_wait_nodes" to SchedulerParameters to control default sbatch + behaviour with respect to waiting for all allocated nodes to be ready for + use. Job can override the configuration option using the --wait-all-nodes=# + option. * Changes in Slurm 16.05.4 ========================== diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 06c7c678068a39ba445c1f0376a43d786547f49b..0083f1848d1c716d36f4668b816bd51c80d30ca7 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -2730,6 +2730,13 @@ If defined, the salloc command will wait until all allocated nodes are ready for use (i.e. booted) before the command returns. By default, salloc will return as soon as the resource allocation has been made. .TP +\fBsbatch_wait_nodes\fR +If defined, the sbatch script will wait until all allocated nodes are ready for +use (i.e. booted) before the initiation. By default, the sbatch script will be +initiated as soon as the first node in the job allocation is ready. The sbatch +command can use the \-\-wait\-all\-nodes option to override this configuration +parameter. +.TP \fBsched_interval=#\fR How frequently, in seconds, the main scheduling loop will execute and test all pending jobs. diff --git a/src/salloc/opt.c b/src/salloc/opt.c index c5826ed491c2846854aacac32d6559e9c16f6b3b..5630b65f6fc72f66fe1188cf502258c4d340d55b 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -1296,6 +1296,11 @@ void set_options(const int argc, char **argv) case LONG_OPT_WAIT_ALL_NODES: if (!optarg) /* CLANG Fix */ break; + if ((optarg[0] < '0') || (optarg[0] > '9')) { + error("Invalid --wait-all-nodes argument: %s", + optarg); + exit(1); + } opt.wait_all_nodes = strtol(optarg, NULL, 10); break; case LONG_OPT_CPU_FREQ: diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index eff2d8a3406e6d987235251fa143ba869d1e707a..c6b5e5e4b8cd38682062e33e0c1bf55bbe00116f 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1891,6 +1891,11 @@ static void _set_options(int argc, char **argv) case LONG_OPT_WAIT_ALL_NODES: if (!optarg) /* CLANG Fix */ break; + if ((optarg[0] < '0') || (optarg[0] > '9')) { + error("Invalid --wait-all-nodes argument: %s", + optarg); + exit(1); + } opt.wait_all_nodes = strtol(optarg, NULL, 10); break; case LONG_OPT_EXPORT: diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 44a28115881889e21e3c5cdf2357bc084c430f69..95379075f59a8c91325af8e00c17f273672364f1 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -7088,6 +7088,31 @@ _set_multi_core_data(job_desc_msg_t * job_desc) return mc_ptr; } +/* Return default "wait_all_nodes" option for a new job */ +static uint16_t _default_wait_all_nodes(job_desc_msg_t *job_desc) +{ + static uint16_t default_batch_wait = (uint16_t) NO_VAL; + static time_t sched_update = 0; + char *sched_params; + + if (!job_desc->script) + return 0; + + if ((default_batch_wait != (uint16_t) NO_VAL) && + (sched_update == slurmctld_conf.last_update)) + return default_batch_wait; + + sched_params = slurm_get_sched_params(); + if (sched_params && strstr(sched_params, "sbatch_wait_nodes")) + default_batch_wait = 1; + else + default_batch_wait = 0; + xfree(sched_params); + sched_update = slurmctld_conf.last_update; + + return default_batch_wait; +} + /* _copy_job_desc_to_job_record - copy the job descriptor from the RPC * structure into the actual slurmctld job record */ static int @@ -7233,7 +7258,9 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, job_desc->spank_job_env_size = 0; /* nothing left to free */ job_ptr->mcs_label = xstrdup(job_desc->mcs_label); - if (job_desc->wait_all_nodes != (uint16_t) NO_VAL) + if (job_desc->wait_all_nodes == (uint16_t) NO_VAL) + job_ptr->wait_all_nodes = _default_wait_all_nodes(job_desc); + else job_ptr->wait_all_nodes = job_desc->wait_all_nodes; job_ptr->warn_flags = job_desc->warn_flags; job_ptr->warn_signal = job_desc->warn_signal;