diff --git a/NEWS b/NEWS index 7b1fbf19b0398eefe43eba40fe46800dcd40558c..0f2ad26d034e3d5909e8a90aa355e3d781ae78b3 100644 --- a/NEWS +++ b/NEWS @@ -46,6 +46,10 @@ documents those changes that are of interest to users and admins. Moved existing digital signature logic into new plugin: crypto/openssl. Added new support for crypto/munge (available with GPL license). +* Changes in SLURM 1.2.16 +========================= + -- Add --overcommit option to the salloc command. + * Changes in SLURM 1.2.15 ========================= -- In sched/wiki2, fix bug processing hostlist expressions where hosts diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 0d004adac52f41b21be805e8e7671e08eca5fbd6..ba2869dcd81b55fc52d5bb92632a774302cb099d 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -1,4 +1,4 @@ -.TH "salloc" "1" "SLURM 1.2" "December 2006" "SLURM Commands" +.TH "salloc" "1" "SLURM 1.2" "August 2007" "SLURM Commands" .SH "NAME" .LP salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, and then release the allocation when the command is finished. @@ -222,6 +222,13 @@ is presently ignored if SchedulerType=sched/maui. \fB\-\-no\-bell\fR Silence salloc's use of the terminal bell. Also see the option \fB\-\-bell\fR. +.TP +\fB\-O\fR, \fB\-\-overcommit\fR +Overcommit resources. Normally, \fBsbatch\fR will allocate one cpu per +task to be executed. By specifying \fB\-\-overcommit\fR you are explicitly +allowing more than one process per cpu. However no more than +\fBMAX_TASKS_PER_NODE\fR tasks are permitted to execute per node. + .TP \fB\-p\fR, \fB\-\-partition\fR[=]<\fIpartition name\fR> Request a specific partition for the resource allocation. If not specified, @@ -376,6 +383,9 @@ Same as \fB\-\-no\-bell\fR. \fBSALLOC_NO_ROTATE\fR Same as \fB\-R\fR or \fB\-\-no\-rotate\fR. .TP +\fBSLURM_OVERCOMMIT\fR +Same as \fB\-O, \-\-overcommit\fR +.TP \fBSALLOC_PARTITION\fR Same as \fB\-p\fR or \fB\-\-partition\fR. .TP diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 5ee2c5617a9fec033e40c58a629352057fe460ba..76c2c266acf6b4f7a5fa31629f1ac4180a0745ba 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -271,6 +271,7 @@ When a job is requeued, the batch script is initiated from its beginning. Specify the number of tasks to be launched per node. Equivalent to \fB\-\-tasks\-per\-node\fR. +.TP \fB\-O\fR, \fB\-\-overcommit\fR Overcommit resources. Normally, \fBsbatch\fR will allocate one cpu per task to be executed. By specifying \fB\-\-overcommit\fR you are explicitly diff --git a/src/plugins/sched/wiki2/start_job.c b/src/plugins/sched/wiki2/start_job.c index 6c578bdf17e69504bd0951f4ddb794d4f47f7406..224f05da8f097bafd1ad17e75b1bfee53f2ab59b 100644 --- a/src/plugins/sched/wiki2/start_job.c +++ b/src/plugins/sched/wiki2/start_job.c @@ -141,7 +141,6 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, size_t node_name_len; static uint32_t cr_test = 0, cr_enabled = 0; -info("task_cnt=%d, hostlist=%s, tasklist=%s", task_cnt, hostlist, tasklist); if (cr_test == 0) { select_g_get_info_from_plugin(SELECT_CR_PLUGIN, &cr_enabled); diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 245fc959d343865eed6e166609c94d5084394e3d..82e37115133f02ba259e75b78ea05c9dff33a0f8 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -85,6 +85,7 @@ #define OPT_NO_BELL 0x10 #define OPT_JOBID 0x11 #define OPT_EXCLUSIVE 0x12 +#define OPT_OVERCOMMIT 0x13 /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_JOBID 0x105 @@ -441,6 +442,7 @@ static void _opt_default() opt.kill_command_signal_set = false; opt.immediate = false; + opt.overcommit = false; opt.max_wait = 0; opt.quiet = 0; @@ -504,6 +506,7 @@ env_vars_t env_vars[] = { {"SALLOC_BELL", OPT_BELL, NULL, NULL }, {"SALLOC_NO_BELL", OPT_NO_BELL, NULL, NULL }, {"SALLOC_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL }, + {"SALLOC_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {NULL, 0, NULL, NULL} }; @@ -612,6 +615,9 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_EXCLUSIVE: opt.shared = 0; break; + case OPT_OVERCOMMIT: + opt.overcommit = true; + break; default: /* do nothing */ break; @@ -660,6 +666,7 @@ void set_options(const int argc, char **argv) {"kill-command", optional_argument, 0, 'K'}, {"tasks", required_argument, 0, 'n'}, {"nodes", required_argument, 0, 'N'}, + {"overcommit", no_argument, 0, 'O'}, {"partition", required_argument, 0, 'p'}, {"quiet", no_argument, 0, 'q'}, {"no-rotate", no_argument, 0, 'R'}, @@ -694,7 +701,7 @@ void set_options(const int argc, char **argv) {"reboot", no_argument, 0, LONG_OPT_REBOOT}, {NULL, 0, 0, 0} }; - char *opt_string = "+a:c:C:d:F:g:hHIJ:kK::n:N:p:qR:st:uU:vVw:W:x:"; + char *opt_string = "+a:c:C:d:F:g:hHIJ:kK::n:N:Op:qR:st:uU:vVw:W:x:"; opt.progname = xbasename(argv[0]); optind = 0; @@ -772,6 +779,9 @@ void set_options(const int argc, char **argv) exit(1); } break; + case 'O': + opt.overcommit = true; + break; case 'p': xfree(opt.partition); opt.partition = xstrdup(optarg); @@ -1247,6 +1257,7 @@ static void _opt_list() info("jobid : %u", opt.jobid); info("verbose : %d", opt.verbose); info("immediate : %s", tf_(opt.immediate)); + info("overcommit : %s", tf_(opt.overcommit)); if (opt.time_limit == INFINITE) info("time_limit : INFINITE"); else if (opt.time_limit != NO_VAL) @@ -1288,7 +1299,7 @@ static void _usage(void) printf( "Usage: salloc [-N numnodes|[min nodes]-[max nodes]] [-n num-processors]\n" " [[-c cpus-per-node] [-r n] [-p partition] [--hold] [-t minutes]\n" -" [--immediate] [--no-kill]\n" +" [--immediate] [--no-kill] [--overcommit]\n" " [--share] [-J jobname] [--jobid=id]\n" " [--verbose] [--gid=group] [--uid=user]\n" " [-W sec] [--minsockets=n] [--mincores=n] [--minthreads=n]\n" @@ -1318,6 +1329,7 @@ static void _help(void) " -I, --immediate exit if resources are not immediately available\n" " -k, --no-kill do not kill job on node failure\n" " -K, --kill-command[=signal] signal to send terminating job\n" +" -O, --overcommit overcommit resources\n" " -s, --share share nodes with other jobs\n" " -J, --job-name=jobname name of job\n" " --jobid=id specify jobid to use\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 53a68871fd6f84061fbec2100f912110a177e4e4..c101b0bcb4d774d17d65d5b21d737b09c973276f 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -78,6 +78,7 @@ typedef struct salloc_options { bool hold; /* --hold, -H */ bool no_kill; /* --no-kill, -k */ + bool overcommit; /* --overcommit -O */ int kill_command_signal;/* --kill-command, -K */ bool kill_command_signal_set; uint16_t shared; /* --share, -s */ diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 1a359484f89eab7cda7c14b82e01bf9ac98b8ff9..06da0579f8665bf283306d13f783587621bfbb3d 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -164,6 +164,10 @@ int main(int argc, char *argv[]) /* Add default task count for srun, if not already set */ if (opt.nprocs_set) env_array_append_fmt(&env, "SLURM_NPROCS", "%d", opt.nprocs); + if (opt.overcommit) { + env_array_append_fmt(&env, "SLURM_OVERCOMMIT", "%d", + opt.overcommit); + } env_array_set_environment(env); env_array_free(env); pthread_mutex_lock(&allocation_state_lock); @@ -284,7 +288,11 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) desc->job_min_memory = opt.realmem; if (opt.tmpdisk > -1) desc->job_min_tmp_disk = opt.tmpdisk; - desc->num_procs = opt.nprocs * opt.cpus_per_task; + if (opt.overcommit) { + desc->num_procs = opt.min_nodes; + desc->overcommit = opt.overcommit; + } else + desc->num_procs = opt.nprocs * opt.cpus_per_task; if (opt.nprocs_set) desc->num_tasks = opt.nprocs; if (opt.cpus_set) diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 98309e73bb66e3b245ba88b0faa9885f2e62f0e5..bd9d6e5b33b1ae05794e387e6800eb29851b8f87 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -228,6 +228,7 @@ EXTRA_DIST = \ test15.21 \ test15.22 \ test15.23 \ + test15.24 \ test16.1 \ test16.2 \ test16.3 \ diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index de8bbbc2da3fffa53642f5ca8f10d5ffdced3e71..866bb0d7831a77ee8a51c958f4e30df3941249ad 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -454,6 +454,7 @@ EXTRA_DIST = \ test15.21 \ test15.22 \ test15.23 \ + test15.24 \ test16.1 \ test16.2 \ test16.3 \ diff --git a/testsuite/expect/README b/testsuite/expect/README index fd5de3c8e4d2e40ba62b378e92c441b03a0f3a65..809cc878ca2240998101a5a882fcb05922368a68 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -381,6 +381,7 @@ test15.22 Test of partition specification on job submission (--partition option). test15.23 Test of environment variables that control salloc actions: SALLOC_ACCOUNT, SALLOC_DEBUG and SALLOC_TIMELIMIT +test15.24 Test of --overcommit option. test16.# Testing of sattach options. diff --git a/testsuite/expect/test17.32 b/testsuite/expect/test17.32 index 032c1dba2a0f571b7a072af1ba7fc0586e4bdaeb..b26af694904dab4fbdf46cd997d6cde1b9ddd5ae 100755 --- a/testsuite/expect/test17.32 +++ b/testsuite/expect/test17.32 @@ -41,7 +41,7 @@ set task_cnt 10 print_header $test_id # -# Submit a slurm job that will execute 'id' on 4 tasks (or try anyway) +# Submit a slurm job that will print slurm env vars and execute 'id' # file delete $file_in $file_out make_bash_script $file_in " @@ -110,11 +110,6 @@ if {$task_cnt != $tasks} { set exit_code 1 } -if {$task_cnt != $tasks} { - send_user "\nFAILURE: Did not get proper number of tasks: " - send_user "$task_cnt, $tasks\n" - set exit_code 1 -} if {$exit_code == 0} { file delete $file_in $file_out send_user "\nSUCCESS\n"