Skip to content
Snippets Groups Projects
Commit 33733a32 authored by Moe Jette's avatar Moe Jette
Browse files
parent 397e0e3b
No related branches found
No related tags found
No related merge requests found
...@@ -46,6 +46,10 @@ documents those changes that are of interest to users and admins. ...@@ -46,6 +46,10 @@ documents those changes that are of interest to users and admins.
Moved existing digital signature logic into new plugin: crypto/openssl. Moved existing digital signature logic into new plugin: crypto/openssl.
Added new support for crypto/munge (available with GPL license). Added new support for crypto/munge (available with GPL license).
* Changes in SLURM 1.2.16
=========================
-- Add --overcommit option to the salloc command.
* Changes in SLURM 1.2.15 * Changes in SLURM 1.2.15
========================= =========================
-- In sched/wiki2, fix bug processing hostlist expressions where hosts -- In sched/wiki2, fix bug processing hostlist expressions where hosts
......
.TH "salloc" "1" "SLURM 1.2" "December 2006" "SLURM Commands" .TH "salloc" "1" "SLURM 1.2" "August 2007" "SLURM Commands"
.SH "NAME" .SH "NAME"
.LP .LP
salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, and then release the allocation when the command is finished. salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, and then release the allocation when the command is finished.
...@@ -222,6 +222,13 @@ is presently ignored if SchedulerType=sched/maui. ...@@ -222,6 +222,13 @@ is presently ignored if SchedulerType=sched/maui.
\fB\-\-no\-bell\fR \fB\-\-no\-bell\fR
Silence salloc's use of the terminal bell. Also see the option \fB\-\-bell\fR. Silence salloc's use of the terminal bell. Also see the option \fB\-\-bell\fR.
.TP
\fB\-O\fR, \fB\-\-overcommit\fR
Overcommit resources. Normally, \fBsbatch\fR will allocate one cpu per
task to be executed. By specifying \fB\-\-overcommit\fR you are explicitly
allowing more than one process per cpu. However no more than
\fBMAX_TASKS_PER_NODE\fR tasks are permitted to execute per node.
.TP .TP
\fB\-p\fR, \fB\-\-partition\fR[=]<\fIpartition name\fR> \fB\-p\fR, \fB\-\-partition\fR[=]<\fIpartition name\fR>
Request a specific partition for the resource allocation. If not specified, Request a specific partition for the resource allocation. If not specified,
...@@ -376,6 +383,9 @@ Same as \fB\-\-no\-bell\fR. ...@@ -376,6 +383,9 @@ Same as \fB\-\-no\-bell\fR.
\fBSALLOC_NO_ROTATE\fR \fBSALLOC_NO_ROTATE\fR
Same as \fB\-R\fR or \fB\-\-no\-rotate\fR. Same as \fB\-R\fR or \fB\-\-no\-rotate\fR.
.TP .TP
\fBSLURM_OVERCOMMIT\fR
Same as \fB\-O, \-\-overcommit\fR
.TP
\fBSALLOC_PARTITION\fR \fBSALLOC_PARTITION\fR
Same as \fB\-p\fR or \fB\-\-partition\fR. Same as \fB\-p\fR or \fB\-\-partition\fR.
.TP .TP
......
...@@ -271,6 +271,7 @@ When a job is requeued, the batch script is initiated from its beginning. ...@@ -271,6 +271,7 @@ When a job is requeued, the batch script is initiated from its beginning.
Specify the number of tasks to be launched per node. Specify the number of tasks to be launched per node.
Equivalent to \fB\-\-tasks\-per\-node\fR. Equivalent to \fB\-\-tasks\-per\-node\fR.
.TP
\fB\-O\fR, \fB\-\-overcommit\fR \fB\-O\fR, \fB\-\-overcommit\fR
Overcommit resources. Normally, \fBsbatch\fR will allocate one cpu per Overcommit resources. Normally, \fBsbatch\fR will allocate one cpu per
task to be executed. By specifying \fB\-\-overcommit\fR you are explicitly task to be executed. By specifying \fB\-\-overcommit\fR you are explicitly
......
...@@ -141,7 +141,6 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, ...@@ -141,7 +141,6 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist,
size_t node_name_len; size_t node_name_len;
static uint32_t cr_test = 0, cr_enabled = 0; static uint32_t cr_test = 0, cr_enabled = 0;
info("task_cnt=%d, hostlist=%s, tasklist=%s", task_cnt, hostlist, tasklist);
if (cr_test == 0) { if (cr_test == 0) {
select_g_get_info_from_plugin(SELECT_CR_PLUGIN, select_g_get_info_from_plugin(SELECT_CR_PLUGIN,
&cr_enabled); &cr_enabled);
......
...@@ -85,6 +85,7 @@ ...@@ -85,6 +85,7 @@
#define OPT_NO_BELL 0x10 #define OPT_NO_BELL 0x10
#define OPT_JOBID 0x11 #define OPT_JOBID 0x11
#define OPT_EXCLUSIVE 0x12 #define OPT_EXCLUSIVE 0x12
#define OPT_OVERCOMMIT 0x13
/* generic getopt_long flags, integers and *not* valid characters */ /* generic getopt_long flags, integers and *not* valid characters */
#define LONG_OPT_JOBID 0x105 #define LONG_OPT_JOBID 0x105
...@@ -441,6 +442,7 @@ static void _opt_default() ...@@ -441,6 +442,7 @@ static void _opt_default()
opt.kill_command_signal_set = false; opt.kill_command_signal_set = false;
opt.immediate = false; opt.immediate = false;
opt.overcommit = false;
opt.max_wait = 0; opt.max_wait = 0;
opt.quiet = 0; opt.quiet = 0;
...@@ -504,6 +506,7 @@ env_vars_t env_vars[] = { ...@@ -504,6 +506,7 @@ env_vars_t env_vars[] = {
{"SALLOC_BELL", OPT_BELL, NULL, NULL }, {"SALLOC_BELL", OPT_BELL, NULL, NULL },
{"SALLOC_NO_BELL", OPT_NO_BELL, NULL, NULL }, {"SALLOC_NO_BELL", OPT_NO_BELL, NULL, NULL },
{"SALLOC_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL }, {"SALLOC_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL },
{"SALLOC_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL },
{NULL, 0, NULL, NULL} {NULL, 0, NULL, NULL}
}; };
...@@ -612,6 +615,9 @@ _process_env_var(env_vars_t *e, const char *val) ...@@ -612,6 +615,9 @@ _process_env_var(env_vars_t *e, const char *val)
case OPT_EXCLUSIVE: case OPT_EXCLUSIVE:
opt.shared = 0; opt.shared = 0;
break; break;
case OPT_OVERCOMMIT:
opt.overcommit = true;
break;
default: default:
/* do nothing */ /* do nothing */
break; break;
...@@ -660,6 +666,7 @@ void set_options(const int argc, char **argv) ...@@ -660,6 +666,7 @@ void set_options(const int argc, char **argv)
{"kill-command", optional_argument, 0, 'K'}, {"kill-command", optional_argument, 0, 'K'},
{"tasks", required_argument, 0, 'n'}, {"tasks", required_argument, 0, 'n'},
{"nodes", required_argument, 0, 'N'}, {"nodes", required_argument, 0, 'N'},
{"overcommit", no_argument, 0, 'O'},
{"partition", required_argument, 0, 'p'}, {"partition", required_argument, 0, 'p'},
{"quiet", no_argument, 0, 'q'}, {"quiet", no_argument, 0, 'q'},
{"no-rotate", no_argument, 0, 'R'}, {"no-rotate", no_argument, 0, 'R'},
...@@ -694,7 +701,7 @@ void set_options(const int argc, char **argv) ...@@ -694,7 +701,7 @@ void set_options(const int argc, char **argv)
{"reboot", no_argument, 0, LONG_OPT_REBOOT}, {"reboot", no_argument, 0, LONG_OPT_REBOOT},
{NULL, 0, 0, 0} {NULL, 0, 0, 0}
}; };
char *opt_string = "+a:c:C:d:F:g:hHIJ:kK::n:N:p:qR:st:uU:vVw:W:x:"; char *opt_string = "+a:c:C:d:F:g:hHIJ:kK::n:N:Op:qR:st:uU:vVw:W:x:";
opt.progname = xbasename(argv[0]); opt.progname = xbasename(argv[0]);
optind = 0; optind = 0;
...@@ -772,6 +779,9 @@ void set_options(const int argc, char **argv) ...@@ -772,6 +779,9 @@ void set_options(const int argc, char **argv)
exit(1); exit(1);
} }
break; break;
case 'O':
opt.overcommit = true;
break;
case 'p': case 'p':
xfree(opt.partition); xfree(opt.partition);
opt.partition = xstrdup(optarg); opt.partition = xstrdup(optarg);
...@@ -1247,6 +1257,7 @@ static void _opt_list() ...@@ -1247,6 +1257,7 @@ static void _opt_list()
info("jobid : %u", opt.jobid); info("jobid : %u", opt.jobid);
info("verbose : %d", opt.verbose); info("verbose : %d", opt.verbose);
info("immediate : %s", tf_(opt.immediate)); info("immediate : %s", tf_(opt.immediate));
info("overcommit : %s", tf_(opt.overcommit));
if (opt.time_limit == INFINITE) if (opt.time_limit == INFINITE)
info("time_limit : INFINITE"); info("time_limit : INFINITE");
else if (opt.time_limit != NO_VAL) else if (opt.time_limit != NO_VAL)
...@@ -1288,7 +1299,7 @@ static void _usage(void) ...@@ -1288,7 +1299,7 @@ static void _usage(void)
printf( printf(
"Usage: salloc [-N numnodes|[min nodes]-[max nodes]] [-n num-processors]\n" "Usage: salloc [-N numnodes|[min nodes]-[max nodes]] [-n num-processors]\n"
" [[-c cpus-per-node] [-r n] [-p partition] [--hold] [-t minutes]\n" " [[-c cpus-per-node] [-r n] [-p partition] [--hold] [-t minutes]\n"
" [--immediate] [--no-kill]\n" " [--immediate] [--no-kill] [--overcommit]\n"
" [--share] [-J jobname] [--jobid=id]\n" " [--share] [-J jobname] [--jobid=id]\n"
" [--verbose] [--gid=group] [--uid=user]\n" " [--verbose] [--gid=group] [--uid=user]\n"
" [-W sec] [--minsockets=n] [--mincores=n] [--minthreads=n]\n" " [-W sec] [--minsockets=n] [--mincores=n] [--minthreads=n]\n"
...@@ -1318,6 +1329,7 @@ static void _help(void) ...@@ -1318,6 +1329,7 @@ static void _help(void)
" -I, --immediate exit if resources are not immediately available\n" " -I, --immediate exit if resources are not immediately available\n"
" -k, --no-kill do not kill job on node failure\n" " -k, --no-kill do not kill job on node failure\n"
" -K, --kill-command[=signal] signal to send terminating job\n" " -K, --kill-command[=signal] signal to send terminating job\n"
" -O, --overcommit overcommit resources\n"
" -s, --share share nodes with other jobs\n" " -s, --share share nodes with other jobs\n"
" -J, --job-name=jobname name of job\n" " -J, --job-name=jobname name of job\n"
" --jobid=id specify jobid to use\n" " --jobid=id specify jobid to use\n"
......
...@@ -78,6 +78,7 @@ typedef struct salloc_options { ...@@ -78,6 +78,7 @@ typedef struct salloc_options {
bool hold; /* --hold, -H */ bool hold; /* --hold, -H */
bool no_kill; /* --no-kill, -k */ bool no_kill; /* --no-kill, -k */
bool overcommit; /* --overcommit -O */
int kill_command_signal;/* --kill-command, -K */ int kill_command_signal;/* --kill-command, -K */
bool kill_command_signal_set; bool kill_command_signal_set;
uint16_t shared; /* --share, -s */ uint16_t shared; /* --share, -s */
......
...@@ -164,6 +164,10 @@ int main(int argc, char *argv[]) ...@@ -164,6 +164,10 @@ int main(int argc, char *argv[])
/* Add default task count for srun, if not already set */ /* Add default task count for srun, if not already set */
if (opt.nprocs_set) if (opt.nprocs_set)
env_array_append_fmt(&env, "SLURM_NPROCS", "%d", opt.nprocs); env_array_append_fmt(&env, "SLURM_NPROCS", "%d", opt.nprocs);
if (opt.overcommit) {
env_array_append_fmt(&env, "SLURM_OVERCOMMIT", "%d",
opt.overcommit);
}
env_array_set_environment(env); env_array_set_environment(env);
env_array_free(env); env_array_free(env);
pthread_mutex_lock(&allocation_state_lock); pthread_mutex_lock(&allocation_state_lock);
...@@ -284,7 +288,11 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) ...@@ -284,7 +288,11 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc)
desc->job_min_memory = opt.realmem; desc->job_min_memory = opt.realmem;
if (opt.tmpdisk > -1) if (opt.tmpdisk > -1)
desc->job_min_tmp_disk = opt.tmpdisk; desc->job_min_tmp_disk = opt.tmpdisk;
desc->num_procs = opt.nprocs * opt.cpus_per_task; if (opt.overcommit) {
desc->num_procs = opt.min_nodes;
desc->overcommit = opt.overcommit;
} else
desc->num_procs = opt.nprocs * opt.cpus_per_task;
if (opt.nprocs_set) if (opt.nprocs_set)
desc->num_tasks = opt.nprocs; desc->num_tasks = opt.nprocs;
if (opt.cpus_set) if (opt.cpus_set)
......
...@@ -228,6 +228,7 @@ EXTRA_DIST = \ ...@@ -228,6 +228,7 @@ EXTRA_DIST = \
test15.21 \ test15.21 \
test15.22 \ test15.22 \
test15.23 \ test15.23 \
test15.24 \
test16.1 \ test16.1 \
test16.2 \ test16.2 \
test16.3 \ test16.3 \
......
...@@ -454,6 +454,7 @@ EXTRA_DIST = \ ...@@ -454,6 +454,7 @@ EXTRA_DIST = \
test15.21 \ test15.21 \
test15.22 \ test15.22 \
test15.23 \ test15.23 \
test15.24 \
test16.1 \ test16.1 \
test16.2 \ test16.2 \
test16.3 \ test16.3 \
......
...@@ -381,6 +381,7 @@ test15.22 Test of partition specification on job submission (--partition ...@@ -381,6 +381,7 @@ test15.22 Test of partition specification on job submission (--partition
option). option).
test15.23 Test of environment variables that control salloc actions: test15.23 Test of environment variables that control salloc actions:
SALLOC_ACCOUNT, SALLOC_DEBUG and SALLOC_TIMELIMIT SALLOC_ACCOUNT, SALLOC_DEBUG and SALLOC_TIMELIMIT
test15.24 Test of --overcommit option.
test16.# Testing of sattach options. test16.# Testing of sattach options.
......
...@@ -41,7 +41,7 @@ set task_cnt 10 ...@@ -41,7 +41,7 @@ set task_cnt 10
print_header $test_id print_header $test_id
# #
# Submit a slurm job that will execute 'id' on 4 tasks (or try anyway) # Submit a slurm job that will print slurm env vars and execute 'id'
# #
file delete $file_in $file_out file delete $file_in $file_out
make_bash_script $file_in " make_bash_script $file_in "
...@@ -110,11 +110,6 @@ if {$task_cnt != $tasks} { ...@@ -110,11 +110,6 @@ if {$task_cnt != $tasks} {
set exit_code 1 set exit_code 1
} }
if {$task_cnt != $tasks} {
send_user "\nFAILURE: Did not get proper number of tasks: "
send_user "$task_cnt, $tasks\n"
set exit_code 1
}
if {$exit_code == 0} { if {$exit_code == 0} {
file delete $file_in $file_out file delete $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment