diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index c048d2fd8da972ca557cfb54728ba0f606aa64ca..be61083b0da8a933d43c93d4a1a6ed0dae29be47 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -14,15 +14,20 @@ The command may be any program the user wishes. Some typical commands are xterm .SH "OPTIONS" .LP .TP -\fB\-\-jobid\fR <\fIJOBID\fP> +\fB\-\-jobid\fR[=]<\fIJOBID\fP> The job allocation under which the parallel application should be launched. .TP -\fB\-n\fR, \fB\-\-ntasks\fR=<\fInumber\fR> +\fB\-n\fR, \fB\-\-ntasks\fR[=]<\fInumber\fR> Specify the number of processes to launch. The default is one process per node, but note that the \-c parameter will change this default. .TP -\fB\-N\fR, \fB\-\-nodes\fR=<\fInumber|[min]\-[max]\fR> +\fB\-N\fR, \fB\-\-nodes\fR[=]<\fInumber|[min]\-[max]\fR> Specify the number of nodes to be used by this job step. This option accepts either a single number, or a range of possible node counts. If a single number is used, such as "\-N 4", then the allocation is asking for four and ONLY four nodes. If a range is specified, such as "\-N 2\-6", SLURM controller may grant salloc anywhere from 2 to 6 nodes. When using a range, either of the min or max options may be omitted. For instance, "\-N 10\-" means "no fewer than 10 nodes", and "\-N \-20" means "no more than 20 nodes". The default value of this option is one node, but other options may require more than one node to be allocated. - +.TP +\fB\-I\fR,\fB\-\-immediate\fR +Do not wait for the resources need to grant this allocation. Normally salloc will wait for the resources necessary to satisfy the requested job allocation, but when \-\-immediate is specified it will exit immediately without running the \fIcommand\fR parameter. +.TP +\fB\-W\fR, \fB\-\-wait\fR[=]<\fIseconds\fR> +If the resources needed to satisy a job allocation are not immediately available, the job allocation is enqueued and is said to be PENDING. This option tells salloc how long (in seconds) to wait for the allocation to be granted before giving up. When the wait limit has been reached, salloc will exit without running the \fIcommand\fR parameter. By default, salloc will wait indefinitely. (The \-\-immediate option makes \-\-wait moot.) .TP \fB\-c\fR, \fB\-\-cpus\-per\-task\fR=<\fIncpus\fR> diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 8249b089d491c4e788c047aecc86eb97e6e8ab7d..87f79562c89a79d8183dcf0679ceb9e132cccd7a 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -761,7 +761,8 @@ extern int slurm_allocate_resources PARAMS(( * A timeout of zero will wait indefinitely. * * RET allocation structure on success, NULL on error set errno to - * indicate the error + * indicate the error (errno will be ETIMEDOUT if the timeout is reached + * with no allocation granted) * NOTE: free the allocation structure using * slurm_free_resource_allocation_response_msg */ diff --git a/src/api/allocate.c b/src/api/allocate.c index b59051a09d70ca082929899ebe22b905479660b5..d602c52de8d309aa5da526b5aa3c1d6d717d9159 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -146,12 +146,14 @@ slurm_allocate_resources (job_desc_msg_t *req, * A timeout of zero will wait indefinitely. * * RET allocation structure on success, NULL on error set errno to - * indicate the error + * indicate the error (errno will be ETIMEDOUT if the timeout is reached + * with no allocation granted) * NOTE: free the allocation structure using * slurm_free_resource_allocation_response_msg */ resource_allocation_response_msg_t * -slurm_allocate_resources_blocking (const job_desc_msg_t *user_req, time_t timeout) +slurm_allocate_resources_blocking (const job_desc_msg_t *user_req, + time_t timeout) { int rc; slurm_msg_t req_msg; @@ -236,14 +238,14 @@ slurm_allocate_resources_blocking (const job_desc_msg_t *user_req, time_t timeou /* no, we need to wait for a response */ job_id = resp->job_id; slurm_free_resource_allocation_response_msg(resp); - verbose("Pending job allocation %u", job_id); + info("Pending job allocation %u", job_id); resp = _wait_for_allocation_response(job_id, listen, timeout); /* If NULL, we didn't get the allocation in the time desired, so just free the job id */ if (resp == NULL) { + errnum = errno; slurm_complete_job(job_id, -1); - errnum = -1; } } break; @@ -687,11 +689,12 @@ _wait_for_alloc_rpc(const listen_t *listen, int sleep_time, resource_allocation_response_msg_t **resp) { struct pollfd fds[1]; + int rc; fds[0].fd = listen->fd; fds[0].events = POLLIN; - while (poll (fds, 1, (sleep_time * 1000)) < 0) { + while ((rc = poll(fds, 1, (sleep_time * 1000))) < 0) { switch (errno) { case EAGAIN: case EINTR: @@ -708,20 +711,25 @@ _wait_for_alloc_rpc(const listen_t *listen, int sleep_time, } } - if (fds[0].revents & POLLIN) + if (rc == 0) { /* poll timed out */ + errno = ETIMEDOUT; + } else if (fds[0].revents & POLLIN) { return (_accept_msg_connection(listen->fd, resp)); + } - return (0); + return 0; } static resource_allocation_response_msg_t * _wait_for_allocation_response(uint32_t job_id, const listen_t *listen, int timeout) { - resource_allocation_response_msg_t *resp; + resource_allocation_response_msg_t *resp = NULL; + int errnum; - debug ("job %u queued and waiting for resources", job_id); + debug("job %u queued and waiting for resources", job_id); if (_wait_for_alloc_rpc(listen, timeout, &resp) <= 0) { + errnum = errno; /* Maybe the resource allocation response RPC got lost * in the mail; surely it should have arrived by now. * Let's see if the controller thinks that the allocation @@ -730,10 +738,12 @@ _wait_for_allocation_response(uint32_t job_id, const listen_t *listen, if (slurm_allocation_lookup(job_id, &resp) >= 0) return resp; - if (slurm_get_errno() == ESLURM_JOB_PENDING) - debug3 ("Still waiting for allocation"); - else { - debug3 ("Unable to confirm allocation for job %u: %m", + if (slurm_get_errno() == ESLURM_JOB_PENDING) { + debug3("Still waiting for allocation"); + errno = errnum; + return NULL; + } else { + debug3("Unable to confirm allocation for job %u: %m", job_id); return NULL; } diff --git a/src/salloc/opt.c b/src/salloc/opt.c index acfda5da9076ad44bf0c4eb6fcee58cd3dba014a..a5c4eff1155f8de33061dab5b54abe716c16cc9f 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -84,7 +84,6 @@ #define OPT_CONN_TYPE 0x08 #define OPT_NO_ROTATE 0x0a #define OPT_GEOMETRY 0x0b -#define OPT_MPI 0x0c #define OPT_CPU_BIND 0x0d #define OPT_MEM_BIND 0x0e @@ -100,7 +99,6 @@ #define LONG_OPT_CONT 0x109 #define LONG_OPT_UID 0x10a #define LONG_OPT_GID 0x10b -#define LONG_OPT_MPI 0x10c #define LONG_OPT_CORE 0x10e #define LONG_OPT_NOSHELL 0x10f #define LONG_OPT_DEBUG_TS 0x110 @@ -742,10 +740,9 @@ static void _opt_default() opt.no_requeue = false; opt.noshell = false; - opt.max_wait = slurm_get_wait_time(); + opt.max_wait = 0; opt.quit_on_intr = false; - opt.disable_status = false; opt.test_only = false; opt.quiet = 0; @@ -815,8 +812,6 @@ env_vars_t env_vars[] = { {"SLURM_PARTITION", OPT_STRING, &opt.partition, NULL }, {"SLURM_TIMELIMIT", OPT_INT, &opt.time_limit, NULL }, {"SLURM_WAIT", OPT_INT, &opt.max_wait, NULL }, - {"SLURM_DISABLE_STATUS",OPT_INT, &opt.disable_status,NULL }, - {"SLURM_MPI_TYPE", OPT_MPI, NULL, NULL }, {NULL, 0, NULL, NULL} }; @@ -919,15 +914,6 @@ _process_env_var(env_vars_t *e, const char *val) e->var, val); } break; - - case OPT_MPI: - if (srun_mpi_init((char *)val) == SLURM_ERROR) { - fatal("\"%s=%s\" -- invalid MPI type, " - "--mpi=list for acceptable types.", - e->var, val); - } - break; - default: /* do nothing */ break; @@ -962,7 +948,6 @@ void set_options(const int argc, char **argv, int first) { int opt_char, option_index = 0; static bool set_name=false; - struct utsname name; static struct option long_options[] = { {"cpus-per-task", required_argument, 0, 'c'}, {"constraint", required_argument, 0, 'C'}, @@ -988,15 +973,12 @@ void set_options(const int argc, char **argv, int first) {"nodelist", required_argument, 0, 'w'}, {"wait", required_argument, 0, 'W'}, {"exclude", required_argument, 0, 'x'}, - {"disable-status", no_argument, 0, 'X'}, - {"no-allocate", no_argument, 0, 'Z'}, {"contiguous", no_argument, 0, LONG_OPT_CONT}, {"exclusive", no_argument, 0, LONG_OPT_EXCLUSIVE}, {"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND}, {"mem_bind", required_argument, 0, LONG_OPT_MEM_BIND}, {"mincpus", required_argument, 0, LONG_OPT_MINCPU}, {"mem", required_argument, 0, LONG_OPT_MEM}, - {"mpi", required_argument, 0, LONG_OPT_MPI}, {"no-shell", no_argument, 0, LONG_OPT_NOSHELL}, {"tmp", required_argument, 0, LONG_OPT_TMP}, {"msg-timeout", required_argument, 0, LONG_OPT_TIMEO}, @@ -1021,7 +1003,7 @@ void set_options(const int argc, char **argv, int first) {NULL, 0, 0, 0} }; char *opt_string = "+a:c:C:g:HIJ:km:N:" - "Op:P:qQR:st:U:vVw:W:x:XZ"; + "Op:P:qQR:st:U:vVw:W:x:"; if(opt.progname == NULL) opt.progname = xbasename(argv[0]); @@ -1185,15 +1167,6 @@ void set_options(const int argc, char **argv, int first) if (!_valid_node_list(&opt.exc_nodes)) exit(1); break; - case (int)'X': - opt.disable_status = true; - break; - case (int)'Z': - opt.no_alloc = true; - uname(&name); - if (strcasecmp(name.sysname, "AIX") == 0) - opt.network = xstrdup("ip"); - break; case LONG_OPT_CONT: opt.contiguous = true; break; @@ -1221,13 +1194,6 @@ void set_options(const int argc, char **argv, int first) exit(1); } break; - case LONG_OPT_MPI: - if (srun_mpi_init((char *)optarg) == SLURM_ERROR) { - fatal("\"--mpi=%s\" -- long invalid MPI type, " - "--mpi=list for acceptable types.", - optarg); - } - break; case LONG_OPT_NOSHELL: opt.noshell = true; break; @@ -1381,16 +1347,6 @@ static bool _opt_verify(void) verified = false; } - if (opt.no_alloc && !opt.nodelist) { - error("must specify a node list with -Z, --no-allocate."); - verified = false; - } - - if (opt.no_alloc && opt.exc_nodes) { - error("can not specify --exclude list with -Z, --no-allocate."); - verified = false; - } - if (opt.mincpus < opt.cpus_per_task) opt.mincpus = opt.cpus_per_task; @@ -1685,10 +1641,9 @@ static void _help(void) " (type = block|cyclic|hostfile)\n" " -J, --job-name=jobname name of job\n" " --mpi=type type of MPI being used\n" -" -W, --wait=sec seconds to wait after first task exits\n" -" before killing job\n" +" -W, --wait=sec seconds to wait for allocation if not\n" +" immediately available\n" " -q, --quit-on-interrupt quit on single Ctrl-C\n" -" -X, --disable-status Disable Ctrl-C status feature\n" " -v, --verbose verbose mode (multiple -v's increase verbosity)\n" " -Q, --quiet quiet mode (suppress informational messages)\n" " -d, --slurmd-debug=level slurmd debug level\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index c81e3ea98d40269f7a0c52e523a1c72b6081dce7..776591993088929751e7c03bcb6907309184663f 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -81,7 +81,6 @@ typedef struct salloc_options { enum task_dist_states distribution; /* --distribution=, -m dist */ char *job_name; /* --job-name=, -J name */ - char *mpi_type; /* --mpi=type */ unsigned int dependency;/* --dependency, -P jobid */ int nice; /* --nice */ char *account; /* --account, -U acct_name */ @@ -96,7 +95,6 @@ typedef struct salloc_options { bool share; /* --share, -s */ int max_wait; /* --wait, -W */ bool quit_on_intr; /* --quit-on-interrupt, -q */ - bool disable_status; /* --disable-status, -X */ int quiet; bool test_only; /* --test-only */ char *propagate; /* --propagate[=RLIMIT_CORE,...]*/ @@ -109,7 +107,6 @@ typedef struct salloc_options { bool contiguous; /* --contiguous */ char *nodelist; /* --nodelist=node1,node2,... */ char *exc_nodes; /* --exclude=node1,node2,... -x */ - bool no_alloc; /* --no-allocate, -Z */ int max_launch_time; /* Undocumented */ int max_exit_timeout; /* Undocumented */ int msg_timeout; /* Undocumented */ diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 749cb70d51bbdf025411690c9d1b5ba29dfa667c..bd60e94a9270965b1f8b22f7ebf07e774d84065e 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -42,6 +42,7 @@ #include "src/common/xstring.h" #include "src/common/xmalloc.h" +#include "src/common/xsignal.h" #include "src/common/read_config.h" #include "src/salloc/opt.h" @@ -49,7 +50,9 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc); static void ring_terminal_bell(void); -static void run_command(void); +static int run_command(char **command); + +static int signals_to_block[] = {SIGINT, SIGTERM, SIGQUIT, 0}; int main(int argc, char *argv[]) { @@ -58,6 +61,7 @@ int main(int argc, char *argv[]) resource_allocation_response_msg_t *alloc; time_t before, after; salloc_msg_thread_t *msg_thr; + int rc; log_init(xbasename(argv[0]), logopt, 0, NULL); if (initialize_and_process_args(argc, argv) < 0) { @@ -71,6 +75,8 @@ int main(int argc, char *argv[]) log_alter(logopt, 0, NULL); } + xsignal_block(signals_to_block); + /* * Request a job allocation */ @@ -86,7 +92,7 @@ int main(int argc, char *argv[]) verbose("other_hostname = %s", desc.other_hostname); before = time(NULL); - alloc = slurm_allocate_resources_blocking(&desc, 0); + alloc = slurm_allocate_resources_blocking(&desc, opt.max_wait); if (alloc == NULL) fatal("Failed to allocate resources: %m"); after = time(NULL); @@ -106,7 +112,7 @@ int main(int argc, char *argv[]) */ setenvfs("SLURM_JOBID=%d", alloc->job_id); setenvfs("SLURM_NNODES=%d", alloc->node_cnt); - run_command(); + rc = run_command(command_argv); /* * Relinquish the job allocation. @@ -119,7 +125,7 @@ int main(int argc, char *argv[]) slurm_free_resource_allocation_response_msg(alloc); msg_thr_destroy(msg_thr); - return 0; + return rc; } @@ -128,7 +134,7 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) { desc->contiguous = opt.contiguous ? 1 : 0; desc->features = opt.constraints; - desc->immediate = opt.immediate; + desc->immediate = opt.immediate ? 1 : 0; desc->name = opt.job_name; desc->req_nodes = opt.nodelist; if (desc->req_nodes == NULL) { @@ -214,25 +220,36 @@ static void ring_terminal_bell(void) fflush(stdout); } -static void run_command(void) +/* returns the exit status of the command */ +static int run_command(char **command) { pid_t pid; int status; - int rc; + int rc = 0; pid = fork(); if (pid < 0) { error("fork failed: %m"); } else if (pid > 0) { /* parent */ - while ((rc = waitpid(pid, &status, 0)) == -1) { - if (errno != EINTR) { - error("waitpid failed: %m"); - break; - } + while ((rc = waitpid(pid, &status, 0)) == -1 + && errno == EINTR) { + /* just keep spinning */ + } + if (rc == -1) { + error("waitpid for %s failed: %m", command[0]); + rc = 1; + } else { + if (WIFEXITED(status)) + rc = WEXITSTATUS(status); + else + rc = 1; } } else { /* child */ - execvp(command_argv[0], command_argv); + xsignal_unblock(signals_to_block); + execvp(command[0], command); } + + return rc; }