diff --git a/NEWS b/NEWS index ae37afed73f1d9018a9bee447ea8cd2abf9c5935..64aa1f52d5f826b826ca691f752f7b9b9f01a181 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,10 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 0.7.0-pre1 +============================= + -- Support defered initiation of job (e.g. srun --begin=11:30 ...). + * Changes in SLURM 0.6.0-pre6 ============================= -- Added logic to return scheduled nodes to Maui scheduler (David diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index 37d4d9712ba1901802b6d082b31de147f0afa60a..9f31857435705a81c61da5246c53371098a6be61 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -1,4 +1,4 @@ -.TH SCONTROL "1" "August 2005" "scontrol 0.6" "Slurm components" +.TH SCONTROL "1" "September 2005" "scontrol 0.7" "Slurm components" .SH "NAME" scontrol \- Used view and modify Slurm configuration and state. @@ -222,6 +222,24 @@ Set the job's count of required processors to the specified value. Set the job's ability to share nodes with other jobs. Possible values are "YES" and "NO". .TP +\fIStartTime\fP=<time_spec> +Set the job's earliest initiation time. +It accepts times of the form \fIHH:MM:SS\fR to run a job at +a specific time of day (seconds are optional). +(If that time is already past, the next day is assumed.) +You may also specify \fImidnight\fR, \fInoon\fR, or +\fIteatime\fR (4pm) and you can have a time-of-day suffixed +with \fIAM\fR or \fIPM\fR for running in the morning or the evening. +You can also say what day the job will be run, by giving +a date in the form \fImonth-name\fR day with an optional year, +or giving a date of the form \fIMMDDYY\fR or \fIMM/DD/YY\fR +or \fIDD.MM.YY\fR. You can also +give times like \fInow + count time-units\fR, where the time-units +can be \fIminutes\fR, \fIhours\fR, \fIdays\fR, or \fIweeks\fR +and you can tell SLURM to run the job today with the keyword +\fItoday\fR and to run the job tomorrow with the keyword +\fItomorrow\fR. +.TP \fITimeLimit\fP=<minutes> Set the job's time limit to the specified value. .TP diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 908e5d627f3bf30960193bdddcbdea0426a1f807..ac98c5c5e43a7c088707c7676d055fd064c1e268 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,6 +1,6 @@ \." $Id$ .\" -.TH SRUN "1" "August 2005" "srun 0.6" "slurm components" +.TH SRUN "1" "September 2005" "srun 0.7" "slurm components" .SH "NAME" srun \- run parallel jobs .SH SYNOPSIS @@ -75,6 +75,26 @@ dependency and these jobs may belong to different users. The value may be changed after job submission using the \fBscontrol\fR command. .TP +\fB\-\-begin\fR=\fItime\fR +Defer initiation of this job until the specified time. +It accepts times of the form \fIHH:MM:SS\fR to run a job at +a specific time of day (seconds are optional). +(If that time is already past, the next day is assumed.) +You may also specify \fImidnight\fR, \fInoon\fR, or +\fIteatime\fR (4pm) and you can have a time-of-day suffixed +with \fIAM\fR or \fIPM\fR for running in the morning or the evening. +You can also say what day the job will be run, by giving +a date in the form \fImonth-name\fR day with an optional year, +or giving a date of the form \fIMMDDYY\fR or \fIMM/DD/YY\fR +or \fIDD.MM.YY\fR. You can also +give times like \fInow + count time-units\fR, where the time-units +can be \fIminutes\fR, \fIhours\fR, \fIdays\fR, or \fIweeks\fR +and you can tell SLURM to run the job today with the keyword +\fItoday\fR and to run the job tomorrow with the keyword +\fItomorrow\fR. +The value may be changed after job submission using the +\fBscontrol\fR command. +.TP \fB\-U\fR, \fB\-\-account\fR=\fIaccount\fR Change resource use by this job to specified account. The \fIaccount\fR is an arbitrary string. The may diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 2e084df5130784d3dccf76132b542ba45aabee19..c7c030a0471f7c758d1cf10cf383cb4f2587ee68 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -154,7 +154,8 @@ enum job_wait_reason { WAIT_PART_NODE_LIMIT, /* request exceeds partition node limit */ WAIT_PART_TIME_LIMIT, /* request exceeds partition time limit */ WAIT_PART_STATE, /* requested partition is down */ - WAIT_HELD /* job is held, priority==0 */ + WAIT_HELD, /* job is held, priority==0 */ + WAIT_TIME /* job waiting for specific begin time */ }; #ifdef HAVE_BGL @@ -306,6 +307,7 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ * 0 otherwise, default=0. Only useful * when Consumable Resources are * enabled */ + time_t begin_time; /* delay initiation until this time */ /* * The following parameters are only meaningful on a Blue Gene * system at present. Some will be of value on other system. diff --git a/src/api/init_msg.c b/src/api/init_msg.c index 88100790f36892a0c0fd40074965899e9dc2b128..33302e10a2167daf13544e371612968d0b7ceb61 100644 --- a/src/api/init_msg.c +++ b/src/api/init_msg.c @@ -80,6 +80,7 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg) job_desc_msg->work_dir = NULL; job_desc_msg->host = NULL; job_desc_msg->port = 0; + job_desc_msg->begin_time = 0; #if SYSTEM_DIMENSIONS { int i; diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 212c79f3eb39cc827d07347f89169b9e63db54f4..c9476c7cde8c34ba3233c976942864cf9240b23d 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -462,6 +462,8 @@ extern char *job_reason_string(enum job_wait_reason inx) return "PartitionDown"; case WAIT_HELD: return "JobHeld"; + case WAIT_TIME: + return "BeginTime"; default: return "?"; } diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 48dfdbca9a10499115129bc36f6246d1772b70da..c2a8165917bf4b9c38a39582a153eaf647ecca88 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -2043,6 +2043,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer) pack16(job_desc_ptr->port, buffer); packstr(job_desc_ptr->host, buffer); packstr(job_desc_ptr->network, buffer); + pack_time(job_desc_ptr->begin_time, buffer); if (select_g_alloc_jobinfo (&jobinfo) == SLURM_SUCCESS) { #if SYSTEM_DIMENSIONS @@ -2120,6 +2121,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer) safe_unpack16(&job_desc_ptr->port, buffer); safe_unpackstr_xmalloc(&job_desc_ptr->host, &uint16_tmp, buffer); safe_unpackstr_xmalloc(&job_desc_ptr->network, &uint16_tmp, buffer); + safe_unpack_time(&job_desc_ptr->begin_time, buffer); if (select_g_alloc_jobinfo (&job_desc_ptr->select_jobinfo) || select_g_unpack_jobinfo(job_desc_ptr->select_jobinfo, buffer)) diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index 7ceadaf0862fee37d9a716e0b658a6107dd6bc16..fa68c7dd8b122e0cab2d92d651674320f7ed59ea 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -67,6 +67,7 @@ #include "src/common/hostlist.h" #include "src/common/log.h" #include "src/common/parse_spec.h" +#include "src/common/parse_time.h" #include "src/common/read_config.h" #include "src/common/slurm_protocol_api.h" #include "src/common/xmalloc.h" @@ -1495,6 +1496,9 @@ _update_job (int argc, char *argv[]) (uint16_t) strtol(&argv[i][11], (char **) NULL, 10); } + else if (strncasecmp(argv[i], "StartTime=", 10) == 0) { + job_msg.begin_time = parse_time(&argv[i][10]); + } else { exit_code = 1; fprintf (stderr, "Invalid input: %s\n", argv[i]); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 550603118779dc24adce98e02a1b6594745a4e70..b73ccd5c0ba35ba449b3bf71d182b5e21cd955c6 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -114,6 +114,7 @@ static int _list_find_job_old(void *job_entry, void *key); static int _load_job_details(struct job_record *job_ptr, Buf buffer); static int _load_job_state(Buf buffer); static int _load_step_state(struct job_record *job_ptr, Buf buffer); +static void _make_time_str (time_t *time, char *string); static void _pack_job_details(struct job_details *detail_ptr, Buf buffer); static int _purge_job_record(uint32_t job_id); static void _purge_lost_batch_jobs(int node_inx, time_t now); @@ -693,6 +694,7 @@ void _dump_job_details(struct job_details *detail_ptr, Buf buffer) pack32((uint32_t) detail_ptr->min_procs, buffer); pack32((uint32_t) detail_ptr->min_memory, buffer); pack32((uint32_t) detail_ptr->min_tmp_disk, buffer); + pack_time(detail_ptr->begin_time, buffer); pack_time(detail_ptr->submit_time, buffer); packstr(detail_ptr->req_nodes, buffer); @@ -716,7 +718,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer) uint32_t min_nodes, max_nodes, min_procs; uint16_t argc = 0, req_tasks, shared, contiguous, name_len; uint32_t min_memory, min_tmp_disk, total_procs; - time_t submit_time; + time_t begin_time, submit_time; int i; /* unpack the job's details from the buffer */ @@ -731,6 +733,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer) safe_unpack32(&min_procs, buffer); safe_unpack32(&min_memory, buffer); safe_unpack32(&min_tmp_disk, buffer); + safe_unpack_time(&begin_time, buffer); safe_unpack_time(&submit_time, buffer); safe_unpackstr_xmalloc(&req_nodes, &name_len, buffer); @@ -774,6 +777,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer) job_ptr->details->min_procs = min_procs; job_ptr->details->min_memory = min_memory; job_ptr->details->min_tmp_disk = min_tmp_disk; + job_ptr->details->begin_time = begin_time; job_ptr->details->submit_time = submit_time; job_ptr->details->req_nodes = req_nodes; job_ptr->details->exc_nodes = exc_nodes; @@ -1198,10 +1202,11 @@ void dump_job_desc(job_desc_msg_t * job_specs) dependency = (job_specs->dependency != NO_VAL) ? (long) job_specs->dependency : -1L; debug3(" host=%s port=%u dependency=%ld account=%s", - job_specs->host, job_specs->port, - dependency, job_specs->account); + job_specs->host, job_specs->port, + dependency, job_specs->account); - debug3(" network=%s", job_specs->network); + _make_time_str(&job_specs->begin_time, buf); + debug3(" network=%s begin=%s", job_specs->network, buf); select_g_sprint_jobinfo(job_specs->select_jobinfo, buf, sizeof(buf), SELECT_PRINT_MIXED); @@ -1209,6 +1214,21 @@ void dump_job_desc(job_desc_msg_t * job_specs) debug3(" %s", buf); } +static void _make_time_str (time_t *time, char *string) +{ + struct tm time_tm; + + localtime_r (time, &time_tm); + if ( *time == (time_t) 0 ) { + sprintf( string, "N/A" ); + } else { + sprintf ( string, "%2.2u/%2.2u-%2.2u:%2.2u:%2.2u", + (time_tm.tm_mon+1), time_tm.tm_mday, + time_tm.tm_hour, time_tm.tm_min, time_tm.tm_sec); + } +} + + /* * init_job_conf - initialize the job configuration tables and values. @@ -2221,6 +2241,7 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, detail_ptr->out = xstrdup(job_desc->out); if (job_desc->work_dir) detail_ptr->work_dir = xstrdup(job_desc->work_dir); + detail_ptr->begin_time = job_desc->begin_time; if (select_g_alloc_jobinfo(&job_ptr->select_jobinfo)) return SLURM_ERROR; @@ -2613,7 +2634,14 @@ void pack_job(struct job_record *dump_job_ptr, Buf buffer) pack32(dump_job_ptr->alloc_sid, buffer); pack32(dump_job_ptr->time_limit, buffer); - pack_time(dump_job_ptr->start_time, buffer); + if (IS_JOB_PENDING(dump_job_ptr)) { + if (dump_job_ptr->details) + pack_time(dump_job_ptr->details->begin_time, + buffer); + else + pack_time((time_t) 0, buffer); + } else + pack_time(dump_job_ptr->start_time, buffer); pack_time(dump_job_ptr->end_time, buffer); pack32(dump_job_ptr->priority, buffer); @@ -3215,6 +3243,13 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) } } + if (job_specs->begin_time) { + if (IS_JOB_PENDING(job_ptr) && detail_ptr) + detail_ptr->begin_time = job_specs->begin_time; + else + error_code = ESLURM_DISABLED; + } + #if SYSTEM_DIMENSIONS if (job_specs->geometry[0] != (uint16_t) NO_VAL) { int i, tot = 1; @@ -3682,7 +3717,8 @@ extern void job_completion_logger(struct job_record *job_ptr) } /* - * job_independent - determine if this job has a depenentent job pending + * job_independent - determine if this job has a depenendent job pending + * or if the job's scheduled begin time is in the future * IN job_ptr - pointer to job being tested * RET - true if job no longer must be defered for another job */ @@ -3691,6 +3727,11 @@ extern bool job_independent(struct job_record *job_ptr) struct job_record *dep_ptr; struct job_details *detail_ptr = job_ptr->details; + if (detail_ptr && (detail_ptr->begin_time > time(NULL))) { + detail_ptr->wait_reason = WAIT_TIME; + return false; /* not yet time */ + } + if (job_ptr->dependency == 0) return true; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 14307de760a760522950a93f1115eb7fc4110adf..672fd1ed3b599970c99312d72110a91e8df43db6 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -261,6 +261,7 @@ struct job_details { uint32_t total_procs; /* number of allocated processors, for accounting */ time_t submit_time; /* time of submission */ + time_t begin_time; /* start after this time */ char *work_dir; /* pathname of working directory */ char **argv; /* arguments for a batch job script */ uint16_t argc; /* count of argv elements */ @@ -739,6 +740,7 @@ extern int job_complete (uint32_t job_id, uid_t uid, bool requeue, /* * job_independent - determine if this job has a depenentent job pending + * or if the job's scheduled begin time is in the future * IN job_ptr - pointer to job being tested * RET - true if job no longer must be defered for another job */ diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 6579b34a4b8d2f4bca82838f607bf2107ef0fec0..b66a75a831cf9650e61c8d17a156bebfcf0807f0 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -373,6 +373,8 @@ job_desc_msg_create_from_opts (char *script) j->dependency = opt.dependency; j->exclusive = opt.exclusive; j->group_id = opt.gid; + if (opt.begin) + j->begin_time = opt.begin; if (opt.network) j->network = xstrdup(opt.network); if (opt.account) diff --git a/src/srun/opt.c b/src/srun/opt.c index cb56f1fc979c456a35f2c3987ac85b8651844dc4..f1ccb39da9e0bfc3476b1f8f45424e7ea9ca207a 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -57,6 +57,7 @@ #include "src/common/list.h" #include "src/common/log.h" +#include "src/common/parse_time.h" #include "src/common/slurm_protocol_api.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" @@ -105,6 +106,7 @@ #define LONG_OPT_PROPAGATE 0x116 #define LONG_OPT_PROLOG 0x117 #define LONG_OPT_EPILOG 0x118 +#define LONG_OPT_BEGIN 0x119 /*---- forward declarations of static functions ----*/ @@ -771,6 +773,7 @@ void set_options(const int argc, char **argv, int first) {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, {"prolog", required_argument, 0, LONG_OPT_PROLOG}, {"epilog", required_argument, 0, LONG_OPT_EPILOG}, + {"begin", required_argument, 0, LONG_OPT_BEGIN}, {NULL, 0, 0, 0} }; char *opt_string = "+a:Abc:C:d:D:e:g:Hi:IjJ:kKlm:n:N:" @@ -1158,6 +1161,9 @@ void set_options(const int argc, char **argv, int first) xfree(opt.epilog); opt.epilog = xstrdup(optarg); break; + case LONG_OPT_BEGIN: + opt.begin = parse_time(optarg); + break; } } } @@ -1572,6 +1578,10 @@ static void _opt_list() info("network : %s", opt.network); info("propagate : %s", opt.propagate == NULL ? "NONE" : opt.propagate); + if (opt.begin) { + info("begin : %s", + asctime(localtime(&opt.begin))); + } str = print_commandline(); info("remote command : `%s'", str); xfree(str); @@ -1650,6 +1660,7 @@ static void _help(void) " --mpi=type specifies version of MPI to use\n" " --prolog=program run \"program\" before launching job step\n" " --epilog=program run \"program\" after launching job step\n" +" --begin=time defer job until HH:MM DD/MM/YY\n" "\n" "Allocate only:\n" " -A, --allocate allocate resources and spawn a shell\n" diff --git a/src/srun/opt.h b/src/srun/opt.h index d4c07f63f9f4f4e19aa7d963417f5b0cccba35a7..52dc34b917253e3caed1d3070d7efc2365a4ccb9 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -1,5 +1,6 @@ /*****************************************************************************\ * opt.h - definitions for srun option processing + * $Id$ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -31,6 +32,7 @@ # include "config.h" #endif +#include <time.h> #include <sys/types.h> #include <unistd.h> @@ -159,6 +161,7 @@ typedef struct srun_options { int conn_type; /* --conn-type */ char *prolog; /* --prolog */ char *epilog; /* --epilog */ + time_t begin; /* --begin */ } opt_t; opt_t opt;