Skip to content
Snippets Groups Projects
Commit 42e75cb1 authored by Moe Jette's avatar Moe Jette
Browse files

Support defered initiation of job (e.g. srun --begin=11:30 ...).

parent 644de3ff
No related branches found
No related tags found
No related merge requests found
This file describes changes in recent versions of SLURM. It primarily
documents those changes that are of interest to users and admins.
* Changes in SLURM 0.7.0-pre1
=============================
-- Support defered initiation of job (e.g. srun --begin=11:30 ...).
* Changes in SLURM 0.6.0-pre6
=============================
-- Added logic to return scheduled nodes to Maui scheduler (David
......
.TH SCONTROL "1" "August 2005" "scontrol 0.6" "Slurm components"
.TH SCONTROL "1" "September 2005" "scontrol 0.7" "Slurm components"
.SH "NAME"
scontrol \- Used view and modify Slurm configuration and state.
......@@ -222,6 +222,24 @@ Set the job's count of required processors to the specified value.
Set the job's ability to share nodes with other jobs. Possible values are
"YES" and "NO".
.TP
\fIStartTime\fP=<time_spec>
Set the job's earliest initiation time.
It accepts times of the form \fIHH:MM:SS\fR to run a job at
a specific time of day (seconds are optional).
(If that time is already past, the next day is assumed.)
You may also specify \fImidnight\fR, \fInoon\fR, or
\fIteatime\fR (4pm) and you can have a time-of-day suffixed
with \fIAM\fR or \fIPM\fR for running in the morning or the evening.
You can also say what day the job will be run, by giving
a date in the form \fImonth-name\fR day with an optional year,
or giving a date of the form \fIMMDDYY\fR or \fIMM/DD/YY\fR
or \fIDD.MM.YY\fR. You can also
give times like \fInow + count time-units\fR, where the time-units
can be \fIminutes\fR, \fIhours\fR, \fIdays\fR, or \fIweeks\fR
and you can tell SLURM to run the job today with the keyword
\fItoday\fR and to run the job tomorrow with the keyword
\fItomorrow\fR.
.TP
\fITimeLimit\fP=<minutes>
Set the job's time limit to the specified value.
.TP
......
\." $Id$
.\"
.TH SRUN "1" "August 2005" "srun 0.6" "slurm components"
.TH SRUN "1" "September 2005" "srun 0.7" "slurm components"
.SH "NAME"
srun \- run parallel jobs
.SH SYNOPSIS
......@@ -75,6 +75,26 @@ dependency and these jobs may belong to different users.
The value may be changed after job submission using the
\fBscontrol\fR command.
.TP
\fB\-\-begin\fR=\fItime\fR
Defer initiation of this job until the specified time.
It accepts times of the form \fIHH:MM:SS\fR to run a job at
a specific time of day (seconds are optional).
(If that time is already past, the next day is assumed.)
You may also specify \fImidnight\fR, \fInoon\fR, or
\fIteatime\fR (4pm) and you can have a time-of-day suffixed
with \fIAM\fR or \fIPM\fR for running in the morning or the evening.
You can also say what day the job will be run, by giving
a date in the form \fImonth-name\fR day with an optional year,
or giving a date of the form \fIMMDDYY\fR or \fIMM/DD/YY\fR
or \fIDD.MM.YY\fR. You can also
give times like \fInow + count time-units\fR, where the time-units
can be \fIminutes\fR, \fIhours\fR, \fIdays\fR, or \fIweeks\fR
and you can tell SLURM to run the job today with the keyword
\fItoday\fR and to run the job tomorrow with the keyword
\fItomorrow\fR.
The value may be changed after job submission using the
\fBscontrol\fR command.
.TP
\fB\-U\fR, \fB\-\-account\fR=\fIaccount\fR
Change resource use by this job to specified account.
The \fIaccount\fR is an arbitrary string. The may
......
......@@ -154,7 +154,8 @@ enum job_wait_reason {
WAIT_PART_NODE_LIMIT, /* request exceeds partition node limit */
WAIT_PART_TIME_LIMIT, /* request exceeds partition time limit */
WAIT_PART_STATE, /* requested partition is down */
WAIT_HELD /* job is held, priority==0 */
WAIT_HELD, /* job is held, priority==0 */
WAIT_TIME /* job waiting for specific begin time */
};
#ifdef HAVE_BGL
......@@ -306,6 +307,7 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */
* 0 otherwise, default=0. Only useful
* when Consumable Resources are
* enabled */
time_t begin_time; /* delay initiation until this time */
/*
* The following parameters are only meaningful on a Blue Gene
* system at present. Some will be of value on other system.
......
......@@ -80,6 +80,7 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg)
job_desc_msg->work_dir = NULL;
job_desc_msg->host = NULL;
job_desc_msg->port = 0;
job_desc_msg->begin_time = 0;
#if SYSTEM_DIMENSIONS
{
int i;
......
......@@ -462,6 +462,8 @@ extern char *job_reason_string(enum job_wait_reason inx)
return "PartitionDown";
case WAIT_HELD:
return "JobHeld";
case WAIT_TIME:
return "BeginTime";
default:
return "?";
}
......
......@@ -2043,6 +2043,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer)
pack16(job_desc_ptr->port, buffer);
packstr(job_desc_ptr->host, buffer);
packstr(job_desc_ptr->network, buffer);
pack_time(job_desc_ptr->begin_time, buffer);
if (select_g_alloc_jobinfo (&jobinfo) == SLURM_SUCCESS) {
#if SYSTEM_DIMENSIONS
......@@ -2120,6 +2121,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer)
safe_unpack16(&job_desc_ptr->port, buffer);
safe_unpackstr_xmalloc(&job_desc_ptr->host, &uint16_tmp, buffer);
safe_unpackstr_xmalloc(&job_desc_ptr->network, &uint16_tmp, buffer);
safe_unpack_time(&job_desc_ptr->begin_time, buffer);
if (select_g_alloc_jobinfo (&job_desc_ptr->select_jobinfo)
|| select_g_unpack_jobinfo(job_desc_ptr->select_jobinfo, buffer))
......
......@@ -67,6 +67,7 @@
#include "src/common/hostlist.h"
#include "src/common/log.h"
#include "src/common/parse_spec.h"
#include "src/common/parse_time.h"
#include "src/common/read_config.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/xmalloc.h"
......@@ -1495,6 +1496,9 @@ _update_job (int argc, char *argv[])
(uint16_t) strtol(&argv[i][11],
(char **) NULL, 10);
}
else if (strncasecmp(argv[i], "StartTime=", 10) == 0) {
job_msg.begin_time = parse_time(&argv[i][10]);
}
else {
exit_code = 1;
fprintf (stderr, "Invalid input: %s\n", argv[i]);
......
......@@ -114,6 +114,7 @@ static int _list_find_job_old(void *job_entry, void *key);
static int _load_job_details(struct job_record *job_ptr, Buf buffer);
static int _load_job_state(Buf buffer);
static int _load_step_state(struct job_record *job_ptr, Buf buffer);
static void _make_time_str (time_t *time, char *string);
static void _pack_job_details(struct job_details *detail_ptr, Buf buffer);
static int _purge_job_record(uint32_t job_id);
static void _purge_lost_batch_jobs(int node_inx, time_t now);
......@@ -693,6 +694,7 @@ void _dump_job_details(struct job_details *detail_ptr, Buf buffer)
pack32((uint32_t) detail_ptr->min_procs, buffer);
pack32((uint32_t) detail_ptr->min_memory, buffer);
pack32((uint32_t) detail_ptr->min_tmp_disk, buffer);
pack_time(detail_ptr->begin_time, buffer);
pack_time(detail_ptr->submit_time, buffer);
packstr(detail_ptr->req_nodes, buffer);
......@@ -716,7 +718,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer)
uint32_t min_nodes, max_nodes, min_procs;
uint16_t argc = 0, req_tasks, shared, contiguous, name_len;
uint32_t min_memory, min_tmp_disk, total_procs;
time_t submit_time;
time_t begin_time, submit_time;
int i;
/* unpack the job's details from the buffer */
......@@ -731,6 +733,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer)
safe_unpack32(&min_procs, buffer);
safe_unpack32(&min_memory, buffer);
safe_unpack32(&min_tmp_disk, buffer);
safe_unpack_time(&begin_time, buffer);
safe_unpack_time(&submit_time, buffer);
safe_unpackstr_xmalloc(&req_nodes, &name_len, buffer);
......@@ -774,6 +777,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer)
job_ptr->details->min_procs = min_procs;
job_ptr->details->min_memory = min_memory;
job_ptr->details->min_tmp_disk = min_tmp_disk;
job_ptr->details->begin_time = begin_time;
job_ptr->details->submit_time = submit_time;
job_ptr->details->req_nodes = req_nodes;
job_ptr->details->exc_nodes = exc_nodes;
......@@ -1198,10 +1202,11 @@ void dump_job_desc(job_desc_msg_t * job_specs)
dependency = (job_specs->dependency != NO_VAL) ?
(long) job_specs->dependency : -1L;
debug3(" host=%s port=%u dependency=%ld account=%s",
job_specs->host, job_specs->port,
dependency, job_specs->account);
job_specs->host, job_specs->port,
dependency, job_specs->account);
debug3(" network=%s", job_specs->network);
_make_time_str(&job_specs->begin_time, buf);
debug3(" network=%s begin=%s", job_specs->network, buf);
select_g_sprint_jobinfo(job_specs->select_jobinfo,
buf, sizeof(buf), SELECT_PRINT_MIXED);
......@@ -1209,6 +1214,21 @@ void dump_job_desc(job_desc_msg_t * job_specs)
debug3(" %s", buf);
}
static void _make_time_str (time_t *time, char *string)
{
struct tm time_tm;
localtime_r (time, &time_tm);
if ( *time == (time_t) 0 ) {
sprintf( string, "N/A" );
} else {
sprintf ( string, "%2.2u/%2.2u-%2.2u:%2.2u:%2.2u",
(time_tm.tm_mon+1), time_tm.tm_mday,
time_tm.tm_hour, time_tm.tm_min, time_tm.tm_sec);
}
}
/*
* init_job_conf - initialize the job configuration tables and values.
......@@ -2221,6 +2241,7 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc,
detail_ptr->out = xstrdup(job_desc->out);
if (job_desc->work_dir)
detail_ptr->work_dir = xstrdup(job_desc->work_dir);
detail_ptr->begin_time = job_desc->begin_time;
if (select_g_alloc_jobinfo(&job_ptr->select_jobinfo))
return SLURM_ERROR;
......@@ -2613,7 +2634,14 @@ void pack_job(struct job_record *dump_job_ptr, Buf buffer)
pack32(dump_job_ptr->alloc_sid, buffer);
pack32(dump_job_ptr->time_limit, buffer);
pack_time(dump_job_ptr->start_time, buffer);
if (IS_JOB_PENDING(dump_job_ptr)) {
if (dump_job_ptr->details)
pack_time(dump_job_ptr->details->begin_time,
buffer);
else
pack_time((time_t) 0, buffer);
} else
pack_time(dump_job_ptr->start_time, buffer);
pack_time(dump_job_ptr->end_time, buffer);
pack32(dump_job_ptr->priority, buffer);
......@@ -3215,6 +3243,13 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
}
}
if (job_specs->begin_time) {
if (IS_JOB_PENDING(job_ptr) && detail_ptr)
detail_ptr->begin_time = job_specs->begin_time;
else
error_code = ESLURM_DISABLED;
}
#if SYSTEM_DIMENSIONS
if (job_specs->geometry[0] != (uint16_t) NO_VAL) {
int i, tot = 1;
......@@ -3682,7 +3717,8 @@ extern void job_completion_logger(struct job_record *job_ptr)
}
/*
* job_independent - determine if this job has a depenentent job pending
* job_independent - determine if this job has a depenendent job pending
* or if the job's scheduled begin time is in the future
* IN job_ptr - pointer to job being tested
* RET - true if job no longer must be defered for another job
*/
......@@ -3691,6 +3727,11 @@ extern bool job_independent(struct job_record *job_ptr)
struct job_record *dep_ptr;
struct job_details *detail_ptr = job_ptr->details;
if (detail_ptr && (detail_ptr->begin_time > time(NULL))) {
detail_ptr->wait_reason = WAIT_TIME;
return false; /* not yet time */
}
if (job_ptr->dependency == 0)
return true;
......
......@@ -261,6 +261,7 @@ struct job_details {
uint32_t total_procs; /* number of allocated processors,
for accounting */
time_t submit_time; /* time of submission */
time_t begin_time; /* start after this time */
char *work_dir; /* pathname of working directory */
char **argv; /* arguments for a batch job script */
uint16_t argc; /* count of argv elements */
......@@ -739,6 +740,7 @@ extern int job_complete (uint32_t job_id, uid_t uid, bool requeue,
/*
* job_independent - determine if this job has a depenentent job pending
* or if the job's scheduled begin time is in the future
* IN job_ptr - pointer to job being tested
* RET - true if job no longer must be defered for another job
*/
......
......@@ -373,6 +373,8 @@ job_desc_msg_create_from_opts (char *script)
j->dependency = opt.dependency;
j->exclusive = opt.exclusive;
j->group_id = opt.gid;
if (opt.begin)
j->begin_time = opt.begin;
if (opt.network)
j->network = xstrdup(opt.network);
if (opt.account)
......
......@@ -57,6 +57,7 @@
#include "src/common/list.h"
#include "src/common/log.h"
#include "src/common/parse_time.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/uid.h"
#include "src/common/xmalloc.h"
......@@ -105,6 +106,7 @@
#define LONG_OPT_PROPAGATE 0x116
#define LONG_OPT_PROLOG 0x117
#define LONG_OPT_EPILOG 0x118
#define LONG_OPT_BEGIN 0x119
/*---- forward declarations of static functions ----*/
......@@ -771,6 +773,7 @@ void set_options(const int argc, char **argv, int first)
{"propagate", optional_argument, 0, LONG_OPT_PROPAGATE},
{"prolog", required_argument, 0, LONG_OPT_PROLOG},
{"epilog", required_argument, 0, LONG_OPT_EPILOG},
{"begin", required_argument, 0, LONG_OPT_BEGIN},
{NULL, 0, 0, 0}
};
char *opt_string = "+a:Abc:C:d:D:e:g:Hi:IjJ:kKlm:n:N:"
......@@ -1158,6 +1161,9 @@ void set_options(const int argc, char **argv, int first)
xfree(opt.epilog);
opt.epilog = xstrdup(optarg);
break;
case LONG_OPT_BEGIN:
opt.begin = parse_time(optarg);
break;
}
}
}
......@@ -1572,6 +1578,10 @@ static void _opt_list()
info("network : %s", opt.network);
info("propagate : %s",
opt.propagate == NULL ? "NONE" : opt.propagate);
if (opt.begin) {
info("begin : %s",
asctime(localtime(&opt.begin)));
}
str = print_commandline();
info("remote command : `%s'", str);
xfree(str);
......@@ -1650,6 +1660,7 @@ static void _help(void)
" --mpi=type specifies version of MPI to use\n"
" --prolog=program run \"program\" before launching job step\n"
" --epilog=program run \"program\" after launching job step\n"
" --begin=time defer job until HH:MM DD/MM/YY\n"
"\n"
"Allocate only:\n"
" -A, --allocate allocate resources and spawn a shell\n"
......
/*****************************************************************************\
* opt.h - definitions for srun option processing
* $Id$
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
......@@ -31,6 +32,7 @@
# include "config.h"
#endif
#include <time.h>
#include <sys/types.h>
#include <unistd.h>
......@@ -159,6 +161,7 @@ typedef struct srun_options {
int conn_type; /* --conn-type */
char *prolog; /* --prolog */
char *epilog; /* --epilog */
time_t begin; /* --begin */
} opt_t;
opt_t opt;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment