diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index 5566ba04023617ddf8ce02ab2b49d3de969afb76..1306a0dc379feb98bb6fbe5dda4e4f18aeeb7bc7 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -333,6 +333,9 @@ Job has been allocated resources, but are waiting for them to become ready for u \f3CG COMPLETING\fP Job is in the process of completing. Some processes on some nodes may still be active. .TP +\f3DL DEADLINE\fP +Job missed its deadline. +.TP \f3F FAILED\fP Job terminated with non\-zero exit code or other failure condition. .TP @@ -773,7 +776,7 @@ Initiation time of the job in the same format as \f3End\fP. Displays the job status, or state. Output can be RUNNING, RESIZING, SUSPENDED, COMPLETED, CANCELLED, FAILED, -TIMEOUT, PREEMPTED, BOOT_FAIL or NODE_FAIL. +TIMEOUT, PREEMPTED, BOOT_FAIL, DEADLINE or NODE_FAIL. If more information is available on the job state than will fit into the current field width (for example, the uid that CANCELLED a job) the state will be followed by a "+". You can increase the size of diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index eb37fd82847810ca0174910ce3f9f2bf29e5a49b..16e8a494e4d97962f1c0c3c2926aa3fe2058adf3 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -358,6 +358,20 @@ the \-\-cpus\-per\-task=3 options, the controller knows that each task requires 3 processors on the same node, and the controller will grant an allocation of 4 nodes, one for each of the 4 tasks. +.TP +\fB\-\-deadline\fR=<\fIOPT\fR> +remove the job if no ending is possible before +this deadline (start > (deadline \- time[\-min])). +Default is no deadline. Valid time formats are: +.br +HH:MM[:SS] [AM|PM] +.br +MMDD[YY] or MM/DD[/YY] or MM.DD[.YY] +.br +MM/DD[/YY]\-HH:MM[:SS] +.br +YYYY\-MM\-DD[THH:MM[:SS]]] + .TP \fB\-d\fR, \fB\-\-dependency\fR=<\fIdependency_list\fR> Defer the start of this job until the specified dependencies have been diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 258d2be36b2fe598dd14b2da3a92439d3e08d42d..158d179a7e7f01626fe80b75188daf60d1cf346c 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -385,6 +385,20 @@ the \-\-cpus\-per\-task=3 options, the controller knows that each task requires 3 processors on the same node, and the controller will grant an allocation of 4 nodes, one for each of the 4 tasks. +.TP +\fB\-\-deadline\fR=<\fIOPT\fR> +remove the job if no ending is possible before +this deadline (start > (deadline \- time[\-min])). +Default is no deadline. Valid time formats are: +.br +HH:MM[:SS] [AM|PM] +.br +MMDD[YY] or MM/DD[/YY] or MM.DD[.YY] +.br +MM/DD[/YY]\-HH:MM[:SS] +.br +YYYY\-MM\-DD[THH:MM[:SS]]] + .TP \fB\-d\fR, \fB\-\-dependency\fR=<\fIdependency_list\fR> Defer the start of this job until the specified dependencies have been diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index b2a60fe11c4122e4f1f03f23f8daba0610cd753c..266832f741a3be52c2576e8bbd59edc386d422c2 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -314,7 +314,7 @@ Resuming a previously suspended job may result in multiple jobs being allocated the same CPUs, which could trigger gang scheduling with some configurations or severe degradation in performance with other configurations. Use of the scancel command to send SIGSTOP and SIGCONT signals would stop a -job without releasing its CPUs for allocaiton to other jobs and would be a +job without releasing its CPUs for allocation to other jobs and would be a preferable mechanism in many cases. Use with caution. @@ -512,6 +512,23 @@ Set the job's requirement for contiguous (consecutive) nodes to be allocated. Possible values are "YES" and "NO". Only the Slurm administrator or root can change this parameter. .TP +\fIDeadline\fP=<time_spec> +It accepts times of the form \fIHH:MM:SS\fR to specify a deadline to a job at +a specific time of day (seconds are optional). +You may also specify \fImidnight\fR, \fInoon\fR, \fIfika\fR (3 PM) or +\fIteatime\fR (4 PM) and you can have a time\-of\-day suffixed +with \fIAM\fR or \fIPM\fR for a deadline in the morning or the evening. +You can specify a deadline for the job with +a date of the form \fIMMDDYY\fR or \fIMM/DD/YY\fR or \fIMM.DD.YY\fR, +or a date and time as \fIYYYY\-MM\-DD[THH:MM[:SS]]\fR. You can also +give times like \fInow + count time\-units\fR, where the time\-units +can be \fIminutes\fR, \fIhours\fR, \fIdays\fR, or \fIweeks\fR +and you can tell Slurm to put a deadline for tomorrow with the keyword +\fItomorrow\fR. +The specified deadline must be later than the current time. +Only pending jobs can have the deadline updated. +Only the Slurm administrator or root can change this parameter. +.TP \fIDependency\fP=<dependency_list> Defer job's initiation until specified job dependency specification is satisfied. diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index ad6c4f156af606ac300d8e475b650bbd2e72c69f..38f2049b084d4580c98268a4c6da028a4380b94a 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -532,6 +532,10 @@ Prints the frequency of the allocated CPUs. Prints the number of CPUs per tasks allocated to the job. (Valid for jobs only) .TP +\fBdeadline\fR +Prints the deadline affected to the job +(Valid for jobs only) +.TP \fBdependency\fR Job dependencies remaining. This job will not begin execution until these dependent jobs complete. In the case of a job that can not run due to job @@ -788,7 +792,7 @@ When \-\-sockets\-per\-node has not been set, "*" is displayed. Number of sockets per board allocated to the job. (Valid for jobs only) .TP -\fBstartime\fR +\fBstarttime\fR Actual or expected start time of the job or job step. (Valid for jobs and job steps) .TP diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 3e4d418934962f0654fa60ac0fe1c3cb7a16bb03..d4cc1f5bf6300949d1959e6086fc889eac77624d 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -557,6 +557,20 @@ threads per CPU for a total of two tasks. there are configurations and options which can result in inconsistent allocations when \-c has a value greater than \-c on salloc or sbatch. +.TP +\fB\-\-deadline\fR=<\fIOPT\fR> +remove the job if no ending is possible before +this deadline (start > (deadline \- time[\-min])). +Default is no deadline. Valid time formats are: +.br +HH:MM[:SS] [AM|PM] +.br +MMDD[YY] or MM/DD[/YY] or MM.DD[.YY] +.br +MM/DD[/YY]\-HH:MM[:SS] +.br +YYYY\-MM\-DD[THH:MM[:SS]]] + .TP \fB\-d\fR, \fB\-\-dependency\fR=<\fIdependency_list\fR> Defer the start of this job until the specified dependencies have been diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 33d1ed12988424c6da8eaaf8fb0fa5d7e5885a55..2b105cf647bd037f7b3b6ccb8d83a4955b1b855a 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -274,6 +274,7 @@ enum job_states { JOB_NODE_FAIL, /* terminated on node failure */ JOB_PREEMPTED, /* terminated due to preemption */ JOB_BOOT_FAIL, /* terminated due to node boot failure */ + JOB_DEADLINE, /* terminated on deadline */ JOB_END /* not a real state, last entry in table */ }; #define JOB_STATE_BASE 0x000000ff /* Used for job_states above */ @@ -536,6 +537,7 @@ enum job_state_reason { * (burst buffer) */ WAIT_QOS_MIN_BB, /* QOS MinTRESPerJob not reached * (burst buffer) */ + FAIL_DEADLINE, /* reached deadline */ }; enum job_acct_types { @@ -1407,6 +1409,7 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ uint32_t cpu_freq_min; /* Minimum cpu frequency */ uint32_t cpu_freq_max; /* Maximum cpu frequency */ uint32_t cpu_freq_gov; /* cpu frequency governor */ + time_t deadline; /* deadline */ char *dependency; /* synchronize job execution with other jobs */ time_t end_time; /* time by which job must complete, used for * job update only now, possible deadline @@ -1570,6 +1573,7 @@ typedef struct job_info { uint32_t cpu_freq_min; /* Minimum cpu frequency */ uint32_t cpu_freq_max; /* Maximum cpu frequency */ uint32_t cpu_freq_gov; /* cpu frequency governor */ + time_t deadline; /* deadline */ char *dependency; /* synchronize job execution with other jobs */ uint32_t derived_ec; /* highest exit code of all job steps */ time_t eligible_time; /* time job is eligible for running */ diff --git a/src/api/job_info.c b/src/api/job_info.c index ef777079cb8c93b9b2b78af042e69804c962f61f..7e96b53075456342bbc5f522b4679b78a3ac0b7d 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -562,11 +562,21 @@ line6: xstrcat(out, tmp_line); if ((job_ptr->time_limit == INFINITE) && (job_ptr->end_time > time(NULL))) - sprintf(tmp_line, "Unknown"); + sprintf(tmp_line, "Unknown "); else { slurm_make_time_str ((time_t *)&job_ptr->end_time, time_str, sizeof(time_str)); + sprintf(tmp_line, "%s ", time_str); + } + xstrcat(out, tmp_line); + snprintf(tmp_line, sizeof(tmp_line), "Deadline="); + xstrcat(out, tmp_line); + if (job_ptr->deadline) { + slurm_make_time_str((time_t *)&job_ptr->deadline, time_str, + sizeof(time_str)); sprintf(tmp_line, "%s", time_str); + } else { + sprintf(tmp_line, "N/A"); } xstrcat(out, tmp_line); if (one_liner) diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 1681dec3344f2d48c5e46865b6af281270c96d9e..5b5628bfc4095900be012a2c2a4b3df511a63ffd 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -1591,6 +1591,8 @@ extern char *job_reason_string(enum job_state_reason inx) return "AssocMaxBBMinutesPerJob"; case WAIT_QOS_MIN_BB: return "QOSMinBB"; + case FAIL_DEADLINE: + return "DeadLine"; default: snprintf(val, sizeof(val), "%d", inx); return val; @@ -1830,6 +1832,8 @@ extern char *job_state_string(uint32_t inx) return "PREEMPTED"; case JOB_BOOT_FAIL: return "BOOT_FAIL"; + case JOB_DEADLINE: + return "DEADLINE"; default: return "?"; } @@ -1875,6 +1879,8 @@ extern char *job_state_string_compact(uint32_t inx) return "PR"; case JOB_BOOT_FAIL: return "BF"; + case JOB_DEADLINE: + return "DL"; default: return "?"; } diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index ec39bc478ab5bc755a6ca85e2f1e37b6eaee074e..5b25ae532ee569d95c50bc2d1cba9bfc0e2bdda0 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -89,6 +89,8 @@ ((_X->job_state & JOB_STATE_BASE) == JOB_TIMEOUT) #define IS_JOB_NODE_FAILED(_X) \ ((_X->job_state & JOB_STATE_BASE) == JOB_NODE_FAIL) +#define IS_JOB_DEADLINE(_X) \ + ((_X->job_state & JOB_STATE_BASE) == JOB_DEADLINE) /* Derived job states */ #define IS_JOB_COMPLETING(_X) \ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 0820885ad902e52f4932979f2fe3a6abaf4cc135..173d729fe9dc25cb850e6a3697a935fa83c65f9b 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -5876,6 +5876,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer, safe_unpack8 (&job->reboot, buffer); safe_unpack16(&job->restart_cnt, buffer); safe_unpack16(&job->show_flags, buffer); + safe_unpack_time(&job->deadline, buffer); safe_unpack32(&job->alloc_sid, buffer); safe_unpack32(&job->time_limit, buffer); @@ -7750,6 +7751,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, packstr(job_desc_ptr->network, buffer); pack_time(job_desc_ptr->begin_time, buffer); pack_time(job_desc_ptr->end_time, buffer); + pack_time(job_desc_ptr->deadline, buffer); packstr(job_desc_ptr->licenses, buffer); pack16(job_desc_ptr->mail_type, buffer); @@ -8279,6 +8281,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, &uint32_tmp, buffer); safe_unpack_time(&job_desc_ptr->begin_time, buffer); safe_unpack_time(&job_desc_ptr->end_time, buffer); + safe_unpack_time(&job_desc_ptr->deadline, buffer); safe_unpackstr_xmalloc(&job_desc_ptr->licenses, &uint32_tmp, buffer); diff --git a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c index cbce5a510f1b2f44b0f2b62f0788b13bfe3ceb8d..a617d11a4e573bdc0f3f93ed903c2afae4f81f1b 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c @@ -330,6 +330,7 @@ static void _state_time_string(char **extra, char *cluster_name, uint32_t state, case JOB_TIMEOUT: case JOB_NODE_FAIL: case JOB_PREEMPTED: + case JOB_DEADLINE: default: xstrfmtcat(*extra, "(t1.state='%u' && (t1.time_end && ", state); if (start) { diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 048099c83bae0fc032d349f714f674e058f9d643..b6144bba7073ff5b5c2aba2ffeff51dca3277b49 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -804,7 +804,7 @@ static int _attempt_backfill(void) int bb, i, j, node_space_recs, mcs_select = 0; struct job_record *job_ptr; struct part_record *part_ptr, **bf_part_ptr = NULL; - uint32_t end_time, end_reserve; + uint32_t end_time, end_reserve, deadline_time_limit; uint32_t time_limit, comp_time_limit, orig_time_limit, part_time_limit; uint32_t min_nodes, max_nodes, req_nodes; bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL; @@ -1188,6 +1188,16 @@ next_task: continue; } + /* test of deadline */ + now = time(NULL); + deadline_time_limit = 0; + if ((job_ptr->deadline) && (job_ptr->deadline != NO_VAL)) { + if (!deadline_ok(job_ptr, "backfill")) + continue; + + deadline_time_limit = (job_ptr->deadline - now) / 60; + } + /* Determine job's expected completion time */ if (part_ptr->max_time == INFINITE) part_time_limit = YEAR_MINUTES; @@ -1204,7 +1214,10 @@ next_task: time_limit = MIN(job_ptr->time_limit, part_time_limit); } - comp_time_limit = time_limit; + if (deadline_time_limit) + comp_time_limit = MIN(time_limit, deadline_time_limit); + else + comp_time_limit = time_limit; qos_ptr = job_ptr->qos_ptr; if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE) && slurm_get_preempt_mode()) diff --git a/src/sacct/options.c b/src/sacct/options.c index 3d4ffd099385cfceb44c361377de0388dd977d3d..5d6bca50fa89c2e264021ee3cf36a76133907c9b 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -373,8 +373,8 @@ sacct [<OPTION>] \n \ -s, --state: \n\ Select jobs based on their current state or the state \n\ they were in during the time period given: running (r), \n\ - completed (cd), failed (f), timeout (to), resizing (rs) \n\ - and node_fail (nf). \n\ + completed (cd), failed (f), timeout (to), resizing (rs), \n\ + deadline (dl) and node_fail (nf). \n\ -S, --starttime: \n\ Select jobs eligible after this time. Default is \n\ 00:00:00 of the current day, unless '-s' is set then \n\ diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 9a77df27291a39108ca762accd678aa5eae4ded5..216f69df264252f9186b25973fd76a9f1bf816ed 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -180,6 +180,8 @@ #define LONG_OPT_POWER 0x162 #define LONG_OPT_THREAD_SPEC 0x163 #define LONG_OPT_MCS_LABEL 0x165 +#define LONG_OPT_DEADLINE 0x166 + /*---- global variables, defined in opt.h ----*/ opt_t opt; @@ -682,6 +684,7 @@ void set_options(const int argc, char **argv) {"contiguous", no_argument, 0, LONG_OPT_CONT}, {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, {"cpu-freq", required_argument, 0, LONG_OPT_CPU_FREQ}, + {"deadline", required_argument, 0, LONG_OPT_DEADLINE}, {"exclusive", optional_argument, 0, LONG_OPT_EXCLUSIVE}, {"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV}, {"gid", required_argument, 0, LONG_OPT_GID}, @@ -926,6 +929,14 @@ void set_options(const int argc, char **argv) case LONG_OPT_CONT: opt.contiguous = true; break; + case LONG_OPT_DEADLINE: + opt.deadline = parse_time(optarg, 0); + if (errno == ESLURM_INVALID_TIME_VALUE) { + error("Invalid deadline specification %s", + optarg); + exit(error_exit); + } + break; case LONG_OPT_EXCLUSIVE: if (optarg == NULL) { opt.shared = JOB_SHARED_NONE; @@ -1634,6 +1645,10 @@ static bool _opt_verify(void) if (opt.time_min == 0) opt.time_min = INFINITE; } + if ((opt.deadline) && (opt.begin) && (opt.deadline < opt.begin)) { + error("Incompatible begin and deadline time specification"); + exit(error_exit); + } #ifdef HAVE_AIX if (opt.network == NULL) @@ -1927,6 +1942,11 @@ static void _opt_list(void) slurm_make_time_str(&opt.begin, time_str, sizeof(time_str)); info("begin : %s", time_str); } + if (opt.deadline) { + char time_str[32]; + slurm_make_time_str(&opt.deadline, time_str, sizeof(time_str)); + info("deadline : %s", time_str); + } info("mail_type : %s", print_mail_type(opt.mail_type)); info("mail_user : %s", opt.mail_user); info("sockets-per-node : %d", opt.sockets_per_node); @@ -2014,6 +2034,8 @@ static void _help(void) " --comment=name arbitrary comment\n" " --cpu-freq=min[-max[:gov]] requested cpu frequency (and governor)\n" " -d, --dependency=type:jobid defer job until condition on jobid is satisfied\n" +" --deadline=time remove the job if no ending possible before\n" +" this deadline (start > (deadline - time[-min]))\n" " -D, --chdir=path change working directory\n" " --get-user-env used by Moab. See srun man page.\n" " --gid=group_id group ID to run job as (user root only)\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 8fc4ac6c6962e46fd1a244857ae95c948a7da862..9e3179281c72011ccc40d10f902d5cbba95bb892 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -169,6 +169,7 @@ typedef struct salloc_options { uint32_t cpu_freq_gov; /* cpu frequency governor */ uint8_t power_flags; /* Power management options */ char *mcs_label; /* mcs label if mcs plugin in use */ + time_t deadline; /* --deadline */ } opt_t; extern opt_t opt; diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index a17f6960a3c1311e69831dd14d672ced9dfbc243..7a1cd063840bd247e51932d47e3e2c058fde5a86 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -684,6 +684,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->mail_user = xstrdup(opt.mail_user); if (opt.begin) desc->begin_time = opt.begin; + if (opt.deadline) + desc->deadline = opt.deadline; if (opt.burst_buffer) desc->burst_buffer = opt.burst_buffer; if (opt.account) diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index cbb29a628dd078fcd12e21d0388409a8b800af85..21236283f534fcb8ac3a811dd5d00861485f6d13 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -192,6 +192,7 @@ #define LONG_OPT_PRIORITY 0x160 #define LONG_OPT_KILL_INV_DEP 0x161 #define LONG_OPT_MCS_LABEL 0x165 +#define LONG_OPT_DEADLINE 0x166 /*---- global variables, defined in opt.h ----*/ opt_t opt; @@ -743,6 +744,7 @@ static struct option long_options[] = { {"contiguous", no_argument, 0, LONG_OPT_CONT}, {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, {"cpu-freq", required_argument, 0, LONG_OPT_CPU_FREQ}, + {"deadline", required_argument, 0, LONG_OPT_DEADLINE}, {"exclusive", optional_argument, 0, LONG_OPT_EXCLUSIVE}, {"export", required_argument, 0, LONG_OPT_EXPORT}, {"export-file", required_argument, 0, LONG_OPT_EXPORT_FILE}, @@ -1391,6 +1393,14 @@ static void _set_options(int argc, char **argv) case LONG_OPT_CONT: opt.contiguous = true; break; + case LONG_OPT_DEADLINE: + opt.deadline = parse_time(optarg, 0); + if (errno == ESLURM_INVALID_TIME_VALUE) { + error("Invalid deadline specification %s", + optarg); + exit(error_exit); + } + break; case LONG_OPT_EXCLUSIVE: if (optarg == NULL) { opt.shared = JOB_SHARED_NONE; @@ -2607,6 +2617,10 @@ static bool _opt_verify(void) if (opt.time_min == 0) opt.time_min = INFINITE; } + if ((opt.deadline) && (opt.begin) && (opt.deadline < opt.begin)) { + error("Incompatible begin and deadline time specification"); + exit(error_exit); + } if (opt.ckpt_interval_str) { opt.ckpt_interval = time_str2mins(opt.ckpt_interval_str); @@ -2956,6 +2970,11 @@ static void _opt_list(void) slurm_make_time_str(&opt.begin, time_str, sizeof(time_str)); info("begin : %s", time_str); } + if (opt.deadline) { + char time_str[32]; + slurm_make_time_str(&opt.deadline, time_str, sizeof(time_str)); + info("deadline : %s", time_str); + } info("array : %s", opt.array_inx == NULL ? "N/A" : opt.array_inx); info("cpu_freq_min : %u", opt.cpu_freq_min); @@ -3053,6 +3072,8 @@ static void _help(void) " -c, --cpus-per-task=ncpus number of cpus required per task\n" " -d, --dependency=type:jobid defer job until condition on jobid is satisfied\n" +" --deadline=time remove the job if no ending possible before\n" +" this deadline (start > (deadline - time[-min]))\n" " -D, --workdir=directory set working directory for batch script\n" " -e, --error=err file for batch script's standard error\n" " --export[=names] specify environment variables to export\n" diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h index 8202c86cb5e93425ca1a1c22f96fa76221ab69fd..8c7f91571fe50289662c7e4937c8047459412e1c 100644 --- a/src/sbatch/opt.h +++ b/src/sbatch/opt.h @@ -188,6 +188,7 @@ typedef struct sbatch_options { char *burst_buffer; /* -bb */ uint8_t power_flags; /* Power management options */ char *mcs_label; /* mcs label if mcs plugin in use */ + time_t deadline; /* ---deadline */ } opt_t; extern opt_t opt; diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index 36d9d2b970e8ae6c1d4855ce2c956d44ce25620a..e77530d7f57413d441b6dc10ce9cc25d73f264cf 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -456,6 +456,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->burst_buffer = opt.burst_buffer; if (opt.begin) desc->begin_time = opt.begin; + if (opt.deadline) + desc->deadline = opt.deadline; if (opt.account) desc->account = xstrdup(opt.account); if (opt.comment) diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c index a5ffd45960a01e9fa423007b3db60711ac28626c..d77c7ed19b3eea0c9ff170d2fb4ae7fa90d4114f 100644 --- a/src/scontrol/update_job.c +++ b/src/scontrol/update_job.c @@ -1165,6 +1165,11 @@ scontrol_update_job (int argc, char *argv[]) } job_uid = (uint32_t) user_id; } + else if (!strncasecmp(tag, "Deadline", MAX(taglen, 3))) { + if ((job_msg.deadline = parse_time(val, 0))) { + update_cnt++; + } + } else { exit_code = 1; fprintf (stderr, "Update of this parameter is not " diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 055e434a2bef11f4c5052ed46de4192fbdc6c1c1..2f338dec659f79290fc59cece778a8e2e061f002 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1154,6 +1154,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) pack_time(dump_job_ptr->pre_sus_time, buffer); pack_time(dump_job_ptr->resize_time, buffer); pack_time(dump_job_ptr->tot_sus_time, buffer); + pack_time(dump_job_ptr->deadline, buffer); pack16(dump_job_ptr->direct_set_prio, buffer); pack32(dump_job_ptr->job_state, buffer); @@ -1260,7 +1261,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) uint32_t profile = ACCT_GATHER_PROFILE_NOT_SET; uint32_t job_state; time_t start_time, end_time, suspend_time, pre_sus_time, tot_sus_time; - time_t preempt_time = 0; + time_t preempt_time = 0, deadline = 0; time_t resize_time = 0, now = time(NULL); uint8_t reboot = 0, power_flags = 0; uint8_t uint8_tmp = 0; @@ -1369,6 +1370,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) safe_unpack_time(&pre_sus_time, buffer); safe_unpack_time(&resize_time, buffer); safe_unpack_time(&tot_sus_time, buffer); + safe_unpack_time(&deadline, buffer); safe_unpack16(&direct_set_prio, buffer); safe_unpack32(&job_state, buffer); @@ -2014,6 +2016,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) job_ptr->state_desc = state_desc; state_desc = NULL; /* reused, nothing left to free */ job_ptr->suspend_time = suspend_time; + job_ptr->deadline = deadline; if (task_id_size != NO_VAL) { if (!job_ptr->array_recs) job_ptr->array_recs=xmalloc(sizeof(job_array_struct_t)); @@ -3612,6 +3615,8 @@ void dump_job_desc(job_desc_msg_t * job_specs) debug3(" array_inx=%s", job_specs->array_inx); debug3(" burst_buffer=%s", job_specs->burst_buffer); debug3(" mcs_label=%s", job_specs->mcs_label); + slurm_make_time_str(&job_specs->deadline, buf, sizeof(buf)); + debug3(" deadline=%s", buf); select_g_select_jobinfo_sprint(job_specs->select_jobinfo, buf, sizeof(buf), SELECT_PRINT_MIXED); @@ -3816,6 +3821,7 @@ extern struct job_record *job_array_split(struct job_record *job_ptr) job_ptr_pend->tres_fmt_alloc_str = NULL; job_ptr_pend->wckey = xstrdup(job_ptr->wckey); + job_ptr_pend->deadline = job_ptr->deadline; job_details = job_ptr->details; details_new = job_ptr_pend->details; @@ -3988,6 +3994,15 @@ static int _select_nodes_parts(struct job_record *job_ptr, bool test_only, return rc; } +static inline bool _has_deadline(struct job_record *job_ptr) +{ + if ((job_ptr->deadline) && (job_ptr->deadline != NO_VAL)) { + queue_job_scheduler(); + return true; + } + return false; +} + /* * job_allocate - create job_records for the supplied job specification and * allocate nodes for it. @@ -4139,7 +4154,7 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, test_only = will_run || (allocate == 0); - no_alloc = test_only || too_fragmented || + no_alloc = test_only || too_fragmented || _has_deadline(job_ptr) || (!top_prio) || (!independent) || !avail_front_end(job_ptr); no_alloc = no_alloc || (bb_g_job_test_stage_in(job_ptr, no_alloc) != 1); @@ -5398,6 +5413,38 @@ static int _valid_job_part(job_desc_msg_t * job_desc, rc = ESLURM_INVALID_TIME_LIMIT; goto fini; } + if ((job_desc->deadline) && (job_desc->deadline != NO_VAL)) { + char time_str_now[32]; + char time_str_deadline[32]; + time_t now = time(NULL); + slurm_make_time_str(&job_desc->deadline, time_str_deadline, + sizeof(time_str_deadline)); + slurm_make_time_str(&now, time_str_now, sizeof(time_str_now)); + if (job_desc->deadline < now) { + info("_valid_job_part: job's deadline smaller than now " + "(%s < %s)", + time_str_deadline, time_str_now); + rc = ESLURM_INVALID_TIME_LIMIT; + goto fini; + } + if ((job_desc->time_min) && (job_desc->time_min != NO_VAL) && + (job_desc->deadline < (now + job_desc->time_min * 60))) { + info("_valid_job_part: job's min_time greater than " + "deadline (%u > %s)", + job_desc->time_min, time_str_deadline); + rc = ESLURM_INVALID_TIME_LIMIT; + goto fini; + } + if ((job_desc->time_min == 0) && (job_desc->time_limit) && + (job_desc->time_limit != NO_VAL) && + (job_desc->deadline < (now + job_desc->time_limit * 60))) { + info("_valid_job_part: job's time_limit greater than " + "deadline (%u > %s)", + job_desc->time_limit, time_str_deadline); + rc = ESLURM_INVALID_TIME_LIMIT; + goto fini; + } + } fini: return rc; @@ -6844,6 +6891,7 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, job_ptr->group_id = (gid_t) job_desc->group_id; job_ptr->job_state = JOB_PENDING; job_ptr->time_limit = job_desc->time_limit; + job_ptr->deadline = job_desc->deadline; if (job_desc->time_min != NO_VAL) job_ptr->time_min = job_desc->time_min; job_ptr->alloc_sid = job_desc->alloc_sid; @@ -8109,6 +8157,7 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, pack8(dump_job_ptr->reboot, buffer); pack16(dump_job_ptr->restart_cnt, buffer); pack16(show_flags, buffer); + pack_time(dump_job_ptr->deadline, buffer); pack32(dump_job_ptr->alloc_sid, buffer); if ((dump_job_ptr->time_limit == NO_VAL) @@ -10610,6 +10659,29 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, error_code = ESLURM_ACCESS_DENIED; } } + + if ((job_specs->deadline) && (!IS_JOB_RUNNING(job_ptr))) { + char time_str[32]; + slurm_make_time_str(&job_ptr->deadline,time_str, + sizeof(time_str)); + if (job_specs->deadline < now) { + error_code = ESLURM_INVALID_TIME_VALUE; + } else if (authorized) { + /* update deadline */ + job_ptr->deadline = job_specs->deadline; + info("sched: update_job: setting deadline to %s for " + "job_id %u", time_str, + job_specs->job_id); + /* Always use the acct_policy_limit_set.* + * since if set by a super user it be set correctly */ + job_ptr->limit_set.time = acct_policy_limit_set.time; + update_accounting = true; + } else { + info("sched: Attempt to extend end time for job %u", + job_specs->job_id); + error_code = ESLURM_ACCESS_DENIED; + } + } if (error_code != SLURM_SUCCESS) goto fini; @@ -14654,6 +14726,7 @@ _copy_job_record_to_job_desc(struct job_record *job_ptr) job_desc->cpu_freq_min = details->cpu_freq_min; job_desc->cpu_freq_max = details->cpu_freq_max; job_desc->cpu_freq_gov = details->cpu_freq_gov; + job_desc->deadline = job_ptr->deadline; job_desc->dependency = xstrdup(details->dependency); job_desc->end_time = 0; /* Unused today */ job_desc->environment = get_job_env(job_ptr, diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 9c1469870b3098dfa4fc0681e1433f507bfc2b5f..0327c11ca45a974fe7947d640bd616edbacaf9a7 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -64,6 +64,7 @@ #include "src/common/power.h" #include "src/common/slurm_accounting_storage.h" #include "src/common/slurm_acct_gather.h" +#include "src/common/parse_time.h" #include "src/common/timers.h" #include "src/common/uid.h" #include "src/common/xassert.h" @@ -970,6 +971,60 @@ static void *_sched_agent(void *args) return NULL; } +/* Determine if job's deadline specification is still valid, kill job if not + * job_ptr IN - Job to test + * func IN - function named used for logging, "sched" or "backfill" + * RET - true of valid, false if invalid and job cancelled + */ +extern bool deadline_ok(struct job_record *job_ptr, char *func) +{ + time_t now; + char time_str_deadline[32]; + bool fail_job = false; + time_t inter; + + now = time(NULL); + if ((job_ptr->time_min) && (job_ptr->time_min != NO_VAL)) { + inter = now + job_ptr->time_min * 60; + if (job_ptr->deadline < inter) { + slurm_make_time_str(&job_ptr->deadline, + time_str_deadline, + sizeof(time_str_deadline)); + info("%s: JobId %u with time_min %u exceeded deadline " + "%s and cancelled ", + func, job_ptr->job_id, job_ptr->time_min, + time_str_deadline); + fail_job = true; + } + } else if ((job_ptr->time_limit != NO_VAL) && + (job_ptr->time_limit != INFINITE)) { + inter = now + job_ptr->time_limit * 60; + if (job_ptr->deadline < inter) { + slurm_make_time_str(&job_ptr->deadline, + time_str_deadline, + sizeof(time_str_deadline)); + info("%s: JobId %u with time_limit %u exceeded " + "deadline %s and cancelled ", + func, job_ptr->job_id, job_ptr->time_limit, + time_str_deadline); + fail_job = true; + } + } + if (fail_job) { + last_job_update = now; + job_ptr->job_state = JOB_DEADLINE; + job_ptr->exit_code = 1; + job_ptr->state_reason = FAIL_DEADLINE; + xfree(job_ptr->state_desc); + job_ptr->start_time = now; + job_ptr->end_time = now; + srun_allocate_abort(job_ptr); + job_completion_logger(job_ptr, false); + return false; + } + return true; +} + static int _schedule(uint32_t job_limit) { ListIterator job_iterator = NULL, part_iterator = NULL; @@ -1011,6 +1066,7 @@ static int _schedule(uint32_t job_limit) char job_id_buf[32]; char *unavail_node_str = NULL; bool fail_by_part; + uint32_t deadline_time_limit, save_time_limit; #if HAVE_SYS_PRCTL_H char get_name[16]; #endif @@ -1484,6 +1540,32 @@ next_task: } } + deadline_time_limit = 0; + if ((job_ptr->deadline) && (job_ptr->deadline != NO_VAL)) { + if (!deadline_ok(job_ptr, "sched")) + continue; + + deadline_time_limit = job_ptr->deadline - now; + deadline_time_limit /= 60; + if ((job_ptr->time_limit != NO_VAL) && + (job_ptr->time_limit != INFINITE)) { + deadline_time_limit = MIN(job_ptr->time_limit, + deadline_time_limit); + } else { + if ((job_ptr->part_ptr->default_time != NO_VAL) && + (job_ptr->part_ptr->default_time != INFINITE)){ + deadline_time_limit = MIN( + job_ptr->part_ptr->default_time, + deadline_time_limit); + } else if ((job_ptr->part_ptr->max_time != NO_VAL) && + (job_ptr->part_ptr->max_time != INFINITE)){ + deadline_time_limit = MIN( + job_ptr->part_ptr->max_time, + deadline_time_limit); + } + } + } + if (!acct_policy_job_runnable_state(job_ptr) && !acct_policy_job_runnable_pre_select(job_ptr)) continue; @@ -1569,9 +1651,15 @@ next_task: continue; } + if (deadline_time_limit) { + save_time_limit = job_ptr->time_limit; + job_ptr->time_limit = deadline_time_limit; + } error_code = select_nodes(job_ptr, false, NULL, unavail_node_str, NULL); fail_by_part = false; + if ((error_code != SLURM_SUCCESS) && deadline_time_limit) + job_ptr->time_limit = save_time_limit; if ((error_code == ESLURM_NODES_BUSY) || (error_code == ESLURM_POWER_NOT_AVAIL) || (error_code == ESLURM_POWER_RESERVED)) { diff --git a/src/slurmctld/job_scheduler.h b/src/slurmctld/job_scheduler.h index 6913e9eb664b6ab3ecaf7928eeffae67db42da5f..70842010af0a763345cb9d0f367004c247a5034d 100644 --- a/src/slurmctld/job_scheduler.h +++ b/src/slurmctld/job_scheduler.h @@ -73,7 +73,15 @@ extern List build_job_queue(bool clear_start, bool backfill); /* Given a scheduled job, return a pointer to it batch_job_launch_msg_t data */ extern batch_job_launch_msg_t *build_launch_job_msg( struct job_record *job_ptr, - uint16_t protocol_versin); + uint16_t protocol_version); + +/* Determine if job's deadline specification is still valid, kill job if not + * job_ptr IN - Job to test + * func IN - function named used for logging, "sched" or "backfill" + * RET - true of valid, false if invalid and job cancelled + */ +extern bool deadline_ok(struct job_record *job_ptr, char *func); + /* * epilog_slurmctld - execute the prolog_slurmctld for a job that has just * terminated. diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 619e6fdeb7f3112664c1790744557a0fd0681b24..878e10d4903c5d8e830d560ef4d0bb2940d422f9 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -611,6 +611,7 @@ struct job_record { * bluegene and the linear plugins * 0 if cr is NOT enabled, * 1 if cr is enabled */ + time_t deadline; /* deadline */ uint32_t db_index; /* used only for database * plugins */ uint32_t derived_ec; /* highest exit code of all job steps */ diff --git a/src/squeue/opts.c b/src/squeue/opts.c index a90fbc531d44931eff3397c1c5dd1d74ff0612c3..76193a1ecafe264221dbb9266921d377654231a9 100644 --- a/src/squeue/opts.c +++ b/src/squeue/opts.c @@ -1475,7 +1475,12 @@ extern int parse_long_format( char* format_long ) right_justify, suffix ); else if (!strcasecmp(token, "mcslabel")) - job_format_add_mcs_label( params.format_list, + job_format_add_mcs_label(params.format_list, + field_size, + right_justify, + suffix ); + else if (!strcasecmp(token, "deadline")) + job_format_add_deadline(params.format_list, field_size, right_justify, suffix ); diff --git a/src/squeue/print.c b/src/squeue/print.c index 8c3cb9b684e8898054b3224a97d1c0db6d1e1cc7..f9a6839d54785e6e78646cfaa7b0f86d6631d018 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -855,7 +855,16 @@ int _print_job_time_start(job_info_t * job, int width, bool right, printf("%s", suffix); return SLURM_SUCCESS; } - +int _print_job_deadline(job_info_t * job, int width, bool right, char* suffix) +{ + if (job == NULL) /* Print the Header instead */ + _print_str("DEADLINE", width, right, true); + else + _print_time(job->deadline, 0, width, right); + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} int _print_job_time_end(job_info_t * job, int width, bool right, char* suffix) { if (job == NULL) /* Print the Header instead */ @@ -958,6 +967,7 @@ int _print_job_reason_list(job_info_t * job, int width, bool right, } else if (!IS_JOB_COMPLETING(job) && (IS_JOB_PENDING(job) || IS_JOB_TIMEOUT(job) + || IS_JOB_DEADLINE(job) || IS_JOB_FAILED(job))) { char *reason_fmt = NULL, *reason = NULL; if (job->state_desc) diff --git a/src/squeue/print.h b/src/squeue/print.h index 47fa169fa0d254905262e8f7bc1e393414609873..8fe95efd2e3861e0f2e114f862b3e1145b25c20d 100644 --- a/src/squeue/print.h +++ b/src/squeue/print.h @@ -141,6 +141,8 @@ int job_format_add_function(List list, int width, bool right_justify, job_format_add_function(list,wid,right,suffix,_print_job_time_submit) #define job_format_add_time_start(list,wid,right,suffix) \ job_format_add_function(list,wid,right,suffix,_print_job_time_start) +#define job_format_add_deadline(list,wid,right,suffix) \ + job_format_add_function(list,wid,right,suffix,_print_job_deadline) #define job_format_add_time_end(list,wid,right,suffix) \ job_format_add_function(list,wid,right,suffix,_print_job_time_end) #define job_format_add_priority(list,wid,right,suffix) \ @@ -334,6 +336,8 @@ int _print_job_time_start(job_info_t * job, int width, bool right_justify, char* suffix); int _print_job_time_end(job_info_t * job, int width, bool right_justify, char* suffix); +int _print_job_deadline(job_info_t * job, int width, bool right_justify, + char* suffix); int _print_job_priority(job_info_t * job, int width, bool right_justify, char* suffix); int _print_job_priority_long(job_info_t * job, int width, bool right_justify, diff --git a/src/srun/libsrun/allocate.c b/src/srun/libsrun/allocate.c index 3cefb86a13f001f23e680f29bcaa159446559dd1..52ed3d26b576ba84607add2bffb42eb106ee6a32 100644 --- a/src/srun/libsrun/allocate.c +++ b/src/srun/libsrun/allocate.c @@ -752,6 +752,8 @@ job_desc_msg_create_from_opts (void) j->burst_buffer = opt.burst_buffer; if (opt.begin) j->begin_time = opt.begin; + if (opt.deadline) + j->deadline = opt.deadline; if (opt.licenses) j->licenses = opt.licenses; if (opt.network) diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c index 242d263bed95dc9a3c167ceb8c2fb1dcf5fa1413..927d0aeecc394efd7eaae7cfaf4811cf6dcd0bf0 100644 --- a/src/srun/libsrun/opt.c +++ b/src/srun/libsrun/opt.c @@ -209,6 +209,7 @@ #define LONG_OPT_PRIORITY 0x160 #define LONG_OPT_ACCEL_BIND 0x161 #define LONG_OPT_MCS_LABEL 0x165 +#define LONG_OPT_DEADLINE 0x166 extern char **environ; @@ -935,6 +936,7 @@ static void _set_options(const int argc, char **argv) {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, {"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND}, {"cpu-freq", required_argument, 0, LONG_OPT_CPU_FREQ}, + {"deadline", required_argument, 0, LONG_OPT_DEADLINE}, {"debugger-test", no_argument, 0, LONG_OPT_DEBUG_TS}, {"epilog", required_argument, 0, LONG_OPT_EPILOG}, {"exclusive", optional_argument, 0, LONG_OPT_EXCLUSIVE}, @@ -1257,6 +1259,14 @@ static void _set_options(const int argc, char **argv) case LONG_OPT_CONT: opt.contiguous = true; break; + case LONG_OPT_DEADLINE: + opt.deadline = parse_time(optarg, 0); + if (errno == ESLURM_INVALID_TIME_VALUE) { + error("Invalid deadline specification %s", + optarg); + exit(error_exit); + } + break; case LONG_OPT_EXCLUSIVE: if (optarg == NULL) { opt.exclusive = true; @@ -2244,7 +2254,10 @@ static bool _opt_verify(void) if (opt.time_min == 0) opt.time_min = INFINITE; } - + if ((opt.deadline) && (opt.begin) && (opt.deadline < opt.begin)) { + error("Incompatible begin and deadline time specification"); + exit(error_exit); + } if (opt.ckpt_interval_str) { opt.ckpt_interval = time_str2mins(opt.ckpt_interval_str); if ((opt.ckpt_interval < 0) && @@ -2555,6 +2568,11 @@ static void _opt_list(void) slurm_make_time_str(&opt.begin, time_str, sizeof(time_str)); info("begin : %s", time_str); } + if (opt.deadline) { + char time_str[32]; + slurm_make_time_str(&opt.deadline, time_str, sizeof(time_str)); + info("deadline : %s", time_str); + } info("prolog : %s", opt.prolog); info("epilog : %s", opt.epilog); info("mail_type : %s", print_mail_type(opt.mail_type)); @@ -2705,6 +2723,8 @@ static void _help(void) " --comment=name arbitrary comment\n" " --cpu-freq=min[-max[:gov]] requested cpu frequency (and governor)\n" " -d, --dependency=type:jobid defer job until condition on jobid is satisfied\n" +" --deadline=time remove the job if no ending possible before\n" +" this deadline (start > (deadline - time[-min]))\n" " -D, --chdir=path change remote current working directory\n" " --export=env_vars|NONE environment variables passed to launcher with\n" " optional values or NONE (pass no variables)\n" diff --git a/src/srun/libsrun/opt.h b/src/srun/libsrun/opt.h index d3f32cbaa66540bba6057c605d2132ef0b963179..f771b8734d1ccb3a5137194583af24af26ef69b9 100644 --- a/src/srun/libsrun/opt.h +++ b/src/srun/libsrun/opt.h @@ -239,6 +239,7 @@ typedef struct srun_options { uint32_t cpu_freq_gov; /* cpu frequency governor */ uint8_t power_flags; /* Power management options */ char *mcs_label; /* mcs label if mcs plugin in use */ + time_t deadline; /* --deadline */ } opt_t; extern opt_t opt; diff --git a/src/sstat/options.c b/src/sstat/options.c index 39c2035f7468324c7cfaece1f31ee6719ada1651..7a04e7eed41440260d4ff34aa672c604d3c58e62 100644 --- a/src/sstat/options.c +++ b/src/sstat/options.c @@ -334,6 +334,8 @@ int decode_state_char(char *state) return JOB_NODE_FAIL; else if (!strcasecmp(state, "pr")) return JOB_PREEMPTED; + else if (!strcasecmp(state, "dl")) + return JOB_DEADLINE; else return -1; // unknown } diff --git a/src/sview/job_info.c b/src/sview/job_info.c index d537b924627c85669cdf97556e496bedaff150ca..9c9af53d2a78a67e2701e5075627fe7f9aeffc51 100644 --- a/src/sview/job_info.c +++ b/src/sview/job_info.c @@ -127,6 +127,7 @@ enum { SORTID_CPU_MAX, SORTID_CPU_MIN, SORTID_CPUS_PER_TASK, + SORTID_DEADLINE, SORTID_DEPENDENCY, SORTID_DERIVED_EC, SORTID_EXIT_CODE, @@ -318,6 +319,8 @@ static display_data_t display_data_job[] = { EDIT_TEXTBOX, refresh_job, create_model_job, admin_edit_job}, {G_TYPE_STRING, SORTID_TIME_END, "Time End", FALSE, EDIT_NONE, refresh_job, create_model_job, admin_edit_job}, + {G_TYPE_STRING, SORTID_DEADLINE, "Deadline", FALSE, + EDIT_TEXTBOX, refresh_job, create_model_job, admin_edit_job}, {G_TYPE_STRING, SORTID_TIME_SUSPEND, "Time Suspended", FALSE, EDIT_NONE, refresh_job, create_model_job, admin_edit_job}, {G_TYPE_STRING, SORTID_TIMELIMIT, "Time Limit", FALSE, @@ -1172,6 +1175,14 @@ static const char *_set_job_msg(job_desc_msg_t *job_msg, const char *new_text, if (job_msg->begin_time < time(NULL)) job_msg->begin_time = time(NULL); break; + case SORTID_DEADLINE: + type = "deadline"; + job_msg->deadline = parse_time((char *)new_text, 0); + if (!job_msg->deadline) + goto return_error; + if (job_msg->deadline < time(NULL)) + goto return_error; + break; case SORTID_STD_OUT: type = "StdOut"; job_msg->std_out = xstrdup(new_text); @@ -1549,6 +1560,15 @@ static void _layout_job_record(GtkTreeView *treeview, find_col_name(display_data_job, SORTID_CPUS_PER_TASK), tmp_char); + if (job_ptr->deadline) + slurm_make_time_str((time_t *)&job_ptr->deadline, tmp_char, + sizeof(tmp_char)); + else + sprintf(tmp_char, "N/A"); + add_display_treestore_line(update, treestore, &iter, + find_col_name(display_data_job, + SORTID_DEADLINE), + tmp_char); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_job, @@ -2020,7 +2040,7 @@ static void _update_job_record(sview_job_info_t *sview_job_info_ptr, char tmp_prio[40], tmp_nice[40], tmp_preempt_time[40]; char tmp_rqswitch[40], tmp_core_spec[40], tmp_job_id[400]; char tmp_std_err[128], tmp_std_in[128], tmp_std_out[128]; - char tmp_thread_spec[40]; + char tmp_thread_spec[40], tmp_time_deadline[40]; char *tmp_batch, *tmp_cont, *tmp_shared, *tmp_requeue, *tmp_uname; char *tmp_reboot, *tmp_reason, *tmp_nodes; char time_buf[32]; @@ -2294,6 +2314,12 @@ static void _update_job_record(sview_job_info_t *sview_job_info_ptr, slurm_make_time_str((time_t *)&job_ptr->submit_time, tmp_time_submit, sizeof(tmp_time_submit)); + if (job_ptr->deadline) + slurm_make_time_str((time_t *)&job_ptr->deadline, tmp_time_deadline, + sizeof(tmp_time_deadline)); + else + sprintf(tmp_time_deadline, "N/A"); + slurm_get_job_stderr(tmp_std_err, sizeof(tmp_std_err), job_ptr); slurm_get_job_stdin(tmp_std_in, sizeof(tmp_std_in), job_ptr); @@ -2371,6 +2397,7 @@ static void _update_job_record(sview_job_info_t *sview_job_info_ptr, SORTID_CPU_MIN, tmp_cpu_cnt, SORTID_CPUS_PER_TASK,tmp_cpus_per_task, SORTID_CPU_REQ, tmp_cpu_req, + SORTID_DEADLINE, tmp_time_deadline, SORTID_DEPENDENCY, job_ptr->dependency, SORTID_DERIVED_EC, tmp_derived_ec, SORTID_EXIT_CODE, tmp_exit, diff --git a/src/sview/popups.c b/src/sview/popups.c index ef48764b44f1da42f5dfefa8084ad6c90b63c039..b298a38e3fc269daa8fc89197e73d84ee370e41d 100644 --- a/src/sview/popups.c +++ b/src/sview/popups.c @@ -690,6 +690,7 @@ extern void create_search_popup(GtkAction *action, gpointer user_data) {G_TYPE_NONE, JOB_NODE_FAIL, "Node Failure", TRUE, -1}, {G_TYPE_NONE, JOB_PREEMPTED, "Preempted", TRUE, -1}, {G_TYPE_NONE, JOB_BOOT_FAIL, "Boot Failure", TRUE, -1}, + {G_TYPE_NONE, JOB_DEADLINE, "Deadline", TRUE, -1}, {G_TYPE_NONE, -1, NULL, FALSE, -1} }; diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 9e3a01c9ea7b273336798e238f9834ff17f14a79..048678cc3e81c75e9fd642061e5b3c8493861ddb 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -126,6 +126,17 @@ EXTRA_DIST = \ test1.99 \ test1.100 \ test1.101 \ + test1.102 \ + test1.103 \ + test1.104 \ + test1.105 \ + test1.106 \ + test1.107 \ + test1.108 \ + test1.109 \ + test1.110 \ + test1.111 \ + test1.112 \ test2.1 \ test2.2 \ test2.3 \ @@ -371,6 +382,15 @@ EXTRA_DIST = \ test15.25 \ test15.26 \ test15.27 \ + test15.30 \ + test15.31 \ + test15.32 \ + test15.33 \ + test15.34 \ + test15.35 \ + test15.36 \ + test15.37 \ + test15.38 \ test16.1 \ test16.2 \ test16.3 \ @@ -420,6 +440,15 @@ EXTRA_DIST = \ test17.42 \ test17.51 \ test17.52 \ + test17.53 \ + test17.54 \ + test17.55 \ + test17.56 \ + test17.57 \ + test17.58 \ + test17.59 \ + test17.60 \ + test17.61 \ test19.1 \ test19.2 \ test19.3 \ diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index 4f2e1dfbe8461eed8fbe38c4cb6c3cbc22da4259..c40970b1669ff139703465723208ed7777baeb38 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -527,6 +527,17 @@ EXTRA_DIST = \ test1.99 \ test1.100 \ test1.101 \ + test1.102 \ + test1.103 \ + test1.104 \ + test1.105 \ + test1.106 \ + test1.107 \ + test1.108 \ + test1.109 \ + test1.110 \ + test1.111 \ + test1.112 \ test2.1 \ test2.2 \ test2.3 \ @@ -772,6 +783,15 @@ EXTRA_DIST = \ test15.25 \ test15.26 \ test15.27 \ + test15.30 \ + test15.31 \ + test15.32 \ + test15.33 \ + test15.34 \ + test15.35 \ + test15.36 \ + test15.37 \ + test15.38 \ test16.1 \ test16.2 \ test16.3 \ @@ -821,6 +841,15 @@ EXTRA_DIST = \ test17.42 \ test17.51 \ test17.52 \ + test17.53 \ + test17.54 \ + test17.55 \ + test17.56 \ + test17.57 \ + test17.58 \ + test17.59 \ + test17.60 \ + test17.61 \ test19.1 \ test19.2 \ test19.3 \ diff --git a/testsuite/expect/README b/testsuite/expect/README index 8f0569972e51c1226b2b38ae6e365fc16868b966..1438846185371df964f108fd00bc12e4d7551c85 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -180,10 +180,6 @@ test1.75 Test that --cpu-freq is enforced when using non-numeric values test1.76 Test that --cpu-freq sets min-max:gov test1.77 Test that job name specified at the command line overwrites the environment variable SLURM_JOB_NAME - -**NOTE** The following tests attempt to utilize multiple CPUs or partitions, - The test will print "WARNING" and terminate with an exit code of - zero if the cluster configuration does not permit proper testing. test1.80 Confirm that a job executes with the proper task distribution (--nodes and --distribution options). test1.81 Confirm that a job executes with the proper node count @@ -213,7 +209,17 @@ test1.98 AVAILBLE... test1.99 Validate that SrunPortRange is enforced when using srun test1.100 Test of pack/nopack task distribution. test1.101 Test of --bcast option. -**NOTE** The above tests for multiple processor/partition systems only +test1.102 Test of --deadline option too late +test1.103 Test of --deadline option too short +test1.104 Test of --deadline option and time limit partition +test1.105 Test of --deadline option and no time +test1.106 Test of --deadline option and --time_min too long +test1.107 Test of --deadline option and --time_min and time too long +test1.108 Test of --deadline option and --begin option too late +test1.109 Test of --deadline and --begin option and --time_min too long +test1.110 Test of --deadline and --begin option and --time_min OK +test1.111 Test of --deadline and --begin option and time to be changed +test1.112 Test of --deadline and --begin option and time not changed test2.# Testing of scontrol options (to be run as unprivileged user). ======================================================================== @@ -506,6 +512,15 @@ test15.24 Test of --overcommit option. test15.25 Test of job account(--account option). test15.26 Test interruption of salloc while waiting for resource allocation. test15.27 Test salloc --exclusive allocates all cpus on node. +test15.30 Test of --deadline option too late +test15.31 Test of --deadline option too short +test15.32 Test of --deadline option and time limit partition +test15.33 Test of --deadline option and no time +test15.34 Test of --deadline option and time_min too long +test15.35 Test of --deadline option and time_min and time too long +test15.36 Test of --deadline option and --begin option too late +test15.37 Test of --deadline and --begin option and --time_min too long +test15.38 Test of --deadline and --begin option and --time_min OK test16.# Testing of sattach options. ====================================== @@ -577,6 +592,16 @@ test17.41 Validate that sbatch, srun, salloc return partition error message. test17.42 Test of sbatch --wait option. test17.51 Validate that the mcs plugin (mcs/user) is OK with sbatch test17.52 Validate that the mcs plugin (mcs/group) is OK with sbatch +test17.53 Test of --deadline option too late +test17.54 Test of --deadline option too short +test17.55 Test of --deadline option and time limit partition +test17.56 Test of --deadline option and no time +test17.57 Test of --deadline option and time_min too long +test17.58 Test of --deadline option and time_min and time too long +test17.59 Test of --deadline option and --begin option too late +test17.60 Test of --deadline and --begin option and --time_min too long +test17.61 Test of --deadline and --begin option and --time_min OK + test19.# Testing of strigger options. ======================================= diff --git a/testsuite/expect/globals b/testsuite/expect/globals index a9241fa1c87b834322ed5ab3b45763a3548c578d..fe01fa96bbbf50d479b7a3c07e4a734485c42189 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -598,6 +598,7 @@ proc wait_for_job { job_id desired_state } { switch $state { "NOT_FOUND" - "CANCELLED" - + "DEADLINE" - "FAILED" - "TIMEOUT" - "NODE_FAIL" - diff --git a/testsuite/expect/test1.102 b/testsuite/expect/test1.102 new file mode 100755 index 0000000000000000000000000000000000000000..ad358477f78118e2926979aaa552acfd4c4afc1a --- /dev/null +++ b/testsuite/expect/test1.102 @@ -0,0 +1,71 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option too late +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.102" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job that deadline is too late +# +set timeout $max_job_delay +set srun_pid 0 +set srun_pid [spawn $srun -v --deadline 2015-03-27T04:30:00 -t1 $bin_sleep 1] +expect { + -re "Requested time limit is invalid (missing or exceeds some limit)" { + set exit_code 0 + exp_continue + } + -re "unrecognized option '--deadline'" { + set exit_code 1 + exp_continue + } + -re "Completed" { + send_user "\nFAILURE: srun is OK with a deadline too old\n" + set exit_code 1 + exit 1 + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.103 b/testsuite/expect/test1.103 new file mode 100755 index 0000000000000000000000000000000000000000..68d4b038c15c5a0772fb5f7b701b71da465b9023 --- /dev/null +++ b/testsuite/expect/test1.103 @@ -0,0 +1,91 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option too short +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.103" +set job_id 0 +set exit_code 0 + +print_header $test_id + +# +# Spawn a job that deadline is too short +# +set timeout $max_job_delay +set srun_pid [spawn $srun -v -N1 --deadline now+1hour -t120 --time-min=1 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "launching ($number).($number) on host" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: Job launch failed\n" + set exit_code 1 +} else { + spawn $scontrol show job $job_id + expect { + -re "TimeLimit=01:00:00" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=00:59:" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=" { + send_user "\nFAILURE: Timelimit not OK with deadline\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.104 b/testsuite/expect/test1.104 new file mode 100755 index 0000000000000000000000000000000000000000..61e5f8ea43a0d371effb33f6c1216e4121bdaec0 --- /dev/null +++ b/testsuite/expect/test1.104 @@ -0,0 +1,250 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and time limit partition +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.104" +set file_in "test$test_id.bash" +set test_srun "test$test_id.test_srun" +set job_id 0 +set authorized 1 +set exit_code 0 +set part_name "QA_TEST" +set test_script "./test$test_id.bash" + + +print_header $test_id + +if {[test_super_user] == 0} { + send_user "\nWARNING: can not test more unless SlurmUser or root\n" + exit $exit_code +} + +# +# Confirm the partition name does not already exist +# +set found -1 +spawn $scontrol -a show part $part_name +expect { + -re "not found" { + send_user "This error was expected, no worries\n" + set found 0 + exp_continue + } + -re "PartitionName" { + set found 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$found == -1} { + send_user "\nFAILURE: scontrol output format error\n" + exit 1 +} +if {$found == 1} { + send_user "\nFAILURE: partition $part_name already exists\n" + exit 1 +} + +# +# Identify usable nodes in default partition +# +set def_node [get_idle_node_in_part [default_partition]] +if {[string compare $def_node ""] == 0} { + send_user "\nFAILURE: default partition seems to have no idle nodes\n" + exit 1 +} + +# +# Create a new partition +# +spawn $scontrol create PartitionName=$part_name Nodes=$def_node MaxTime=30 +expect { + -re "error" { + send_user "\nFAILURE: something bad happened on partition create\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# +# Confirm the partition now exists +# +set allow 0 +set found -1 +spawn $scontrol show part $part_name +expect { + -re "not found" { + set found 0 + exp_continue + } + -re "PartitionName" { + set found 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$found != 1} { + send_user "\nFAILURE: partition not created\n" + exit 1 +} + +set found 0 + +# +# Run a job in this new partition and validate the time limit +# +make_bash_script $test_srun " +set -m +$srun -N1 --deadline now+1hour -p $part_name -t100 -v $bin_sleep 1 & +" + +make_bash_script $file_in " +bash -i ./$test_srun +" + +set timeout $max_job_delay +spawn bash -i $file_in +expect { + -re "job ($number) queued and waiting for resources" { + set job_id $expect_out(1,string) + set exit_code 0 + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} +# +# verify time limit. Add a temporization to wait for update reason +# +if {$job_id == 0} { + send_user "\nFAILURE: job_id = 0 : salloc not OK\n" + set exit_code 1 +} else { + set cycle_count 8 + for {set inx 0} {$inx < $cycle_count} {incr inx} { + spawn $scontrol show job $job_id + expect { + -re "Reason=PartitionTimeLimit" { + set exit_code 0 + break + } + -re "Reason=" { + sleep 5 + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } + } + if {$exit_code == 1} { + if {$cycle_count == 8} { + send_user "\nFAILURE: partition time limit not used \n" + } + } + cancel_job $job_id +} + +# +# Now delete the partition +# +spawn $scontrol delete PartitionName=$part_name +expect { + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# +# Confirm the partition is now gone +# +set found -1 +spawn $scontrol show part $part_name +expect { + -re "not found" { + send_user "This error was expected, no worries\n" + set found 0 + exp_continue + } + -re "PartitionName" { + set found 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$found != 0} { + send_user "\nFAILURE: partition not deleted\n" + exit 1 +} + +if {$exit_code == 0} { + exec $bin_rm $test_srun $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.105 b/testsuite/expect/test1.105 new file mode 100755 index 0000000000000000000000000000000000000000..20b05997f38b494dae7c79f3fc934009d24128c3 --- /dev/null +++ b/testsuite/expect/test1.105 @@ -0,0 +1,91 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and no time +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.105" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job with a deadline and no time +# +set timeout $max_job_delay +set srun_pid [spawn $srun --verbose -N1 --deadline now+600 $bin_sleep 1] +expect { + -re "Requested time limit is invalid (missing or exceeds some limit)" { + set exit_code 1 + exp_continue + } + -re "launching ($number).($number) on host" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: job not submitted\n" + exit 1 +} else { + spawn $scontrol show job $job_id + expect { + -re "TimeLimit=00:10:" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=00:09:" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=" { + send_user "\nFAILURE : Timelimit not OK with deadline\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.106 b/testsuite/expect/test1.106 new file mode 100755 index 0000000000000000000000000000000000000000..4d2b18127348ddd5da4850b6ed9bd9cc223079c2 --- /dev/null +++ b/testsuite/expect/test1.106 @@ -0,0 +1,78 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and time_min too long +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.106" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job with a deadline and no time +# +set timeout $max_job_delay +set time_error 0 +set srun_pid [spawn $srun -v -N1 --deadline now+1hour --time-min 120 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set time_error 1 + exp_continue + } + -re "launching ($number).($number) on host" { + set job_id $expect_out(1,string) + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id != 0} { + send_user "\nFAILURE: job submitted despite the deadline\n" + [cancel_job $job_id] + exit 1 +} +if {$time_error == 0} { + send_user "\nFAILURE: No time limit error\n" + set exit_code 1 +} else { + send_user "\nTime limit error expected, no worries\n" +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.107 b/testsuite/expect/test1.107 new file mode 100755 index 0000000000000000000000000000000000000000..bfb5805e17daf26b605d58778d2da032943e50b6 --- /dev/null +++ b/testsuite/expect/test1.107 @@ -0,0 +1,95 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and time_min and time too long +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.107" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job with a deadline and no time +# +set timeout $max_job_delay +set srun_pid [spawn $srun -v -N1 --deadline now+1hour --time-min 20 -t180 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + send_user "\nFAILURE: time limit error\n" + set exit_code 1 + exp_continue + } + -re "launching ($number).0 on host" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: job not submitted\n" + exit 1 +} + +# +# verify time limit +# +spawn $scontrol show job $job_id +expect { + -re "TimeLimit=01:00:00" { + exp_continue + } + -re "TimeLimit=00:59:" { + exp_continue + } + -re "TimeLimit=" { + send_user "\nFAILURE: time limit is different from deadline\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.108 b/testsuite/expect/test1.108 new file mode 100755 index 0000000000000000000000000000000000000000..cb611d0b2bb167dc7e91013acc453a54d42e478a --- /dev/null +++ b/testsuite/expect/test1.108 @@ -0,0 +1,76 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and --begin option too late +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.108" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job with a deadline and no time +# +set timeout $max_job_delay +set srun_pid [spawn $srun -v -N1 --begin now+2hour --deadline now+1hour $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Incompatible begin and deadline time specification" { + set exit_code 0 + exp_continue + } + -re "launching ($number).($number) on host" { + set job_id $expect_out(1,string) + set exit_code 1 + exp_continue + } + + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$job_id != 0} { + send_user "\nFAILURE: job submitted with a deadline too short\n" + exit 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.109 b/testsuite/expect/test1.109 new file mode 100755 index 0000000000000000000000000000000000000000..e4110fa7e9a44ed6a8943693ae0b5c69d70c8d82 --- /dev/null +++ b/testsuite/expect/test1.109 @@ -0,0 +1,80 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline and --begin option and --time_min too long +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This fie is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.109" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job with a deadline and no time limit and a time-min too long +# +set timeout [expr $max_job_delay + 1] +set invalid_tl 0 +set srun_pid [spawn $srun -v --begin now+5 --deadline now+600 --time-min 11 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set invalid_tl 1 + exp_continue + } + -re "Force Terminated job ($number)" { + exp_continue + } + -re "job ($number) queued and waiting for resources" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id != 0} { + send_user "\nFAILURE: job submitted with a deadline too short\n" + cancel_job $job_id + exit 1 +} +if {$invalid_tl == 0} { + send_user "\nFAILURE: failed to generate deadline error\n" + exit 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} else { + send_user "\nFAILURE: job not with DEADLINE status.\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.110 b/testsuite/expect/test1.110 new file mode 100755 index 0000000000000000000000000000000000000000..385ba984f75ebdd6241eaeef418b07240c1f6b7a --- /dev/null +++ b/testsuite/expect/test1.110 @@ -0,0 +1,87 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline and -- begin option and --time_min OK +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.110" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job with a deadline and no time +# +set timeout $max_job_delay +set srun_pid [spawn $srun -v --begin now+5 --deadline now+600 --time-min 1 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Incompatible begin and deadline time specification" { + set exit_code 1 + exp_continue + } + -re "Force Terminated job ($number)" { + set exit_code 1 + } + -re "job (number) queued and waiting for resources" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + -re "launching ($number).0 on host" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: job not submitted with a deadline and a time-min OK\n" + exit 1 +} + +if {[wait_for_job $job_id DONE] != 0} { + send_user "\nFAILURE: job failed to run to completion\n" + exit 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.111 b/testsuite/expect/test1.111 new file mode 100755 index 0000000000000000000000000000000000000000..8bae6baf1eba0bcd6dad1b8a0b6d5c4f1907c457 --- /dev/null +++ b/testsuite/expect/test1.111 @@ -0,0 +1,101 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline and --begin option and time to be changed +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.111" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job with a deadline +# +set timeout $max_job_delay +set srun_pid [spawn $srun -v --begin=now+5 --deadline=now+600 --time=100 --time-min=9 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Incompatible begin and deadline time specification" { + set exit_code 1 + exp_continue + } + -re "job ($number) queued and waiting for resources" { + set job_id $expect_out(1,string) + exp_continue + } + -re "job ($number) has been allocated resources" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: batch not submitted\n" + exit 1 +} + +# +# test timelimit in job +# +spawn $scontrol show job $job_id +expect { + -re "TimeLimit=00:09:" { + exp_continue + } + -re "TimeLimit=00:10:" { + exp_continue + } + -re "TimeLimit=" { + send_user "\nFAILURE: job submitted with a time limit different than calculated (00:09:00)\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: sacct not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.112 b/testsuite/expect/test1.112 new file mode 100755 index 0000000000000000000000000000000000000000..8f0779d6921869764394fdedfe22063c2ca7c48a --- /dev/null +++ b/testsuite/expect/test1.112 @@ -0,0 +1,100 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline and -- begin option and time not changed +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.112" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job with a deadline and no time-min +# +set timeout $max_job_delay +set srun_pid [spawn $srun -v --begin now+10 --deadline now+1hour -t 10 $bin_sleep 1 ] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Incompatible begin and deadline time specification" { + set exit_code 1 + exp_continue + } + -re "job ($number) queued and waiting for resources" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + -re "job ($number) has been allocated resources" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: job not submitted\n" + exit 1 +} + +# +# test timelimit in job +# +spawn $scontrol show job $job_id +expect { + -re "TimeLimit=00:10:" { + exp_continue + } + -re "TimeLimit=" { + send_user "\nFAILURE: job submitted with a time limit different than calculated (00:10:00)\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test15.30 b/testsuite/expect/test15.30 new file mode 100755 index 0000000000000000000000000000000000000000..43d0647ebc614d9a4c761b4abb0386f33fe48bf3 --- /dev/null +++ b/testsuite/expect/test15.30 @@ -0,0 +1,68 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option too late +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "15.30" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a job that deadline is too late +# +set timeout $max_job_delay +set salloc_pid 0 +set salloc_pid [spawn $salloc -N1 --deadline 2015-03-27T04:30:00 -t1 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set exit_code 0 + exp_continue + } + -re "Granted job allocation " { + send_user "\nFAILURE: salloc is OK with a deadline too old\n" + set exit_code 1 + exit 1 + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test15.31 b/testsuite/expect/test15.31 new file mode 100755 index 0000000000000000000000000000000000000000..e84e2d2abbd955e157d70357e7d5bad6cd4d8fe4 --- /dev/null +++ b/testsuite/expect/test15.31 @@ -0,0 +1,88 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option too short +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "15.31" +set job_id 0 +set exit_code 0 + +print_header $test_id + +# +# Spawn a job that deadline is too short +# +set timeout $max_job_delay +set salloc_pid [spawn $salloc -N1 --deadline now+1hour -t120 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Granted job allocation ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +if {$job_id != 0} { + spawn $scontrol show job $job_id + expect { + -re "TimeLimit=00:59" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=01:00:00" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=" { + send_user "\nFAILURE : Timelimit not OK with deadline\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test15.32 b/testsuite/expect/test15.32 new file mode 100755 index 0000000000000000000000000000000000000000..4a8d71beca89c505561f9a566a7056608767c6b1 --- /dev/null +++ b/testsuite/expect/test15.32 @@ -0,0 +1,252 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and time limit partition +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "15.32" +set file_in "test$test_id.bash" +set test_salloc "test$test_id.test_salloc" +set job_id 0 +set authorized 1 +set exit_code 0 +set part_name "QA_TEST" + + +print_header $test_id + +if {[test_super_user] == 0} { + send_user "\nWARNING: can not test more unless SlurmUser or root\n" + exit $exit_code +} + +# +# Confirm the partition name does not already exist +# +set found -1 +spawn $scontrol -a show part $part_name +expect { + -re "not found" { + send_user "This error was expected, no worries\n" + set found 0 + exp_continue + } + -re "PartitionName" { + set found 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$found == -1} { + send_user "\nFAILURE: scontrol output format error\n" + exit 1 +} +if {$found == 1} { + send_user "\nFAILURE: partition $part_name already exists\n" + exit 1 +} + +# +# Identify usable nodes in default partition +# +set def_node [get_idle_node_in_part [default_partition]] +if {[string compare $def_node ""] == 0} { + send_user "\nFAILURE: default partition seems to have no nodes\n" + exit 1 +} + +# +# Create a new partition +# +spawn $scontrol create PartitionName=$part_name Nodes=$def_node MaxTime=30 +expect { + -re "error" { + send_user "\nFAILURE: something bad happened on partition create\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# +# Confirm the partition now exists +# +set allow 0 +set found -1 +spawn $scontrol show part $part_name +expect { + -re "not found" { + set found 0 + exp_continue + } + -re "PartitionName" { + set found 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$found != 1} { + send_user "\nFAILURE: partition not created\n" + exit 1 +} + +set found 0 + +# +# Run a job in this new partition and validate the time limit +# +make_bash_script $test_salloc " +set -m +$salloc --no-shell -N1 --deadline now+1hour -p $part_name -t100 $bin_sleep 1 & +" + +make_bash_script $file_in " +bash -i ./$test_salloc +" + +set timeout $max_job_delay +spawn bash -i $file_in +expect { + -re "Pending job allocation ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + } + + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +# +# verify time limit. Add a temporization to wait for update reason +# +if {$job_id == 0} { + send_user "\nFAILURE: job_id = 0 : salloc not OK \n" + set exit_code 1 +} else { + set cycle_count 8 + for {set inx 0} {$inx < $cycle_count} {incr inx} { + spawn $scontrol show job $job_id + expect { + -re "Reason=PartitionTimeLimit" { + set exit_code 0 + break + } + -re "Reason=" { + set exit_code 1 + sleep 5 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } + } + } + if {$exit_code == 1} { + send_user "\nFAILURE: partition time limit not used\n" + set exit_code 1 + } + cancel_job $job_id +} + +# +# Now delete the partition +# +spawn $scontrol delete PartitionName=$part_name +expect { + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# +# Confirm the partition is now gone +# +set found -1 +spawn $scontrol show part $part_name +expect { + -re "not found" { + send_user "This error was expected, no worries\n" + set found 0 + exp_continue + } + -re "PartitionName" { + set found 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$found != 0} { + send_user "\nFAILURE: partition not deleted\n" + exit 1 +} + +if {$exit_code == 0} { + exec $bin_rm $test_salloc $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test15.33 b/testsuite/expect/test15.33 new file mode 100755 index 0000000000000000000000000000000000000000..ea144698f5dfee937e33d395b1b1765dc2796a39 --- /dev/null +++ b/testsuite/expect/test15.33 @@ -0,0 +1,97 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and no time +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "15.33" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +set salloc_pid [spawn $salloc -N1 --deadline now+600 $bin_sleep 1] +expect { + -re "Requested time limit is invalid (missing or exceeds some limit)" { + set exit_code 1 + exp_continue + } + -re "Granted job allocation ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +if {$job_id == 0} { + send_user "\nFAILURE: batch not submitted\n" + exit 1 +} + +if {$job_id != 0} { + spawn $scontrol show job $job_id + expect { + -re "TimeLimit=00:10:00" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=00:09:" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=" { + send_user "\nFAILURE : Timelimit not OK with deadline\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test15.34 b/testsuite/expect/test15.34 new file mode 100755 index 0000000000000000000000000000000000000000..e07cf42775f0af6108acbc6ad5e3eaa30e8bb6ed --- /dev/null +++ b/testsuite/expect/test15.34 @@ -0,0 +1,72 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and time_min too long +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "15.34" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +set salloc_pid [spawn $salloc -N1 --deadline now+1hour --time-min 120 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set exit_code 0 + exp_continue + } + -re "Granted job allocation ($number)" { + set job_id $expect_out(1,string) + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +if {$job_id != 0} { + send_user "\nFAILURE: batch submitted despite the deadline\n" + exit 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test15.35 b/testsuite/expect/test15.35 new file mode 100755 index 0000000000000000000000000000000000000000..2674eccab809495a1be63454873543af773b9488 --- /dev/null +++ b/testsuite/expect/test15.35 @@ -0,0 +1,96 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and time_min and time too long +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "15.35" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +set salloc_pid [spawn $salloc -N1 --deadline now+1hour --time-min 20 -t180 $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Granted job allocation ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: job not submitted\n" + exit 1 +} + +# +# verify time limit +# +spawn $scontrol show job $job_id +expect { + -re "TimeLimit=01:00:00" { + exp_continue + } + -re "TimeLimit=00:59:" { + exp_continue + } + -re "TimeLimit=" { + send_user "\nFAILURE: time limit is different from deadline\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test15.36 b/testsuite/expect/test15.36 new file mode 100755 index 0000000000000000000000000000000000000000..0eeb5c1b3774592f032f04fe345c9f5739cc7027 --- /dev/null +++ b/testsuite/expect/test15.36 @@ -0,0 +1,77 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and --begin option too late +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "15.36" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +set salloc_pid [spawn $salloc -N1 --begin now+2hour --deadline now+1hour $bin_sleep 1] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Incompatible begin and deadline time specification" { + set exit_code 0 + exp_continue + } + -re "Granted job allocation ($number)" { + set job_id $expect_out(1,string) + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +if {$job_id != 0} { + send_user "\nFAILURE: job submitted with a deadline too short\n" + cancel_job $job_id + exit 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test15.37 b/testsuite/expect/test15.37 new file mode 100755 index 0000000000000000000000000000000000000000..03a34f8942f03dcb08e55348a955018f36e76a2a --- /dev/null +++ b/testsuite/expect/test15.37 @@ -0,0 +1,157 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline and --begin option and --time_min too long +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This fie is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "15.37" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a batch job with a deadline and no time +# +set timeout [expr $max_job_delay + 60] +set salloc_pid [spawn $salloc --begin now+60 --deadline now+600 --time-min 10 $bin_sleep 1 ] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Job allocation ($number) has been revoked" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + -re "job ($number) queued and waiting for resources" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + -re "Pending job allocation ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: batch not submitted with a deadline too short\n" + exit 1 +} + +if {[wait_for_job $job_id DONE] != 0} { + send_user "\nFAILURE: job failed to run to completion\n" + exit 1 +} + +# +# test job status in scontrol and sacct +# +spawn $scontrol show job $job_id +expect { + -re "JobState=DEADLINE" { + exp_continue + } + -re "JobState=" { + send_user "\nFAILURE: invalid job state\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +set cycle_count 8 +set not_support 0 +for {set inx 0} {$inx < $cycle_count} {incr inx} { + spawn $sacct -n -P -X -j $job_id -o State + expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + break + } + -re "DEADLINE" { + set exit_code 0 + break + } + -re "COMPLETED" { + send_user "\nFAILURE: batch submitted with a deadline too short\n" + set exit_code 1 + break + } + -re "PENDING" { + set exit_code 1 + exp_continue + } + -re "CANCELLED by ($number)" { + send_user "\nFAILURE: batch killed by timeout before start\n" + set exit_code 1 + break + } + timeout { + send_user "\nFAILURE: sacct not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } + } + sleep 5 +} + +if {$not_support != 0} { + send_user "\nWARNING: can not test without accounting enabled\n" + exit 0 +} +if {$exit_code == 1} { + send_user "\nFAILURE: batch submitted with a deadline too short\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test15.38 b/testsuite/expect/test15.38 new file mode 100755 index 0000000000000000000000000000000000000000..ce6abf8666873c93aaeffa6966d67dbf3afaefe9 --- /dev/null +++ b/testsuite/expect/test15.38 @@ -0,0 +1,135 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline and --begin option and --time_min OK +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "15.38" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +set salloc_pid [spawn $salloc --begin now+1 --deadline now+600 --time-min 5 $bin_sleep 1 ] +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Incompatible begin and deadline time specification" { + set exit_code 1 + exp_continue + } + -re "Pending job allocation ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + -re "Granted job allocation ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: job not submitted with a deadline and a time-min OK\n" + exit 1 +} + +if {[wait_for_job $job_id DONE] != 0} { + send_user "\nFAILURE: job failed to run to completion\n" + exit 1 +} + +# +# test job status in scontrol and sacct +# +spawn $scontrol show job $job_id +expect { + -re "JobState=COMPLETED" { + exp_continue + } + -re "JobState=" { + send_user "\nFAILURE: invalid job state\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +set not_support 0 +spawn $sacct -n -P -X -j $job_id -o State +expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + exp_continue + } + -re "DEADLINE" { + send_user "\nFAILURE: job not submitted with a deadline and a time-min OK\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacct not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +if {$not_support != 0} { + send_user "\nWARNING: can not test without accounting enabled\n" + exit 0 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.53 b/testsuite/expect/test17.53 new file mode 100755 index 0000000000000000000000000000000000000000..a91fd05b5b133e51f56aa3ef197210fb2a81b923 --- /dev/null +++ b/testsuite/expect/test17.53 @@ -0,0 +1,80 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option too late +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "17.53" +set file_in "test$test_id.input" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Build input script file +# +exec $bin_rm -f $file_in +make_bash_script $file_in "$bin_sleep 5" + +# +# Spawn a batch job that deadline is too late +# +set timeout $max_job_delay +spawn $sbatch --output=/dev/null --error=/dev/null --deadline 2015-03-27T04:30:00 -t1 $file_in +expect { + -re "Requested time limit is invalid" { + set exit_code 0 + exp_continue + } + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$job_id != 0} { + send_user "\nFAILURE: batch submit with a deadline too old\n" + cancel_job $job_id + exit 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.54 b/testsuite/expect/test17.54 new file mode 100755 index 0000000000000000000000000000000000000000..f17df2ccf0deac8da4d252ac8d2c7d43676f68ee --- /dev/null +++ b/testsuite/expect/test17.54 @@ -0,0 +1,109 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option too short +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "17.54" +set file_in "test$test_id.input" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Build input script file +# +exec $bin_rm -f $file_in +make_bash_script $file_in "$bin_sleep 1" + +# +# Spawn a batch job that deadline is too late +# +set timeout $max_job_delay +spawn $sbatch --output=/dev/null --error=/dev/null --deadline now+1hour $file_in +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: job submission failed\n" + exit 1 +} + +if {[wait_for_job $job_id DONE] != 0} { + send_user "\nFAILURE: job failed to run to completion\n" + exit 1 +} + +# +# verify time limit +# +spawn $scontrol show job $job_id +expect { + -re "TimeLimit=00:59:" { + exp_continue + } + -re "TimeLimit=01:00:00" { + exp_continue + } + -re "TimeLimit=" { + send_user "\nFAILURE: time limit is different from deadline\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.55 b/testsuite/expect/test17.55 new file mode 100755 index 0000000000000000000000000000000000000000..515279f579ce27bcd773c61f7a15af288999eb38 --- /dev/null +++ b/testsuite/expect/test17.55 @@ -0,0 +1,233 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and time limit partition +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "17.55" +set job_id 0 +set authorized 1 +set exit_code 0 +set part_name "QA_TEST" +set file_in "test$test_id.input" + +print_header $test_id + +if {[test_super_user] == 0} { + send_user "\nWARNING: can not test more unless SlurmUser or root\n" + exit $exit_code +} + +# +# Confirm the partition name does not already exist +# +set found -1 +spawn $scontrol -a show part $part_name +expect { + -re "not found" { + send_user "This error was expected, no worries\n" + set found 0 + exp_continue + } + -re "PartitionName" { + set found 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$found == -1} { + send_user "\nFAILURE: scontrol output format error\n" + exit 1 +} +if {$found == 1} { + send_user "\nFAILURE: partition $part_name already exists\n" + exit 1 +} + +# +# Identify usable nodes in default partition +# +set def_node [get_idle_node_in_part [default_partition]] +if {[string compare $def_node ""] == 0} { + send_user "\nFAILURE:default partition seems to have no nodes\n" + exit 1 +} + +# +# Create a new partition +# +spawn $scontrol create PartitionName=$part_name Nodes=$def_node MaxTime=30 +expect { + -re "error" { + send_user "\nFAILURE: something bad happened on partition create\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# +# Confirm the partition now exists +# +set allow 0 +set found -1 +spawn $scontrol show part $part_name +expect { + -re "not found" { + set found 0 + exp_continue + } + -re "PartitionName" { + set found 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$found != 1} { + send_user "\nFAILURE: partition not created\n" + exit 1 +} + +set found 0 + +# +# Run a job in this new partition and validate the time limit +# +exec $bin_rm -f $file_in +make_bash_script $file_in "$bin_sleep 5" + +set srun_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t100 --deadline now+3hour -p $part_name $file_in] +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +# +# verify time limit +# +sleep 5 +set matches 0 +spawn $scontrol show job $job_id +expect { + -re "Reason=PartitionTimeLimit" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$matches != 1} { + send_user "\nFAILURE: partition time limit not used \n" + set exit_code 1 +} + +cancel_job $job_id + +# +# Now delete the partition +# +spawn $scontrol delete PartitionName=$part_name +expect { + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# +# Confirm the partition is now gone +# +set found -1 +spawn $scontrol show part $part_name +expect { + -re "not found" { + send_user "This error was expected, no worries\n" + set found 0 + exp_continue + } + -re "PartitionName" { + set found 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$found != 0} { + send_user "\nFAILURE: partition not deleted\n" + exit 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.56 b/testsuite/expect/test17.56 new file mode 100755 index 0000000000000000000000000000000000000000..b0e2ff78f90948bd867db36c2be7612925e9a112 --- /dev/null +++ b/testsuite/expect/test17.56 @@ -0,0 +1,106 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and no time +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "17.56" +set file_in "test$test_id.input" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Build input script file +# +exec $bin_rm -f $file_in +make_bash_script $file_in "$bin_sleep 5" + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +spawn $sbatch --output=/dev/null --error=/dev/null --deadline now+600 $file_in +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +if {$job_id == 0} { + send_user "\nFAILURE: batch job not submitted\n" + exit 1 +} + +if {[wait_for_job $job_id DONE] != 0} { + send_user "\nFAILURE: job failed to run to completion\n" + exit 1 +} + +spawn $scontrol show job $job_id +expect { + -re "TimeLimit=00:10:00" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=00:09:" { + send_user "\nTimelimit OK\n" + } + -re "TimeLimit=" { + send_user "\nFAILURE : Timelimit not OK with deadline\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.57 b/testsuite/expect/test17.57 new file mode 100755 index 0000000000000000000000000000000000000000..421092660591a9c2064e5671e250ae1491884860 --- /dev/null +++ b/testsuite/expect/test17.57 @@ -0,0 +1,80 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and time_min too long +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "17.57" +set file_in "test$test_id.input" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Build input script file +# +exec $bin_rm -f $file_in +make_bash_script $file_in "$bin_sleep 5" + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +spawn $sbatch --output=/dev/null --error=/dev/null --deadline now+1hour --time-min 120 $file_in +expect { + -re "Requested time limit is invalid" { + set exit_code 0 + exp_continue + } + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$job_id != 0} { + send_user "\nFAILURE: batch submitted despite the deadline\n" + cancel_job $job_id + exit 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.58 b/testsuite/expect/test17.58 new file mode 100755 index 0000000000000000000000000000000000000000..30658c961ada8a08e7b5d1a97f0c50d851ba7346 --- /dev/null +++ b/testsuite/expect/test17.58 @@ -0,0 +1,111 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and time_min and time too long +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> + +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "17.58" +set file_in "test$test_id.input" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Build input script file +# +exec $bin_rm -f $file_in +make_bash_script $file_in "$bin_sleep 5" + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +spawn $sbatch --output=/dev/null --error=/dev/null --deadline now+1hour --time-min 20 -t 180 $file_in +expect { + -re "Requested time limit is invalid (missing or exceeds some limit)" { + set exit_code 1 + exp_continue + } + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$job_id == 0} { + send_user "\nFAILURE: batch not submitted\n" + exit 1 +} + +# +# verify time limit +# +if {[wait_for_job $job_id DONE] != 0} { + send_user "\nFAILURE: job failed to run to completion\n" + exit 1 +} +spawn $scontrol show job $job_id +expect { + -re "TimeLimit=01:00:00" { + set exit_code 0 + exp_continue + } + -re "TimeLimit=00:59:" { + set exit_code 0 + exp_continue + } + -re "TimeLimit=" { + send_user "\nFAILURE: time limit is different from deadline\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.59 b/testsuite/expect/test17.59 new file mode 100755 index 0000000000000000000000000000000000000000..b62c288ce5703b549708024e317b71f8fd363862 --- /dev/null +++ b/testsuite/expect/test17.59 @@ -0,0 +1,84 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline option and --begin option too late +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> + +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "17.59" +set file_in "test$test_id.input" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Build input script file +# +exec $bin_rm -f $file_in +make_bash_script $file_in "$bin_sleep 5" + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +spawn $sbatch --output=/dev/null --error=/dev/null --begin now+2hour --deadline now+1hour $file_in +expect { + -re "Requested time limit is invalid" { + set exit_code 1 + exp_continue + } + -re "Incompatible begin and deadline time specification" { + set exit_code 0 + exp_continue + } + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id != 0} { + send_user "\nFAILURE: batch submitted with a deadline too short\n" + cancel_job $job_id + exit 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.60 b/testsuite/expect/test17.60 new file mode 100755 index 0000000000000000000000000000000000000000..f40858f2d25f4d9a029f19d7e70b116a31aad4d9 --- /dev/null +++ b/testsuite/expect/test17.60 @@ -0,0 +1,143 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline and --begin option and --time_min too long +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "17.60" +set file_in "test$test_id.input" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Build input script file +# +exec $bin_rm -f $file_in +make_bash_script $file_in "$bin_sleep 5" + +# +# Spawn a batch job with a deadline and no time +# +set timeout [expr $max_job_delay + 60] +spawn $sbatch --output=/dev/null --error=/dev/null --begin now+60 --deadline now+600 --time-min 10 $file_in +expect { + -re "Requested time limit is invalid (missing or exceeds some limit)" { + set exit_code 1 + exp_continue + } + -re "Incompatible begin and deadline time specification" { + set exit_code 1 + exp_continue + } + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: batch not submitted with a deadline too short\n" + exit 1 +} + +# +# test status deadline in scontrol and sacct +# +if {[wait_for_job $job_id DONE] != 0} { + send_user "\nFAILURE: job failed to run to completion\n" + exit 1 +} + +spawn $scontrol show job $job_id +expect { + -re "JobState=DEADLINE" { + exp_continue + } + -re "JobState=" { + send_user "\nFAILURE: invalid job state\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +set cycle_count 8 +set not_support 0 +for {set inx 0} {$inx < $cycle_count} {incr inx} { + spawn $sacct -n -P -X -j $job_id -o State + expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + exp_continue + } + -re "DEADLINE" { + break + } + -re "COMPLETED" { + set exit_code 1 + break + } + timeout { + send_user "\nFAILURE: sacct not responding\n" + set exit_code 1 + } + eof { + wait + } + } + sleep 10 +} +if {$not_support != 0} { + send_user "\nWARNING: can not test without accounting enabled\n" + exit 0 +} + +if {$exit_code == 1} { + send_user "\nFAILURE: batch submitted with a deadline too short\n" +} else { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.61 b/testsuite/expect/test17.61 new file mode 100755 index 0000000000000000000000000000000000000000..c0fef3b52bbe81b9397332df8b188dab964e5042 --- /dev/null +++ b/testsuite/expect/test17.61 @@ -0,0 +1,136 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of --deadline and --begin option and --time_min OK +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2015 CEA/DAM/DIF +# Written by Aline Roy <aline.roy@cea.fr> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "17.61" +set file_in "test$test_id.input" +set exit_code 0 +set job_id 0 + +print_header $test_id + +# +# Build input script file +# +exec $bin_rm -f $file_in +make_bash_script $file_in "$bin_sleep 5" + +# +# Spawn a batch job with a deadline and no time +# +set timeout $max_job_delay +spawn $sbatch --output=/dev/null --error=/dev/null --begin now+1 --deadline now+600 --time-min 5 $file_in +expect { + -re "Requested time limit is invalid (missing or exceeds some limit)" { + set exit_code 1 + exp_continue + } + -re "Incompatible begin and deadline time specification" { + set exit_code 1 + exp_continue + } + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + set exit_code 0 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +if {$job_id == 0} { + send_user "\nFAILURE: batch not submitted with a deadline too short\n" + exit 1 +} + +# +# test status deadline in scontrol and sacct +# +if {[wait_for_job $job_id DONE] != 0} { + send_user "\nFAILURE: job failed to run to completion\n" + exit 1 +} + +spawn $scontrol show job $job_id +expect { + -re "JobState=COMPLETED" { + exp_continue + } + -re "JobState=" { + send_user "\nFAILURE: invalid job state\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} + +set not_support 0 +spawn $sacct -n -P -X -j $job_id -o State +expect { + -re "SLURM accounting storage is disabled" { + set not_support 1 + exp_continue + } + -re "DEADLINE" { + send_user "\nFAILURE: batch not submitted with a deadline and a time-min OK\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacct not responding\n" + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$not_support != 0} { + send_user "\nWARNING: can not fully test without accounting enabled\n" +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code