Skip to content
Snippets Groups Projects
Commit ac326bf5 authored by Joseph P. Donaghy's avatar Joseph P. Donaghy
Browse files

Debut entry of srun job step time limit test.

parent 08a08dd4
No related branches found
No related tags found
No related merge requests found
...@@ -75,6 +75,7 @@ EXTRA_DIST = \ ...@@ -75,6 +75,7 @@ EXTRA_DIST = \
test1.58 \ test1.58 \
test1.59 \ test1.59 \
test1.60 \ test1.60 \
test1.61 \
test1.80 \ test1.80 \
test1.81 \ test1.81 \
test1.82 \ test1.82 \
......
...@@ -319,6 +319,7 @@ EXTRA_DIST = \ ...@@ -319,6 +319,7 @@ EXTRA_DIST = \
test1.58 \ test1.58 \
test1.59 \ test1.59 \
test1.60 \ test1.60 \
test1.61 \
test1.80 \ test1.80 \
test1.81 \ test1.81 \
test1.82 \ test1.82 \
......
...@@ -158,6 +158,7 @@ test1.57 Test of srun --jobid for a new job allocation (used by Moab) ...@@ -158,6 +158,7 @@ test1.57 Test of srun --jobid for a new job allocation (used by Moab)
test1.58 Test of srun --jobid for an existing job allocation test1.58 Test of srun --jobid for an existing job allocation
test1.59 Test of hostfile logic for job steps test1.59 Test of hostfile logic for job steps
test1.60 Test of labelling output when writing a file per task or per node test1.60 Test of labelling output when writing a file per task or per node
test1.61 Test of srun job step time limit
**NOTE** The following tests attempt to utilize multiple CPUs or partitions, **NOTE** The following tests attempt to utilize multiple CPUs or partitions,
The test will print "WARNING" and terminate with an exit code of The test will print "WARNING" and terminate with an exit code of
......
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# test to verify job step time limit
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2009 Lawrence Livermore National Security.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Joseph Donaghy <donaghy1@llnl.gov>
# CODE-OCEC-09-009. All rights reserved.
#
# This file is part of SLURM, a resource management program.
# For details, see <https://computing.llnl.gov/linux/slurm/>.
# Please also read the included file: DISCLAIMER.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_id "1.61"
set exit_code 0
set job_id 0
set file_in "test.$test_id.input"
set file_out "test.$test_id.output"
set file_err "test.$test_id.error"
set test_acct "test_acct"
set timeout 60
print_header $test_id
exec $bin_rm -f $file_in file_out file_err
#
# Spawn a job via sbatch using this account
#
make_bash_script $file_in "
$squeue -s
$srun -t1 $scontrol show step \$SLURM_JOB_ID &
$squeue -s
$srun -t1 sleep 200
$squeue -s
$srun -t1 sleep 200 &
$squeue -s
"
set job_id 0
spawn $sbatch -t5 --output=$file_out --error=$file_err $file_in
expect {
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: sbatch not responding\n"
set exit_code 1
}
eof {
wait
}
}
if {$job_id == 0} {
send_user "\nFAILURE: did not get sbatch job_id\n"
set exit_code 1
}
#
# Wait for step to begin execution
#
if {[wait_for_step $job_id.2] == 0} {
cancel_job $job_id
}
spawn cat $file_out
expect {
-re " TimeLimit=00:01:00" {
set tl $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: srun time limit not set\n"
set exit_code 1
}
eof {
wait
}
}
spawn cat $file_err
expect {
-re " STEP $job_id.1 CANCELLED AT.*DUE TO TIME LIMIT " {
set tl $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: srun not cancelled\n"
set exit_code 1
}
eof {
wait
}
}
if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out $file_err
send_user "\nSUCCESS\n"
}
exit $exit_code
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment