From 6000393222a0f61a647bc4e0e3ef106b031f1d2a Mon Sep 17 00:00:00 2001 From: Nathan Yee <nyee32@schedmd.com> Date: Mon, 11 Feb 2013 13:35:50 -0800 Subject: [PATCH] Add job array test --- testsuite/expect/Makefile.am | 1 + testsuite/expect/Makefile.in | 1 + testsuite/expect/README | 7 +- testsuite/expect/globals | 96 ++++++++++------ testsuite/expect/test28.1 | 210 +++++++++++++++++++++++++++++++++++ 5 files changed, 280 insertions(+), 35 deletions(-) create mode 100755 testsuite/expect/test28.1 diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 6e47cec3512..70f80bde20c 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -400,6 +400,7 @@ EXTRA_DIST = \ test27.3 \ test27.4 \ test27.5 \ + test28.1 \ usleep distclean-local: diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index 8f3c725360e..c45437c5feb 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -718,6 +718,7 @@ EXTRA_DIST = \ test27.3 \ test27.4 \ test27.5 \ + test28.1 \ usleep all: all-am diff --git a/testsuite/expect/README b/testsuite/expect/README index ad8ba4ea088..0b3fde5dcd0 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -1,7 +1,7 @@ ############################################################################ # Copyright (C) 2002-2007 The Regents of the University of California. # Copyright (C) 2008-2011 Lawrence Livermore National Security. -# Copyright (C) 2010-2011 SchedMD LLC +# Copyright (C) 2010-2013 SchedMD LLC # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # Additionals by Joseph Donaghy <donaghy1@llnl.gov> @@ -618,3 +618,8 @@ test27.2 sdiag --help test27.3 sdiag --version test27.4 sdiag --all (default output) test27.5 sdiag --reset + + +test28.# Testing of job array options. +======================================== +test28.1 Confirms sbatch --array and scancel of the job arrays. diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 7e9dcf3e6b5..b796eefc4bc 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -928,23 +928,23 @@ proc test_topology { } { # ################################################################ proc test_track_wckey { } { - global scontrol number + global scontrol number - log_user 0 - set track_wckey 0 - spawn $scontrol show config - expect { - -re "TrackWCKey *= ($number)" { - set track_wckey $expect_out(1,string) - exp_continue - } - eof { - wait - } - } + log_user 0 + set track_wckey 0 + spawn $scontrol show config + expect { + -re "TrackWCKey *= ($number)" { + set track_wckey $expect_out(1,string) + exp_continue + } + eof { + wait + } + } - log_user 1 - return $track_wckey + log_user 1 + return $track_wckey } ################################################################ @@ -1193,23 +1193,23 @@ proc test_front_end { } { ################################################################ proc test_multiple_slurmd { } { - global scontrol + global scontrol - log_user 0 - set multiple_slurmd 0 - spawn $scontrol show config - expect { - "MULTIPLE_SLURMD" { - set multiple_slurmd 1 - exp_continue - } - eof { - wait - } - } - log_user 1 + log_user 0 + set multiple_slurmd 0 + spawn $scontrol show config + expect { + "MULTIPLE_SLURMD" { + set multiple_slurmd 1 + exp_continue + } + eof { + wait + } + } + log_user 1 - return $multiple_slurmd + return $multiple_slurmd } @@ -1890,9 +1890,9 @@ proc is_super_user { } { # ################################################################ proc check_acct_associations { } { - global sacctmgr number alpha_numeric_under + global sacctmgr number alpha_numeric_under - set rc 1 + set rc 1 log_user 0 send_user "Testing Associations\n" # @@ -1943,7 +1943,7 @@ proc check_acct_associations { } { eof { wait } - } + } foreach cluster [array names c_min] { # Here we are checking for holes in the list from above @@ -2008,7 +2008,7 @@ proc check_accounting_admin_level { } { eof { wait } - } + } log_user 1 return $admin_level @@ -2423,3 +2423,31 @@ proc change_subbp_state { node ionodes state } { return $exit_code } +################################################################ +# +# Proc: get_array_config +# +# Purpose: Use scontrol to determine the MaxArraySize +# +# Returns: MaxArraySize value +# +################################################################ + +proc get_array_config { } { + global scontrol number + + log_user 0 + set array_size 1 + spawn $scontrol show config + expect { + -re "MaxArraySize *= ($number)" { + set array_size $expect_out(1,string) + exp_continue + } + eof { + wait + } + } + log_user 1 + return $array_size +} diff --git a/testsuite/expect/test28.1 b/testsuite/expect/test28.1 new file mode 100755 index 00000000000..e8d4dac2efc --- /dev/null +++ b/testsuite/expect/test28.1 @@ -0,0 +1,210 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Confirms that the sbatch --array option is submitted and +# scancel cancels the job array. +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011-2013 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.schedmd.com/slurmdocs/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "28.1" +set exit_code 0 +set file_script "test$test_id.sh" +set file_in "test$test_id.input" +set file_out "test$test_id.output" +set array_id "" +set job_id "" +set array_size 4 + +print_header $test_id + +if {[get_array_config] < [expr $array_size + 1]} { + send_user "\nWARNING: MaxArraySize is to small\n" + exit 0 +} + +proc scontrol_check { job_id } { + global scontrol alpha_numeric_under array_id exit_code + + spawn $scontrol show job $job_id + expect { + -re "JobState=($alpha_numeric_under)" { + set tmp $expect_out(1,string) + if {[string compare $tmp "CANCELLED"]} { + send_user "\nFAILURE: Job was not cancelled\n" + set exit_code 1 + } + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } +} + +proc multi_squeue_check {job_id} { + + global squeue number array_id exit_code + + set array_in 0 + spawn $squeue -r + expect { + -re "$job_id\_($number\)" { + set array_id $expect_out(1,string) + if {$array_id != $array_in} { + send_user "\nFAILURE: array IDs do not match $array_id != $array_in\n" + set exit_code 1 + } + incr array_in + exp_continue + } + timeout { + send_user "\nFAILURE: squeue is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + return $array_in +} + +proc multi_scontrol_check { job_id } { + + global scontrol number array_id array_cnt exit_code + + set array_cnt 0 + spawn $scontrol show job $job_id + expect { + -re "ArrayTaskId=($number)" { + set array_id $expect_out(1,string) + if {$array_id != $array_cnt} { + send_user "\nFAILURE: incorrect number of jobs $array_id = $array_cnt\n" + set exit_code 1 + } + incr array_cnt + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + return $array_cnt +} + +############Test Starts Here########### +make_bash_script $file_script "sleep 10" + +# submit a batch job with an array of $array_size +spawn $sbatch -N1 --array=0-[expr $array_size - 1] --begin=midnight --input=$file_in --output=$file_out $file_script +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + send_user "\njob $job_id was submitted\n" + } + -re "error" { + send_user "\nFAILURE: sbatch did not submit jobs\n" + exit 1 + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# checks all the job array indexes +set job_cnt [multi_scontrol_check $job_id] +if {$job_cnt != $array_size} { + send_user "\nFAILURE: job count found by scontrol bad ($job_cnt != $array_size)\n" + set exit_code 1 +} + +# uses squeue to check for the jobs +set job_cnt [multi_squeue_check $job_id] +if {$job_cnt != $array_size} { + send_user "\nFAILURE: job count found by squeue bad ($job_cnt != $array_size)\n" + set exit_code 1 +} + +# cancel a job with a specific job array index +set idmatch 0 +spawn $scancel -v $job_id\_$array_id +expect { + -re "Terminating job" { + exp_continue + incr idmatch + } + timeout { + send_user "\nFAILURE: scancel not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# checks to see the job was cancelled +scontrol_check $job_id\_$array_id + +# cancels the entire job array +set idmatch 0 +spawn $scancel -v $job_id +expect { + -re "Terminating job" { + exp_continue + incr idmatch + } + timeout { + send_user "\nFAILURE: scancel not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# checks that all the job indexes where cancelled +scontrol_check $job_id + +if {$exit_code == 0} { + file delete $file_in $file_out + send_user "\nSUCCESS\n" +} +exit $exit_code -- GitLab