Skip to content
Snippets Groups Projects
Commit 7582ccdd authored by Nathan Yee's avatar Nathan Yee Committed by Morris Jette
Browse files

Add more job requeue tests

parent eb3fc11d
No related branches found
No related tags found
No related merge requests found
...@@ -140,6 +140,9 @@ EXTRA_DIST = \ ...@@ -140,6 +140,9 @@ EXTRA_DIST = \
test2.18 \ test2.18 \
test2.19 \ test2.19 \
test2.20 \ test2.20 \
test2.21 \
test2.22 \
test2.23 \
test3.1 \ test3.1 \
test3.2 \ test3.2 \
test3.3 \ test3.3 \
......
...@@ -524,6 +524,9 @@ EXTRA_DIST = \ ...@@ -524,6 +524,9 @@ EXTRA_DIST = \
test2.18 \ test2.18 \
test2.19 \ test2.19 \
test2.20 \ test2.20 \
test2.21 \
test2.22 \
test2.23 \
test3.1 \ test3.1 \
test3.2 \ test3.2 \
test3.3 \ test3.3 \
......
...@@ -234,6 +234,9 @@ test2.17 Validate scontrol displays and updates Allow/Deny Qos. ...@@ -234,6 +234,9 @@ test2.17 Validate scontrol displays and updates Allow/Deny Qos.
test2.18 Validate that Allow/Deny accounts are enforced. test2.18 Validate that Allow/Deny accounts are enforced.
test2.19 Validate that Allow/Deny Qos are enforced. test2.19 Validate that Allow/Deny Qos are enforced.
test2.20 Validate scontrol show hostnames. test2.20 Validate scontrol show hostnames.
test2.21 Validate scontrol requeue of failed or completed job.
test2.22 Validate scontrol requeuehold requeues job to held pending state.
test2.23 Validate scontrol requeuehold State=SpecialExit.
test3.# Testing of scontrol options (best run as SlurmUser or root). test3.# Testing of scontrol options (best run as SlurmUser or root).
......
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# Validate scontrol requeue of failed or completed job.
#
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2013 SchedMD LLC
# Written by Nathan Yee <nyee32@schedmd.com>
#
# This file is part of SLURM, a resource management program.
# For details, see <http://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_id "2.21"
set complete_script "test$test_id\.bash"
set fail_script "test$test_id\.fail"
set job_id 0
set exit_code 0
print_header $test_id
# Remove any vestigial scripts
exec $bin_rm -f $complete_script $fail_script
make_bash_script $complete_script "$bin_sleep 20"
make_bash_script $fail_script "BadCommand"
proc check_state { id } {
global scontrol exit_code
set job_state 0
spawn $scontrol show job $id
expect {
-re "JobState=PENDING" {
set job_state 1
exp_continue
}
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 0
}
eof {
wait
}
}
if {$job_state != 1} {
send_user "\nFAILURE: Job $id state was not PENDING after it was requeued\n"
set exit_code 1
}
}
proc requeue_job { id } {
global scontrol exit_code
spawn $scontrol requeue $id
expect {
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
}
#
# Run a job that will complete
#
spawn $sbatch -N1 -o /dev/null -e /dev/null -t1 $complete_script
expect {
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: sbatch is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if { $job_id == 0 } {
send_user "\nFAILURE: sbatch did not submit job\n"
exit 1
}
# Wait for the job to be in the complete state
wait_for_job $job_id DONE
# Requeue the job when it is complete
requeue_job $job_id
# Check to see if the job state is PENDING after the requeue
check_state $job_id
cancel_job $job_id
#
# Run a job that will fail
#
set job_id 0
spawn $sbatch -N1 -o /dev/null -e /dev/null -t 1 $fail_script
expect {
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: sbatch is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if { $job_id == 0 } {
send_user "\nFAILURE: sbatch did not submit job\n"
exit
}
# Wait for the job to be in the complete state
wait_for_job $job_id DONE
# Requeue the job when it is complete
requeue_job $job_id
# Check to see if the job state is PENDING after the requeue
check_state $job_id
cancel_job $job_id
if {$exit_code == 0} {
send_user "\nSUCCESS\n"
exec $bin_rm $complete_script $fail_script
}
exit $exit_code
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# Validate scontrol requeuehold requeues job to held pending state.
#
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2013 SchedMD LLC
# Written by Nathan Yee <nyee32@schedmd.com>
#
# This file is part of SLURM, a resource management program.
# For details, see <http://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_id "2.22"
set script "test$test_id.bash"
set job_id 0
set exit_code 0
print_header $test_id
# Remove any vestigial scripts
exec $bin_rm -f $script
make_bash_script $script "$bin_sleep 20"
proc check_hold { job } {
global scontrol exit_code
set hold 0
spawn $scontrol show job $job
expect {
-re "Priority=0" {
set hold 1
exp_continue
}
timeout {
send_user "\nFAILURE scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if { $hold != 1 } {
send_user "\nFAILURE scontrol did not hold job after it was requeued\n"
set exit_code 1
}
}
proc check_release { job } {
global scontrol number exit_code
set priority 0
spawn $scontrol show job $job
expect {
-re "Priority=($number)" {
set priority $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if { $priority == 0 } {
send_user "\nFAILURE: priority was not set to a non zero value after it was released\n"
set exit_code 1
}
}
proc check_state { job } {
global scontrol exit_code
set job_state 0
spawn $scontrol show job $job
expect {
-re "JobState=PENDING" {
set job_state 1
exp_continue
}
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 0
}
eof {
wait
}
}
if {$job_state != 1} {
send_user "\nFAILURE: Job $job state was not PENDING after it was requeued\n"
set exit_code 1
}
}
spawn $sbatch -N1 -t1 -o /dev/null -e /dev/null $script
expect {
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE sbatch is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if {$job_id == 0} {
send_user "\nFAILURE: sbatch did not submit job\n"
exit 1
}
wait_for_job $job_id DONE
spawn $scontrol requeuehold $job_id
expect {
timeout {
send_user "\nFAILURE scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
# Check if the job is in hold
check_hold $job_id
# Check that the job state is pending after released
check_state $job_id
spawn $scontrol release $job_id
expect {
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
check_release $job_id
cancel_job $job_id
if {$exit_code == 0} {
send_user "\nSUCCESS\n"
exec $bin_rm $script
}
exit $exit_code
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# Validate scontrol requeuehold State=SpecialExit.
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2013 SchedMD LLC
# Written by Nathan Yee <nyee32@schedmd.com>
#
# This file is part of SLURM, a resource management program.
# For details, see <http://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_id "2.23"
set script "test$test_id\.bash"
set job_id 0
set exit_code 0
print_header $test_id
# Remove any vestigial files
exec $bin_rm -f $script
make_bash_script $script "$bin_sleep 20"
proc check_state { id } {
global scontrol exit_code
set chk_state 0
spawn $scontrol show job $id
expect {
-re "JobState=SPECIAL_EXIT" {
set chk_state 1
exp_continue
}
timeout {
send_user "\nFAILURE: scontrol is nor responding\n"
set exit_code 1
}
eof {
wait
}
}
if { $chk_state != 1 } {
send_user "\nFAILURE: Job $id state was not set to SPECIAL_EXIT\n"
set exit_code
}
}
proc check_release { job } {
global scontrol number exit_code
set priority 0
spawn $scontrol show job $job
expect {
-re "Priority=($number)" {
set priority $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if { $priority == 0 } {
send_user "\nFAILURE: priority was not set to a non zero value after it was released\n"
set exit_code 1
}
}
spawn $sbatch -N1 -t1 -o /dev/null -e /dev/null $script
expect {
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: sbatch is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if { $job_id == 0 } {
send_user "\nFAILURE: sbatch did not submit job\n"
exit 1
}
wait_for_job $job_id DONE
spawn $scontrol requeuehold State=SpecialExit $job_id
expect {
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
check_state $job_id
spawn $scontrol release $job_id
expect {
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
check_release $job_id
cancel_job $job_id
if {$exit_code == 0} {
send_user "\nSUCCESS\n"
exec $bin_rm $script
}
exit $exit_code
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment