Skip to content
Snippets Groups Projects
Commit 77d5016a authored by Moe Jette's avatar Moe Jette
Browse files

Remove more kill_srun with slow_kill pid

parent 67e94143
No related branches found
No related tags found
No related merge requests found
Showing
with 61 additions and 100 deletions
......@@ -42,7 +42,7 @@ print_header $test_id
# Submit a slurm allocate job
#
set timeout $max_job_delay
spawn $srun --allocate -t1 --no-shell
set srun_pid [spawn $srun --allocate -t1 --no-shell]
expect {
-re "SLURM_JOBID=($number).*" {
set job_id $expect_out(1,string)
......@@ -50,9 +50,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......@@ -95,7 +94,7 @@ exec $scancel --quiet $job_id
# Create a job allocation as some other user, namely root
#
set job_id 0
spawn $srun --allocate -t1 --no-shell --uid=0
set srun_pid [spawn $srun --allocate -t1 --no-shell --uid=0]
expect {
-re "SLURM_JOBID=($number).*" {
set job_id $expect_out(1,string)
......@@ -107,9 +106,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......@@ -145,10 +143,7 @@ if { $job_id == -1 } {
send_user "\nFAILURE: job $job_id not in run state\n"
exit 1
}
#
# Kill the job
#
exec $scancel --quiet $job_id
cancel_job $job_id
}
if { $exit_code == 0 } {
......
......@@ -59,7 +59,7 @@ if { [test_bluegene] } {
}
}
spawn $srun -N$node_cnt -n$task_cnt --overcommit --debugger-test -t1 $bin_id
set srun_pid [spawn $srun -N$node_cnt -n$task_cnt --overcommit --debugger-test -t1 $bin_id]
expect {
-re "uid=" {
send_user "\nFAILURE: task not stopped\n"
......@@ -76,9 +76,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......
......@@ -7,10 +7,6 @@
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
#
# Note: This script generates and then deletes files in the working directory
# named test1.42.input
# Note: This test will fail for SLURM versions <0.4
############################################################################
# Copyright (C) 2004 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
......@@ -56,7 +52,7 @@ make_bash_script $file_in "$bin_sleep 5"
# Spawn a srun batch job that just sleeps for a while
#
set timeout $max_job_delay
spawn $srun --batch --output=/dev/null --error=/dev/null --account=MY_ACCT -t1 $file_in
set srun_pid [spawn $srun --batch --output=/dev/null --error=/dev/null --account=MY_ACCT -t1 $file_in]
expect {
-re "jobid ($number) submitted" {
set job_id1 $expect_out(1,string)
......@@ -64,9 +60,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......@@ -82,7 +77,7 @@ if {$job_id1 == 0} {
#
set match_acct 0
set match_state 0
spawn $srun -v --dependency=$job_id1 $scontrol show job $job_id1
set srun_pid [spawn $srun -v --dependency=$job_id1 $scontrol show job $job_id1]
expect {
-re "launching ($number).0" {
set job_id2 $expect_out(1,string)
......@@ -98,9 +93,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......@@ -152,7 +146,7 @@ if {$match_jobid != $job_id1} {
# Submit a job to run at noon tomorrow
#
set job_id1 0
spawn $srun --batch --output=/dev/null --error=/dev/null --begin=noon-tomorrow $file_in
set srun_pid [spawn $srun --batch --output=/dev/null --error=/dev/null --begin=noon-tomorrow $file_in]
expect {
-re "jobid ($number) submitted" {
set job_id1 $expect_out(1,string)
......@@ -160,9 +154,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......@@ -239,7 +232,7 @@ while { $delayed < $max_job_delay } {
}
if {$is_done == 0} {
send_user "\nFAILURE: unexpected JobState\n"
exec $scancel --quiet $job_id1
cancel_job $job_id1
set exit_code 1
}
......
......@@ -41,7 +41,7 @@ print_header $test_id
#
set timeout 60
for {set node_cnt 1} {$node_cnt > 0} {set node_cnt [expr $node_cnt * 2]} {
spawn $srun --test-only -N$node_cnt -t1 $bin_hostname
set srun_pid [spawn $srun --test-only -N$node_cnt -t1 $bin_hostname]
expect {
-re "allocation success" {
exp_continue
......@@ -52,14 +52,13 @@ for {set node_cnt 1} {$node_cnt > 0} {set node_cnt [expr $node_cnt * 2]} {
}
-re "error" {
set node_cnt 0
set exit_code 1
set exit_code 1
exp_continue
}
timeout {
send_user "\nFAILURE: srun not responding\n"
set exit_code 1
kill_srun
exp_continue
slow_kill $srun_pid
set exit_code 1
}
eof {
wait
......
......@@ -7,9 +7,6 @@
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
#
# Note: This script generates and then deletes files in the working directory
# named test1.44.output
############################################################################
# Copyright (C) 2005 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
......
......@@ -60,7 +60,7 @@ if { [test_bluegene] } {
}
}
spawn $srun -N$node_cnt -A -v -t1
set srun_pid [spawn $srun -N$node_cnt -A -v -t1]
expect {
-re "jobid ($number):" {
set job_id_1 $expect_out(1,string)
......@@ -81,11 +81,12 @@ expect {
send_user "\nFAILURE: srun not responding\n"
if {$job_id_1 != 0} {
cancel_job $job_id_1
} else {
kill_srun
}
if {$job_id_2 != 0} {
cancel_job $job_id_2
}
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......
......@@ -6,9 +6,6 @@
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
#
# Note: This script generates and then deletes files in the working directory
# named test1.46.input, test1.46.output, and test1.46.error
############################################################################
# Copyright (C) 2002 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
......@@ -76,7 +73,7 @@ exec $bin_chmod 700 $file_in
# Spawn a shell via srun and send exit command to task 1 only
#
set timeout $max_job_delay
spawn $srun -n10 -N1 -O --kill-on-bad-exit -t1 $file_in
set srun_pid [spawn $srun -n10 -N1 -O --kill-on-bad-exit -t1 $file_in]
expect {
-re "exit code 2" {
send_user "This error is expected, no worries\n"
......@@ -89,9 +86,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pidkill_srun
set exit_code 1
exp_continue
}
eof {
wait
......
......@@ -49,7 +49,7 @@ make_bash_script $file_in "
set timeout $max_job_delay
set job_id 0
spawn $srun -o $file_out -b $file_in
set srun_pid [spawn $srun -o $file_out -b $file_in]
expect {
-re "jobid ($number) submitted" {
set job_id $expect_out(1,string)
......@@ -57,7 +57,7 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
......@@ -106,7 +106,7 @@ make_bash_script $file_in "
"
set matches 0
spawn $srun -o $file_out -b $file_in
set srun_pid [spawn $srun -o $file_out -b $file_in]
expect {
-re "More .* requested than permitted" {
send_user "This error was expected, no worries\n\n"
......@@ -115,9 +115,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......@@ -133,7 +132,7 @@ make_bash_script $file_in "
$bin_sleep $delay
"
spawn $srun -N1 -o $file_out -b $file_in
set srun_pid [spawn $srun -N1 -o $file_out -b $file_in]
expect {
-re "More nodes requested than permitted" {
send_user "\nFAILURE: srun read from the batch file options"
......@@ -143,9 +142,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......
......@@ -68,7 +68,7 @@ if {$got_login_grps == 0} {
# Submit a slurm job that will execute 'id'
#
set timeout $max_job_delay
spawn $srun -N1 -t1 --job-name=test$test_id --mail-type=all --mail-user=$login_grp_info $bin_id -un
set srun_pid [spawn $srun -N1 -t1 --job-name=test$test_id --mail-type=all --mail-user=$login_grp_info $bin_id -un]
expect {
-re "error" {
send_user "\nFAILURE: Error running srun\n"
......@@ -81,9 +81,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......
......@@ -81,7 +81,7 @@ if { [test_bluegene] } {
set node_cnt 1-1
}
spawn $srun -N$node_cnt -n$tasks -O -t1 --task-prolog=$task_prolog --task-epilog=$task_epilog $file_in
set srun_pid [spawn $srun -N$node_cnt -n$tasks -O -t1 --task-prolog=$task_prolog --task-epilog=$task_epilog $file_in]
expect {
-re "TEST==prolog_qa" {
incr matches
......@@ -94,9 +94,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......
......@@ -46,7 +46,7 @@ if {[test_front_end] != 0 && [test_super_user] == 0} {
#
set matches 0
set timeout $max_job_delay
spawn $srun -N1 -t1 /bad/bad/bad
set srun_pid [spawn $srun -N1 -t1 /bad/bad/bad]
expect {
-re "No such file" {
send_user "\nNo worries, this error is expected\n"
......@@ -65,9 +65,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......
......@@ -103,7 +103,7 @@ for {set i 0} {$i<2} {incr i} {
set job_id 0
set node0 ""
set node1 ""
spawn $srun -N2 -l $bin_printenv SLURMD_NODENAME
set srun_pid [spawn $srun -N2 -l $bin_printenv SLURMD_NODENAME]
expect {
-re "SwitchType does not permit arbitrary task distribution" {
set no_hostfile 1
......@@ -124,11 +124,7 @@ for {set i 0} {$i<2} {incr i} {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
if {$job_id == 0} {
kill_srun
} else {
cancel_job $job_id
}
slow_kill $srun_pid
set exit_code 1
}
eof {
......
......@@ -51,7 +51,7 @@ make_bash_script $file_in "$bin_sleep 60"
#
# Submit three jobs with differing nice values
#
spawn $srun --batch --output=/dev/null --error=/dev/null -t2 $file_in
set srun_pid [spawn $srun --batch --output=/dev/null --error=/dev/null -t2 $file_in]
expect {
-re "jobid ($number) submitted" {
set job_id1 $expect_out(1,string)
......@@ -59,7 +59,7 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
exit 1
}
eof {
......@@ -68,11 +68,10 @@ expect {
}
if {$job_id1 == 0} {
send_user "\nFAILURE: srun submit failed\n"
kill_srun
exit 1
}
spawn $srun --batch --output=/dev/null --error=/dev/null -t2 --nice $file_in
set srun_pid [spawn $srun --batch --output=/dev/null --error=/dev/null -t2 --nice $file_in]
expect {
-re "jobid ($number) submitted" {
set job_id2 $expect_out(1,string)
......@@ -81,7 +80,7 @@ expect {
timeout {
send_user "\nFAILURE: srun not responding\n"
cancel_job $job_id1
kill_srun
slow_kill $srun_pid
exit 1
}
eof {
......@@ -91,10 +90,9 @@ expect {
if {$job_id2 == 0} {
send_user "\nFAILURE: srun submit failed\n"
cancel_job $job_id1
kill_srun
exit 1
}
spawn $srun --batch --output=/dev/null --error=/dev/null -t2 --nice=200 $file_in
set srun_pid [spawn $srun --batch --output=/dev/null --error=/dev/null -t2 --nice=200 $file_in]
expect {
-re "jobid ($number) submitted" {
set job_id3 $expect_out(1,string)
......@@ -104,7 +102,7 @@ expect {
send_user "\nFAILURE: srun not responding\n"
cancel_job $job_id1
cancel_job $job_id2
kill_srun
slow_kill $srun_pid
exit 1
}
eof {
......
......@@ -60,7 +60,7 @@ exec $bin_chmod 700 $file_in
#
set matches 0
set timeout $max_job_delay
spawn $srun -N1 -n4 --overcommit -l -t1 --multi-prog $file_in
set srun_pid [spawn $srun -N1 -n4 --overcommit -l -t1 --multi-prog ./$file_in]
expect {
-re "($number): task:($number):offset:($number)" {
set label $expect_out(1,string)
......@@ -82,9 +82,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......@@ -118,8 +117,7 @@ close $file
exec $bin_chmod 700 $file_in
set matches 0
set timed_out 0
spawn $srun -N1 -n4 --overcommit -l -t1 --multi-prog --debugger-test $file_in
set srun_pid [spawn $srun -N1 -n4 --overcommit -l -t1 --multi-prog --debugger-test ./$file_in]
expect {
-re "executable:(/bin/)($alpha)" {
if {[string compare $expect_out(2,string) "date"] != 0} {
......@@ -128,28 +126,27 @@ expect {
if {[string compare $expect_out(2,string) "hostname"] != 0} {
incr matches
}
if {$matches == 4} {
slow_kill $srun_pid
}
exp_continue
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
}
}
if {$timed_out == 1} {
send_user "\nEarly termination is expected, no worries.\n"
}
if {$matches != 4} {
send_user "\nFAILURE: did not generate full list of executables.\n"
set exit_code 1
}
if {$exit_code == 0} {
exec $bin_rm $file_in
exec $bin_rm -f $file_in
send_user "\nSUCCESS\n"
}
exit $exit_code
......@@ -54,11 +54,10 @@ make_bash_script $test_script {
#
# Launch the test script
#
set srun_pid [spawn $srun -u -v -n1 $test_script]
set timeout $max_job_delay
set jobid 0
set stepid 0
set srun_pid [spawn $srun -u -v -n1 $test_script]
expect {
-re "launching (($number)\.0)" {
set stepid $expect_out(1,string)
......@@ -68,7 +67,7 @@ expect {
"Running" {}
timeout {
send_user "\nFAILURE: Failed to launch test program through srun\n"
catch {exec $scancel $jobid}
slow_kill $srun_pid
exit 1
}
}
......
......@@ -58,9 +58,8 @@ close $file
#
# Launch the test script
#
spawn $srun -v -n1 --input=$file_in --output=$file_out --error=- cat
set timeout $max_job_delay
set srun_pid [spawn $srun -v -n1 --input=$file_in --output=$file_out --error=- cat]
expect {
-re {launching ($number)\.($number)} {
set jobid $expect_out(1,string)
......@@ -68,7 +67,7 @@ expect {
}
timeout {
send_user "\nFAILURE: srun launch failed\n"
catch {exec $scancel $jobid}
slow_kill $srun_pid
exit 1
}
eof {
......
......@@ -41,7 +41,7 @@ set super_user [test_super_user]
# Submit a slurm job that will execute 'id'
#
set timeout $max_job_delay
spawn $srun -N1 --jobid=66000 -t1 $bin_id
set srun_pid [spawn $srun -N1 --jobid=66000 -t1 $bin_id]
expect {
-re "Invalid job id specified" {
if {[test_super_user] != 0} {
......@@ -66,9 +66,8 @@ expect {
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
slow_kill $srun_pid
set exit_code 1
exp_continue
}
eof {
wait
......
......@@ -50,7 +50,7 @@ expect {
timeout {
send_user "\nFAILURE: srun failed to grab an allocation "
send_user "in a timely manner.\n"
exec kill $srun_alloc_pid
slow_kill $srun_alloc_pid
exit 1
}
}
......@@ -72,9 +72,7 @@ expect {
exp_continue
}
timeout {
exec kill $srun_launch_pid
exec kill $srun_launch_pid
exec kill -9 $srun_launch_pid
slow_kill $srun_launch_pid
}
eof {
wait
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment