Skip to content
Snippets Groups Projects
Commit 67e94143 authored by Moe Jette's avatar Moe Jette
Browse files

Replace a bunch of kill_srun calls with slow_kill $pid calls.

parent 19d0aaba
No related branches found
No related tags found
No related merge requests found
Showing
with 59 additions and 93 deletions
...@@ -81,7 +81,6 @@ expect { ...@@ -81,7 +81,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -54,7 +54,6 @@ expect { ...@@ -54,7 +54,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
} }
......
...@@ -78,7 +78,6 @@ expect { ...@@ -78,7 +78,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
...@@ -189,7 +188,6 @@ expect { ...@@ -189,7 +188,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -68,7 +68,6 @@ expect { ...@@ -68,7 +68,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -53,7 +53,6 @@ expect { ...@@ -53,7 +53,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
...@@ -85,7 +84,6 @@ expect { ...@@ -85,7 +84,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -52,7 +52,6 @@ expect { ...@@ -52,7 +52,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -69,7 +69,6 @@ expect { ...@@ -69,7 +69,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -49,7 +49,7 @@ exec $bin_touch $scratch_file ...@@ -49,7 +49,7 @@ exec $bin_touch $scratch_file
# The --unbuffered option will send the message which lacks a '\n' # The --unbuffered option will send the message which lacks a '\n'
# #
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun --unbuffered --verbose -t1 $bin_rm -i $scratch_file set srun_pid [spawn $srun --unbuffered --verbose -t1 $bin_rm -i $scratch_file]
expect { expect {
-re "launching ($number).0 on host" { -re "launching ($number).0 on host" {
set job_id $expect_out(1,string) set job_id $expect_out(1,string)
...@@ -62,9 +62,8 @@ expect { ...@@ -62,9 +62,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -44,7 +44,7 @@ print_header $test_id ...@@ -44,7 +44,7 @@ print_header $test_id
# #
set timeout $max_job_delay set timeout $max_job_delay
set match 0 set match 0
spawn $srun --allocate --verbose -t1 set srun_pid [spawn $srun --allocate --verbose -t1]
expect { expect {
-re "jobid ($number).*" { -re "jobid ($number).*" {
set job_id $expect_out(1,string) set job_id $expect_out(1,string)
...@@ -68,9 +68,8 @@ expect { ...@@ -68,9 +68,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -53,7 +53,7 @@ exec $bin_chmod 700 $file_prog ...@@ -53,7 +53,7 @@ exec $bin_chmod 700 $file_prog
# Spawn initial program via srun # Spawn initial program via srun
# #
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N1-4 -v -t5 $file_prog set srun_pid [spawn $srun -N1-4 -v -t5 -l $file_prog]
set init_id $spawn_id set init_id $spawn_id
expect { expect {
-i $init_id -i $init_id
...@@ -66,6 +66,7 @@ expect { ...@@ -66,6 +66,7 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun (launch) not responding\n" send_user "\nFAILURE: srun (launch) not responding\n"
slow_kill $srun_pid
set exit_code 1 set exit_code 1
} }
eof { eof {
...@@ -86,7 +87,7 @@ if {$matches == 0} { ...@@ -86,7 +87,7 @@ if {$matches == 0} {
# #
set matches 0 set matches 0
set timeout 10 set timeout 10
spawn $srun -vv --attach=$job_id --join set attach_pid [spawn $srun -vv -l --attach=$job_id --join]
set attach_id $spawn_id set attach_id $spawn_id
expect { expect {
-i $attach_id -i $attach_id
...@@ -96,9 +97,8 @@ expect { ...@@ -96,9 +97,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun (attach) not responding\n" send_user "\nFAILURE: srun (attach) not responding\n"
kill_srun slow_kill $attach_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -54,7 +54,7 @@ for {set task_id 0} {$task_id < $task_cnt} {incr task_id} { ...@@ -54,7 +54,7 @@ for {set task_id 0} {$task_id < $task_cnt} {incr task_id} {
exec $bin_rm -f $file_out_t_glob exec $bin_rm -f $file_out_t_glob
} }
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun --output=$file_out_t -N1 -n$task_cnt -O -v -t1 $bin_id set srun_pid [spawn $srun --output=$file_out_t -N1 -n$task_cnt -O -v -t1 $bin_id]
expect { expect {
-re "jobid ($number).*" { -re "jobid ($number).*" {
set job_id $expect_out(1,string) set job_id $expect_out(1,string)
...@@ -62,7 +62,7 @@ expect { ...@@ -62,7 +62,7 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
exit 1 exit 1
} }
eof { eof {
...@@ -94,7 +94,7 @@ if {$file_cnt != $task_cnt} { ...@@ -94,7 +94,7 @@ if {$file_cnt != $task_cnt} {
# #
set job_id 0 set job_id 0
set srun_exit 0 set srun_exit 0
spawn $srun --error=$file_err_j --output=/dev/null -N1 -n$task_cnt -O -v -t1 $bin_sleep aaa set srun_pid [spawn $srun --error=$file_err_j --output=/dev/null -N1 -n$task_cnt -O -v -t1 $bin_sleep aaa]
expect { expect {
-re "jobid ($number).*" { -re "jobid ($number).*" {
set job_id $expect_out(1,string) set job_id $expect_out(1,string)
...@@ -107,7 +107,7 @@ expect { ...@@ -107,7 +107,7 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
exit 1 exit 1
} }
eof { eof {
...@@ -136,7 +136,7 @@ if {[wait_for_file $file_err_j_glob] == 0} { ...@@ -136,7 +136,7 @@ if {[wait_for_file $file_err_j_glob] == 0} {
# file name and confirm it is created # file name and confirm it is created
# #
set job_id 0 set job_id 0
spawn $srun --output=$file_out_J -N1 -v -t1 $bin_hostname set srun_pid [spawn $srun --output=$file_out_J -N1 -v -t1 $bin_hostname]
expect { expect {
-re "jobid ($number).*" { -re "jobid ($number).*" {
set job_id $expect_out(1,string) set job_id $expect_out(1,string)
...@@ -144,7 +144,7 @@ expect { ...@@ -144,7 +144,7 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
exit 1 exit 1
} }
eof { eof {
...@@ -173,7 +173,7 @@ set file_out_n_glob "test$test_id.n.$node_id.output" ...@@ -173,7 +173,7 @@ set file_out_n_glob "test$test_id.n.$node_id.output"
exec $bin_rm -f $file_out_n_glob exec $bin_rm -f $file_out_n_glob
set job_id 0 set job_id 0
spawn $srun --output=$file_out_n -N1 -n2 -O -v -t1 $bin_hostname set srun_pid [spawn $srun --output=$file_out_n -N1 -n2 -O -v -t1 $bin_hostname]
expect { expect {
-re "jobid ($number).*" { -re "jobid ($number).*" {
set job_id $expect_out(1,string) set job_id $expect_out(1,string)
...@@ -181,7 +181,7 @@ expect { ...@@ -181,7 +181,7 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
exit 1 exit 1
} }
eof { eof {
...@@ -241,7 +241,7 @@ if { [test_bluegene] } { ...@@ -241,7 +241,7 @@ if { [test_bluegene] } {
} }
set job_id 0 set job_id 0
spawn $srun --batch --output=/dev/null -N$node_cnt -n$task_cnt -O -t1 $file_in set srun_pid [spawn $srun --batch --output=/dev/null -N$node_cnt -n$task_cnt -O -t1 $file_in]
expect { expect {
-re "jobid ($number) submitted" { -re "jobid ($number) submitted" {
set job_id $expect_out(1,string) set job_id $expect_out(1,string)
...@@ -249,7 +249,7 @@ expect { ...@@ -249,7 +249,7 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
exit 1 exit 1
} }
eof { eof {
......
...@@ -53,7 +53,6 @@ expect { ...@@ -53,7 +53,6 @@ expect {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -41,7 +41,7 @@ print_header $test_id ...@@ -41,7 +41,7 @@ print_header $test_id
# Spawn a program via srun with stdout forwarding disabled # Spawn a program via srun with stdout forwarding disabled
# #
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun --output=none -t1 $bin_id set srun_pid [spawn $srun --output=none -t1 $bin_id]
expect { expect {
-re "uid=" { -re "uid=" {
send_user "\nFAILURE: srun improperly forwarded stdout\n" send_user "\nFAILURE: srun improperly forwarded stdout\n"
...@@ -55,9 +55,8 @@ expect { ...@@ -55,9 +55,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
...@@ -68,7 +67,7 @@ expect { ...@@ -68,7 +67,7 @@ expect {
# Spawn a program via srun with stderr forwarding disabled # Spawn a program via srun with stderr forwarding disabled
# #
set matches 0 set matches 0
spawn $srun --error=none -t1 $bin_sleep aaa set srun_pid [spawn $srun --error=none -t1 $bin_sleep aaa]
expect { expect {
-re "invalid" { -re "invalid" {
send_user "\nFAILURE: srun improperly forwarded stderr\n" send_user "\nFAILURE: srun improperly forwarded stderr\n"
...@@ -82,9 +81,8 @@ expect { ...@@ -82,9 +81,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -44,7 +44,7 @@ print_header $test_id ...@@ -44,7 +44,7 @@ print_header $test_id
# Spawn a shell via srun with stdout forwarding disabled # Spawn a shell via srun with stdout forwarding disabled
# #
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun --input=$task_id -N1 -n10 -O -v --wait=2 -t1 $bin_bash set srun_pid [spawn $srun --input=$task_id -N1 -n10 -O -v --wait=2 -t1 $bin_bash]
expect { expect {
-re "launching ($number).0 on host" { -re "launching ($number).0 on host" {
set job_id $expect_out(1,string) set job_id $expect_out(1,string)
...@@ -68,9 +68,8 @@ expect { ...@@ -68,9 +68,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -53,7 +53,7 @@ if { [test_bluegene] } { ...@@ -53,7 +53,7 @@ if { [test_bluegene] } {
} }
} }
spawn $srun -N$node_cnt -l --threads=1 -t1 $bin_hostname set srun_pid [spawn $srun -N$node_cnt -l --threads=1 -t1 $bin_hostname]
expect { expect {
-re "0: ($alpha_numeric)" { -re "0: ($alpha_numeric)" {
set host_0 $expect_out(1,string) set host_0 $expect_out(1,string)
...@@ -61,9 +61,8 @@ expect { ...@@ -61,9 +61,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
...@@ -83,7 +82,7 @@ if {[string compare $host_0 ""] == 0} { ...@@ -83,7 +82,7 @@ if {[string compare $host_0 ""] == 0} {
# #
set host_0 "" set host_0 ""
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N$node_cnt -n32 -O -l --threads=32 -t1 $bin_hostname set srun_pid [spawn $srun -N$node_cnt -n32 -O -l --threads=32 -t1 $bin_hostname]
expect { expect {
-re "0: ($alpha_numeric)" { -re "0: ($alpha_numeric)" {
set host_0 $expect_out(1,string) set host_0 $expect_out(1,string)
...@@ -91,9 +90,8 @@ expect { ...@@ -91,9 +90,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -44,7 +44,7 @@ print_header $test_id ...@@ -44,7 +44,7 @@ print_header $test_id
set err_msg 0 set err_msg 0
set host_0 "" set host_0 ""
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N1 -l --mincpus=999999 -t1 $bin_hostname set srun_pid [spawn $srun -N1 -l --mincpus=999999 -t1 $bin_hostname]
expect { expect {
-re "configuration is not available" { -re "configuration is not available" {
send_user "This error is expected, no worries\n" send_user "This error is expected, no worries\n"
...@@ -57,9 +57,8 @@ expect { ...@@ -57,9 +57,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
...@@ -80,7 +79,7 @@ if {$err_msg != 1} { ...@@ -80,7 +79,7 @@ if {$err_msg != 1} {
# #
set host_0 "" set host_0 ""
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N1 -l --mincpus=1 -t1 $bin_hostname set srun_pid [spawn $srun -N1 -l --mincpus=1 -t1 $bin_hostname]
expect { expect {
-re "0: ($alpha_numeric)" { -re "0: ($alpha_numeric)" {
set host_0 $expect_out(1,string) set host_0 $expect_out(1,string)
...@@ -88,9 +87,8 @@ expect { ...@@ -88,9 +87,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
...@@ -108,7 +106,7 @@ if {[string compare $host_0 ""] == 0} { ...@@ -108,7 +106,7 @@ if {[string compare $host_0 ""] == 0} {
set err_msg 0 set err_msg 0
set host_0 "" set host_0 ""
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N1 -l --mem=999999 -t1 $bin_hostname set srun_pid [spawn $srun -N1 -l --mem=999999 -t1 $bin_hostname]
expect { expect {
-re "configuration is not available" { -re "configuration is not available" {
send_user "This error is expected, no worries\n" send_user "This error is expected, no worries\n"
...@@ -121,9 +119,8 @@ expect { ...@@ -121,9 +119,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
...@@ -144,7 +141,7 @@ if {$err_msg != 1} { ...@@ -144,7 +141,7 @@ if {$err_msg != 1} {
# #
set host_0 "" set host_0 ""
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N1 -l --mem=1 -t1 $bin_hostname set srun_pid [spawn $srun -N1 -l --mem=1 -t1 $bin_hostname]
expect { expect {
-re "0: ($alpha_numeric)" { -re "0: ($alpha_numeric)" {
set host_0 $expect_out(1,string) set host_0 $expect_out(1,string)
...@@ -152,9 +149,8 @@ expect { ...@@ -152,9 +149,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
...@@ -172,7 +168,7 @@ if {[string compare $host_0 ""] == 0} { ...@@ -172,7 +168,7 @@ if {[string compare $host_0 ""] == 0} {
set err_msg 0 set err_msg 0
set host_0 "" set host_0 ""
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N1 -l --tmp=999999999 -t1 $bin_hostname set srun_pid [spawn $srun -N1 -l --tmp=999999999 -t1 $bin_hostname]
expect { expect {
-re "configuration is not available" { -re "configuration is not available" {
send_user "This error is expected, no worries\n" send_user "This error is expected, no worries\n"
...@@ -185,9 +181,8 @@ expect { ...@@ -185,9 +181,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
...@@ -208,7 +203,7 @@ if {$err_msg != 1} { ...@@ -208,7 +203,7 @@ if {$err_msg != 1} {
# #
set host_0 "" set host_0 ""
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N1 -l --tmp=1 -t1 $bin_hostname set srun_pid [spawn $srun -N1 -l --tmp=1 -t1 $bin_hostname]
expect { expect {
-re "0: ($alpha_numeric)" { -re "0: ($alpha_numeric)" {
set host_0 $expect_out(1,string) set host_0 $expect_out(1,string)
...@@ -216,9 +211,8 @@ expect { ...@@ -216,9 +211,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -43,7 +43,7 @@ print_header $test_id ...@@ -43,7 +43,7 @@ print_header $test_id
set err_msg 0 set err_msg 0
set host_0 "" set host_0 ""
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N1 -l --constraint=invalid,constraint -t1 $bin_hostname set srun_pid [spawn $srun -N1 -l --constraint=invalid,constraint -t1 $bin_hostname]
expect { expect {
-re "configuration is not available" { -re "configuration is not available" {
send_user "This error is expected, no worries\n" send_user "This error is expected, no worries\n"
...@@ -56,9 +56,8 @@ expect { ...@@ -56,9 +56,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -54,7 +54,7 @@ if { [test_bluegene] } { ...@@ -54,7 +54,7 @@ if { [test_bluegene] } {
} }
} }
spawn $srun -N$node_cnt --no-kill -t1 $bin_sleep $sleep_time set srun_pid [spawn $srun -N$node_cnt --no-kill -t1 $bin_sleep $sleep_time]
expect { expect {
-re "error" { -re "error" {
send_user "\nFAILURE: some error occurred\n" send_user "\nFAILURE: some error occurred\n"
...@@ -63,9 +63,8 @@ expect { ...@@ -63,9 +63,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
...@@ -54,7 +54,7 @@ if {[is_super_user] == 0} { ...@@ -54,7 +54,7 @@ if {[is_super_user] == 0} {
set host_0 "" set host_0 ""
set nodelist_name "" set nodelist_name ""
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -v -N1 -l $bin_printenv SLURMD_NODENAME set srun_pid [spawn $srun -v -N1 -l $bin_printenv SLURMD_NODENAME]
expect { expect {
-re "on host ($alpha_numeric)," { -re "on host ($alpha_numeric)," {
set nodelist_name $expect_out(1,string) set nodelist_name $expect_out(1,string)
...@@ -70,7 +70,7 @@ expect { ...@@ -70,7 +70,7 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
} }
eof { eof {
...@@ -89,7 +89,7 @@ if {[string compare $nodelist_name ""] == 0} { ...@@ -89,7 +89,7 @@ if {[string compare $nodelist_name ""] == 0} {
send_user "\nFAILURE: Did not get nodelist_name of task 0\n" send_user "\nFAILURE: Did not get nodelist_name of task 0\n"
exit 1 exit 1
} }
set include_node $nodelist_name set include_node $host_0
# #
# Submit a job directly to that node # Submit a job directly to that node
...@@ -97,7 +97,7 @@ set include_node $nodelist_name ...@@ -97,7 +97,7 @@ set include_node $nodelist_name
set host_1 "" set host_1 ""
set slurm_user 1 set slurm_user 1
set timeout 10 set timeout 10
spawn $srun -N1 -l --nodelist=$include_node --no-allocate -t1 $bin_printenv SLURMD_NODENAME set srun_pid [spawn $srun -N1 -l --nodelist=$include_node --no-allocate -t1 $bin_printenv SLURMD_NODENAME]
expect { expect {
-re "Invalid job credential" { -re "Invalid job credential" {
send_user "\nWARNING: Not SlurmUser or root.\n" send_user "\nWARNING: Not SlurmUser or root.\n"
...@@ -124,6 +124,7 @@ expect { ...@@ -124,6 +124,7 @@ expect {
timeout { timeout {
send_user "\nWARNING: srun not responding, " send_user "\nWARNING: srun not responding, "
send_user "expected if not SlurmUser or root.\n" send_user "expected if not SlurmUser or root.\n"
slow_kill $srun_pid
set slurm_user 0 set slurm_user 0
} }
eof { eof {
...@@ -133,23 +134,11 @@ expect { ...@@ -133,23 +134,11 @@ expect {
if {$slurm_user == 0} { if {$slurm_user == 0} {
exit 0; exit 0;
} }
if {[string compare $host_0 $host_1]} { if {[string compare $host_1 $include_node]} {
send_user "\nFAILURE: host name value wrong $host_0 $host_1\n" send_user "\nFAILURE: Allocation lacked an included node\n"
set exit_code 1 set exit_code 1
} }
#
# Verify node count and name
#
if {[string compare $host_0 ""] == 0} {
send_user "\nFAILURE: Did not get hostname of task 0\n"
set exit_code 1
}
#if {[string compare $host_0 $include_node] != 0} {
# send_user "\nFAILURE: Allocation lacked an included node\n"
# set exit_code 1
#}
# #
# Run three tasks at a time on some node and do so repeatedly # Run three tasks at a time on some node and do so repeatedly
# This checks for slurmd race conditions # This checks for slurmd race conditions
...@@ -164,13 +153,13 @@ for {set inx 0} {$inx < $interations} {incr inx} { ...@@ -164,13 +153,13 @@ for {set inx 0} {$inx < $interations} {incr inx} {
exec $bin_usleep 250000 exec $bin_usleep 250000
set failures 0 set failures 0
spawn $srun -N1 --nodelist=$include_node -t1 -l $bin_printenv SLURMD_NODENAME set srun_pid [spawn $srun -N1 --nodelist=$include_node -t1 -l $bin_printenv SLURMD_NODENAME]
set alloc $spawn_id set alloc $spawn_id
spawn $srun -N1 --nodelist=$include_node -Z $bin_usleep 500000 set srun_pid1 [spawn $srun -N1 --nodelist=$include_node -Z $bin_usleep 500000]
set noalloc1 $spawn_id set noalloc1 $spawn_id
spawn $srun -N1 --nodelist=$include_node -Z $bin_usleep 250000 set srun_pid2 [spawn $srun -N1 --nodelist=$include_node -Z $bin_usleep 250000]
set noalloc2 $spawn_id set noalloc2 $spawn_id
set timeout 20 set timeout 20
...@@ -203,6 +192,7 @@ for {set inx 0} {$inx < $interations} {incr inx} { ...@@ -203,6 +192,7 @@ for {set inx 0} {$inx < $interations} {incr inx} {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid2
set failures 1 set failures 1
} }
eof { eof {
...@@ -239,6 +229,7 @@ for {set inx 0} {$inx < $interations} {incr inx} { ...@@ -239,6 +229,7 @@ for {set inx 0} {$inx < $interations} {incr inx} {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid1
set failures 1 set failures 1
} }
eof { eof {
...@@ -278,6 +269,7 @@ for {set inx 0} {$inx < $interations} {incr inx} { ...@@ -278,6 +269,7 @@ for {set inx 0} {$inx < $interations} {incr inx} {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid
set failures 1 set failures 1
} }
eof { eof {
......
...@@ -63,7 +63,7 @@ array set good_vars { ...@@ -63,7 +63,7 @@ array set good_vars {
# Spawn a job via srun to print environment variables # Spawn a job via srun to print environment variables
# #
set timeout $max_job_delay set timeout $max_job_delay
spawn $srun -N1 -n1 -t1 $bin_env set srun_pid [spawn $srun -N1 -n1 -t1 $bin_env]
expect { expect {
-re "(SLURM_$alpha_under)=($alpha_numeric)" { -re "(SLURM_$alpha_under)=($alpha_numeric)" {
set found_vars($expect_out(1,string)) "$expect_out(2,string)" set found_vars($expect_out(1,string)) "$expect_out(2,string)"
...@@ -71,9 +71,8 @@ expect { ...@@ -71,9 +71,8 @@ expect {
} }
timeout { timeout {
send_user "\nFAILURE: srun not responding\n" send_user "\nFAILURE: srun not responding\n"
kill_srun slow_kill $srun_pid
set exit_code 1 set exit_code 1
exp_continue
} }
eof { eof {
wait wait
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment