From 67e941432daff9ede8dc8246334d26bea649e658 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 19 Sep 2006 18:56:42 +0000 Subject: [PATCH] Replace a bunch of kill_srun calls with slow_kill $pid calls. --- testsuite/expect/test1.1 | 1 - testsuite/expect/test1.10 | 1 - testsuite/expect/test1.11 | 2 -- testsuite/expect/test1.12 | 1 - testsuite/expect/test1.13 | 2 -- testsuite/expect/test1.14 | 1 - testsuite/expect/test1.15 | 1 - testsuite/expect/test1.16 | 5 ++--- testsuite/expect/test1.17 | 5 ++--- testsuite/expect/test1.18 | 8 ++++---- testsuite/expect/test1.19 | 20 ++++++++++---------- testsuite/expect/test1.2 | 1 - testsuite/expect/test1.20 | 10 ++++------ testsuite/expect/test1.21 | 5 ++--- testsuite/expect/test1.22 | 10 ++++------ testsuite/expect/test1.23 | 30 ++++++++++++------------------ testsuite/expect/test1.24 | 5 ++--- testsuite/expect/test1.25 | 5 ++--- testsuite/expect/test1.26 | 34 +++++++++++++--------------------- testsuite/expect/test1.27 | 5 ++--- testsuite/expect/test1.28 | 5 ++--- testsuite/expect/test1.29 | 5 ++--- testsuite/expect/test1.3 | 1 - testsuite/expect/test1.30 | 7 +++---- testsuite/expect/test1.31 | 5 ++--- testsuite/expect/test1.32 | 8 ++------ testsuite/expect/test1.33 | 8 ++------ testsuite/expect/test1.34 | 6 +++--- testsuite/expect/test1.35 | 9 +++------ testsuite/expect/test1.36 | 5 ++--- testsuite/expect/test1.37 | 13 ++++++------- testsuite/expect/test1.38 | 14 ++++---------- testsuite/expect/test1.39 | 12 ++++++++---- testsuite/expect/test1.4 | 1 - testsuite/expect/test1.5 | 1 - testsuite/expect/test1.6 | 1 - testsuite/expect/test1.7 | 20 ++++++++------------ testsuite/expect/test1.8 | 10 ++++------ testsuite/expect/test1.9 | 5 ++--- testsuite/expect/test15.10 | 6 ++---- 40 files changed, 114 insertions(+), 180 deletions(-) diff --git a/testsuite/expect/test1.1 b/testsuite/expect/test1.1 index c4dc9727633..b71a9ae45fe 100755 --- a/testsuite/expect/test1.1 +++ b/testsuite/expect/test1.1 @@ -81,7 +81,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.10 b/testsuite/expect/test1.10 index 2eb02babb4e..1e7cfb4c47b 100755 --- a/testsuite/expect/test1.10 +++ b/testsuite/expect/test1.10 @@ -54,7 +54,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } } diff --git a/testsuite/expect/test1.11 b/testsuite/expect/test1.11 index 5449706987d..5bc44158016 100755 --- a/testsuite/expect/test1.11 +++ b/testsuite/expect/test1.11 @@ -78,7 +78,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -189,7 +188,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.12 b/testsuite/expect/test1.12 index efe66e7bab1..200c1827209 100755 --- a/testsuite/expect/test1.12 +++ b/testsuite/expect/test1.12 @@ -68,7 +68,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.13 b/testsuite/expect/test1.13 index 9bf489e21a7..ee38994e831 100755 --- a/testsuite/expect/test1.13 +++ b/testsuite/expect/test1.13 @@ -53,7 +53,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -85,7 +84,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.14 b/testsuite/expect/test1.14 index 56f6a86a6ef..fa81dbdeaee 100755 --- a/testsuite/expect/test1.14 +++ b/testsuite/expect/test1.14 @@ -52,7 +52,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.15 b/testsuite/expect/test1.15 index c49e9292663..6257c34b691 100755 --- a/testsuite/expect/test1.15 +++ b/testsuite/expect/test1.15 @@ -69,7 +69,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.16 b/testsuite/expect/test1.16 index fb5f06b4a45..e706b9d89d9 100755 --- a/testsuite/expect/test1.16 +++ b/testsuite/expect/test1.16 @@ -49,7 +49,7 @@ exec $bin_touch $scratch_file # The --unbuffered option will send the message which lacks a '\n' # set timeout $max_job_delay -spawn $srun --unbuffered --verbose -t1 $bin_rm -i $scratch_file +set srun_pid [spawn $srun --unbuffered --verbose -t1 $bin_rm -i $scratch_file] expect { -re "launching ($number).0 on host" { set job_id $expect_out(1,string) @@ -62,9 +62,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.17 b/testsuite/expect/test1.17 index ab8d10783a0..b5689a0bdeb 100755 --- a/testsuite/expect/test1.17 +++ b/testsuite/expect/test1.17 @@ -44,7 +44,7 @@ print_header $test_id # set timeout $max_job_delay set match 0 -spawn $srun --allocate --verbose -t1 +set srun_pid [spawn $srun --allocate --verbose -t1] expect { -re "jobid ($number).*" { set job_id $expect_out(1,string) @@ -68,9 +68,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.18 b/testsuite/expect/test1.18 index ce22cf2a27e..bb769e4b4c9 100755 --- a/testsuite/expect/test1.18 +++ b/testsuite/expect/test1.18 @@ -53,7 +53,7 @@ exec $bin_chmod 700 $file_prog # Spawn initial program via srun # set timeout $max_job_delay -spawn $srun -N1-4 -v -t5 $file_prog +set srun_pid [spawn $srun -N1-4 -v -t5 -l $file_prog] set init_id $spawn_id expect { -i $init_id @@ -66,6 +66,7 @@ expect { } timeout { send_user "\nFAILURE: srun (launch) not responding\n" + slow_kill $srun_pid set exit_code 1 } eof { @@ -86,7 +87,7 @@ if {$matches == 0} { # set matches 0 set timeout 10 -spawn $srun -vv --attach=$job_id --join +set attach_pid [spawn $srun -vv -l --attach=$job_id --join] set attach_id $spawn_id expect { -i $attach_id @@ -96,9 +97,8 @@ expect { } timeout { send_user "\nFAILURE: srun (attach) not responding\n" - kill_srun + slow_kill $attach_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.19 b/testsuite/expect/test1.19 index cb74d07fc16..7e396042987 100755 --- a/testsuite/expect/test1.19 +++ b/testsuite/expect/test1.19 @@ -54,7 +54,7 @@ for {set task_id 0} {$task_id < $task_cnt} {incr task_id} { exec $bin_rm -f $file_out_t_glob } set timeout $max_job_delay -spawn $srun --output=$file_out_t -N1 -n$task_cnt -O -v -t1 $bin_id +set srun_pid [spawn $srun --output=$file_out_t -N1 -n$task_cnt -O -v -t1 $bin_id] expect { -re "jobid ($number).*" { set job_id $expect_out(1,string) @@ -62,7 +62,7 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid exit 1 } eof { @@ -94,7 +94,7 @@ if {$file_cnt != $task_cnt} { # set job_id 0 set srun_exit 0 -spawn $srun --error=$file_err_j --output=/dev/null -N1 -n$task_cnt -O -v -t1 $bin_sleep aaa +set srun_pid [spawn $srun --error=$file_err_j --output=/dev/null -N1 -n$task_cnt -O -v -t1 $bin_sleep aaa] expect { -re "jobid ($number).*" { set job_id $expect_out(1,string) @@ -107,7 +107,7 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid exit 1 } eof { @@ -136,7 +136,7 @@ if {[wait_for_file $file_err_j_glob] == 0} { # file name and confirm it is created # set job_id 0 -spawn $srun --output=$file_out_J -N1 -v -t1 $bin_hostname +set srun_pid [spawn $srun --output=$file_out_J -N1 -v -t1 $bin_hostname] expect { -re "jobid ($number).*" { set job_id $expect_out(1,string) @@ -144,7 +144,7 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid exit 1 } eof { @@ -173,7 +173,7 @@ set file_out_n_glob "test$test_id.n.$node_id.output" exec $bin_rm -f $file_out_n_glob set job_id 0 -spawn $srun --output=$file_out_n -N1 -n2 -O -v -t1 $bin_hostname +set srun_pid [spawn $srun --output=$file_out_n -N1 -n2 -O -v -t1 $bin_hostname] expect { -re "jobid ($number).*" { set job_id $expect_out(1,string) @@ -181,7 +181,7 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid exit 1 } eof { @@ -241,7 +241,7 @@ if { [test_bluegene] } { } set job_id 0 -spawn $srun --batch --output=/dev/null -N$node_cnt -n$task_cnt -O -t1 $file_in +set srun_pid [spawn $srun --batch --output=/dev/null -N$node_cnt -n$task_cnt -O -t1 $file_in] expect { -re "jobid ($number) submitted" { set job_id $expect_out(1,string) @@ -249,7 +249,7 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid exit 1 } eof { diff --git a/testsuite/expect/test1.2 b/testsuite/expect/test1.2 index d6b90a0a487..2bde95c6349 100755 --- a/testsuite/expect/test1.2 +++ b/testsuite/expect/test1.2 @@ -53,7 +53,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.20 b/testsuite/expect/test1.20 index 661875ce48b..f04116e20a4 100755 --- a/testsuite/expect/test1.20 +++ b/testsuite/expect/test1.20 @@ -41,7 +41,7 @@ print_header $test_id # Spawn a program via srun with stdout forwarding disabled # set timeout $max_job_delay -spawn $srun --output=none -t1 $bin_id +set srun_pid [spawn $srun --output=none -t1 $bin_id] expect { -re "uid=" { send_user "\nFAILURE: srun improperly forwarded stdout\n" @@ -55,9 +55,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -68,7 +67,7 @@ expect { # Spawn a program via srun with stderr forwarding disabled # set matches 0 -spawn $srun --error=none -t1 $bin_sleep aaa +set srun_pid [spawn $srun --error=none -t1 $bin_sleep aaa] expect { -re "invalid" { send_user "\nFAILURE: srun improperly forwarded stderr\n" @@ -82,9 +81,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.21 b/testsuite/expect/test1.21 index 794b7268dda..401d2f0719c 100755 --- a/testsuite/expect/test1.21 +++ b/testsuite/expect/test1.21 @@ -44,7 +44,7 @@ print_header $test_id # Spawn a shell via srun with stdout forwarding disabled # set timeout $max_job_delay -spawn $srun --input=$task_id -N1 -n10 -O -v --wait=2 -t1 $bin_bash +set srun_pid [spawn $srun --input=$task_id -N1 -n10 -O -v --wait=2 -t1 $bin_bash] expect { -re "launching ($number).0 on host" { set job_id $expect_out(1,string) @@ -68,9 +68,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.22 b/testsuite/expect/test1.22 index 9ab76e5903a..2cf22b19a2d 100755 --- a/testsuite/expect/test1.22 +++ b/testsuite/expect/test1.22 @@ -53,7 +53,7 @@ if { [test_bluegene] } { } } -spawn $srun -N$node_cnt -l --threads=1 -t1 $bin_hostname +set srun_pid [spawn $srun -N$node_cnt -l --threads=1 -t1 $bin_hostname] expect { -re "0: ($alpha_numeric)" { set host_0 $expect_out(1,string) @@ -61,9 +61,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -83,7 +82,7 @@ if {[string compare $host_0 ""] == 0} { # set host_0 "" set timeout $max_job_delay -spawn $srun -N$node_cnt -n32 -O -l --threads=32 -t1 $bin_hostname +set srun_pid [spawn $srun -N$node_cnt -n32 -O -l --threads=32 -t1 $bin_hostname] expect { -re "0: ($alpha_numeric)" { set host_0 $expect_out(1,string) @@ -91,9 +90,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.23 b/testsuite/expect/test1.23 index ea1f5588a5e..0eefd473748 100755 --- a/testsuite/expect/test1.23 +++ b/testsuite/expect/test1.23 @@ -44,7 +44,7 @@ print_header $test_id set err_msg 0 set host_0 "" set timeout $max_job_delay -spawn $srun -N1 -l --mincpus=999999 -t1 $bin_hostname +set srun_pid [spawn $srun -N1 -l --mincpus=999999 -t1 $bin_hostname] expect { -re "configuration is not available" { send_user "This error is expected, no worries\n" @@ -57,9 +57,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -80,7 +79,7 @@ if {$err_msg != 1} { # set host_0 "" set timeout $max_job_delay -spawn $srun -N1 -l --mincpus=1 -t1 $bin_hostname +set srun_pid [spawn $srun -N1 -l --mincpus=1 -t1 $bin_hostname] expect { -re "0: ($alpha_numeric)" { set host_0 $expect_out(1,string) @@ -88,9 +87,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -108,7 +106,7 @@ if {[string compare $host_0 ""] == 0} { set err_msg 0 set host_0 "" set timeout $max_job_delay -spawn $srun -N1 -l --mem=999999 -t1 $bin_hostname +set srun_pid [spawn $srun -N1 -l --mem=999999 -t1 $bin_hostname] expect { -re "configuration is not available" { send_user "This error is expected, no worries\n" @@ -121,9 +119,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -144,7 +141,7 @@ if {$err_msg != 1} { # set host_0 "" set timeout $max_job_delay -spawn $srun -N1 -l --mem=1 -t1 $bin_hostname +set srun_pid [spawn $srun -N1 -l --mem=1 -t1 $bin_hostname] expect { -re "0: ($alpha_numeric)" { set host_0 $expect_out(1,string) @@ -152,9 +149,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -172,7 +168,7 @@ if {[string compare $host_0 ""] == 0} { set err_msg 0 set host_0 "" set timeout $max_job_delay -spawn $srun -N1 -l --tmp=999999999 -t1 $bin_hostname +set srun_pid [spawn $srun -N1 -l --tmp=999999999 -t1 $bin_hostname] expect { -re "configuration is not available" { send_user "This error is expected, no worries\n" @@ -185,9 +181,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -208,7 +203,7 @@ if {$err_msg != 1} { # set host_0 "" set timeout $max_job_delay -spawn $srun -N1 -l --tmp=1 -t1 $bin_hostname +set srun_pid [spawn $srun -N1 -l --tmp=1 -t1 $bin_hostname] expect { -re "0: ($alpha_numeric)" { set host_0 $expect_out(1,string) @@ -216,9 +211,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.24 b/testsuite/expect/test1.24 index 03a3783546c..829cf84e80e 100755 --- a/testsuite/expect/test1.24 +++ b/testsuite/expect/test1.24 @@ -43,7 +43,7 @@ print_header $test_id set err_msg 0 set host_0 "" set timeout $max_job_delay -spawn $srun -N1 -l --constraint=invalid,constraint -t1 $bin_hostname +set srun_pid [spawn $srun -N1 -l --constraint=invalid,constraint -t1 $bin_hostname] expect { -re "configuration is not available" { send_user "This error is expected, no worries\n" @@ -56,9 +56,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.25 b/testsuite/expect/test1.25 index 7c6d06f5892..c590dbe5308 100755 --- a/testsuite/expect/test1.25 +++ b/testsuite/expect/test1.25 @@ -54,7 +54,7 @@ if { [test_bluegene] } { } } -spawn $srun -N$node_cnt --no-kill -t1 $bin_sleep $sleep_time +set srun_pid [spawn $srun -N$node_cnt --no-kill -t1 $bin_sleep $sleep_time] expect { -re "error" { send_user "\nFAILURE: some error occurred\n" @@ -63,9 +63,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.26 b/testsuite/expect/test1.26 index bf90382ddfc..9e3d5c53db2 100755 --- a/testsuite/expect/test1.26 +++ b/testsuite/expect/test1.26 @@ -54,7 +54,7 @@ if {[is_super_user] == 0} { set host_0 "" set nodelist_name "" set timeout $max_job_delay -spawn $srun -v -N1 -l $bin_printenv SLURMD_NODENAME +set srun_pid [spawn $srun -v -N1 -l $bin_printenv SLURMD_NODENAME] expect { -re "on host ($alpha_numeric)," { set nodelist_name $expect_out(1,string) @@ -70,7 +70,7 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 } eof { @@ -89,7 +89,7 @@ if {[string compare $nodelist_name ""] == 0} { send_user "\nFAILURE: Did not get nodelist_name of task 0\n" exit 1 } -set include_node $nodelist_name +set include_node $host_0 # # Submit a job directly to that node @@ -97,7 +97,7 @@ set include_node $nodelist_name set host_1 "" set slurm_user 1 set timeout 10 -spawn $srun -N1 -l --nodelist=$include_node --no-allocate -t1 $bin_printenv SLURMD_NODENAME +set srun_pid [spawn $srun -N1 -l --nodelist=$include_node --no-allocate -t1 $bin_printenv SLURMD_NODENAME] expect { -re "Invalid job credential" { send_user "\nWARNING: Not SlurmUser or root.\n" @@ -124,6 +124,7 @@ expect { timeout { send_user "\nWARNING: srun not responding, " send_user "expected if not SlurmUser or root.\n" + slow_kill $srun_pid set slurm_user 0 } eof { @@ -133,23 +134,11 @@ expect { if {$slurm_user == 0} { exit 0; } -if {[string compare $host_0 $host_1]} { - send_user "\nFAILURE: host name value wrong $host_0 $host_1\n" +if {[string compare $host_1 $include_node]} { + send_user "\nFAILURE: Allocation lacked an included node\n" set exit_code 1 } -# -# Verify node count and name -# -if {[string compare $host_0 ""] == 0} { - send_user "\nFAILURE: Did not get hostname of task 0\n" - set exit_code 1 -} -#if {[string compare $host_0 $include_node] != 0} { -# send_user "\nFAILURE: Allocation lacked an included node\n" -# set exit_code 1 -#} - # # Run three tasks at a time on some node and do so repeatedly # This checks for slurmd race conditions @@ -164,13 +153,13 @@ for {set inx 0} {$inx < $interations} {incr inx} { exec $bin_usleep 250000 set failures 0 - spawn $srun -N1 --nodelist=$include_node -t1 -l $bin_printenv SLURMD_NODENAME + set srun_pid [spawn $srun -N1 --nodelist=$include_node -t1 -l $bin_printenv SLURMD_NODENAME] set alloc $spawn_id - spawn $srun -N1 --nodelist=$include_node -Z $bin_usleep 500000 + set srun_pid1 [spawn $srun -N1 --nodelist=$include_node -Z $bin_usleep 500000] set noalloc1 $spawn_id - spawn $srun -N1 --nodelist=$include_node -Z $bin_usleep 250000 + set srun_pid2 [spawn $srun -N1 --nodelist=$include_node -Z $bin_usleep 250000] set noalloc2 $spawn_id set timeout 20 @@ -203,6 +192,7 @@ for {set inx 0} {$inx < $interations} {incr inx} { } timeout { send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid2 set failures 1 } eof { @@ -239,6 +229,7 @@ for {set inx 0} {$inx < $interations} {incr inx} { } timeout { send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid1 set failures 1 } eof { @@ -278,6 +269,7 @@ for {set inx 0} {$inx < $interations} {incr inx} { } timeout { send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid set failures 1 } eof { diff --git a/testsuite/expect/test1.27 b/testsuite/expect/test1.27 index 6d8fe3a0b5b..40df8849375 100755 --- a/testsuite/expect/test1.27 +++ b/testsuite/expect/test1.27 @@ -63,7 +63,7 @@ array set good_vars { # Spawn a job via srun to print environment variables # set timeout $max_job_delay -spawn $srun -N1 -n1 -t1 $bin_env +set srun_pid [spawn $srun -N1 -n1 -t1 $bin_env] expect { -re "(SLURM_$alpha_under)=($alpha_numeric)" { set found_vars($expect_out(1,string)) "$expect_out(2,string)" @@ -71,9 +71,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.28 b/testsuite/expect/test1.28 index 2aae4a492bc..3bd362a7110 100755 --- a/testsuite/expect/test1.28 +++ b/testsuite/expect/test1.28 @@ -47,7 +47,7 @@ print_header $test_id # set timeout $max_job_delay set env($test_env_name) $test_env_val -spawn $srun -N1 -t1 $bin_env +set srun_pid [spawn $srun -N1 -t1 $bin_env] expect { -re "$test_env_name=($number)" { if {$expect_out(1,string) == $test_env_val} { @@ -57,9 +57,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.29 b/testsuite/expect/test1.29 index 5ed500f6efe..44730f107f3 100755 --- a/testsuite/expect/test1.29 +++ b/testsuite/expect/test1.29 @@ -133,7 +133,7 @@ make_bash_script $file_in " set timeout $max_job_delay -spawn $srun --batch --output=$file_out --error=$file_err -t1 ./$file_in +set srun_pid [spawn $srun --batch --output=$file_out --error=$file_err -t1 ./$file_in] expect { -re "jobid ($number) submitted" { set job_id $expect_out(1,string) @@ -141,9 +141,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.3 b/testsuite/expect/test1.3 index 23838281a66..57ec23bb0cb 100755 --- a/testsuite/expect/test1.3 +++ b/testsuite/expect/test1.3 @@ -50,7 +50,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.30 b/testsuite/expect/test1.30 index bb149952d1d..30337c0c675 100755 --- a/testsuite/expect/test1.30 +++ b/testsuite/expect/test1.30 @@ -55,7 +55,7 @@ if { [test_bluegene] } { # set timeout $max_job_delay for {set node_cnt 1} {$node_cnt > 0} {set node_cnt [expr $node_cnt * 2]} { - spawn $srun --immediate -N$node_cnt $srun_opts -t2 $bin_hostname + set srun_pid [spawn $srun --immediate -N$node_cnt $srun_opts -t2 $bin_hostname] expect { -re "Immediate execution impossible" { send_user "This error is expected, no worries\n" @@ -85,9 +85,8 @@ for {set node_cnt 1} {$node_cnt > 0} {set node_cnt [expr $node_cnt * 2]} { } timeout { send_user "\nFAILURE: srun not responding\n" - set exit_code 1 - kill_srun - exp_continue + slow_kill $srun_pid + set exit_code 1 } eof { wait diff --git a/testsuite/expect/test1.31 b/testsuite/expect/test1.31 index fca0bfaaa39..d4693de681b 100755 --- a/testsuite/expect/test1.31 +++ b/testsuite/expect/test1.31 @@ -77,7 +77,7 @@ exec $bin_rm -f $env_valu_stdout # # Spawn a job via srun using these environment variables # -spawn $srun -t1 $bin_env +set srun_pid [spawn $srun -t1 $bin_env] expect { -re "jobid ($number)" { set job_id $expect_out(1,string) @@ -86,9 +86,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.32 b/testsuite/expect/test1.32 index 6fd60f4feed..197a05ccca9 100755 --- a/testsuite/expect/test1.32 +++ b/testsuite/expect/test1.32 @@ -79,13 +79,10 @@ expect { # Expect have difficulties handling unbuffered srun output # set timeout $max_job_delay -spawn $srun -N1 -t1 --unbuffered $file_prog -set srun_pid [exp_pid] +set srun_pid [spawn $srun -N1 -t1 --unbuffered $file_prog] exec $bin_sleep 5 exec $bin_kill -USR1 $srun_pid -#exec $bin_pkill -USR1 -n -u $uid srun exec $bin_kill -USR2 $srun_pid -#exec $bin_pkill -USR2 -n -u $uid srun expect { -re "WAITING" { incr matches @@ -105,9 +102,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { send_user "\nEOF\n" diff --git a/testsuite/expect/test1.33 b/testsuite/expect/test1.33 index 63f6d778c52..a42c1bfb4a8 100755 --- a/testsuite/expect/test1.33 +++ b/testsuite/expect/test1.33 @@ -47,7 +47,6 @@ make_bash_script $exit_script " make_bash_script $test_script " $srun -N1 -n2 -O $exit_script -# RC=\$? echo srun_exit_code_\$? " @@ -56,7 +55,7 @@ make_bash_script $test_script " # set sum 0 set timeout $max_job_delay -spawn $srun -N1 -n2 -O $exit_script +set srun_pid [spawn $srun -N1 -n2 -O $exit_script] expect { -re "exit code ($number)" { send_user "This error is expected, no worries\n" @@ -65,9 +64,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -104,7 +102,5 @@ if {$matches != 1} { if {$exit_code == 0} { exec $bin_rm -f $exit_script $test_script send_user "\nSUCCESS\n" -} else { - kill_srun } exit $exit_code diff --git a/testsuite/expect/test1.34 b/testsuite/expect/test1.34 index b97deefd891..13f9dee3632 100755 --- a/testsuite/expect/test1.34 +++ b/testsuite/expect/test1.34 @@ -58,7 +58,7 @@ make_bash_script $file_in "$bin_echo \$1,\$2" # Spawn a srun batch job with arguments # set timeout $max_job_delay -spawn $srun --batch --output=$file_out --error=$file_err -t1 $file_in $arg1 $arg2 +set srun_pid [spawn $srun --batch --output=$file_out --error=$file_err -t1 $file_in $arg1 $arg2] expect { -re "jobid ($number) submitted" { set job_id $expect_out(1,string) @@ -66,9 +66,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -85,6 +84,7 @@ if {$job_id == 0} { # if {[wait_for_job $job_id "DONE"] != 0} { send_user "\nFAILURE: waiting for job to complete\n" + cancel_job $job_id set exit_code 1 } diff --git a/testsuite/expect/test1.35 b/testsuite/expect/test1.35 index ba277667aba..6d76bfb9de4 100755 --- a/testsuite/expect/test1.35 +++ b/testsuite/expect/test1.35 @@ -6,9 +6,6 @@ # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. -# -# Note: This script generates and then deletes files in the working directory -# named test1.35.input, test1.35.output, and test1.35.error ############################################################################ # Copyright (C) 2002-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -73,7 +70,7 @@ if { [test_bluegene] } { } } -spawn $srun --batch -N$node_cnt --output=$file_out --error=$file_err -t1 $file_in +set srun_pid [spawn $srun --batch -N$node_cnt --output=$file_out --error=$file_err -t1 $file_in] expect { -re "jobid ($number) submitted" { set job_id $expect_out(1,string) @@ -81,9 +78,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -100,6 +96,7 @@ if {$job_id == 0} { # if {[wait_for_job $job_id "DONE"] != 0} { send_user "\nFAILURE: waiting for job to complete\n" + cancel_job $job_id set exit_code 1 } diff --git a/testsuite/expect/test1.36 b/testsuite/expect/test1.36 index e1cd00443ed..3e3632a1772 100755 --- a/testsuite/expect/test1.36 +++ b/testsuite/expect/test1.36 @@ -58,7 +58,7 @@ if { [test_bluegene] } { } } -spawn $srun -N$node_cnt -n$task_cnt -O -t1 $srun -l -n$mult -O $bin_id +set srun_pid [spawn $srun -N$node_cnt -n$task_cnt -O -t1 $srun -l -n$mult -O $bin_id] expect { -re "($number): uid=" { incr task_output @@ -66,9 +66,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.37 b/testsuite/expect/test1.37 index bf8e57c10af..5915a40baf3 100755 --- a/testsuite/expect/test1.37 +++ b/testsuite/expect/test1.37 @@ -48,7 +48,7 @@ print_header $test_id # Submit a job and get the node's NodeName from the nodelist # set timeout $max_job_delay -spawn $srun -v -N1 -l -t1 $bin_hostname +set srun_pid [spawn $srun -v -N1 -l -t1 $bin_hostname] expect { -re "on host ($alpha_numeric)," { set nodelist_name $expect_out(1,string) @@ -60,9 +60,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -89,7 +88,7 @@ make_bash_script $file_in "$srun $bin_sleep 5" # with sharing permitted. Insure the first job completes before the # second job is started. # -spawn $srun --batch -N1 --exclusive --nodelist=$nodelist_name -t1 --output=$file_out --error=$file_err $file_in +set srun_pid [spawn $srun --batch -N1 --exclusive --nodelist=$nodelist_name -t1 --output=$file_out --error=$file_err $file_in] expect { -re "jobid ($number) submitted" { set job_id1 $expect_out(1,string) @@ -97,7 +96,7 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid exit 1 } eof { @@ -113,7 +112,7 @@ if {$job_id1 == 0} { set partition "dummy" set waited 1 set timeout [expr $timeout + 5] -spawn $srun -N1 --nodelist=$nodelist_name -t1 --share $scontrol -o show job $job_id1 +set srun_pid [spawn $srun -N1 --nodelist=$nodelist_name -t1 --share $scontrol -o show job $job_id1] expect { -re "Partition=($alpha_numeric)" { set partition $expect_out(1,string) @@ -125,7 +124,7 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid exit 1 } eof { diff --git a/testsuite/expect/test1.38 b/testsuite/expect/test1.38 index 07a093b1ce3..e3084acf3f2 100755 --- a/testsuite/expect/test1.38 +++ b/testsuite/expect/test1.38 @@ -74,8 +74,7 @@ expect { set timeout $max_job_delay set matches 0 set job_id 0 -spawn $srun -v -N1 -t1 --unbuffered $file_in -set srun_pid [exp_pid] +set srun_pid [spawn $srun -v -N1 -t1 --unbuffered $file_in] expect { -re "launching ($number).0" { set job_id $expect_out(1,string) @@ -84,7 +83,6 @@ expect { -re "WAITING" { incr matches exec $bin_kill -INT $srun_pid -# exec $bin_pkill -INT -n -u $uid srun exp_continue } -re "srun: interrupt" { @@ -98,9 +96,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -118,8 +115,7 @@ if {$matches != 3} { # set matches 0 set job_id 0 -spawn $srun -v -N1 -t1 --unbuffered --quit-on-interrupt $file_in -set srun_pid [exp_pid] +set srun_pid [spawn $srun -v -N1 -t1 --unbuffered --quit-on-interrupt $file_in] expect { -re "launching ($number).0" { set job_id $expect_out(1,string) @@ -128,7 +124,6 @@ expect { -re "WAITING" { incr matches exec $bin_kill -INT $srun_pid -# exec $bin_pkill -INT -n -u $uid srun exp_continue } -re "srun: interrupt" { @@ -145,9 +140,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.39 b/testsuite/expect/test1.39 index 34885f0eda6..4d8cfd1351d 100755 --- a/testsuite/expect/test1.39 +++ b/testsuite/expect/test1.39 @@ -44,12 +44,17 @@ print_header $test_id # Test if "light" core file supported # set matches 0 -spawn $srun --core=list +set srun_pid [spawn $srun --core=list] expect { -re " light " { set matches 1 exp_continue } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid + exit 1 + } eof { wait } @@ -77,7 +82,7 @@ foreach filename [glob -nocomplain *core*] { # Spawn initial program via srun # set timeout $max_job_delay -spawn $srun -N1 -t1 --core=light $file_prog +set srun_pid [spawn $srun -N1 -t1 --core=light $file_prog] expect { -re "Segmentation fault" { send_user "\nNo worries, this error is expected.\n" @@ -85,9 +90,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.4 b/testsuite/expect/test1.4 index 177f1edb896..cd70baea80e 100755 --- a/testsuite/expect/test1.4 +++ b/testsuite/expect/test1.4 @@ -50,7 +50,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.5 b/testsuite/expect/test1.5 index 179da4a4245..fae47de2425 100755 --- a/testsuite/expect/test1.5 +++ b/testsuite/expect/test1.5 @@ -58,7 +58,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.6 b/testsuite/expect/test1.6 index 2863c290832..f4311e80ff2 100755 --- a/testsuite/expect/test1.6 +++ b/testsuite/expect/test1.6 @@ -59,7 +59,6 @@ expect { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.7 b/testsuite/expect/test1.7 index 3e1c12230ed..ed0515be2b9 100755 --- a/testsuite/expect/test1.7 +++ b/testsuite/expect/test1.7 @@ -6,9 +6,6 @@ # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. -# -# Note: This script generates and then deletes files in the working directory -# named test1.7.input, test1.7.output, and test1.7.error ############################################################################ # Copyright (C) 2002-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -110,7 +107,7 @@ make_bash_script $file_in " # set timeout [expr $max_job_delay + $sleep_time] set timed_out 0 -spawn $srun -t1 $bin_sleep $sleep_time +set srun_pid [spawn $srun -t1 $bin_sleep $sleep_time] expect { -re "job exceeded timelimit" { set timed_out 1 @@ -126,9 +123,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -142,7 +138,7 @@ if {$timed_out == 1} { } set completions 0 -spawn $srun -t4 $bin_sleep $sleep_time +set srun_pid [spawn $srun -t4 $bin_sleep $sleep_time] expect { -re "job exceeded timelimit" { set completions -1 @@ -162,9 +158,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -180,7 +175,7 @@ if {$completions != 1} { # Spawn a srun batch job with arguments # set timeout $max_job_delay -spawn $srun --batch --output=$file_out --error=$file_err -t4 $file_in +set srun_pid [spawn $srun --batch --output=$file_out --error=$file_err -t4 ./$file_in] expect { -re "jobid ($number) submitted" { set job_id $expect_out(1,string) @@ -188,7 +183,7 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 } eof { @@ -224,7 +219,8 @@ if {[wait_for_file $file_out] == 0} { } if {$output_fini == 0} { - send_user "\nFAILURE: Unexpected batch job output, possible premature job termination\n" + send_user "\nFAILURE: Unexpected batch job output, " + send_user "possible premature job termination\n" set exit_code 1 } diff --git a/testsuite/expect/test1.8 b/testsuite/expect/test1.8 index 29b9d88c676..ee9303b4906 100755 --- a/testsuite/expect/test1.8 +++ b/testsuite/expect/test1.8 @@ -63,7 +63,7 @@ exec $bin_chmod 700 $file_in # Spawn a shell via srun that uses stdin/out/err and confirm their contents # set timeout $max_job_delay -spawn $srun --input=$file_in --output=$file_out --error=$file_err -t1 $bin_bash +set srun_pid [spawn $srun --input=$file_in --output=$file_out --error=$file_err -t1 $bin_bash] expect { -re "Unable to contact" { send_user "\nFAILURE: slurm appears to be down\n" @@ -71,9 +71,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait @@ -150,7 +149,7 @@ if {$got_sleep_err == 0} { # # Spawn a program to run for a while with no input, output or error # -spawn $srun --input=none --output=none --error=none -t1 -N1 od -c $srun +set srun_pid [spawn $srun --input=none --output=none --error=none -t1 -N1 od -c $srun] expect { -re "Unable to contact" { send_user "\nFAILURE: slurm appears to be down\n" @@ -168,9 +167,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test1.9 b/testsuite/expect/test1.9 index e91341879e9..22be915a3e7 100755 --- a/testsuite/expect/test1.9 +++ b/testsuite/expect/test1.9 @@ -43,7 +43,7 @@ print_header $test_id # Submit a slurm job that will execute 'id' on 1 node and over task_cnt tasks # set timeout $max_job_delay -spawn $srun -N1 -n$task_cnt -O -v -t1 $bin_id +set srun_pid [spawn $srun -N1 -n$task_cnt -O -v -t1 $bin_id] expect { -re "jobid" { set verbosity 1 @@ -59,9 +59,8 @@ expect { } timeout { send_user "\nFAILURE: srun not responding\n" - kill_srun + slow_kill $srun_pid set exit_code 1 - exp_continue } eof { wait diff --git a/testsuite/expect/test15.10 b/testsuite/expect/test15.10 index 27c715d21ad..fc880c863ae 100755 --- a/testsuite/expect/test15.10 +++ b/testsuite/expect/test15.10 @@ -59,13 +59,11 @@ for {set inx 1} {$inx < 4} {set inx [expr $inx * 2]} { } timeout { send_user "\nFAILURE: salloc not responding\n" - if {$job_id == 0} { - slow_kill $salloc_pid - } else { + if {$job_id != 0} { cancel_job $job_id } + slow_kill $salloc_pid set exit_code 1 - exp_continue } eof { if {$tasks_get < $tasks_set} { -- GitLab