diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 21b2c94a10eba043f6d9ee2fae6d4bb9c95edebd..8a63ce365313294df54457be12623650d4d8305d 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -638,13 +638,13 @@ proc wait_for_job { job_id desired_state } { "RUNNING" {} "SUSPENDED" {} default { - send_user "FAILURE: wait_for_job with invalid state: $desired_state\n" + send_user "WARNING: wait_for_job with invalid state: $desired_state\n" return 1 } } if {$job_id == 0} { - send_user "FAILURE: wait_for_job with invalid job ID: $job_id\n" + send_user "WARNING: wait_for_job with invalid job ID: $job_id\n" return 1 } @@ -713,7 +713,7 @@ proc wait_for_job { job_id desired_state } { } if { $my_delay > $max_job_state_delay } { - send_user "FAILURE: Timeout waiting for job state $desired_state\n" + send_user "WARNING: Timeout waiting for job state $desired_state\n" return 1 } diff --git a/testsuite/expect/test1.114 b/testsuite/expect/test1.114 index f49f0a2c33af11d500b08389711d917e02bc7232..19b0ecdd4fa2caeea7cf57730f399d459c5d8d6f 100755 --- a/testsuite/expect/test1.114 +++ b/testsuite/expect/test1.114 @@ -60,7 +60,12 @@ proc run_spread_job { task_cnt } { exit 1 } - wait_for_job $job_id "DONE" + if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 + } + set timeout 10 set num_nodes 0 spawn $scontrol show job $job_id diff --git a/testsuite/expect/test1.75 b/testsuite/expect/test1.75 index e14117aa3cf0b9e093ffc228a0b9b8c7cf26bce8..79236603286bb8bde0812d265817a9d435df6cac 100755 --- a/testsuite/expect/test1.75 +++ b/testsuite/expect/test1.75 @@ -118,7 +118,11 @@ proc sub_job { freq } { exit 1 } - wait_for_job $job_id DONE + if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 + } spawn $sacct -j$job_id -oavecpufreq --noheader expect { diff --git a/testsuite/expect/test1.97 b/testsuite/expect/test1.97 index f81d77c226eac7d68d95b97490008bca79180417..d88838d56ef4dbc3cd511b6f19743bb6e2df03ae 100755 --- a/testsuite/expect/test1.97 +++ b/testsuite/expect/test1.97 @@ -319,7 +319,11 @@ expect { } } -wait_for_job $tmp_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} spawn $scontrol show job $tmp_id expect { diff --git a/testsuite/expect/test1.99 b/testsuite/expect/test1.99 index 74814200caaad24f85073fb8641f2ffab2d9fcac..c973329e9957b7c6ba1d68d1a122d793f44f7533 100755 --- a/testsuite/expect/test1.99 +++ b/testsuite/expect/test1.99 @@ -238,7 +238,10 @@ if {$tmp_id == 0} { exit 1 } -wait_for_job $tmp_id RUNNING +if {[wait_for_job $tmp_id "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $tmp_id to start\n" + set exit_code 1 +} set match 0 spawn $srun -t1 sleep 10 expect { diff --git a/testsuite/expect/test12.7 b/testsuite/expect/test12.7 index 256b0c7986656dfce83f4496136388e1cedb5e03..f6ad128caf938a76d0e2ced02a610454985d9fbe 100755 --- a/testsuite/expect/test12.7 +++ b/testsuite/expect/test12.7 @@ -177,7 +177,10 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id RUNNING +if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to start\n" + set exit_code 1 +} set found 0 spawn $scontrol show job $job_id @@ -225,7 +228,10 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id RUNNING +if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to start\n" + set exit_code 1 +} # Wait for batch script to start (after message delays, prologs, etc.) sleep 5 @@ -241,7 +247,10 @@ mod_state "resume" "test$test_id" # Check the job state send_user "\n\nTest 2\n" -wait_for_job $job_id PENDING +if {[wait_for_job $job_id "PENDING"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to pend\n" + set exit_code 1 +} set fail_count [check_sacct_states "NODE_FAIL" 1] if {$fail_count != 1} { endit 1 "FAILURE: Bad NODE_FAIL count ($fail_count != 1)" @@ -252,7 +261,10 @@ if {$pend_count != 1} { } send_user "So far, so good\n\n" -wait_for_job $job_id RUNNING +if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to start\n" + set exit_code 1 +} # Wait for batch script to start (after message delays, prologs, etc.) sleep 5 @@ -300,7 +312,10 @@ if {$pend_count != 1} { } send_user "So far, so good\n\n" -wait_for_job $job_id RUNNING +if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to start\n" + set exit_code 1 +} # Wait for batch script to start (after message delays, prologs, etc.) sleep 5 @@ -327,7 +342,11 @@ if {$run_count != 1} { } send_user "So far, so good\n\n" -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} # Check steps after job has completed check_step 2 diff --git a/testsuite/expect/test12.8 b/testsuite/expect/test12.8 index 41e690af50aaff98a8abf3f685cc3d3e6dfa5eeb..2fbea21e983aecd9ae19d7752fecebae072a1302 100755 --- a/testsuite/expect/test12.8 +++ b/testsuite/expect/test12.8 @@ -73,7 +73,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set failed_cnt 0 set timeout_cnt 0 diff --git a/testsuite/expect/test17.12 b/testsuite/expect/test17.12 index 089b7b5f84f94679b50b55dcfe27781ffe939448..dd5e547497f0fc7dc0086b7e7d22667036c57feb 100755 --- a/testsuite/expect/test17.12 +++ b/testsuite/expect/test17.12 @@ -54,7 +54,11 @@ proc check_job_nodes {test_job node_name find_node} { set nodelist "" - wait_for_job $test_job "RUNNING" + if {[wait_for_job $test_job "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $test_job to start\n" + cancel_job $test_job + set exit_code 1 + } # Check that job that the job used the correct nodes log_user 0 @@ -103,7 +107,11 @@ proc check_job {nnode test_job} { set nodelist "" - wait_for_job $test_job "RUNNING" + if {[wait_for_job $test_job "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $test_job to start\n" + cancel_job $test_job + set exit_code 1 + } # Check that job that the job used the correct nodes spawn $scontrol show job $test_job diff --git a/testsuite/expect/test17.36 b/testsuite/expect/test17.36 index deeac2a9bb90d8a99653158d95756cf93511b4ea..7349531040160a8767ac0b1d506d54cc1c517763 100755 --- a/testsuite/expect/test17.36 +++ b/testsuite/expect/test17.36 @@ -232,7 +232,11 @@ proc check_job { exp_num_jobs } { global squeue job_id num_jobs file_in exit_code # Wait a bit for the job to start - wait_for_job ${job_id}_0 RUNNING + if {[wait_for_job ${job_id}_0 "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job ${job_id}_0 to start\n" + cancel_job ${job_id}_0 + set exit_code 1 + } set job_cnt 0 # If gang scheduling is configured, some jobs will be suspended @@ -319,7 +323,11 @@ if {$job_id == 0} { } # Wait a bit for job to start -wait_for_job $job_id RUNNING +if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to start\n" + cancel_job $job_id + set exit_code 1 +} # Identify node to use for testing set got_node 0 diff --git a/testsuite/expect/test17.37 b/testsuite/expect/test17.37 index be528dc57a31d0eb64f4eb0e6fd4e2af5e10c540..cdb5c08e415fc8dbb67899f253cd26ab411b5378 100755 --- a/testsuite/expect/test17.37 +++ b/testsuite/expect/test17.37 @@ -60,7 +60,11 @@ if { $job_id1 == 0 } { exit 1 } -wait_for_job $job_id1 RUNNING +if {[wait_for_job $job_id1 "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id1 to start\n" + cancel_job $job_id1 + set exit_code 1 +} # Submit a job that depends on job above spawn $sbatch -t1 -dafternotok:$job_id1 -o/dev/null $script @@ -82,7 +86,11 @@ if { $job_id2 == 0 } { exit 1 } -wait_for_job $job_id1 DONE +if {[wait_for_job $job_id1 "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id1 to complete\n" + cancel_job $job_id1 + set exit_code 1 +} # Check exit code of the first job set match 0 diff --git a/testsuite/expect/test17.38 b/testsuite/expect/test17.38 index e0a95ab42a601761ae46a89facf23b0e6fc36ba6..fec662e2f4465883a39e9443bdc5c915407da428 100755 --- a/testsuite/expect/test17.38 +++ b/testsuite/expect/test17.38 @@ -95,7 +95,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + set exit_code 1 + cancel_job $job_id +} set sig 0 spawn $bin_cat $file_out @@ -175,7 +179,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + set exit_code 1 + cancel_job $job_id +} set sig 0 spawn $bin_cat $file_out diff --git a/testsuite/expect/test17.39 b/testsuite/expect/test17.39 index d092970cb576fda1284cfd77be2eed27239ce639..b55e51bf73e625e24a27f4d5d0920455669cecf5 100755 --- a/testsuite/expect/test17.39 +++ b/testsuite/expect/test17.39 @@ -141,7 +141,11 @@ if {$match == 0} { } # Wait for the fast job to finish after submitting dependent job -wait_for_job $fast_id DONE +if {[wait_for_job $fast_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $fast_id to complete\n" + set exit_code 1 + cancel_job $fast_id +} # Wait for dependency job to start once the fast job is complete if {[wait_for_job $dep_id RUNNING]} { diff --git a/testsuite/expect/test17.62 b/testsuite/expect/test17.62 index 99058b430b745fb0f5a1d70497f3261d1db986ce..6711c1e359b84d528e5e5871eb4ff1f95de4d9c2 100755 --- a/testsuite/expect/test17.62 +++ b/testsuite/expect/test17.62 @@ -89,7 +89,12 @@ if {$job_id == 0} { } # Wait for the job to finish -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} + foreach option [array names check_list] { #send_user "\n$option=$check_list($option)\n" diff --git a/testsuite/expect/test2.21 b/testsuite/expect/test2.21 index a08da70afc54562a138b365f82f4ca55f0d85f84..e6a693b325e89938bedb6eb45b2540da8cc28c43 100755 --- a/testsuite/expect/test2.21 +++ b/testsuite/expect/test2.21 @@ -92,7 +92,11 @@ if { $job_id == 0 } { } # Wait for the job to be in the complete state -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} # Requeue the job when it is complete requeue_job $job_id @@ -127,7 +131,11 @@ if { $job_id == 0 } { } # Wait for the job to be in the complete state -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} # Requeue the job when it is complete requeue_job $job_id diff --git a/testsuite/expect/test2.22 b/testsuite/expect/test2.22 index f10d31448a8b455f1cce9d8767cd3b57fa9b3e06..82b28b72312221bbe72d8e4fad6deef796fceeb6 100755 --- a/testsuite/expect/test2.22 +++ b/testsuite/expect/test2.22 @@ -121,7 +121,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} spawn $scontrol requeuehold $job_id expect { diff --git a/testsuite/expect/test2.23 b/testsuite/expect/test2.23 index 0ed59a71a212f4357fb77ac993641a919f7085cd..1c4e5d6b706eb953686c367b2fbfa4f2a10e2327 100755 --- a/testsuite/expect/test2.23 +++ b/testsuite/expect/test2.23 @@ -95,7 +95,11 @@ if { $job_id == 0 } { exit 1 } -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} spawn $scontrol requeuehold State=SpecialExit $job_id expect { diff --git a/testsuite/expect/test31.2 b/testsuite/expect/test31.2 index c6c0a9023bed150dbc2e487282e0eb6e85e713f5..6920e33120c57b2d5af163eb21a6d635a96b123c 100755 --- a/testsuite/expect/test31.2 +++ b/testsuite/expect/test31.2 @@ -149,7 +149,10 @@ if {$exit_code} { exit $exit_code } -wait_for_job $job_id "RUNNING" +if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to start\n" + set exit_code 1 +} # Check that the job was submitted with no error set match 0 diff --git a/testsuite/expect/test32.10 b/testsuite/expect/test32.10 index dccaf98ae88e59fd8ac39ad4eca0239a7cbf562e..64827edd1f1e0c79c467225ce2d3e81f194ab716 100755 --- a/testsuite/expect/test32.10 +++ b/testsuite/expect/test32.10 @@ -100,7 +100,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id "DONE" +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set number_1 -1 set number_2 -1 diff --git a/testsuite/expect/test32.11 b/testsuite/expect/test32.11 index 053e1927c598e45fca69bcf0aa383a5c7c214990..c212cc3afe236cdbc2b10f348c1a251748fcf483 100755 --- a/testsuite/expect/test32.11 +++ b/testsuite/expect/test32.11 @@ -98,7 +98,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id "DONE" +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set verbose 0 set number_1 -1 diff --git a/testsuite/expect/test32.12 b/testsuite/expect/test32.12 index f44c039f6746125500a0ebdd0c9fe61a03ffe122..580b746f19281bdfbcc36f1d510fdfba5c2e1d86 100755 --- a/testsuite/expect/test32.12 +++ b/testsuite/expect/test32.12 @@ -98,7 +98,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id "DONE" +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set number_1 -1 set number_2 -1 diff --git a/testsuite/expect/test32.4 b/testsuite/expect/test32.4 index 3ba47bb6f6d5b3ab20db5f9d77a00a8f4922b180..74835521bb0b1299a2252f87ee9d52db325fc6f7 100755 --- a/testsuite/expect/test32.4 +++ b/testsuite/expect/test32.4 @@ -98,7 +98,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id "DONE" +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set number_1 -1 set number_2 -1 diff --git a/testsuite/expect/test32.5 b/testsuite/expect/test32.5 index 24f5c46fc85dd4bf4b3012dbe349068e4dd31b78..97f5c896bc75f31891f8a23c2d9ea9e918204cff 100755 --- a/testsuite/expect/test32.5 +++ b/testsuite/expect/test32.5 @@ -98,7 +98,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id "DONE" +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set number_1 -1 set number_2 -1 diff --git a/testsuite/expect/test32.6 b/testsuite/expect/test32.6 index 6c8dd07639f388961d7d55afa97e0161a089e43b..04ff944606b0c446101ad1bfdef80ac77bf7f098 100755 --- a/testsuite/expect/test32.6 +++ b/testsuite/expect/test32.6 @@ -98,7 +98,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id "DONE" +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set number_1 -1 set number_2 -1 diff --git a/testsuite/expect/test32.7 b/testsuite/expect/test32.7 index 517e0756d503220f1ce78a77774960c15bb15688..43ed7ee2c97291982c4db1649fa314683219ca18 100755 --- a/testsuite/expect/test32.7 +++ b/testsuite/expect/test32.7 @@ -98,7 +98,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id "DONE" +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set matches 0 if {[wait_for_file $file_out] == 0} { diff --git a/testsuite/expect/test32.8 b/testsuite/expect/test32.8 index b0e15d363f1d4f13501d68742883375c73d253b1..655c0a8d7aa814b27003eca27839338ac01224a8 100755 --- a/testsuite/expect/test32.8 +++ b/testsuite/expect/test32.8 @@ -98,7 +98,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id "DONE" +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set number_1 -1 set number_2 -1 diff --git a/testsuite/expect/test32.9 b/testsuite/expect/test32.9 index 748541cc3fa1ab2f552273b8657f29ac0785936d..672e6f6556d4dfeda406447d05d2e08c5bd0c784 100755 --- a/testsuite/expect/test32.9 +++ b/testsuite/expect/test32.9 @@ -100,7 +100,11 @@ if {$job_id == 0} { exit 1 } -wait_for_job $job_id "DONE" +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + cancel_job $job_id + set exit_code 1 +} set matches 0 set node_cnt 99 diff --git a/testsuite/expect/test34.1 b/testsuite/expect/test34.1 index 34ff0c439b7a401d668ec9c4eed909f472b8f98d..0f5e5ee0134883599f063e12ed796fcb3707f69e 100755 --- a/testsuite/expect/test34.1 +++ b/testsuite/expect/test34.1 @@ -247,7 +247,7 @@ if {$job_id == 0} { exit 1 } -if { [wait_for_job $job_id DONE] != 0} { +if { [wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id exit 1 } @@ -283,13 +283,13 @@ create_part $part_name_1 $nodes 1 cancel create_part $part_name_2 $nodes 2 off set part_id_1 [sub_job $part_name_1 $job1] -if { [wait_for_job $part_id_1 RUNNING] != 0} { +if { [wait_for_job $part_id_1 "RUNNING"] != 0} { send_user "\nFAILURE: job $part_id_1 failed to start\n" clean_up 1 } set part_id_2 [sub_job $part_name_2 $job2] -if { [wait_for_job $part_id_2 RUNNING] != 0} { +if { [wait_for_job $part_id_2 "RUNNING"] != 0} { send_user "\nFAILURE: job $part_id_1 failed to start\n" clean_up 1 } @@ -305,14 +305,14 @@ if { [test_gang] == 1 } { mod_part $part_name_1 suspend set part_id_1 [sub_job $part_name_1 $job1] - if { [wait_for_job $part_id_1 RUNNING] != 0} { + if { [wait_for_job $part_id_1 "RUNNING"] != 0} { send_user "\nFAILURE: job $part_id_1 failed to start\n" clean_up 1 } set part_id_2 [sub_job $part_name_2 $job2] # Wait for the job to start - if { [wait_for_job $part_id_2 RUNNING] != 0} { + if { [wait_for_job $part_id_2 "RUNNING"] != 0} { send_user "\nFAILURE: job $part_id_2 failed to start\n" clean_up 1 } @@ -320,11 +320,11 @@ if { [test_gang] == 1 } { sleep 5 check_job_state $part_id_1 SUSPENDED - if { [wait_for_job $part_id_2 DONE] != 0} { + if { [wait_for_job $part_id_2 "DONE"] != 0} { send_user "\nFAILURE: job $part_id_2 failed to complete\n" clean_up 1 } - if { [wait_for_job $part_id_1 RUNNING] != 0} { + if { [wait_for_job $part_id_1 "RUNNING"] != 0} { send_user "\nFAILURE: job $part_id_1 failed to start\n" clean_up 1 } @@ -338,13 +338,13 @@ send_user "\n*** TEST PREEMPT REQUEUE ***\n" mod_part $part_name_1 requeue set part_id_1 [sub_job $part_name_1 $job1] -if { [wait_for_job $part_id_1 RUNNING] != 0} { +if { [wait_for_job $part_id_1 "RUNNING"] != 0} { send_user "\nFAILURE: job $part_id_1 failed to start\n" clean_up 1 } set part_id_2 [sub_job $part_name_2 $job2] -if { [wait_for_job $part_id_2 RUNNING] != 0} { +if { [wait_for_job $part_id_2 "RUNNING"] != 0} { send_user "\nFAILURE: job $part_id_2 failed to start\n" clean_up 1 } @@ -353,8 +353,14 @@ sleep 5 check_job_state $part_id_1 PENDING # Wait for job 1 to be requeued -wait_for_job $part_id_2 DONE -wait_for_job $part_id_1 RUNNING +if { [wait_for_job $part_id_2 "DONE"] != 0} { + send_user "\nFAILURE: job $part_id_2 failed to complete\n" + clean_up 1 +} +if { [wait_for_job $part_id_1 "RUNNING"] != 0} { + send_user "\nFAILURE: job $part_id_1 failed to start\n" + clean_up 1 +} sleep 5 check_job_state $part_id_1 RUNNING diff --git a/testsuite/expect/test34.2 b/testsuite/expect/test34.2 index 20992cac4191680508228f070fbfd892b8ca9910..96bcc7e24178c48e435304646547929f982e9796 100755 --- a/testsuite/expect/test34.2 +++ b/testsuite/expect/test34.2 @@ -238,13 +238,13 @@ acct_setup $acct_1 $qos_1 "" cancel acct_setup $acct_2 $qos_2 $qos_1 cluster set qos_1_id [sub_job $acct_1] -if { [wait_for_job $qos_1_id RUNNING] != 0} { +if { [wait_for_job $qos_1_id "RUNNING"] != 0} { send_user "\nFAILURE: job $qos_id_1 failed to start\n" clean_up 1 } set qos_2_id [sub_job $acct_2] -if { [wait_for_job $qos_2_id RUNNING] != 0} { +if { [wait_for_job $qos_2_id "RUNNING"] != 0} { send_user "\nFAILURE: job $qos_id_2 failed to start\n" clean_up 1 } @@ -261,13 +261,13 @@ send_user "\n*** TEST PREEMPT REQUEUE ***\n" mod_qos $qos_1 requeue set qos_1_id [sub_job $acct_1] -if { [wait_for_job $qos_1_id RUNNING] != 0} { +if { [wait_for_job $qos_1_id "RUNNING"] != 0} { send_user "\nFAILURE: job $qos_id_1 failed to start\n" clean_up 1 } set qos_2_id [sub_job $acct_2] -if { [wait_for_job $qos_2_id RUNNING] != 0} { +if { [wait_for_job $qos_2_id "RUNNING"] != 0} { send_user "\nFAILURE: job $qos_id_2 failed to start\n" clean_up 1 } @@ -276,11 +276,17 @@ sleep 2 check_job_state $qos_1_id PENDING # Wait for requeued job to restart -wait_for_job $qos_2_id DONE -wait_for_job $qos_1_id RUNNING +if { [wait_for_job $qos_2_id "DONE"] != 0} { + send_user "\nFAILURE: job $qos_2_id failed to complete\n" + clean_up 1 +} +if { [wait_for_job $qos_1_id "RUNNING"] != 0} { + send_user "\nFAILURE: job $qos_1_id failed to start\n" + clean_up 1 +} sleep 2 -check_job_state $qos_1_id RUNNING +check_job_state $qos_1_id "RUNNING" cancel_job $qos_1_id cancel_job $qos_2_id diff --git a/testsuite/expect/test4.5 b/testsuite/expect/test4.5 index 4024bc46099350eea55da3bb7cc21b3e897db7a3..45a36885fc0eef6b509444349bf3c9abed1a385b 100755 --- a/testsuite/expect/test4.5 +++ b/testsuite/expect/test4.5 @@ -168,7 +168,11 @@ if {$tmp_id == 0} { set exit_code 1 } -wait_for_job $tmp_id RUNNING +if {[wait_for_job $tmp_id "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $tmp_id to start\n" + cancel_job $tmp_id + set exit_code 1 +} spawn $sinfo --Node --node=$node_list(0),$node_list(1) --long --exact --state=allocated -p$partition expect { diff --git a/testsuite/expect/test5.6 b/testsuite/expect/test5.6 index b1d162fb9c34fbd1c4712e5396cb3f3a8d384e9b..92fc8d865b206fa44810af20b73317ea5a65face 100755 --- a/testsuite/expect/test5.6 +++ b/testsuite/expect/test5.6 @@ -311,7 +311,11 @@ expect { # Wait long enough to make sure the job step is running and has # started its job steps. Check the squeue job step filter option # -wait_for_job $job_id1 "RUNNING" +if {[wait_for_job $job_id1 "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id1 to start\n" + cancel_job $job_id1 + set exit_code 1 +} exec $bin_sleep 10 set job_found 0 spawn $squeue --format=%i --steps $job_id1.$step_id diff --git a/testsuite/expect/test5.9 b/testsuite/expect/test5.9 index bff66c0182731cc88ad820b27fa4a4f1324188d6..b8ebe077213a63d0ea0817a9017ef3859078a349 100755 --- a/testsuite/expect/test5.9 +++ b/testsuite/expect/test5.9 @@ -134,7 +134,11 @@ if {$tmp_job == 0} { exit 0 } -wait_for_job $tmp_job RUNNING +if {[wait_for_job $tmp_job "RUNNING"] != 0} { + send_user "\nFAILURE: error waiting for job $tmp_job to start\n" + cancel_job $tmp_job + set exit_code 1 +} set got_node 0 spawn $scontrol show job $tmp_job @@ -269,8 +273,9 @@ expect { } } -if {[wait_for_job $job_id RUNNING] != 0} { +if {[wait_for_job $job_id "RUNNING"] != 0} { send_user "\nFAILURE: error waiting for job $job_id to start\n" + cancel_job $job_id set exit_code 1 } # Wait for steps to start too @@ -444,7 +449,10 @@ if {$match != $cnt} { } cancel_job $job_id -wait_for_job $job_id DONE +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: error waiting for job $job_id to complete\n" + set exit_code 1 +} remove_acct "" $test_acct