Skip to content
Snippets Groups Projects
Commit 4713523c authored by Morris Jette's avatar Morris Jette
Browse files

Make sgather tests more robust for POE

parent ac750a40
No related branches found
No related tags found
No related merge requests found
...@@ -129,10 +129,18 @@ if {[wait_for_file $file_out] == 0} { ...@@ -129,10 +129,18 @@ if {[wait_for_file $file_out] == 0} {
} }
if {$file_cnt != [expr $node_cnt * 2]} { if {$file_cnt != [expr $node_cnt * 2]} {
send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != [expr $node_cnt * 2])\n" send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != [expr $node_cnt * 2])\n"
exit 1 set exit_code 1
} }
if {$exit_code == 0} { if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out exec $bin_rm -f $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} elseif {[test_launch_poe]} {
send_user "\nWARNING: Sporatic failures with POE are expected. Please retest.\n"
# POE BUG: If the application exits immediately then pmdv12 hangs until
# slurm kills it with a timeout, thus the sleep below is required:
# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12
# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct>
} }
exit $exit_code
...@@ -138,18 +138,26 @@ if {[wait_for_file $file_out] == 0} { ...@@ -138,18 +138,26 @@ if {[wait_for_file $file_out] == 0} {
} }
if {$verbose != 1} { if {$verbose != 1} {
send_user "\nFAILURE: Failed to honor verbose option ($verbose != 1)\n" send_user "\nFAILURE: Failed to honor verbose option ($verbose != 1)\n"
exit 1 set exit_code 1
} }
if {$file_cnt != $node_cnt} { if {$file_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$rm_cnt != $node_cnt} { if {$rm_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$exit_code == 0} { if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out exec $bin_rm -f $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} elseif {[test_launch_poe]} {
send_user "\nWARNING: Sporatic failures with POE are expected. Please retest.\n"
# POE BUG: If the application exits immediately then pmdv12 hangs until
# slurm kills it with a timeout, thus the sleep below is required:
# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12
# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct>
} }
exit $exit_code
...@@ -133,14 +133,22 @@ if {[wait_for_file $file_out] == 0} { ...@@ -133,14 +133,22 @@ if {[wait_for_file $file_out] == 0} {
} }
if {$file_cnt != $node_cnt} { if {$file_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$rm_cnt != $node_cnt} { if {$rm_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$exit_code == 0} { if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out exec $bin_rm -f $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} elseif {[test_launch_poe]} {
send_user "\nWARNING: Sporatic failures with POE are expected. Please retest.\n"
# POE BUG: If the application exits immediately then pmdv12 hangs until
# slurm kills it with a timeout, thus the sleep below is required:
# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12
# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct>
} }
exit $exit_code
...@@ -132,14 +132,22 @@ if {[wait_for_file $file_out] == 0} { ...@@ -132,14 +132,22 @@ if {[wait_for_file $file_out] == 0} {
} }
if {$file_cnt != $node_cnt} { if {$file_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$rm_cnt != $node_cnt} { if {$rm_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$exit_code == 0} { if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out exec $bin_rm -f $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} elseif {[test_launch_poe]} {
send_user "\nWARNING: Sporatic failures with POE are expected. Please retest.\n"
# POE BUG: If the application exits immediately then pmdv12 hangs until
# slurm kills it with a timeout, thus the sleep below is required:
# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12
# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct>
} }
exit $exit_code
...@@ -132,10 +132,18 @@ if {[wait_for_file $file_out] == 0} { ...@@ -132,10 +132,18 @@ if {[wait_for_file $file_out] == 0} {
} }
if {$file_cnt != $node_cnt} { if {$file_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$exit_code == 0} { if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out exec $bin_rm -f $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} elseif {[test_launch_poe]} {
send_user "\nWARNING: Sporatic failures with POE are expected. Please retest.\n"
# POE BUG: If the application exits immediately then pmdv12 hangs until
# slurm kills it with a timeout, thus the sleep below is required:
# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12
# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct>
} }
exit $exit_code
...@@ -133,14 +133,22 @@ if {[wait_for_file $file_out] == 0} { ...@@ -133,14 +133,22 @@ if {[wait_for_file $file_out] == 0} {
} }
if {$file_cnt != $node_cnt} { if {$file_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$rm_cnt == 0} { if {$rm_cnt == 0} {
send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt == 0)\n" send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt == 0)\n"
exit 1 set exit_code 1
} }
if {$exit_code == 0} { if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out exec $bin_rm -f $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} elseif {[test_launch_poe]} {
send_user "\nWARNING: Sporatic failures with POE are expected. Please retest.\n"
# POE BUG: If the application exits immediately then pmdv12 hangs until
# slurm kills it with a timeout, thus the sleep below is required:
# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12
# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct>
} }
exit $exit_code
...@@ -120,10 +120,18 @@ if {[wait_for_file $file_out] == 0} { ...@@ -120,10 +120,18 @@ if {[wait_for_file $file_out] == 0} {
} }
if {$matches != 2} { if {$matches != 2} {
send_user "\nFAILURE: Failed to ignore missing files ($matches != 2)\n" send_user "\nFAILURE: Failed to ignore missing files ($matches != 2)\n"
exit 1 set exit_code 1
} }
if {$exit_code == 0} { if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out exec $bin_rm -f $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} elseif {[test_launch_poe]} {
send_user "\nWARNING: Sporatic failures with POE are expected. Please retest.\n"
# POE BUG: If the application exits immediately then pmdv12 hangs until
# slurm kills it with a timeout, thus the sleep below is required:
# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12
# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct>
} }
exit $exit_code
...@@ -133,14 +133,22 @@ if {[wait_for_file $file_out] == 0} { ...@@ -133,14 +133,22 @@ if {[wait_for_file $file_out] == 0} {
} }
if {$file_cnt != $node_cnt} { if {$file_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$rm_cnt != $node_cnt} { if {$rm_cnt != $node_cnt} {
send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)\n" send_user "\nFAILURE: Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)\n"
exit 1 set exit_code 1
} }
if {$exit_code == 0} { if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out exec $bin_rm -f $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} elseif {[test_launch_poe]} {
send_user "\nWARNING: Sporatic failures with POE are expected. Please retest.\n"
# POE BUG: If the application exits immediately then pmdv12 hangs until
# slurm kills it with a timeout, thus the sleep below is required:
# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12
# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct>
} }
exit $exit_code
...@@ -127,10 +127,18 @@ if {[wait_for_file $file_out] == 0} { ...@@ -127,10 +127,18 @@ if {[wait_for_file $file_out] == 0} {
} }
if {$matches != [expr $node_cnt * 2]} { if {$matches != [expr $node_cnt * 2]} {
send_user "\nFAILURE: Failed to preserve file time ($matches != [expr $node_cnt * 2])\n" send_user "\nFAILURE: Failed to preserve file time ($matches != [expr $node_cnt * 2])\n"
exit 1 set exit_code 1
} }
if {$exit_code == 0} { if {$exit_code == 0} {
exec $bin_rm -f $file_in $file_out exec $bin_rm -f $file_in $file_out
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} elseif {[test_launch_poe]} {
send_user "\nWARNING: Sporatic failures with POE are expected. Please retest.\n"
# POE BUG: If the application exits immediately then pmdv12 hangs until
# slurm kills it with a timeout, thus the sleep below is required:
# F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
# 4 R 1001 17141 17137 99 80 0 - 33410 ? ? 00:00:02 pmdv12
# 0 Z 1001 17152 17141 0 80 0 - 0 exit ? 00:00:00 test1.33.exit.b <defunct>
} }
exit $exit_code
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment