Skip to content
Snippets Groups Projects
Commit d0dece74 authored by Moe Jette's avatar Moe Jette
Browse files

modify some tests to work properly if task/CPU layout is un-even (e.g. 3 CPUs on nodes

zero and 1 CPU on node one instead of 2 and 2)
parent ac8f178b
No related branches found
No related tags found
No related merge requests found
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
# "FAILURE: ..." otherwise with an explanation of the failure, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated. # anything else indicates a failure mode that must be investigated.
############################################################################ ############################################################################
# Copyright (C) 2009 Lawrence Livermore National Security. # Copyright (C) 2009-2010 Lawrence Livermore National Security.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Dave Bremer <dbremer@llnl.gov> # Written by Dave Bremer <dbremer@llnl.gov>
# CODE-OCEC-09-009. All rights reserved. # CODE-OCEC-09-009. All rights reserved.
...@@ -137,16 +137,20 @@ if {$file_cnt != $node_count} { ...@@ -137,16 +137,20 @@ if {$file_cnt != $node_count} {
if {$exit_code != 0} { if {$exit_code != 0} {
exit $exit_code exit $exit_code
} }
#
# Spawn a program that generates "node_id" (%n) in stdout file names
# and confirm they are created
#
for {set node_id 0} {$node_id < $node_count} {incr node_id} { for {set node_id 0} {$node_id < $node_count} {incr node_id} {
set file_out_n_glob "test$test_id.n.$node_id.output" set file_out_n_glob "test$test_id.n.$node_id.output"
exec $bin_rm -f $file_out_n_glob exec $bin_rm -f $file_out_n_glob
} }
if {[test_front_end] != 0} {
send_user "\nWARNING: Additional tests are incompatible with front-end systems\n"
exit $exit_code
}
#
# Spawn a program that generates "node_id" (%n) in stdout file names
# and confirm they are created
#
set task_count [expr $node_count * 2] set task_count [expr $node_count * 2]
set timeout $max_job_delay set timeout $max_job_delay
set srun_pid [spawn $srun -l --output=$file_out_n -N $node_count -n $task_count -O -v -t1 $bin_echo hello] set srun_pid [spawn $srun -l --output=$file_out_n -N $node_count -n $task_count -O -v -t1 $bin_echo hello]
...@@ -168,11 +172,10 @@ if {$job_id == 0} { ...@@ -168,11 +172,10 @@ if {$job_id == 0} {
send_user "\nFAILURE: job initiation failed\n" send_user "\nFAILURE: job initiation failed\n"
exit 1 exit 1
} }
if {[test_front_end] != 0} {
send_user "\nWARNING: Additional tests are incompatible with front-end systems\n"
exit $exit_code
}
# We only test that some task ran on every node, we can't check the task ID
# since task distribution is dependent upon resource allocation which may
# not be homogeneous across the nodes.
for {set node_id 0} {$node_id < $node_count} {incr node_id} { for {set node_id 0} {$node_id < $node_count} {incr node_id} {
set file_out_n_glob "test$test_id.n.$node_id.output" set file_out_n_glob "test$test_id.n.$node_id.output"
if {[wait_for_file $file_out_n_glob] != 0} { if {[wait_for_file $file_out_n_glob] != 0} {
...@@ -182,26 +185,19 @@ for {set node_id 0} {$node_id < $node_count} {incr node_id} { ...@@ -182,26 +185,19 @@ for {set node_id 0} {$node_id < $node_count} {incr node_id} {
set exit_code 1 set exit_code 1
break break
} else { } else {
set t0found false set task_found false
set t1found false
spawn $bin_cat $file_out_n_glob spawn $bin_cat $file_out_n_glob
expect { expect {
-re "($number): *hello" { -re "($number): *hello" {
set test_task_id $expect_out(1,string) set task_found true
if {$test_task_id == $node_id * 2} {
set t0found true
}
if {$test_task_id == $node_id * 2 + 1} {
set t1found true
}
exp_continue exp_continue
} }
eof { eof {
wait wait
} }
} }
if {!$t0found || !$t1found} { if {!$task_found} {
send_user "\nFAILURE: file $file_out_n_glob was not labelled correctly\n" send_user "\nFAILURE: file $file_out_n_glob was not labelled correctly\n"
set exit_code 1 set exit_code 1
} else { } else {
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# anything else indicates a failure mode that must be investigated. # anything else indicates a failure mode that must be investigated.
############################################################################ ############################################################################
# Copyright (C) 2002-2007 The Regents of the University of California. # Copyright (C) 2002-2007 The Regents of the University of California.
# Copyright (C) 2008 Lawrence Livermore National Security. # Copyright (C) 2008-2010 Lawrence Livermore National Security.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov> # Written by Morris Jette <jette1@llnl.gov>
# CODE-OCEC-09-009. All rights reserved. # CODE-OCEC-09-009. All rights reserved.
...@@ -48,9 +48,16 @@ if { [test_front_end] } { ...@@ -48,9 +48,16 @@ if { [test_front_end] } {
# #
# Submit a two node job with block distribution # Submit a two node job with block distribution
# #
set timeout $max_job_delay # Task layout depends upon how many tasks/CPUs are allocated per node
set expected_layout [list 0 0 1 1] # One task on node zero, layout: 0 0 0 1
# Two tasks on node zero, layout: 0 0 1 1 << Optimal
# Three tasks on node zero, layout: 0 1 1 1
#
set expected_layout1 [list 0 0 0 1]
set expected_layout2 [list 0 0 1 1]
set expected_layout3 [list 0 1 1 1]
set tested_layout [list -1 -1 -1 -1] set tested_layout [list -1 -1 -1 -1]
set timeout $max_job_delay
set srun_pid [spawn $srun -O -N2 -n4 -l --distribution=block -t1 $bin_printenv SLURM_NODEID] set srun_pid [spawn $srun -O -N2 -n4 -l --distribution=block -t1 $bin_printenv SLURM_NODEID]
expect { expect {
-re "More processors requested than permitted" { -re "More processors requested than permitted" {
...@@ -80,9 +87,12 @@ expect { ...@@ -80,9 +87,12 @@ expect {
# #
# Verify block distribution of tasks # Verify block distribution of tasks
# #
send_user "Expected layout was: $expected_layout\n"
send_user "Tested layout was : $tested_layout\n" send_user "Optimal task layout was: $expected_layout2\n"
if {[string compare $expected_layout $tested_layout] != 0} { send_user "Actual task layout was : $tested_layout\n"
if {[string compare $tested_layout $expected_layout1] &&
[string compare $tested_layout $expected_layout2] &&
[string compare $tested_layout $expected_layout3]} {
send_user "\FAILURE: failed to distribute tasks in block fashion\n" send_user "\FAILURE: failed to distribute tasks in block fashion\n"
set exit_code 1 set exit_code 1
} }
...@@ -95,8 +105,14 @@ if {[test_front_end] != 0} { ...@@ -95,8 +105,14 @@ if {[test_front_end] != 0} {
# #
# Submit a two node job with cyclic distribution # Submit a two node job with cyclic distribution
# #
set timeout $max_job_delay # Task layout depends upon how many tasks/CPUs are allocated per node
set expected_layout [list 0 1 0 1] # One task on node zero, layout: 0 1 1 1
# Two tasks on node zero, layout: 0 1 0 1 << Optimal
# Three tasks on node zero, layout: 0 1 0 0
#
set expected_layout1 [list 0 1 1 1]
set expected_layout2 [list 0 1 0 1]
set expected_layout3 [list 0 1 0 0]
set tested_layout [list -1 -1 -1 -1] set tested_layout [list -1 -1 -1 -1]
set srun_pid [spawn $srun -O -N2 -n4 -l --distribution=cyclic -t1 $bin_printenv SLURM_NODEID] set srun_pid [spawn $srun -O -N2 -n4 -l --distribution=cyclic -t1 $bin_printenv SLURM_NODEID]
expect { expect {
...@@ -127,9 +143,11 @@ expect { ...@@ -127,9 +143,11 @@ expect {
# #
# Verify cyclic distribution of tasks # Verify cyclic distribution of tasks
# #
send_user "Expected layout was: $expected_layout\n" send_user "Optimal task layout was: $expected_layout2\n"
send_user "Tested layout was : $tested_layout\n" send_user "Actual task layout was : $tested_layout\n"
if {[string compare $expected_layout $tested_layout] != 0} { if {[string compare $tested_layout $expected_layout1] &&
[string compare $tested_layout $expected_layout2] &&
[string compare $tested_layout $expected_layout3]} {
send_user "\FAILURE: failed to distribute tasks in cyclic fashion\n" send_user "\FAILURE: failed to distribute tasks in cyclic fashion\n"
set exit_code 1 set exit_code 1
} }
......
...@@ -55,7 +55,7 @@ exit 0 ...@@ -55,7 +55,7 @@ exit 0
# Create an allocation # Create an allocation
# #
set timeout $max_job_delay set timeout $max_job_delay
set salloc_pid [spawn $salloc -N2 -n4 --verbose -t2 $bin_bash] set salloc_pid [spawn $salloc -N2 --ntasks-per-node=2 --verbose -t2 $bin_bash]
expect { expect {
-re "salloc: Granted job allocation ($number)" { -re "salloc: Granted job allocation ($number)" {
set job_id $expect_out(1,string) set job_id $expect_out(1,string)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment