modify some tests to work properly if task/CPU layout is un-even (e.g. 3 CPUs on nodes

zero and 1 CPU on node one instead of 2 and 2)

modify some tests to work properly if task/CPU layout is un-even (e.g. 3 CPUs on nodes
d0dece74 · Moe Jette · ac8f178b · d0dece74 · d0dece74 · d0dece74
Commit d0dece74 authored 14 years ago by Moe Jette
--- a/testsuite/expect/test1.60
+++ b/testsuite/expect/test1.60
@@ -8,7 +8,7 @@
 #          "FAILURE: ..." otherwise with an explanation of the failure, OR
 #          anything else indicates a failure mode that must be investigated.
 ############################################################################
-# Copyright (C) 2009 Lawrence Livermore National Security.
+# Copyright (C) 2009-2010 Lawrence Livermore National Security.
 # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 # Written by Dave Bremer <dbremer@llnl.gov>
 # CODE-OCEC-09-009. All rights reserved.
@@ -137,16 +137,20 @@ if {$file_cnt != $node_count} {
 if {$exit_code != 0} {
 	exit $exit_code
 }
-
-#
-# Spawn a program that generates "node_id" (%n) in stdout file names
-# and confirm they are created
-#
 for {set node_id 0} {$node_id < $node_count} {incr node_id} {
 	set file_out_n_glob  "test$test_id.n.$node_id.output"
 	exec $bin_rm -f $file_out_n_glob
 }

+if {[test_front_end] != 0} {
+	send_user "\nWARNING: Additional tests are incompatible with front-end systems\n"
+	exit $exit_code
+}
+
+#
+# Spawn a program that generates "node_id" (%n) in stdout file names
+# and confirm they are created
+#
 set task_count [expr $node_count * 2]
 set timeout $max_job_delay
 set srun_pid [spawn $srun -l --output=$file_out_n -N $node_count -n $task_count -O -v -t1 $bin_echo hello]
@@ -168,11 +172,10 @@ if {$job_id == 0} {
 	send_user "\nFAILURE: job initiation failed\n"
 	exit 1
 }
-if {[test_front_end] != 0} {
-	send_user "\nWARNING: Additional tests are incompatible with front-end systems\n"
-	exit $exit_code
-}

+# We only test that some task ran on every node, we can't check the task ID
+# since task distribution is dependent upon resource allocation which may
+# not be homogeneous across the nodes.
 for {set node_id 0} {$node_id < $node_count} {incr node_id} {
 	set file_out_n_glob  "test$test_id.n.$node_id.output"
 	if {[wait_for_file $file_out_n_glob] != 0} {
@@ -182,26 +185,19 @@ for {set node_id 0} {$node_id < $node_count} {incr node_id} {
 		set exit_code 1
 		break
 	} else {
-		set t0found false
-		set t1found false
+		set task_found false

 		spawn $bin_cat $file_out_n_glob
 		expect {
 			-re "($number): *hello" {
-				set test_task_id $expect_out(1,string)
-				if {$test_task_id == $node_id * 2} {
-					set t0found true
-				}
-				if {$test_task_id == $node_id * 2 + 1} {
-					set t1found true
-				}
+				set task_found true
 				exp_continue
 			}
 			eof {
 				wait
 			}
 		}
-		if {!$t0found || !$t1found} {
+		if {!$task_found} {
 			send_user "\nFAILURE: file $file_out_n_glob was not labelled correctly\n"
 			set exit_code 1
 		} else {

--- a/testsuite/expect/test1.80
+++ b/testsuite/expect/test1.80
@@ -10,7 +10,7 @@
 #          anything else indicates a failure mode that must be investigated.
 ############################################################################
 # Copyright (C) 2002-2007 The Regents of the University of California.
-# Copyright (C) 2008 Lawrence Livermore National Security.
+# Copyright (C) 2008-2010 Lawrence Livermore National Security.
 # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 # Written by Morris Jette <jette1@llnl.gov>
 # CODE-OCEC-09-009. All rights reserved.
@@ -48,9 +48,16 @@ if { [test_front_end] } {
 #
 # Submit a two node job with block distribution
 #
-set timeout $max_job_delay
-set expected_layout [list 0 0 1 1]
+# Task layout depends upon how many tasks/CPUs are allocated per node
+# One task on node zero,    layout: 0 0 0 1
+# Two tasks on node zero,   layout: 0 0 1 1	<< Optimal
+# Three tasks on node zero, layout: 0 1 1 1
+#
+set expected_layout1 [list 0 0 0 1]
+set expected_layout2 [list 0 0 1 1]
+set expected_layout3 [list 0 1 1 1]
 set tested_layout [list -1 -1 -1 -1]
+set timeout $max_job_delay
 set srun_pid [spawn $srun -O -N2 -n4 -l --distribution=block -t1 $bin_printenv SLURM_NODEID]
 expect {
 	-re "More processors requested than permitted" {
@@ -80,9 +87,12 @@ expect {
 #
 # Verify block distribution of tasks
 #
-send_user "Expected layout was: $expected_layout\n"
-send_user "Tested layout was  : $tested_layout\n"
-if {[string compare $expected_layout $tested_layout] != 0} {
+
+send_user "Optimal task layout was: $expected_layout2\n"
+send_user "Actual task layout was : $tested_layout\n"
+if {[string compare $tested_layout $expected_layout1] &&
+    [string compare $tested_layout $expected_layout2] &&
+    [string compare $tested_layout $expected_layout3]} {
 	send_user "\FAILURE: failed to distribute tasks in block fashion\n"
 	set exit_code 1
 }
@@ -95,8 +105,14 @@ if {[test_front_end] != 0} {
 #
 # Submit a two node job with cyclic distribution
 #
-set timeout $max_job_delay
-set expected_layout [list 0 1 0 1]
+# Task layout depends upon how many tasks/CPUs are allocated per node
+# One task on node zero,    layout: 0 1 1 1
+# Two tasks on node zero,   layout: 0 1 0 1	<< Optimal
+# Three tasks on node zero, layout: 0 1 0 0
+#
+set expected_layout1 [list 0 1 1 1]
+set expected_layout2 [list 0 1 0 1]
+set expected_layout3 [list 0 1 0 0]
 set tested_layout [list -1 -1 -1 -1]
 set srun_pid [spawn $srun -O -N2 -n4 -l --distribution=cyclic -t1 $bin_printenv SLURM_NODEID]
 expect {
@@ -127,9 +143,11 @@ expect {
 #
 # Verify cyclic distribution of tasks
 #
-send_user "Expected layout was: $expected_layout\n"
-send_user "Tested layout was  : $tested_layout\n"
-if {[string compare $expected_layout $tested_layout] != 0} {
+send_user "Optimal task layout was: $expected_layout2\n"
+send_user "Actual task layout was : $tested_layout\n"
+if {[string compare $tested_layout $expected_layout1] &&
+    [string compare $tested_layout $expected_layout2] &&
+    [string compare $tested_layout $expected_layout3]} {
 	send_user "\FAILURE: failed to distribute tasks in cyclic fashion\n"
 	set exit_code 1
 }

--- a/testsuite/expect/test1.92
+++ b/testsuite/expect/test1.92
@@ -55,7 +55,7 @@ exit 0
 # Create an allocation
 #
 set timeout $max_job_delay
-set salloc_pid [spawn $salloc -N2 -n4 --verbose -t2 $bin_bash]
+set salloc_pid [spawn $salloc -N2 --ntasks-per-node=2 --verbose -t2 $bin_bash]
 expect {
 	-re "salloc: Granted job allocation ($number)" {
 		set job_id $expect_out(1,string)