From 9d6f193ecec8651157291c378fc1e06d9e66c176 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Thu, 17 Nov 2005 18:17:53 +0000
Subject: [PATCH] hostfile tests for srun and poe

---
 testsuite/expect/README   |   2 +
 testsuite/expect/test1.52 | 131 ++++++++++++++++++++++++++++++++++++
 testsuite/expect/test11.7 | 136 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 269 insertions(+)
 create mode 100755 testsuite/expect/test1.52
 create mode 100755 testsuite/expect/test11.7

diff --git a/testsuite/expect/README b/testsuite/expect/README
index 195cbbce120..8253124050a 100644
--- a/testsuite/expect/README
+++ b/testsuite/expect/README
@@ -125,6 +125,7 @@ test1.48   Test of srun mail options (--mail-type and --mail-user options).
 test1.49   Test of srun task-prolog and task-epilog options.
 test1.50   Test of running non-existant job, confirm timely termination.
 test1.51   Test propagation of umask to spawned tasks.
+test1.52   Test of hostfile logic
 
 **NOTE**   The following tests attempt to utilize multiple CPUs or partitions,
            The test will print "WARNING" and terminate with an exit code of 
@@ -282,6 +283,7 @@ test11.3   Test running of Network protocol option (-msg_api)
 test11.4   Test mpi jobs (must run make in mpi-testscripts dir)
 test11.5   Test of checkpoint logic (direct with srun)
 test11.6   Test of checkpoint logic (with poe)
+test11.7   Test of hostfile logic (with poe)
 
 
 test12.#   Testing of sacct command and options
diff --git a/testsuite/expect/test1.52 b/testsuite/expect/test1.52
new file mode 100755
index 00000000000..b343a1a94c5
--- /dev/null
+++ b/testsuite/expect/test1.52
@@ -0,0 +1,131 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of srun functionality
+#          Test of hostfile option (-hostfile).
+#          
+#
+# Output:  "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+#          "FAILURE: ..." otherwise with an explanation of the failure, OR
+#          anything else indicates a failure mode that must be investigated.
+#
+############################################################################
+# Copyright (C) 2002 The Regents of the University of California.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Danny Auble <da@llnl.gov>
+# UCRL-CODE-2002-040.
+# 
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.llnl.gov/linux/slurm/>.
+#  
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+# 
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+############################################################################
+source ./globals
+
+set test_id     "1.52"
+set partition   "debug"
+set exit_code   0
+set num_nodes   2
+set num_tasks   2
+set node_count  0
+set task_count  0
+set job_id      0
+set hostfile    "test$test_id.hostfile"
+
+print_header $test_id
+
+exec $bin_rm -f $hostfile
+
+if { ![file exists $poe] } {
+	send_user "WARNING: poe must be installed on the\
+	    system to run this test.\n"
+	exit $exit_code
+}
+
+#find out if we have enough nodes to test functionality
+spawn $scontrol show partition
+expect {
+	-re "TotalNodes=($number)" {
+		set node_count $expect_out(1,string)
+		if { $node_count < 2 } {
+			send_user "WARNING: system must have at least 2 \
+				   nodes to run this test on.  This system \
+				   only has 2.\n"
+			exit $exit_code
+		}
+		exp_continue
+	}	
+	timeout {
+		send_user "\nFAILURE: scontrol not responding\n"
+		exit 1
+	}
+	eof {
+	}
+}
+set node0 0
+set node1 0
+
+for {set i 0} {$i<2} {incr i} {
+
+	if { $i==1 } {
+		if { $node0 == 0 || $node1 == 0 } {
+			send_user "\nFAILURE: node names not set from \
+				previous poe run\n"
+			exit 1
+		}
+		set env(MP_HOSTFILE) $hostfile
+		set 1node0 $node0
+		set 1node1 $node1
+		set file [open $hostfile "w"]
+		puts $file "$node1\n$node0"
+		close $file
+	}
+	#
+	# execute poe with a specific node count
+	#
+	spawn $srun -N2 -l $bin_hostname
+	expect {
+		-re "0: ($alpha_numeric)" {
+			set node0 $expect_out(1,string)
+			exp_continue
+		}
+		-re "1: ($alpha_numeric)" {
+			set node1 $expect_out(1,string)
+			exp_continue
+		}
+		-re "slurm job ($number)" {
+			set job_id $expect_out(1,string)
+			exp_continue
+		}
+		timeout {
+			send_user "\nFAILURE: poe not responding\n"
+			exec $scancel --quiet $job_id
+			set exit_code 1
+		}
+		eof {
+		}
+	}
+}
+if { [string compare $node0 $1node1] } {
+	send_user "\nFAILURE: tasks not distributed by hostfile\n"
+	set exit_code 1
+}
+if { [string compare $node1 $1node0] } {
+	send_user "\nFAILURE: tasks not distributed by hostfile\n"
+	set exit_code 1
+}
+if {$exit_code == 0} {
+	send_user "\nSUCCESS\n"
+}
+exit $exit_code
diff --git a/testsuite/expect/test11.7 b/testsuite/expect/test11.7
new file mode 100755
index 00000000000..36dde5b7b8f
--- /dev/null
+++ b/testsuite/expect/test11.7
@@ -0,0 +1,136 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of POE functionality
+#          Test of hostfile option (-hostfile).
+#          
+#
+# Output:  "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+#          "FAILURE: ..." otherwise with an explanation of the failure, OR
+#          anything else indicates a failure mode that must be investigated.
+#
+############################################################################
+# Copyright (C) 2002 The Regents of the University of California.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Danny Auble <da@llnl.gov>
+# UCRL-CODE-2002-040.
+# 
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.llnl.gov/linux/slurm/>.
+#  
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+# 
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+############################################################################
+source ./globals
+
+set test_id     "11.7"
+set partition   "debug"
+set exit_code   0
+set env(SLURM_LL_API_DEBUG) "2"
+set num_nodes   2
+set num_tasks   2
+set node_count  0
+set task_count  0
+set job_id      0
+set hostfile    "test$test_id.hostfile"
+
+print_header $test_id
+
+exec $bin_rm -f $hostfile
+
+if { ![file exists $poe] } {
+	send_user "WARNING: poe must be installed on the\
+	    system to run this test.\n"
+	exit $exit_code
+}
+
+#find out if we have enough nodes to test functionality
+spawn $scontrol show partition
+expect {
+	-re "TotalNodes=($number)" {
+		set node_count $expect_out(1,string)
+		if { $node_count < 2 } {
+			send_user "WARNING: system must have at least 2 \
+				   nodes to run this test on.  This system \
+				   only has 2.\n"
+			exit $exit_code
+		}
+		exp_continue
+	}	
+	timeout {
+		send_user "\nFAILURE: scontrol not responding\n"
+		exit 1
+	}
+	eof {
+	}
+}
+set node0 0
+set node1 0
+
+for {set i 0} {$i<2} {incr i} {
+
+	if { $i==1 } {
+		if { $node0 == 0 || $node1 == 0 } {
+			send_user "\nFAILURE: node names not set from \
+					previous poe run\n"
+			exit 1
+		}
+		set 1node0 $node0
+		set 1node1 $node1
+		set file [open $hostfile "w"]
+		puts $file "$node1\n$node0"
+		close $file
+		spawn $poe $bin_hostname -resd yes -rmpool $partition -procs \
+				$num_tasks -nodes $num_nodes -retry wait \
+				-hostfile $hostfile
+	} else {
+		#
+		# execute poe with a specific node count
+		#
+		spawn $poe $bin_hostname -resd yes -rmpool $partition -procs \
+				$num_tasks -nodes $num_nodes -retry wait
+	}
+	expect {
+		-re "0:($alpha_numeric)" {
+			set node0 $expect_out(1,string)
+			exp_continue
+		}
+		-re "1:($alpha_numeric)" {
+			set node1 $expect_out(1,string)
+			exp_continue
+		}
+		-re "slurm job ($number)" {
+			set job_id $expect_out(1,string)
+			exp_continue
+		}
+		timeout {
+			send_user "\nFAILURE: poe not responding\n"
+			exec $scancel --quiet $job_id
+			set exit_code 1
+		}
+		eof {
+		}
+	}
+}
+if { [string compare $node0 $1node1] } {
+	send_user "\nFAILURE: tasks not distributed by hostfile\n"
+	set exit_code 1
+}
+if { [string compare $node1 $1node0] } {
+	send_user "\nFAILURE: tasks not distributed by hostfile\n"
+	set exit_code 1
+}
+if {$exit_code == 0} {
+	send_user "\nSUCCESS\n"
+}
+exit $exit_code
-- 
GitLab