From 9d6f193ecec8651157291c378fc1e06d9e66c176 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Thu, 17 Nov 2005 18:17:53 +0000 Subject: [PATCH] hostfile tests for srun and poe --- testsuite/expect/README | 2 + testsuite/expect/test1.52 | 131 ++++++++++++++++++++++++++++++++++++ testsuite/expect/test11.7 | 136 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 269 insertions(+) create mode 100755 testsuite/expect/test1.52 create mode 100755 testsuite/expect/test11.7 diff --git a/testsuite/expect/README b/testsuite/expect/README index 195cbbce120..8253124050a 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -125,6 +125,7 @@ test1.48 Test of srun mail options (--mail-type and --mail-user options). test1.49 Test of srun task-prolog and task-epilog options. test1.50 Test of running non-existant job, confirm timely termination. test1.51 Test propagation of umask to spawned tasks. +test1.52 Test of hostfile logic **NOTE** The following tests attempt to utilize multiple CPUs or partitions, The test will print "WARNING" and terminate with an exit code of @@ -282,6 +283,7 @@ test11.3 Test running of Network protocol option (-msg_api) test11.4 Test mpi jobs (must run make in mpi-testscripts dir) test11.5 Test of checkpoint logic (direct with srun) test11.6 Test of checkpoint logic (with poe) +test11.7 Test of hostfile logic (with poe) test12.# Testing of sacct command and options diff --git a/testsuite/expect/test1.52 b/testsuite/expect/test1.52 new file mode 100755 index 00000000000..b343a1a94c5 --- /dev/null +++ b/testsuite/expect/test1.52 @@ -0,0 +1,131 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of srun functionality +# Test of hostfile option (-hostfile). +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +# +############################################################################ +# Copyright (C) 2002 The Regents of the University of California. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Danny Auble <da@llnl.gov> +# UCRL-CODE-2002-040. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.52" +set partition "debug" +set exit_code 0 +set num_nodes 2 +set num_tasks 2 +set node_count 0 +set task_count 0 +set job_id 0 +set hostfile "test$test_id.hostfile" + +print_header $test_id + +exec $bin_rm -f $hostfile + +if { ![file exists $poe] } { + send_user "WARNING: poe must be installed on the\ + system to run this test.\n" + exit $exit_code +} + +#find out if we have enough nodes to test functionality +spawn $scontrol show partition +expect { + -re "TotalNodes=($number)" { + set node_count $expect_out(1,string) + if { $node_count < 2 } { + send_user "WARNING: system must have at least 2 \ + nodes to run this test on. This system \ + only has 2.\n" + exit $exit_code + } + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + exit 1 + } + eof { + } +} +set node0 0 +set node1 0 + +for {set i 0} {$i<2} {incr i} { + + if { $i==1 } { + if { $node0 == 0 || $node1 == 0 } { + send_user "\nFAILURE: node names not set from \ + previous poe run\n" + exit 1 + } + set env(MP_HOSTFILE) $hostfile + set 1node0 $node0 + set 1node1 $node1 + set file [open $hostfile "w"] + puts $file "$node1\n$node0" + close $file + } + # + # execute poe with a specific node count + # + spawn $srun -N2 -l $bin_hostname + expect { + -re "0: ($alpha_numeric)" { + set node0 $expect_out(1,string) + exp_continue + } + -re "1: ($alpha_numeric)" { + set node1 $expect_out(1,string) + exp_continue + } + -re "slurm job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: poe not responding\n" + exec $scancel --quiet $job_id + set exit_code 1 + } + eof { + } + } +} +if { [string compare $node0 $1node1] } { + send_user "\nFAILURE: tasks not distributed by hostfile\n" + set exit_code 1 +} +if { [string compare $node1 $1node0] } { + send_user "\nFAILURE: tasks not distributed by hostfile\n" + set exit_code 1 +} +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test11.7 b/testsuite/expect/test11.7 new file mode 100755 index 00000000000..36dde5b7b8f --- /dev/null +++ b/testsuite/expect/test11.7 @@ -0,0 +1,136 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of POE functionality +# Test of hostfile option (-hostfile). +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +# +############################################################################ +# Copyright (C) 2002 The Regents of the University of California. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Danny Auble <da@llnl.gov> +# UCRL-CODE-2002-040. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "11.7" +set partition "debug" +set exit_code 0 +set env(SLURM_LL_API_DEBUG) "2" +set num_nodes 2 +set num_tasks 2 +set node_count 0 +set task_count 0 +set job_id 0 +set hostfile "test$test_id.hostfile" + +print_header $test_id + +exec $bin_rm -f $hostfile + +if { ![file exists $poe] } { + send_user "WARNING: poe must be installed on the\ + system to run this test.\n" + exit $exit_code +} + +#find out if we have enough nodes to test functionality +spawn $scontrol show partition +expect { + -re "TotalNodes=($number)" { + set node_count $expect_out(1,string) + if { $node_count < 2 } { + send_user "WARNING: system must have at least 2 \ + nodes to run this test on. This system \ + only has 2.\n" + exit $exit_code + } + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + exit 1 + } + eof { + } +} +set node0 0 +set node1 0 + +for {set i 0} {$i<2} {incr i} { + + if { $i==1 } { + if { $node0 == 0 || $node1 == 0 } { + send_user "\nFAILURE: node names not set from \ + previous poe run\n" + exit 1 + } + set 1node0 $node0 + set 1node1 $node1 + set file [open $hostfile "w"] + puts $file "$node1\n$node0" + close $file + spawn $poe $bin_hostname -resd yes -rmpool $partition -procs \ + $num_tasks -nodes $num_nodes -retry wait \ + -hostfile $hostfile + } else { + # + # execute poe with a specific node count + # + spawn $poe $bin_hostname -resd yes -rmpool $partition -procs \ + $num_tasks -nodes $num_nodes -retry wait + } + expect { + -re "0:($alpha_numeric)" { + set node0 $expect_out(1,string) + exp_continue + } + -re "1:($alpha_numeric)" { + set node1 $expect_out(1,string) + exp_continue + } + -re "slurm job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: poe not responding\n" + exec $scancel --quiet $job_id + set exit_code 1 + } + eof { + } + } +} +if { [string compare $node0 $1node1] } { + send_user "\nFAILURE: tasks not distributed by hostfile\n" + set exit_code 1 +} +if { [string compare $node1 $1node0] } { + send_user "\nFAILURE: tasks not distributed by hostfile\n" + set exit_code 1 +} +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code -- GitLab