diff --git a/testsuite/expect/README b/testsuite/expect/README index 6cf9295681a580e20be7d9ef016f6ddb57c5f596..1e27cbccc8d300aedc665977d7bb59e9cbe97aab 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -179,6 +179,7 @@ test1.89 Test of CPU affinity support. test1.90 Test of memory affinity support for NUMA systems. test1.91 Test of CPU affinity for multi-core systems. test1.92 Test of task distribution support on multi-core systems. +test1.93 Test of LAM-MPI functionality **NOTE** The above tests for mutliple processor/partition systems only test2.# Testing of scontrol options (to be run as unprivileged user). diff --git a/testsuite/expect/test1.86 b/testsuite/expect/test1.86 index 92ad51b3aa2cc40e271985097a371d819de4e841..380e80997e4846e18d8e3af5fcbaf2faba812aab 100755 --- a/testsuite/expect/test1.86 +++ b/testsuite/expect/test1.86 @@ -62,7 +62,7 @@ make_bash_script $file_in " # Submit a 2 node job # set timeout $max_job_delay -set salloc_pid [spawn $salloc -N2 ./$file_in] +set salloc_pid [spawn $salloc -N2 -t1 ./$file_in] expect { -re "More ($alpha) requested than permitted" { send_user "\nWARNING: can't test srun task distribution\n" diff --git a/testsuite/expect/test1.93 b/testsuite/expect/test1.93 new file mode 100755 index 0000000000000000000000000000000000000000..6ff305d4cde70002947dd8e6ee68e262bb146e6e --- /dev/null +++ b/testsuite/expect/test1.93 @@ -0,0 +1,156 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of LAM-MPI functionality +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "WARNING: ..." with an explanation of why the test can't be made, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.93" +set exit_code 0 +set file_in "test$test_id.input" +set prompt "PROMPT: " + +print_header $test_id + +if {[test_front_end] != 0} { + send_user "\nWARNING: This test is incompatable with front-end systems\n" + exit 0 +} + +# +# Build input script file +# +make_bash_script $file_in " + export PS1=\"$prompt\" + $bin_bash --norc +" + +# +# Submit a 2 node job +# +set timeout $max_job_delay +set salloc_pid [spawn $salloc -t1 -n2 ./$file_in] +expect { + -re "More ($alpha) requested than permitted" { + send_user "\nWARNING: can't test srun task distribution\n" + exec $bin_rm -f $file_in + exit $exit_code + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "Job initiated\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $salloc_pid + exit 1 + } + eof { + wait + send_user "\nFAILURE: srun terminated\n" + exit 1 + } +} +exec $bin_rm -f $file_in + +# +# Get node names +# +set host_0 "" +set host_1 "" +send "$srun -l --mpi=lam $bin_hostname\n" +expect { + -re "($number): ($alpha_numeric)" { + set host_inx $expect_out(1,string) + if {$host_inx == 0} { + set host_0 $expect_out(2,string) + } + if {$host_inx == 1} { + set host_1 $expect_out(2,string) + } + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "srun completed\n\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $salloc_pid + exit 1 + } + eof { + send_user "\nFAILURE: srun EOF\n" + exit 1 + } +} + +# +# Verify node count +# +if {[string compare $host_0 ""] == 0} { + send_user "\nFAILURE: no response from task zero\n" + set exit_code 1 +} +if {[string compare $host_0 $host_1] == 0} { + send_user "\nFAILURE: mulitple tasks ran on a single node with --mpi=lam option\n" + set exit_code 1 +} + +# +# Post-processing +# +send "exit\n" +expect { + -re "error" { + send_user "\nFAILURE: Some error occured\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $salloc_pid + exit 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code