diff --git a/testsuite/expect/test1.86 b/testsuite/expect/test1.86 new file mode 100755 index 0000000000000000000000000000000000000000..08e37a5c68621e4697f5793aba345c5bcaaea788 --- /dev/null +++ b/testsuite/expect/test1.86 @@ -0,0 +1,468 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Confirm node selection from within a job step on existing allocation +# (--nodelist, --exclude, --nodes and --nprocs options). +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "WARNING: ..." with an explanation of why the test can't be made, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2002 The Regents of the University of California. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# UCRL-CODE-2002-040. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "1.86" +set exit_code 0 +set file_in "test$test_id.input" +set prompt "SLURM_QA_PROMPT: " + +print_header $test_id + +if {[test_front_end] != 0} { + send_user "\nWARNING: This test is incompatable with front-end systems\n" + exit 0 +} + +# +# Delete left-over input script file +# Build input script file +# +exec $bin_rm -f $file_in +exec echo "#!$bin_bash" >$file_in +exec echo "export PS1=\"$prompt\" " >>$file_in +exec echo "$bin_bash -norc" >>$file_in +exec $bin_chmod 700 $file_in + +# +# Submit a 2 node job +# +set timeout $max_job_delay +spawn $srun -N2 -A $file_in +expect { + -re "More ($alpha) requested than permitted" { + send_user "\nWARNING: can't test srun task distribution\n" + exec $bin_rm -f $file_in + exit $exit_code + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "Job initiated\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exec $bin_rm -f $file_in + exit 1 + } + eof { + wait + send_user "\nFAILURE: srun terminated\n" + kill_srun + exec $bin_rm -f $file_in + exit 1 + } +} +exec $bin_rm -f $file_in + +# +# Get node names +# +set host_0 "" +set host_1 "" +send "$srun -l $bin_hostname\n" +expect { + -re "($number): ($alpha_numeric)" { + set host_inx $expect_out(1,string) + if {$host_inx == 0} { + set host_0 $expect_out(2,string) + } + if {$host_inx == 1} { + set host_1 $expect_out(2,string) + } + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} + +# +# Verify node count +# +if {[string compare $host_0 ""] == 0} { + send_user "\nFAILURE: Did not get hostname of task 0\n" + set exit_code 1 +} +if {[string compare $host_1 ""] == 0} { + send_user "\nFAILURE: Did not get hostname of task 1\n" + set exit_code 1 +} +if {$exit_code != 0} { + exit $exit_code +} + +# +# Exclude specific node +# +set matches 0 +send "$srun -l -N1 -n1 --exclude=$host_0 $bin_hostname\n" +expect { + -re "0: ($alpha_numeric)" { + if {[string compare $expect_out(1,string) $host_1] == 0} { + incr matches + } else { + send_user "\nFAILURE: wrong node responded\n" + set exit_code 1 + } + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} +if {$matches == 0} { + send_user "\nFAILURE: required node failed to respond\n" + set exit_code 1 +} + +# +# Exclude specific node +# +set matches 0 +send "$srun -l -N1 -n1 --exclude=$host_1 $bin_hostname\n" +expect { + -re "0: ($alpha_numeric)" { + if {[string compare $expect_out(1,string) $host_0] == 0} { + incr matches + } else { + send_user "\nFAILURE: wrong node responded\n" + set exit_code 1 + } + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} +if {$matches == 0} { + send_user "\nFAILURE: required node failed to respond\n" + set exit_code 1 +} + +# +# Include specific node +# +set matches 0 +send "$srun -l -N1 -n1 --nodelist=$host_0 $bin_hostname\n" +expect { + -re "0: ($alpha_numeric)" { + if {[string compare $expect_out(1,string) $host_0] == 0} { + incr matches + } else { + send_user "\nFAILURE: wrong node responded\n" + set exit_code 1 + } + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} +if {$matches == 0} { + send_user "\nFAILURE: required node failed to respond\n" + set exit_code 1 +} + +# +# Include specific node +# +set matches 0 +send "$srun -l -N1 -n1 --nodelist=$host_1 $bin_hostname\n" +expect { + -re "0: ($alpha_numeric)" { + if {[string compare $expect_out(1,string) $host_1] == 0} { + incr matches + } else { + send_user "\nFAILURE: wrong node responded\n" + set exit_code 1 + } + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} +if {$matches == 0} { + send_user "\nFAILURE: required node failed to respond\n" + set exit_code 1 +} + +# +# Error test: Overlapping include/exclude node list +# +set matches 0 +send "$srun -l -N1 -n1 --nodelist=$host_0 --exclude=$host_0 $bin_hostname\n" +expect { + -re "0: ($alpha_numeric)" { + send_user "\nFAILURE: wrong node responded\n" + set exit_code 1 + exp_continue + } + "error:" { + send_user "This error is expected, no worries\n" + incr matches + exp_continue + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} +if {$matches == 0} { + send_user "\nFAILURE: No error for overlapping include/exclude node list\n" + set exit_code 1 +} + +# +# Error test: Exceed node count +# +set matches 0 +send "$srun -l -N3 -n3 -O $bin_hostname\n" +expect { + -re "0: ($alpha_numeric)" { + send_user "\nFAILURE: wrong node responded\n" + set exit_code 1 + exp_continue + } + "error:" { + send_user "This error is expected, no worries\n" + incr matches + exp_continue + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} +if {$matches == 0} { + send_user "\nFAILURE: No error for exceeding node count\n" + set exit_code 1 +} + +# +# Run with fewer nodes +# +set test_0 "" +set test_1 "" +send "$srun -l -N1-1 -n2 -O $bin_hostname\n" +expect { + -re "($number): ($alpha_numeric)" { + set host_inx $expect_out(1,string) + if {$host_inx == 0} { + set test_0 $expect_out(2,string) + } + if {$host_inx == 1} { + set test_1 $expect_out(2,string) + } + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} +if {[string compare $test_0 $test_1] != 0} { + send_user "\nFAILURE: Multiple nodes responded, should be only one\n" + set exit_code 1 +} + +# +# Error test: Exceed task count, first get the processor count then exceed it +# +set processors 1 +send "$srun -l -c1 $bin_hostname\n" +expect { + -re "($number): ($alpha_numeric)" { + incr processors + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + exit 1 + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} +set matches 0 +send "$srun -l -n $processors $bin_hostname\n" +expect { + -re "0: ($alpha_numeric)" { + send_user "\nFAILURE: wrong node responded\n" + set exit_code 1 + exp_continue + } + "error:" { + incr matches + exp_continue + } + "$prompt" { + send_user "srun completed\n" + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} +if {$matches == 0} { + send_user "\nFAILURE: No error for exceeding processor count\n" + set exit_code 1 +} else { + send_user "This error is expected, no worries\n" +} + +# +# Post-processing +# +send "exit\n" +expect { + -re "error.*Exit 1" { + send_user "This error is expected, no worries\n" + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + exit 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code