diff --git a/testsuite/expect/test8.21 b/testsuite/expect/test8.21 new file mode 100755 index 0000000000000000000000000000000000000000..014e133a2d0fdcb4744bf24bb5228cb71b6931ba --- /dev/null +++ b/testsuite/expect/test8.21 @@ -0,0 +1,237 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Bluegene/Q only: Test that multple job step allocations are +# properly packed within the job's allocation +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2011 SchedMD LLC +# Written by Morris Jette <jette@schedmd.gov> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.schedmd.com/slurmdocs/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "8.21" +set exit_code 0 +set file_prog "test$test_id.bash" +set job_id 0 +set job_size 32 + + +print_header $test_id + +if {([test_bluegene] == 0) || [string compare [get_bluegene_type] "Q"]} { + send_user "\nWARNING: This test is only compatable with Bluegene/Q systems\n" + exit $exit_code +} + +# +# Spawn a job via salloc +# +set matches 0 +set timeout $max_job_delay +set salloc_pid [spawn $salloc -N$job_size -t1 $bin_bash] +expect { + -re "Granted job allocation ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + -re $prompt { + #send_user "Job initiated\n" + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + if {$job_id != 0} { + cancel_job $job_id + } + slow_kill [expr 0 - $salloc_pid] + exit 1 + } +} + +if {$job_id == 0} { + send_user "\nFAILURE: did not get job_id\n" + exit 1 +} + +# +# Determine the job's allocation dimensions +# +set timeout 5 +set job_start1 -1 +send "$scontrol show job $job_id\r" +expect { + -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" { + set job_prefix $expect_out(1,string) + set job_start1 $expect_out(2,string) + set job_start2 $expect_out(3,string) + set job_start3 $expect_out(4,string) + set job_start4 $expect_out(5,string) + set job_start5 $expect_out(6,string) + set job_fini1 $expect_out(7,string) + set job_fini2 $expect_out(8,string) + set job_fini3 $expect_out(9,string) + set job_fini4 $expect_out(10,string) + set job_fini5 $expect_out(11,string) + exp_continue + } + -re $prompt { + #break + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } +} +if {$job_start1 == -1} { + send_user "\nFAILURE: did not get job dimensions\n" + cancel_job $job_id + exit 1 +} +send_user "\nJob allocation\n" +send_user "prefix: $job_prefix\n" +send_user "dim 1: $job_start1 to $job_fini1 " +send_user "dim 2: $job_start2 to $job_fini2 " +send_user "dim 3: $job_start3 to $job_fini3 " +send_user "dim 4: $job_start4 to $job_fini4 " +send_user "dim 5: $job_start5 to $job_fini5\n" + +set job_dim1 [expr $job_fini1 - $job_start1 + 1] +set job_dim2 [expr $job_fini2 - $job_start2 + 1] +set job_dim3 [expr $job_fini3 - $job_start3 + 1] +set job_dim4 [expr $job_fini4 - $job_start4 + 1] +set job_dim5 [expr $job_fini5 - $job_start5 + 1] +set actual_job_size [expr $job_dim1 * $job_dim2 * $job_dim3 * $job_dim4 * $job_dim5] +send_user "size: $actual_job_size c-nodes\n" +if {$actual_job_size < $job_size} { + send_user "\nFAILURE: job allocation too small ($actual_job_size < $job_size)\n" + cancel_job $job_id + exit 1 +} +if {$actual_job_size != $job_size} { +# This is a legitimate condition. A request for 5 c-nodes requires +# at least 6 c-nodes (3x2x1x1x1). + send_user "\nWARNING: job allocation too large ($actual_job_size != $job_size)\n" +} + +# +# Build an array to count the job's c-nodes which have been allocated to steps +# +for {set dim1 $job_start1} {$dim1 <= $job_fini1} {incr dim1} { + for {set dim2 $job_start2} {$dim2 <= $job_fini2} {incr dim2} { + for {set dim3 $job_start3} {$dim3 <= $job_fini3} {incr dim3} { + for {set dim4 $job_start4} {$dim4 <= $job_fini4} {incr dim4} { + for {set dim5 $job_start5} {$dim5 <= $job_fini5} {incr dim5} { + set use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) 0 + } + } + } + } +} + +set timeout 30 +send "./$file_prog $srun $squeue $job_id $actual_job_size\r" +expect { + -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" { + set step_prefix $expect_out(1,string) + set step_start1 $expect_out(2,string) + set step_start2 $expect_out(3,string) + set step_start3 $expect_out(4,string) + set step_start4 $expect_out(5,string) + set step_start5 $expect_out(6,string) + set step_fini1 $expect_out(7,string) + set step_fini2 $expect_out(8,string) + set step_fini3 $expect_out(9,string) + set step_fini4 $expect_out(10,string) + set step_fini5 $expect_out(11,string) + for {set dim1 $step_start1} {$dim1 <= $step_fini1} {incr dim1} { + for {set dim2 $step_start2} {$dim2 <= $step_fini2} {incr dim2} { + for {set dim3 $step_start3} {$dim3 <= $step_fini3} {incr dim3} { + for {set dim4 $step_start4} {$dim4 <= $step_fini4} {incr dim4} { + for {set dim5 $step_start5} {$dim5 <= $step_fini5} {incr dim5} { + if [info exists use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)] { + incr use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) + } else { + send_user "\nFAILURE: invalid step cnode allocation at " + send_user "\[$dim1,$dim2,$dim3,$dim4,$dim5\]/" + set exit_code 1 + } + } + } + } + } + } + exp_continue + } + -re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)" { + set step_prefix $expect_out(1,string) + set dim1 $expect_out(2,string) + set dim2 $expect_out(3,string) + set dim3 $expect_out(4,string) + set dim4 $expect_out(5,string) + set dim5 $expect_out(6,string) + if [info exists use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)] { + incr use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) + } else { + send_user "\nFAILURE: invalid step cnode allocation at " + send_user "\[$dim1,$dim2,$dim3,$dim4,$dim5\]\n" + set exit_code 1 + } + exp_continue + } + -re $prompt { + send "exit\r" + exp_continue + } + timeout { + send_user "\nFAILURE: job not responding\n" + set exit_code 1 + } +} + +# +# Test that each of the job's c-nodes have been allocated once to some step +# +for {set dim1 $job_start1} {$dim1 <= $job_fini1} {incr dim1} { + for {set dim2 $job_start2} {$dim2 <= $job_fini2} {incr dim2} { + for {set dim3 $job_start3} {$dim3 <= $job_fini3} {incr dim3} { + for {set dim4 $job_start4} {$dim4 <= $job_fini4} {incr dim4} { + for {set dim5 $job_start5} {$dim5 <= $job_fini5} {incr dim5} { + if {$use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) != 1} { + send_user "\nFAILURE: cnode at \[$dim1,$dim2,$dim3,$dim4,$dim5\] " + send_user "allocated $use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) times\n" + set exit_code 1 + } + } + } + } + } +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} else { + cancel_job $job_id +} + +exit $exit_code diff --git a/testsuite/expect/test8.21.bash b/testsuite/expect/test8.21.bash new file mode 100755 index 0000000000000000000000000000000000000000..ff47de5004c279cc599b9fd9b466162a911d7649 --- /dev/null +++ b/testsuite/expect/test8.21.bash @@ -0,0 +1,23 @@ +#!/bin/bash + +if [ $# -ne 4 ]; then + echo "test8.21.bash <srun_path> <squeue_path> <job_id> <job_size>" + exit 1 +fi +srun=$1 +squeue=$2 +job_id=$3 +job_size=$4 + +$srun -N1 --test-only /bin/true +sleep 5 + +while [ $job_size -ge 2 ] +do + job_size=`expr $job_size / 2` + $srun -N$job_size --test-only sleep 50 & + sleep 1 +done +$srun -N1 --test-only sleep 50 & +sleep 5 +$squeue --jobs=$job_id --steps --noheader --format='Step_ID=%i BP_List=%N'