diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91 new file mode 100755 index 0000000000000000000000000000000000000000..f091f175e2c09d0d63bb84843ce647fc85156f01 --- /dev/null +++ b/testsuite/expect/test1.91 @@ -0,0 +1,416 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of CPU affinity support for multi-core systems. +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "WARNING: ..." with an explanation of why the test can't be made, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2005 The Regents of the University of California. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# UCRL-CODE-226842. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.91" +set exit_code 0 +set file_prog "test$test_id.prog" + +print_header $test_id + +# +# Test if task affinity support is supported. +# +set affinity 0 +log_user 0 +spawn $scontrol show config +expect { + -re "task/affinity" { + set affinity 1 + exp_continue + } + eof { + wait + } +} +log_user 1 +if {$affinity == 0} { + send_user "\nWARNING: task affinity not supported on this system\n" + exit 0 +} +send_user "\ntask affinity plugin installed\n" + +set num_sockets 0 +set num_cores 0 +set num_threads 0 +log_user 0 +spawn $scontrol show node +expect { + -re "Sockets=($number)" { + set num_sockets $expect_out(1,string) + exp_continue + } + -re "Cores=($number)" { + set num_cores $expect_out(1,string) + exp_continue + } + -re "Threads=($number)" { + set num_threads $expect_out(1,string) + exp_continue + } + eof { + wait + } +} +log_user 1 +if {$num_sockets == 0 || $num_cores == 0 || $num_threads == 0} { + send_user "\nWARNING: Could not determine number of Sockets:Cores:Threads (saw $num_sockets:$num_cores:$num_threads)\n" + exit 0 +} +send_user "Node config: Sockets=$num_sockets Cores=$num_cores Threads=$num_threads\n\n" + +# +# Build a test program to report affinity by task +# +exec $bin_rm -f $file_prog +exec $bin_cc -I$build_dir $file_prog.c -o $file_prog +exec $bin_chmod 700 $file_prog + +# +# Create an allocation +# +global env +set env(SLURM_CPU_BIND) "verbose" +set salloc_pid [spawn $salloc -N1 --exclusive --verbose -t2 $bin_bash] + +############################################################################# +# +# Run a job step to get allocated processor count and affinity +# +expect -re $prompt +set mask 0 +set task_cnt 0 +send "$srun -c1 $file_prog\n" +expect { + -re "TASK_ID:($number),MASK:($number)" { + incr task_cnt + set mask $expect_out(2,string) + exp_continue + } + -re "error" { + send_user "\nFAILURE: some error occurred\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding " + send_user "or failure to recognize prompt\n" + slow_kill $salloc_pid + exit 1 + } + -re $prompt +} + +############################################################################# +# +# Run a job step with affinity to verify unique masks with min -B 1:1:1 +# +set expected_mask [ expr ((1 << $task_cnt) - 1) ] +set task_mask 0 +send "$srun -c1 -n $task_cnt -B 1:1:1 $file_prog\n" +expect { + -re "TASK_ID:($number),MASK:($number)" { + incr task_mask $expect_out(2,string) + exp_continue + } + -re "error" { + send_user "\nFAILURE: some error occurred\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding " + send_user "or failure to recognize prompt\n" + set exit_code 1 + } + -re $prompt +} +if {$task_mask != $expected_mask} { + send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$expected_mask)\n" + set exit_code 1 +} + +############################################################################# +# +# Run varying number of sockets, verify task count and number of set bits +# +set this_cnt 1 +while {$this_cnt <= $num_sockets} { + set expected_tasks [ expr $this_cnt * $num_cores * $num_threads ] + set num_tasks 0 + set num_bits 0 + set task_mask 0 + send "$srun -B $this_cnt-$this_cnt:$num_cores:$num_threads $file_prog\n" + expect { + -re "TASK_ID:($number),MASK:($number)" { + incr task_mask $expect_out(2,string) + incr num_tasks 1 + # count number of set bits + set this_mask $expect_out(2,string) + while {$this_mask > 0} { + if {$this_mask & 1} { + incr num_bits 1 + } + set this_mask [ expr $this_mask >> 1 ] + } + exp_continue + } + -re "error" { + send_user "\nFAILURE: some error occurred\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding " + send_user "or failure to recognize prompt\n" + set exit_code 1 + } + -re $prompt + } + + if {$num_tasks != $expected_tasks} { + send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n" + set exit_code 1 + } + if {$num_bits != $expected_tasks} { + send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$expected_tasks)\n" + set exit_code 1 + } + incr this_cnt 1 +} + + +############################################################################# +# +# Run varying number of cores, verify task count and number of set bits +# +set this_cnt 1 +while {$this_cnt <= $num_cores} { + set expected_tasks [ expr $num_sockets * $this_cnt * $num_threads ] + set num_tasks 0 + set num_bits 0 + set task_mask 0 + send "$srun -B $num_sockets:$this_cnt-$this_cnt:$num_threads $file_prog\n" + expect { + -re "TASK_ID:($number),MASK:($number)" { + incr task_mask $expect_out(2,string) + incr num_tasks 1 + # count number of set bits + set this_mask $expect_out(2,string) + while {$this_mask > 0} { + if {$this_mask & 1} { + incr num_bits 1 + } + set this_mask [ expr $this_mask >> 1 ] + } + exp_continue + } + -re "error" { + send_user "\nFAILURE: some error occurred\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding " + send_user "or failure to recognize prompt\n" + set exit_code 1 + } + -re $prompt + } + + if {$num_tasks != $expected_tasks} { + send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n" + set exit_code 1 + } + if {$num_bits != $expected_tasks} { + send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$expected_tasks)\n" + set exit_code 1 + } + incr this_cnt 1 +} + + +############################################################################# +# +# Run varying number of threads, verify task count and number of set bits +# +set this_cnt 1 +while {$this_cnt <= $num_threads} { + set expected_tasks [ expr $num_sockets * $num_cores * $this_cnt ] + set num_tasks 0 + set num_bits 0 + set task_mask 0 + send "$srun -B $num_sockets:$num_cores:$this_cnt-$this_cnt $file_prog\n" + expect { + -re "TASK_ID:($number),MASK:($number)" { + incr task_mask $expect_out(2,string) + incr num_tasks 1 + # count number of set bits + set this_mask $expect_out(2,string) + while {$this_mask > 0} { + if {$this_mask & 1} { + incr num_bits 1 + } + set this_mask [ expr $this_mask >> 1 ] + } + exp_continue + } + -re "error" { + send_user "\nFAILURE: some error occurred\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding " + send_user "or failure to recognize prompt\n" + set exit_code 1 + } + -re $prompt + } + + if {$num_tasks != $expected_tasks} { + send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n" + set exit_code 1 + } + if {$num_bits != $expected_tasks} { + send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$expected_tasks)\n" + set exit_code 1 + } + incr this_cnt 1 +} + +############################################################################# +# +# Run varying cpus per task, verify task count and number of set bits +# +set this_cnt 1 +while {$this_cnt <= $task_cnt} { + set expected_tasks 1 + set num_tasks 0 + set num_bits 0 + set task_mask 0 + send "$srun -c$this_cnt -B 1:1:1 $file_prog\n" + expect { + -re "TASK_ID:($number),MASK:($number)" { + incr task_mask $expect_out(2,string) + incr num_tasks 1 + # count number of set bits + set this_mask $expect_out(2,string) + while {$this_mask > 0} { + if {$this_mask & 1} { + incr num_bits 1 + } + set this_mask [ expr $this_mask >> 1 ] + } + exp_continue + } + -re "error" { + send_user "\nFAILURE: some error occurred\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding " + send_user "or failure to recognize prompt\n" + set exit_code 1 + } + -re $prompt + } + + if {$num_tasks != $expected_tasks} { + send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n" + set exit_code 1 + } + if {$num_bits != $this_cnt} { + send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$this_cnt)\n" + set exit_code 1 + } + incr this_cnt 1 +} + +############################################################################# +# +# Run a job step with plane distribution to exercise option +# +set expected_mask [ expr ((1 << $task_cnt) - 1) * $task_cnt ] +set task_mask 0 +send "$srun -n $task_cnt -m plane=4 $file_prog\n" +expect { + -re "TASK_ID:($number),MASK:($number)" { + incr task_mask $expect_out(2,string) + exp_continue + } + -re "error" { + send_user "\nFAILURE: some error occurred\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding " + send_user "or failure to recognize prompt\n" + set exit_code 1 + } + -re $prompt +} +if {$task_mask != $expected_mask} { + send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$expected_mask)\n" + set exit_code 1 +} + +############################################################################# +# +# Terminate the job, free the allocation +# +send "exit\n" +expect { + -re "error" { + send_user "\nFAILURE: some error occurred\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: salloc not responding " + send_user "or failure to recognize prompt\n" + slow_kill $salloc_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_prog + send_user "\nSUCCESS\n" +} +exit $exit_code +