Skip to content
Snippets Groups Projects
Commit e4a6ad47 authored by Moe Jette's avatar Moe Jette
Browse files

Restore test1.91 with modes=700

parent f8f703f5
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# Test of CPU affinity support for multi-core systems.
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "WARNING: ..." with an explanation of why the test can't be made, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2005 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# UCRL-CODE-226842.
#
# This file is part of SLURM, a resource management program.
# For details, see <http://www.llnl.gov/linux/slurm/>.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_id "1.91"
set exit_code 0
set file_prog "test$test_id.prog"
print_header $test_id
#
# Test if task affinity support is supported.
#
set affinity 0
log_user 0
spawn $scontrol show config
expect {
-re "task/affinity" {
set affinity 1
exp_continue
}
eof {
wait
}
}
log_user 1
if {$affinity == 0} {
send_user "\nWARNING: task affinity not supported on this system\n"
exit 0
}
send_user "\ntask affinity plugin installed\n"
set num_sockets 0
set num_cores 0
set num_threads 0
log_user 0
spawn $scontrol show node
expect {
-re "Sockets=($number)" {
set num_sockets $expect_out(1,string)
exp_continue
}
-re "Cores=($number)" {
set num_cores $expect_out(1,string)
exp_continue
}
-re "Threads=($number)" {
set num_threads $expect_out(1,string)
exp_continue
}
eof {
wait
}
}
log_user 1
if {$num_sockets == 0 || $num_cores == 0 || $num_threads == 0} {
send_user "\nWARNING: Could not determine number of Sockets:Cores:Threads (saw $num_sockets:$num_cores:$num_threads)\n"
exit 0
}
send_user "Node config: Sockets=$num_sockets Cores=$num_cores Threads=$num_threads\n\n"
#
# Build a test program to report affinity by task
#
exec $bin_rm -f $file_prog
exec $bin_cc -I$build_dir $file_prog.c -o $file_prog
exec $bin_chmod 700 $file_prog
#
# Create an allocation
#
global env
set env(SLURM_CPU_BIND) "verbose"
set salloc_pid [spawn $salloc -N1 --exclusive --verbose -t2 $bin_bash]
#############################################################################
#
# Run a job step to get allocated processor count and affinity
#
expect -re $prompt
set mask 0
set task_cnt 0
send "$srun -c1 $file_prog\n"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_cnt
set mask $expect_out(2,string)
exp_continue
}
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
exp_continue
}
timeout {
send_user "\nFAILURE: salloc not responding "
send_user "or failure to recognize prompt\n"
slow_kill $salloc_pid
exit 1
}
-re $prompt
}
#############################################################################
#
# Run a job step with affinity to verify unique masks with min -B 1:1:1
#
set expected_mask [ expr ((1 << $task_cnt) - 1) ]
set task_mask 0
send "$srun -c1 -n $task_cnt -B 1:1:1 $file_prog\n"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
exp_continue
}
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
exp_continue
}
timeout {
send_user "\nFAILURE: salloc not responding "
send_user "or failure to recognize prompt\n"
set exit_code 1
}
-re $prompt
}
if {$task_mask != $expected_mask} {
send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$expected_mask)\n"
set exit_code 1
}
#############################################################################
#
# Run varying number of sockets, verify task count and number of set bits
#
set this_cnt 1
while {$this_cnt <= $num_sockets} {
set expected_tasks [ expr $this_cnt * $num_cores * $num_threads ]
set num_tasks 0
set num_bits 0
set task_mask 0
send "$srun -B $this_cnt-$this_cnt:$num_cores:$num_threads $file_prog\n"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
incr num_tasks 1
# count number of set bits
set this_mask $expect_out(2,string)
while {$this_mask > 0} {
if {$this_mask & 1} {
incr num_bits 1
}
set this_mask [ expr $this_mask >> 1 ]
}
exp_continue
}
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
exp_continue
}
timeout {
send_user "\nFAILURE: salloc not responding "
send_user "or failure to recognize prompt\n"
set exit_code 1
}
-re $prompt
}
if {$num_tasks != $expected_tasks} {
send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n"
set exit_code 1
}
if {$num_bits != $expected_tasks} {
send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$expected_tasks)\n"
set exit_code 1
}
incr this_cnt 1
}
#############################################################################
#
# Run varying number of cores, verify task count and number of set bits
#
set this_cnt 1
while {$this_cnt <= $num_cores} {
set expected_tasks [ expr $num_sockets * $this_cnt * $num_threads ]
set num_tasks 0
set num_bits 0
set task_mask 0
send "$srun -B $num_sockets:$this_cnt-$this_cnt:$num_threads $file_prog\n"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
incr num_tasks 1
# count number of set bits
set this_mask $expect_out(2,string)
while {$this_mask > 0} {
if {$this_mask & 1} {
incr num_bits 1
}
set this_mask [ expr $this_mask >> 1 ]
}
exp_continue
}
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
exp_continue
}
timeout {
send_user "\nFAILURE: salloc not responding "
send_user "or failure to recognize prompt\n"
set exit_code 1
}
-re $prompt
}
if {$num_tasks != $expected_tasks} {
send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n"
set exit_code 1
}
if {$num_bits != $expected_tasks} {
send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$expected_tasks)\n"
set exit_code 1
}
incr this_cnt 1
}
#############################################################################
#
# Run varying number of threads, verify task count and number of set bits
#
set this_cnt 1
while {$this_cnt <= $num_threads} {
set expected_tasks [ expr $num_sockets * $num_cores * $this_cnt ]
set num_tasks 0
set num_bits 0
set task_mask 0
send "$srun -B $num_sockets:$num_cores:$this_cnt-$this_cnt $file_prog\n"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
incr num_tasks 1
# count number of set bits
set this_mask $expect_out(2,string)
while {$this_mask > 0} {
if {$this_mask & 1} {
incr num_bits 1
}
set this_mask [ expr $this_mask >> 1 ]
}
exp_continue
}
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
exp_continue
}
timeout {
send_user "\nFAILURE: salloc not responding "
send_user "or failure to recognize prompt\n"
set exit_code 1
}
-re $prompt
}
if {$num_tasks != $expected_tasks} {
send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n"
set exit_code 1
}
if {$num_bits != $expected_tasks} {
send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$expected_tasks)\n"
set exit_code 1
}
incr this_cnt 1
}
#############################################################################
#
# Run varying cpus per task, verify task count and number of set bits
#
set this_cnt 1
while {$this_cnt <= $task_cnt} {
set expected_tasks 1
set num_tasks 0
set num_bits 0
set task_mask 0
send "$srun -c$this_cnt -B 1:1:1 $file_prog\n"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
incr num_tasks 1
# count number of set bits
set this_mask $expect_out(2,string)
while {$this_mask > 0} {
if {$this_mask & 1} {
incr num_bits 1
}
set this_mask [ expr $this_mask >> 1 ]
}
exp_continue
}
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
exp_continue
}
timeout {
send_user "\nFAILURE: salloc not responding "
send_user "or failure to recognize prompt\n"
set exit_code 1
}
-re $prompt
}
if {$num_tasks != $expected_tasks} {
send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n"
set exit_code 1
}
if {$num_bits != $this_cnt} {
send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$this_cnt)\n"
set exit_code 1
}
incr this_cnt 1
}
#############################################################################
#
# Run a job step with plane distribution to exercise option
#
set expected_mask [ expr ((1 << $task_cnt) - 1) * $task_cnt ]
set task_mask 0
send "$srun -n $task_cnt -m plane=4 $file_prog\n"
expect {
-re "TASK_ID:($number),MASK:($number)" {
incr task_mask $expect_out(2,string)
exp_continue
}
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
exp_continue
}
timeout {
send_user "\nFAILURE: salloc not responding "
send_user "or failure to recognize prompt\n"
set exit_code 1
}
-re $prompt
}
if {$task_mask != $expected_mask} {
send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$expected_mask)\n"
set exit_code 1
}
#############################################################################
#
# Terminate the job, free the allocation
#
send "exit\n"
expect {
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
}
timeout {
send_user "\nFAILURE: salloc not responding "
send_user "or failure to recognize prompt\n"
slow_kill $salloc_pid
set exit_code 1
}
eof {
wait
}
}
if {$exit_code == 0} {
exec $bin_rm -f $file_prog
send_user "\nSUCCESS\n"
}
exit $exit_code
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment