diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 0f3d48d84a378f4f3ab8e12a9582ef561863ba5d..25d6ecfc54dbf4a89c89060d2b002c09378fb85d 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -76,6 +76,8 @@ EXTRA_DIST = \ test1.59 \ test1.60 \ test1.61 \ + test1.62 \ + test1.62.bash \ test1.80 \ test1.81 \ test1.82 \ diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index 5682668cb5e23965b2c5e11739ef4f476d13ddd3..49075078c533a7d948dc19a9cd3a5cf73a1283b5 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -346,6 +346,8 @@ EXTRA_DIST = \ test1.59 \ test1.60 \ test1.61 \ + test1.62 \ + test1.62.bash \ test1.80 \ test1.81 \ test1.82 \ diff --git a/testsuite/expect/README b/testsuite/expect/README index 9e86497f70dc1f63fe56b4d019b974bd89ee58c9..57ff95bf1a74f946e1371aaf98f034c24630fe84 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -164,6 +164,7 @@ test1.58 Test of srun --jobid for an existing job allocation test1.59 Test of hostfile logic for job steps test1.60 Test of labelling output when writing a file per task or per node test1.61 Test of srun job step time limit +test1.62 Test of gres/gpu plugin (if configured). **NOTE** The following tests attempt to utilize multiple CPUs or partitions, The test will print "WARNING" and terminate with an exit code of diff --git a/testsuite/expect/test1.62 b/testsuite/expect/test1.62 new file mode 100755 index 0000000000000000000000000000000000000000..ee2ba1e9fa5dfafe733880f8999eb89f91f47204 --- /dev/null +++ b/testsuite/expect/test1.62 @@ -0,0 +1,134 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of gres/gpu plugin (if configured). +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2010 Lawrence Livermore National Security +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# CODE-OCEC-09-009. All rights reserved. +# +# This file is part of SLURM, a resource management program. +# For details, see <https://computing.llnl.gov/linux/slurm/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.62" +set exit_code 0 +set file_in "test$test_id.bash" + +proc run_gpu_test { gres_cnt } { + global max_job_delay srun number file_in + + set timeout $max_job_delay + set bad_format 0 + set devices 0 + set invalid 0 + set srun_pid [spawn $srun -N1 -n1 --gres=gpu:$gres_cnt -t1 $file_in] + expect { + -re "Unable to allocate" { + incr invalid + exp_continue + } + -re "CUDA_VISIBLE_DEVICES=($number),($number),($number)" { + if {$expect_out(1,string) == $expect_out(2,string)} { + incr bad_format + } elseif {$expect_out(2,string) == $expect_out(3,string)} { + incr bad_format + } elseif {$expect_out(1,string) == $expect_out(3,string)} { + incr bad_format + } + incr devices +3 + exp_continue + } + -re "CUDA_VISIBLE_DEVICES=($number),($number)" { + if {$expect_out(1,string) == $expect_out(2,string)} { + incr bad_format + } + incr devices +2 + exp_continue + } + -re "CUDA_VISIBLE_DEVICES=($number)" { + incr devices + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid + set exit_code 1 + } + eof { + wait + } + } + if {$invalid != 0} { + send_user "\WARNING: Insufficient resources to test 2 GPUs\n" + return 0 + } elseif {$bad_format != 0} { + send_user "\nFAILURE: Duplicated device number in GRES allocation\n" + return 1 + } elseif {$devices != $gres_cnt} { + send_user "\nFAILURE: Exected $gres_cnt GPUs, but was allocated $devices\n" + return 1 + } + return 0 +} + +print_header $test_id + +# +# Test if gres/gpu is configured +# +log_user 0 +set gres_gpu 0 +spawn $scontrol show config +expect { + -re "GresTypes *= \[a-zA-Z0-9_\,\-\]*gpu" { + set gres_gpu 1 + exp_continue + } + eof { + wait + } +} +log_user 1 +if {$gres_gpu != 1} { + send_user "\nWARNING: This test can not be run without gres/gpu configured\n" + exit $exit_code +} + +# +# Spawn a job via srun to print environment variables and +# check count GPU devices allocated +# +if {[run_gpu_test 1 ] != 0} { + incr exit_code +} elseif {[run_gpu_test 2 ] != 0} { + incr exit_code +} elseif {[run_gpu_test 3 ] != 0} { + incr exit_code +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.62.bash b/testsuite/expect/test1.62.bash new file mode 100755 index 0000000000000000000000000000000000000000..50e9ee88a1848bf9ca588355fe04bfea6698043c --- /dev/null +++ b/testsuite/expect/test1.62.bash @@ -0,0 +1,2 @@ +#!/bin/bash +echo CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES