diff --git a/testsuite/expect/test18.29 b/testsuite/expect/test18.29 index 7de8986afa298c255a5bbffb35e6adbedd7b5ca0..d756f7c60e0703c4dde8c063b54302270f1d8495 100755 --- a/testsuite/expect/test18.29 +++ b/testsuite/expect/test18.29 @@ -1,17 +1,12 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# Test of cpus-per-task option on a single node (--cpus-per-task -# option). +# Test of slaunch --cpus-per-task option. # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "WARNING: ..." with an explanation of why the test can't be made, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. -# -# NOTE: This assumes node names are of the form <alpha><number>, where -# the value of <number> indicates the nodes relative location. -# Change tha node name parsing logic as needed for other formats. ############################################################################ # Copyright (C) 2002-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -56,115 +51,143 @@ make_bash_script $file_in " env | grep SLURM_JOB_CPUS_PER_NODE $bin_hostname" -# -# Submit a 1 node job to determine the node's CPU count -# set cpu_cnt 0 -set fat_nodes 0 -set host "" set job_id 0 set timeout $max_job_delay -set task_cnt 0 +set node_cnt 2 + +set available [available_nodes [default_partition]] +if {$available < 2} { + send_user "\nWARNING: not enough nodes currently available" + send_user " ($available avail, 2)\n" + exit $exit_code +} -spawn $salloc -N1 -t1 $slaunch --cpus-per-task=1 -l $file_in +# Script will print SLURM_JOB_CPUS_PER_NODE, then hold the allocation +# for a long time. +set script_name test18.28.sh +make_bash_script $script_name { + printenv SLURM_JOB_CPUS_PER_NODE + sleep 600 +} +set salloc_pid [spawn $salloc -N $node_cnt $slaunch -n 1 $script_name] +set salloc_spawn_id $spawn_id expect { - -re "Granted job allocation ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - -re "SLURM_JOB_CPUS_PER_NODE=($number)" { - set cpu_cnt $expect_out(1,string) - exp_continue - } - -re "($number): ($alpha_numeric)" { - set task $expect_out(1,string) - set host $expect_out(2,string) - incr task_cnt - exp_continue - } - -re "Task count specification invalid" { - send_user "\nWARNING: nodes have too many CPUs for test\n" - set fat_nodes 1 - exp_continue - } - timeout { - send_user "\nFAILURE: salloc not responding\n" - if {$job_id == 0} { - kill_salloc - } else { - cancel_job $job_id - } - set exit_code 1 - exp_continue - } - eof { - wait - } -} -# On Blue Gene/L each node has 1024 CPUs, more than slurmd can launch -if {$fat_nodes == 1} { - exit 0 + -re "Granted job allocation ($number)" { + set jobid $expect_out(1,string) + } + timeout { + slow_kill $salloc_pid + set exit_code 1 + } } +expect { + -re "($number)" { + set cpu_cnt $expect_out(1,string) + } + timeout { + slow_kill $salloc_pid + set exit_code 1 + } +} +file delete $script_name + +send_user "cpu count is $cpu_cnt\n" -# # Check CPU count +if {$cpu_cnt < 2} { + send_user "\nWARNING: The node only has one CPU\n" + exec $scancel $jobid + exit $exit_code +} + +# +# Now start the real testing of --cpus-per-task. # -if {[string compare $host ""] == 0} { - send_user "\nFAILURE: Did not get hostname\n" - set exit_code 1 + +# +# Test A +# Should run: --cpus-per-task=1, -n=(node_count*cpu count) +# +set task_cnt 0 +set pid [spawn $slaunch --jobid $jobid -l --cpus-per-task 1 -n [expr $node_cnt * $cpu_cnt] $bin_printenv SLURMD_NODENAME] +expect { + -re "($number): ($alpha_numeric)" { + incr task_cnt + exp_continue + } + eof { + wait + } } -if {$cpu_cnt != $task_cnt} { - send_user "FAILURE: should have run $cpu_cnt tasks (one per CPU) " - send_user "instead of $task_cnt tasks\n" - set exit_code 1 +set expected_task_cnt [expr $node_cnt * $cpu_cnt] +if {$task_cnt < $expected_task_cnt} { + send_user "\nFAILURE Test A: Fewer tasks ($task_cnt) then expected ($expected_task_cnt)\n" + set exit_code 1 } -if {$cpu_cnt < 2} { - send_user "\nWARNING: The node only has one CPU\n" - exit $exit_code +if {$task_cnt > $expected_task_cnt} { + send_user "\nFAILURE Test A: More tasks ($task_cnt) then expected ($expected_task_cnt)\n" + set exit_code 1 } # -# Check task count when using 2 cpus per task +# Test B +# Should NOT run: --cpus-per-task=2, -n=(2*cpu count) # -set job_id 0 -set task_cnt 0 -spawn $salloc -N1 -t1 --nodelist=$host $slaunch --cpus-per-task=2 -l $bin_hostname +set task_cnt 0 +set pid [spawn $slaunch --jobid $jobid -l --cpus-per-task $cpu_cnt -n [expr $node_cnt * $cpu_cnt] $bin_printenv SLURMD_NODENAME] expect { - -re "Granted job allocation ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - -re "Invalid node name specified" { - send_user "\nWARNING: Appears you are using " - send_user "multiple slurmd testing.\n" - send_user "This test won't work in that fashion.\n" - exit 0 - } - -re "$number: ($alpha_numeric)" { - incr task_cnt - exp_continue - } - timeout { - send_user "\nFAILURE: srun not responding\n" - if {$job_id == 0} { - kill_salloc - } else { - cancel_job $job_id - } - set exit_code 1 - exp_continue - } - eof { - wait - } + -re "($number): ($alpha_numeric)" { + incr task_cnt + exp_continue + } + "error: Failed creating job step context" { + send_user "This error was expected!\n" + } + eof { + wait + } +} +if {$task_cnt > 0} { + send_user "FAILURE Test B: Test should NOT have run.\n" + set exit_code 1 } + # -# Check CPU count +# Test C +# Should run: --cpus-per-task=2, -n=(cpu count) # -if {$task_cnt != [expr $cpu_cnt / 2]} { - send_user "\nFAILURE: Improper task count for given cpus-per-task\n" - set exit_code 1 +set task_cnt 0 +set pid [spawn $slaunch --jobid $jobid -l --cpus-per-task $cpu_cnt -n $node_cnt $bin_printenv SLURMD_NODENAME] +expect { + -re "($number): ($alpha_numeric)" { + incr task_cnt + exp_continue + } + eof { + wait + } +} +set expected_task_cnt $node_cnt +if {$task_cnt < $expected_task_cnt} { + send_user "\nFAILURE Test C: Fewer tasks ($task_cnt) then expected ($expected_task_cnt)\n" + set exit_code 1 +} +if {$task_cnt > $expected_task_cnt} { + send_user "\nFAILURE Test C: More tasks ($task_cnt) then expected ($expected_task_cnt)\n" + set exit_code 1 +} + +# +# Clean up the job allocation. +# +exec $scancel $jobid +set spawn_id $salloc_spawn_id +expect { + eof { + wait + } }