Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
9d6f193e
Commit
9d6f193e
authored
19 years ago
by
Danny Auble
Browse files
Options
Downloads
Patches
Plain Diff
hostfile tests for srun and poe
parent
f9825ef5
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
testsuite/expect/README
+2
-0
2 additions, 0 deletions
testsuite/expect/README
testsuite/expect/test1.52
+131
-0
131 additions, 0 deletions
testsuite/expect/test1.52
testsuite/expect/test11.7
+136
-0
136 additions, 0 deletions
testsuite/expect/test11.7
with
269 additions
and
0 deletions
testsuite/expect/README
+
2
−
0
View file @
9d6f193e
...
@@ -125,6 +125,7 @@ test1.48 Test of srun mail options (--mail-type and --mail-user options).
...
@@ -125,6 +125,7 @@ test1.48 Test of srun mail options (--mail-type and --mail-user options).
test1.49 Test of srun task-prolog and task-epilog options.
test1.49 Test of srun task-prolog and task-epilog options.
test1.50 Test of running non-existant job, confirm timely termination.
test1.50 Test of running non-existant job, confirm timely termination.
test1.51 Test propagation of umask to spawned tasks.
test1.51 Test propagation of umask to spawned tasks.
test1.52 Test of hostfile logic
**NOTE** The following tests attempt to utilize multiple CPUs or partitions,
**NOTE** The following tests attempt to utilize multiple CPUs or partitions,
The test will print "WARNING" and terminate with an exit code of
The test will print "WARNING" and terminate with an exit code of
...
@@ -282,6 +283,7 @@ test11.3 Test running of Network protocol option (-msg_api)
...
@@ -282,6 +283,7 @@ test11.3 Test running of Network protocol option (-msg_api)
test11.4 Test mpi jobs (must run make in mpi-testscripts dir)
test11.4 Test mpi jobs (must run make in mpi-testscripts dir)
test11.5 Test of checkpoint logic (direct with srun)
test11.5 Test of checkpoint logic (direct with srun)
test11.6 Test of checkpoint logic (with poe)
test11.6 Test of checkpoint logic (with poe)
test11.7 Test of hostfile logic (with poe)
test12.# Testing of sacct command and options
test12.# Testing of sacct command and options
...
...
This diff is collapsed.
Click to expand it.
testsuite/expect/test1.52
0 → 100755
+
131
−
0
View file @
9d6f193e
#!/usr/bin/expect
############################################################################
# Purpose: Test of srun functionality
# Test of hostfile option (-hostfile).
#
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
#
############################################################################
# Copyright (C) 2002 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Danny Auble <da@llnl.gov>
# UCRL-CODE-2002-040.
#
# This file is part of SLURM, a resource management program.
# For details, see <http://www.llnl.gov/linux/slurm/>.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
############################################################################
source ./globals
set test_id "1.52"
set partition "debug"
set exit_code 0
set num_nodes 2
set num_tasks 2
set node_count 0
set task_count 0
set job_id 0
set hostfile "test$test_id.hostfile"
print_header $test_id
exec $bin_rm -f $hostfile
if { ![file exists $poe] } {
send_user "WARNING: poe must be installed on the\
system to run this test.\n"
exit $exit_code
}
#find out if we have enough nodes to test functionality
spawn $scontrol show partition
expect {
-re "TotalNodes=($number)" {
set node_count $expect_out(1,string)
if { $node_count < 2 } {
send_user "WARNING: system must have at least 2 \
nodes to run this test on. This system \
only has 2.\n"
exit $exit_code
}
exp_continue
}
timeout {
send_user "\nFAILURE: scontrol not responding\n"
exit 1
}
eof {
}
}
set node0 0
set node1 0
for {set i 0} {$i<2} {incr i} {
if { $i==1 } {
if { $node0 == 0 || $node1 == 0 } {
send_user "\nFAILURE: node names not set from \
previous poe run\n"
exit 1
}
set env(MP_HOSTFILE) $hostfile
set 1node0 $node0
set 1node1 $node1
set file [open $hostfile "w"]
puts $file "$node1\n$node0"
close $file
}
#
# execute poe with a specific node count
#
spawn $srun -N2 -l $bin_hostname
expect {
-re "0: ($alpha_numeric)" {
set node0 $expect_out(1,string)
exp_continue
}
-re "1: ($alpha_numeric)" {
set node1 $expect_out(1,string)
exp_continue
}
-re "slurm job ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: poe not responding\n"
exec $scancel --quiet $job_id
set exit_code 1
}
eof {
}
}
}
if { [string compare $node0 $1node1] } {
send_user "\nFAILURE: tasks not distributed by hostfile\n"
set exit_code 1
}
if { [string compare $node1 $1node0] } {
send_user "\nFAILURE: tasks not distributed by hostfile\n"
set exit_code 1
}
if {$exit_code == 0} {
send_user "\nSUCCESS\n"
}
exit $exit_code
This diff is collapsed.
Click to expand it.
testsuite/expect/test11.7
0 → 100755
+
136
−
0
View file @
9d6f193e
#!/usr/bin/expect
############################################################################
# Purpose: Test of POE functionality
# Test of hostfile option (-hostfile).
#
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
#
############################################################################
# Copyright (C) 2002 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Danny Auble <da@llnl.gov>
# UCRL-CODE-2002-040.
#
# This file is part of SLURM, a resource management program.
# For details, see <http://www.llnl.gov/linux/slurm/>.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
############################################################################
source ./globals
set test_id "11.7"
set partition "debug"
set exit_code 0
set env(SLURM_LL_API_DEBUG) "2"
set num_nodes 2
set num_tasks 2
set node_count 0
set task_count 0
set job_id 0
set hostfile "test$test_id.hostfile"
print_header $test_id
exec $bin_rm -f $hostfile
if { ![file exists $poe] } {
send_user "WARNING: poe must be installed on the\
system to run this test.\n"
exit $exit_code
}
#find out if we have enough nodes to test functionality
spawn $scontrol show partition
expect {
-re "TotalNodes=($number)" {
set node_count $expect_out(1,string)
if { $node_count < 2 } {
send_user "WARNING: system must have at least 2 \
nodes to run this test on. This system \
only has 2.\n"
exit $exit_code
}
exp_continue
}
timeout {
send_user "\nFAILURE: scontrol not responding\n"
exit 1
}
eof {
}
}
set node0 0
set node1 0
for {set i 0} {$i<2} {incr i} {
if { $i==1 } {
if { $node0 == 0 || $node1 == 0 } {
send_user "\nFAILURE: node names not set from \
previous poe run\n"
exit 1
}
set 1node0 $node0
set 1node1 $node1
set file [open $hostfile "w"]
puts $file "$node1\n$node0"
close $file
spawn $poe $bin_hostname -resd yes -rmpool $partition -procs \
$num_tasks -nodes $num_nodes -retry wait \
-hostfile $hostfile
} else {
#
# execute poe with a specific node count
#
spawn $poe $bin_hostname -resd yes -rmpool $partition -procs \
$num_tasks -nodes $num_nodes -retry wait
}
expect {
-re "0:($alpha_numeric)" {
set node0 $expect_out(1,string)
exp_continue
}
-re "1:($alpha_numeric)" {
set node1 $expect_out(1,string)
exp_continue
}
-re "slurm job ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: poe not responding\n"
exec $scancel --quiet $job_id
set exit_code 1
}
eof {
}
}
}
if { [string compare $node0 $1node1] } {
send_user "\nFAILURE: tasks not distributed by hostfile\n"
set exit_code 1
}
if { [string compare $node1 $1node0] } {
send_user "\nFAILURE: tasks not distributed by hostfile\n"
set exit_code 1
}
if {$exit_code == 0} {
send_user "\nSUCCESS\n"
}
exit $exit_code
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment