Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
31f447eb
Commit
31f447eb
authored
13 years ago
by
Morris Jette
Browse files
Options
Downloads
Patches
Plain Diff
Bluegene/Q - Add test for how job steps are packed onto job's cnodes
parent
37026b24
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
testsuite/expect/test8.21
+237
-0
237 additions, 0 deletions
testsuite/expect/test8.21
testsuite/expect/test8.21.bash
+23
-0
23 additions, 0 deletions
testsuite/expect/test8.21.bash
with
260 additions
and
0 deletions
testsuite/expect/test8.21
0 → 100755
+
237
−
0
View file @
31f447eb
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# Bluegene/Q only: Test that multple job step allocations are
# properly packed within the job's allocation
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2011 SchedMD LLC
# Written by Morris Jette <jette@schedmd.gov>
#
# This file is part of SLURM, a resource management program.
# For details, see <http://www.schedmd.com/slurmdocs/>.
# Please also read the included file: DISCLAIMER.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_id "8.21"
set exit_code 0
set file_prog "test$test_id.bash"
set job_id 0
set job_size 32
print_header $test_id
if {([test_bluegene] == 0) || [string compare [get_bluegene_type] "Q"]} {
send_user "\nWARNING: This test is only compatable with Bluegene/Q systems\n"
exit $exit_code
}
#
# Spawn a job via salloc
#
set matches 0
set timeout $max_job_delay
set salloc_pid [spawn $salloc -N$job_size -t1 $bin_bash]
expect {
-re "Granted job allocation ($number)" {
set job_id $expect_out(1,string)
exp_continue
}
-re $prompt {
#send_user "Job initiated\n"
}
timeout {
send_user "\nFAILURE: salloc not responding\n"
if {$job_id != 0} {
cancel_job $job_id
}
slow_kill [expr 0 - $salloc_pid]
exit 1
}
}
if {$job_id == 0} {
send_user "\nFAILURE: did not get job_id\n"
exit 1
}
#
# Determine the job's allocation dimensions
#
set timeout 5
set job_start1 -1
send "$scontrol show job $job_id\r"
expect {
-re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set job_prefix $expect_out(1,string)
set job_start1 $expect_out(2,string)
set job_start2 $expect_out(3,string)
set job_start3 $expect_out(4,string)
set job_start4 $expect_out(5,string)
set job_start5 $expect_out(6,string)
set job_fini1 $expect_out(7,string)
set job_fini2 $expect_out(8,string)
set job_fini3 $expect_out(9,string)
set job_fini4 $expect_out(10,string)
set job_fini5 $expect_out(11,string)
exp_continue
}
-re $prompt {
#break
}
timeout {
send_user "\nFAILURE: scontrol not responding\n"
set exit_code 1
}
}
if {$job_start1 == -1} {
send_user "\nFAILURE: did not get job dimensions\n"
cancel_job $job_id
exit 1
}
send_user "\nJob allocation\n"
send_user "prefix: $job_prefix\n"
send_user "dim 1: $job_start1 to $job_fini1 "
send_user "dim 2: $job_start2 to $job_fini2 "
send_user "dim 3: $job_start3 to $job_fini3 "
send_user "dim 4: $job_start4 to $job_fini4 "
send_user "dim 5: $job_start5 to $job_fini5\n"
set job_dim1 [expr $job_fini1 - $job_start1 + 1]
set job_dim2 [expr $job_fini2 - $job_start2 + 1]
set job_dim3 [expr $job_fini3 - $job_start3 + 1]
set job_dim4 [expr $job_fini4 - $job_start4 + 1]
set job_dim5 [expr $job_fini5 - $job_start5 + 1]
set actual_job_size [expr $job_dim1 * $job_dim2 * $job_dim3 * $job_dim4 * $job_dim5]
send_user "size: $actual_job_size c-nodes\n"
if {$actual_job_size < $job_size} {
send_user "\nFAILURE: job allocation too small ($actual_job_size < $job_size)\n"
cancel_job $job_id
exit 1
}
if {$actual_job_size != $job_size} {
# This is a legitimate condition. A request for 5 c-nodes requires
# at least 6 c-nodes (3x2x1x1x1).
send_user "\nWARNING: job allocation too large ($actual_job_size != $job_size)\n"
}
#
# Build an array to count the job's c-nodes which have been allocated to steps
#
for {set dim1 $job_start1} {$dim1 <= $job_fini1} {incr dim1} {
for {set dim2 $job_start2} {$dim2 <= $job_fini2} {incr dim2} {
for {set dim3 $job_start3} {$dim3 <= $job_fini3} {incr dim3} {
for {set dim4 $job_start4} {$dim4 <= $job_fini4} {incr dim4} {
for {set dim5 $job_start5} {$dim5 <= $job_fini5} {incr dim5} {
set use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) 0
}
}
}
}
}
set timeout 30
send "./$file_prog $srun $squeue $job_id $actual_job_size\r"
expect {
-re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)x($digit)($digit)($digit)($digit)($digit)" {
set step_prefix $expect_out(1,string)
set step_start1 $expect_out(2,string)
set step_start2 $expect_out(3,string)
set step_start3 $expect_out(4,string)
set step_start4 $expect_out(5,string)
set step_start5 $expect_out(6,string)
set step_fini1 $expect_out(7,string)
set step_fini2 $expect_out(8,string)
set step_fini3 $expect_out(9,string)
set step_fini4 $expect_out(10,string)
set step_fini5 $expect_out(11,string)
for {set dim1 $step_start1} {$dim1 <= $step_fini1} {incr dim1} {
for {set dim2 $step_start2} {$dim2 <= $step_fini2} {incr dim2} {
for {set dim3 $step_start3} {$dim3 <= $step_fini3} {incr dim3} {
for {set dim4 $step_start4} {$dim4 <= $step_fini4} {incr dim4} {
for {set dim5 $step_start5} {$dim5 <= $step_fini5} {incr dim5} {
if [info exists use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)] {
incr use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)
} else {
send_user "\nFAILURE: invalid step cnode allocation at "
send_user "\[$dim1,$dim2,$dim3,$dim4,$dim5\]/"
set exit_code 1
}
}
}
}
}
}
exp_continue
}
-re "BP_List=($alpha_numeric).($digit)($digit)($digit)($digit)($digit)" {
set step_prefix $expect_out(1,string)
set dim1 $expect_out(2,string)
set dim2 $expect_out(3,string)
set dim3 $expect_out(4,string)
set dim4 $expect_out(5,string)
set dim5 $expect_out(6,string)
if [info exists use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)] {
incr use_cnt($dim1,$dim2,$dim3,$dim4,$dim5)
} else {
send_user "\nFAILURE: invalid step cnode allocation at "
send_user "\[$dim1,$dim2,$dim3,$dim4,$dim5\]\n"
set exit_code 1
}
exp_continue
}
-re $prompt {
send "exit\r"
exp_continue
}
timeout {
send_user "\nFAILURE: job not responding\n"
set exit_code 1
}
}
#
# Test that each of the job's c-nodes have been allocated once to some step
#
for {set dim1 $job_start1} {$dim1 <= $job_fini1} {incr dim1} {
for {set dim2 $job_start2} {$dim2 <= $job_fini2} {incr dim2} {
for {set dim3 $job_start3} {$dim3 <= $job_fini3} {incr dim3} {
for {set dim4 $job_start4} {$dim4 <= $job_fini4} {incr dim4} {
for {set dim5 $job_start5} {$dim5 <= $job_fini5} {incr dim5} {
if {$use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) != 1} {
send_user "\nFAILURE: cnode at \[$dim1,$dim2,$dim3,$dim4,$dim5\] "
send_user "allocated $use_cnt($dim1,$dim2,$dim3,$dim4,$dim5) times\n"
set exit_code 1
}
}
}
}
}
}
if {$exit_code == 0} {
send_user "\nSUCCESS\n"
} else {
cancel_job $job_id
}
exit $exit_code
This diff is collapsed.
Click to expand it.
testsuite/expect/test8.21.bash
0 → 100755
+
23
−
0
View file @
31f447eb
#!/bin/bash
if
[
$#
-ne
4
]
;
then
echo
"test8.21.bash <srun_path> <squeue_path> <job_id> <job_size>"
exit
1
fi
srun
=
$1
squeue
=
$2
job_id
=
$3
job_size
=
$4
$srun
-N1
--test-only
/bin/true
sleep
5
while
[
$job_size
-ge
2
]
do
job_size
=
`
expr
$job_size
/ 2
`
$srun
-N
$job_size
--test-only
sleep
50 &
sleep
1
done
$srun
-N1
--test-only
sleep
50 &
sleep
5
$squeue
--jobs
=
$job_id
--steps
--noheader
--format
=
'Step_ID=%i BP_List=%N'
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment