Skip to content
Snippets Groups Projects
Commit 179d2502 authored by Morris Jette's avatar Morris Jette
Browse files

Refactor CPU frequency test

Prevent test failure if the compute node does not permit user
  control over CPU frequency (no "userspace" governor).
parent 7118060a
No related branches found
No related tags found
No related merge requests found
......@@ -31,7 +31,9 @@
source ./globals
set test_id 1.75
set file_in "test$test_id\_sc"
set file_id "test$test_id\_id.bash"
set file_in "test$test_id\.bash"
set file_out "test$test_id\.out"
set node ""
set threads 0
set job_id 0
......@@ -75,17 +77,17 @@ if {[test_cpu_affinity_or_cgroup] == 0} {
proc sub_job { freq } {
global srun sacct node threads job_id number wait_for_job float timeout exit_code
global alpha_numeric_under
global alpha_numeric_under file_id
set timeout 120
array set this_freq $freq
foreach option [array names this_freq] {
send_user "\n======= TEST FREQUENCY $option =======\n"
set job_id 0
set srun_pid [spawn $srun -v -t1 --cpu-freq=$option -n$threads -w$node sleep 5]
set srun_pid [spawn $srun -t1 --cpu-freq=$option -n$threads -w$node $file_id]
expect {
-re "launching ($number).0" {
-re "SLURM_JOB_ID=($number)" {
set job_id $expect_out(1,string)
exp_continue
}
......@@ -97,7 +99,6 @@ proc sub_job { freq } {
wait
}
}
if {$job_id == 0} {
send_user "\nFAILURE: srun did not submit job\b"
exit 1
......@@ -132,10 +133,11 @@ proc sub_job { freq } {
return [array get this_freq]
}
make_bash_script $file_in "sleep 2"
make_bash_script $file_id "echo SLURM_JOB_ID=\$SLURM_JOB_ID"
make_bash_script $file_in "cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors"
# Get a node that we can use
spawn $sbatch -N1 -t1 -o/dev/null --exclusive $file_in
# Identify a node that we can use and available governors
spawn $sbatch -N1 -t1 -o/dev/null --exclusive -o $file_out $file_in
expect {
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
......@@ -149,13 +151,34 @@ expect {
wait
}
}
if {$job_id == 0} {
send_user "\nFAILURE: sbatch did not submit job\n"
exit 1
}
wait_for_job $job_id RUNNING
if {[wait_for_job $job_id "DONE"] != 0} {
send_user "\nFAILURE: waiting for job to complete\n"
cancel_job $job_id
exit 1
}
set userspace_governor 0
if {[wait_for_file $file_out] == 0} {
spawn $bin_cat $file_out
expect {
-re "ondemand" {
set userspace_governor 1
exp_continue
}
eof {
wait
}
}
}
if {$userspace_governor == 0} {
send_user "\nWARNING: Node configuration prevents directly control over CPU frequency\n"
exit 0
}
set match 0
spawn $scontrol show job $job_id
......@@ -198,13 +221,15 @@ cancel_job $job_id
array set freq_lvl_1 [sub_job [array get freq_lvl_1]]
send_user "\n======= Reported frequencies =======\n"
foreach name [array names freq_lvl_1] {
send_user "$name is $freq_lvl_1($name) GHz\n"
}
if { (($freq_lvl_1(low) > $freq_lvl_1(medium)) ||
($freq_lvl_1(medium) > $freq_lvl_1(high)) ||
($freq_lvl_1(highm1) > $freq_lvl_1(high)))} {
send_user "\nFAILURE: CPU frequency values are not valid\n"
foreach name [array names freq_lvl_1] {
send_user "$name is $freq_lvl_1($name)\n"
}
exit 1
}
......@@ -213,14 +238,11 @@ array set freq_lvl_2 [sub_job [array get freq_lvl_2]]
if {($freq_lvl_2(conservative) == 0) || ($freq_lvl_2(ondemand) == 0)
|| ($freq_lvl_2(performance) == 0) || ($freq_lvl_2(powersave) == 0)} {
send_user "\nFAILURE: CPU frequency values are invalid\n"
foreach name [array names freq_lvl_2] {
send_user "$name is $freq_lvl_2($name)\n"
}
set exit_code 1
}
if {$exit_code == 0} {
exec $bin_rm -f $file_in
exec $bin_rm -f $file_id $file_in $file_out
send_user "\nSUCCESS\n"
}
exit $exit_code
......@@ -148,6 +148,7 @@ if {$no_start != 0} {
#
if {[wait_for_job $job_id "DONE"] != 0} {
send_user "\nFAILURE: waiting for job to complete\n"
cancel_job $job_id
set exit_code 1
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment