Skip to content
Snippets Groups Projects
Commit 518affde authored by Danny Auble's avatar Danny Auble
Browse files

Merge remote-tracking branch 'origin/slurm-14.03'

parents 05acf571 7b91a031
No related branches found
No related tags found
No related merge requests found
...@@ -38,28 +38,151 @@ set exit_code 0 ...@@ -38,28 +38,151 @@ set exit_code 0
set matches 0 set matches 0
set name_string "\[a-zA-Z0-9\]\[^ ]*" set name_string "\[a-zA-Z0-9\]\[^ ]*"
set node_state "" set node_state ""
set node_name "" set num_nodes 0
set num_cpus 0
set mismatches 0 set mismatches 0
# We start the node position of the array to 3 because the first 2 nodes will be
# used for allocated state since scontrol cannot change node state to allocated
set node_pos 2
array set node_list {}
proc change_state { state nodes } {
global scontrol exit_code
spawn $scontrol update nodename=$nodes state=$state reason=test
expect {
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
}
proc reset_state { } {
global scontrol node_list node_pos exit_code
foreach num [array names node_list] {
spawn $scontrol update nodename=$node_list($num) state=idle
expect {
timeout {
send_user "\nFAILURE: scontrol is not responding\n"
set exit_code 1
}
eof {
wait
}
}
}
}
print_header $test_id print_header $test_id
if (![string compare $partition ""]) { if (![string compare $partition ""]) {
set partition [default_partition] set partition [default_partition]
} }
# #
# Check the sinfo long format looking for filtering options # Get some values to test against
# #
spawn $sinfo -o%30D%30c --noheader -p$partition
expect {
-re "($number) *($number)" {
set num_nodes $expect_out(1,string)
set num_cpus $expect_out(2,string)
exp_continue
}
timeout {
send_user "\nFAILURE: sinfo is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if {$num_nodes < 8} {
send_user "\nWARNING: This test requires 8 or more nodes but only "
send_user "found $num_nodes available\n"
exit 0
}
spawn $sinfo --Node --long --exact -p$partition if {$num_cpus == 0} {
send_user "\nFAILURE: was not able to get number of "
send_user "cpus (num_cpus = $num_cpus)\n"
exit 0
}
#
# Get a list of nodes that we can use
#
set i 0
spawn $sinfo -o%n --noheader -p$partition
expect { expect {
-re "($end_of_line)($name_string) *($number_with_suffix) *($name_string) *($alpha)" { -re "($alpha_numeric_under)" {
if (![string compare $node_name ""]) { if {$i<8} {
set node_name $expect_out(2,string) set node_list($i) $expect_out(1,string)
incr i 1
exp_continue
} }
if (![string compare $node_state ""]) { }
set node_state $expect_out(5,string) timeout {
send_user "\nFAILURE: sinfo is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if {$i != 8} {
send_user "\nFAILURE: unable to get all the required nodes\n"
exit 0
}
#
# Submit a job to filter for allocated job state
#
send_user "\nTesting ALLOCATED state\n"
set tmp_id 0
set tmp_sc "test$test_id\_tmp_sc"
make_bash_script $tmp_sc "sleep 20"
spawn $sbatch -w$node_list(0),$node_list(1) -o/dev/null -n[expr 2 * $num_cpus] $tmp_sc
expect {
-re "Submitted batch job ($number)" {
set tmp_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: sbatch is not responding\n"
set exit_code 1
}
eof {
wait
}
}
if {$tmp_id == 0} {
send_user "\nFAILURE: sbatch did not submit job\n"
set exit_code 1
}
wait_for_job $tmp_id RUNNING
spawn $sinfo --Node --node=$node_list(0),$node_list(1) --long --exact --state=allocated -p$partition
expect {
-re "($end_of_line)($name_string) *($number_with_suffix) *($name_string) *($alpha)" {
if ([string compare $expect_out(5,string) "allocated"]) {
incr mismatches
} }
exp_continue exp_continue
} }
-re "error:" {
send_user "\nFAILURE: Unexpected error from sinfo\n"
set exit_code 1
}
-re "Unable to contact" { -re "Unable to contact" {
send_user "\nFAILURE: slurm appears to be down\n" send_user "\nFAILURE: slurm appears to be down\n"
exit 1 exit 1
...@@ -73,14 +196,22 @@ expect { ...@@ -73,14 +196,22 @@ expect {
} }
} }
cancel_job $tmp_id
# Change nodes to different states and see if they made it to that state.
change_state idle $node_list(2),$node_list(3)
change_state down $node_list(4),$node_list(5)
change_state drain $node_list(6),$node_list(7)
# #
# Use sinfo state filter # Change node state to idle and use state filter to filter node
# #
send_user "\nTesting IDLE state\n"
spawn $sinfo --Node --long --exact --state=$node_state -p$partition spawn $sinfo --Node --node=$node_list(2),$node_list(3) --long --exact --state=idle -p$partition
expect { expect {
-re "($end_of_line)($name_string) *($number_with_suffix) *($name_string) *($alpha)" { -re "($end_of_line)($name_string) *($number_with_suffix) *($name_string) *($alpha)" {
if ([string compare $expect_out(5,string) $node_state]) { if ([string compare $expect_out(5,string) "idle"]) {
incr mismatches incr mismatches
} }
exp_continue exp_continue
...@@ -103,13 +234,42 @@ expect { ...@@ -103,13 +234,42 @@ expect {
} }
# #
# Use sinfo node name filter # Change node state to down and use state filter to filter node
# #
send_user "\nTesting DOWN state\n"
spawn $sinfo --Node --node=$node_list(4),$node_list(5) --long --exact --state=down -p$partition
expect {
-re "($end_of_line)($name_string) *($number_with_suffix) *($name_string) *($alpha)" {
if ([string compare $expect_out(5,string) "down"]) {
incr mismatches
}
exp_continue
}
-re "error:" {
send_user "\nFAILURE: Unexpected error from sinfo\n"
set exit_code 1
}
-re "Unable to contact" {
send_user "\nFAILURE: slurm appears to be down\n"
exit 1
}
timeout {
send_user "\nFAILURE: sinfo not responding\n"
set exit_code 1
}
eof {
wait
}
}
spawn $sinfo --Node --long --exact --nodes=$node_name -p$partition #
# Change node state to drain and use state filter to filter node
#
send_user "\nTesting DRAIN state\n"
spawn $sinfo --Node --node=$node_list(6),$node_list(7) --long --exact --state=drain -p$partition
expect { expect {
-re "($end_of_line)($name_string) *($number_with_suffix) *($name_string) *($alpha)" { -re "($end_of_line)($name_string) *($number_with_suffix) *($name_string) *($alpha)" {
if ([string compare $expect_out(2,string) $node_name]) { if ([string compare $expect_out(5,string) "drained"]) {
incr mismatches incr mismatches
} }
exp_continue exp_continue
...@@ -131,6 +291,11 @@ expect { ...@@ -131,6 +291,11 @@ expect {
} }
} }
#
# Reset node states back to idle
#
reset_state
if {$mismatches != 0} { if {$mismatches != 0} {
send_user "\nFAILURE: sinfo node filtering error\n" send_user "\nFAILURE: sinfo node filtering error\n"
set exit_code 1 set exit_code 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment