Skip to content
Snippets Groups Projects
Commit d3d931e7 authored by Michael Hinton's avatar Michael Hinton Committed by Albert Gil
Browse files

Testsuite - Fix false positive in test39.17 with cgroups

Each step has its own cgroup, which applies to GRES GPU devices if
devices are constrained. So a repeat index in CUDA_VISIBLE_DEVICES is
expected, since that index is a local ID to the devices in step's
cgroup. Update the test to reflect this.

Bug 7094
parent 1d455438
No related branches found
No related tags found
No related merge requests found
......@@ -66,6 +66,13 @@ if {$gpu_cnt < 2} {
exit 0
}
set constrain_devices [test_constrain_devices]
if {$constrain_devices} {
log_info "Devices files are constrained by cgroups\n"
} else {
log_info "Devices files are NOT constrained by cgroups\n"
}
get_node_config
set cpus_per_node [expr $sockets_per_node * $cpus_per_socket]
......@@ -229,9 +236,15 @@ expect {
exp_continue
}
-re "STEP_ID:($number) CUDA_VISIBLE_DEVICES:($number)" {
if {$last_cuda_val == -1} {
set last_cuda_val $expect_out(2,string)
} elseif {$last_cuda_val == $expect_out(2,string)} {
set val $expect_out(2,string)
if {$constrain_devices} {
if {$val != 0} {
log_error "Expected CUDA_VISIBLE_DEVICES:0 with cgroup-constrained devices"
set exit_code 1
}
} elseif {$last_cuda_val == -1} {
set last_cuda_val $val
} elseif {$last_cuda_val == $val} {
incr bad_cuda_val
}
exp_continue
......@@ -314,9 +327,15 @@ expect {
exp_continue
}
-re "STEP_ID:($number) CUDA_VISIBLE_DEVICES:($number)" {
if {$last_cuda_val == -1} {
set last_cuda_val $expect_out(2,string)
} elseif {$last_cuda_val == $expect_out(2,string)} {
set val $expect_out(2,string)
if {$constrain_devices} {
if {$val != 0} {
log_error "Expected CUDA_VISIBLE_DEVICES:0 with cgroup-constrained devices"
set exit_code 1
}
} elseif {$last_cuda_val == -1} {
set last_cuda_val $val
} elseif {$last_cuda_val == $val} {
incr bad_cuda_val
}
exp_continue
......@@ -400,17 +419,30 @@ if {$gpu_cnt > 2} {
exp_continue
}
-re "STEP_ID:($number) CUDA_VISIBLE_DEVICES:($number),($number)" {
if {$last_cuda_val == -1} {
set last_cuda_val $expect_out(2,string)
} elseif {$last_cuda_val == $expect_out(2,string)} {
set val $expect_out(2,string)
set val2 $expect_out(3,string)
if {$constrain_devices} {
if {$val != 0 || $val2 != 1} {
log_error "Expected CUDA_VISIBLE_DEVICES:0,1 with cgroup-constrained devices"
set exit_code 1
}
} elseif {$last_cuda_val == -1} {
set last_cuda_val $val
} elseif {$last_cuda_val == $val} {
incr bad_cuda_val
}
exp_continue
}
-re "STEP_ID:($number) CUDA_VISIBLE_DEVICES:($number)" {
if {$last_cuda_val == -1} {
set last_cuda_val $expect_out(2,string)
} elseif {$last_cuda_val == $expect_out(2,string)} {
set val $expect_out(2,string)
if {$constrain_devices} {
if {$val != 0} {
log_error "Expected CUDA_VISIBLE_DEVICES:0 with cgroup-constrained devices"
set exit_code 1
}
} elseif {$last_cuda_val == -1} {
set last_cuda_val $val
} elseif {$last_cuda_val == $val} {
incr bad_cuda_val
}
exp_continue
......@@ -637,7 +669,12 @@ if {$gpu_cnt >= 2 && $nb_nodes >= 2 && $cpus_per_node >= 3} {
set exit_code 1
}
if {$exit_code == 0} {
if {$cuda_val(0) == $cuda_val(1) && $cuda_val(1) == $cuda_val(2)} {
if {$constrain_devices} {
if {$cuda_val(0) != 0 || $cuda_val(1) != 0 || $cuda_val(2) != 0} {
log_error "Expected all steps with NODE_ID=0 to have CUDA_VISIBLE_DEVICES:0 with cgroup-constrained devices"
set exit_code 1
}
} elseif {$cuda_val(0) == $cuda_val(1) && $cuda_val(1) == $cuda_val(2)} {
send_user "\nFAILURE: Duplicated CUDA values for all 3 steps on node 0 of allocation\n"
set exit_code 1
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment