Skip to content
Snippets Groups Projects
Commit fd8e7c6b authored by Christopher J. Morrone's avatar Christopher J. Morrone
Browse files

Make wait_for_job a quieter procedure.

parent c9af48ca
No related branches found
No related tags found
No related merge requests found
......@@ -305,83 +305,74 @@ proc wait_for_file { file_name } {
#
# Input: job_id -- The SLURM job id of a job we want to
# wait for.
# state -- The state you want the job to attain before
# returning. Currently supports:
# DONE any terminated state
# RUNNING job is running
# desired_state -- The state you want the job to attain before
# returning. Currently supports:
# DONE any terminated state
# RUNNING job is running
#
# NOTE: We sleep for two seconds before replying that a job is
# done to give time for I/O completion (stdout/stderr files)
#
################################################################
proc wait_for_job { job_id state } {
global max_job_delay
global scontrol
set is_done 0
set is_running 0
set sleep_time 1
proc wait_for_job { job_id desired_state } {
global scontrol
log_user 0
while { 1 == 1 } {
spawn -noecho $scontrol -o show job $job_id
expect {
-re "Job \[0-9]* not found" {
set is_done 1
exp_continue
}
-re "JobState=CANCELLED" {
set is_done 1
exp_continue
}
-re "JobState=COMPLETE" {
set is_done 1
exp_continue
}
-re "JobState=FAILED" {
set is_done 1
exp_continue
}
-re "JobState=TIMEOUT" {
set is_done 1
exp_continue
}
-re "JobState=NODE_FAIL" {
set is_done 1
exp_continue
}
-re "JobState=RUNNING" {
set is_running 1
exp_continue
}
timeout {
send_user "\nFAILURE: scontrol not responding\n"
log_user 1
return 1
}
eof {
wait
}
}
log_user 1
if {[string compare $state "DONE"] == 0 && $is_done == 1 } {
sleep 2
return 0
}
if {[string compare $state "RUNNING"] == 0 && $is_running == 1 } {
return 0
# First verify that desired_state is supported
switch $desired_state {
"DONE" {}
"RUNNING" {}
default {
send_user "Unsupported desired state: $desired_state\n"
return 1
}
}
set sleep_time 1
while 1 {
set fd [open "|$scontrol -o show job $job_id"]
gets $fd line
close $fd
if {[regexp {JobState\s*=\s*(\w+)} $line foo state] != 1} {
set state "NOT_FOUND"
}
switch $state {
"NOT_FOUND" -
"CANCELLED" -
"FAILED" -
"TIMEOUT" -
"NODE_FAIL" -
"COMPLETED" {
if {[string compare $desired_state "DONE"] == 0} {
send_user "Job $job_id is DONE\n"
sleep 2
return 0
} else {
if {[string compare $desired_state "RUNNING"] == 0} {
send_user "Job $job_id is $state, but we wanted RUNNING\n"
}
return 1
}
if { $is_done == 1 } {
return 1
}
"RUNNING" {
if {[string compare $desired_state "RUNNING"] == 0} {
send_user "Job $job_id is RUNNING\n"
return 0
}
send_user "Job $job_id is in state $state, desire $desired_state\n"
}
default {
send_user "Job $job_id is in state $state, desire $desired_state\n"
}
}
sleep $sleep_time
set sleep_time [expr $sleep_time * 2]
if { $sleep_time > 10 } {
set sleep_time 10
}
sleep $sleep_time
set sleep_time [expr $sleep_time * 2]
if { $sleep_time > 10 } {
set sleep_time 10
}
}
}
################################################################
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment