From acbf625dc998e0280b3996472b4675214a5d8b31 Mon Sep 17 00:00:00 2001
From: Marshall Garey <marshall@schedmd.com>
Date: Thu, 13 Feb 2020 12:57:46 -0700
Subject: [PATCH] Testsuite - Add new test to test dependencies

Bug 6068
---
 testsuite/expect/Makefile.am |   1 +
 testsuite/expect/Makefile.in |   1 +
 testsuite/expect/README      |   1 +
 testsuite/expect/test37.17   | 963 +++++++++++++++++++++++++++++++++++
 4 files changed, 966 insertions(+)
 create mode 100755 testsuite/expect/test37.17

diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am
index 251fc14e154..d35f4ae7e41 100644
--- a/testsuite/expect/Makefile.am
+++ b/testsuite/expect/Makefile.am
@@ -616,6 +616,7 @@ EXTRA_DIST = \
 	test37.14			\
 	test37.15			\
 	test37.16			\
+	test37.17			\
 	test38.1                        \
 	test38.2                        \
 	test38.3                        \
diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in
index 8274f6fdc1c..d5c543c6d2c 100644
--- a/testsuite/expect/Makefile.in
+++ b/testsuite/expect/Makefile.in
@@ -1037,6 +1037,7 @@ EXTRA_DIST = \
 	test37.14			\
 	test37.15			\
 	test37.16			\
+	test37.17			\
 	test38.1                        \
 	test38.2                        \
 	test38.3                        \
diff --git a/testsuite/expect/README b/testsuite/expect/README
index 7d86bdcf4cf..d669f69e600 100644
--- a/testsuite/expect/README
+++ b/testsuite/expect/README
@@ -762,6 +762,7 @@ test37.13  Validate federated arrays
 test37.14  Validate federated scontrol notify
 test37.15  Validate federated scontrol suspend
 test37.16  Validate job cleanup when clusters are removed federation
+test37.17  Test local and remote job dependencies
 
 test38.#   Testing of heterogeneous jobs.
 =========================================
diff --git a/testsuite/expect/test37.17 b/testsuite/expect/test37.17
new file mode 100755
index 00000000000..6b432a769ec
--- /dev/null
+++ b/testsuite/expect/test37.17
@@ -0,0 +1,963 @@
+#!/usr/bin/env expect
+############################################################################
+# Purpose: Test local and remote job dependencies
+#
+# Reqs:    1. Using slurmdbd accounting storage type and is up
+#          2. fed_slurm_base is defined in globals.local - set to directory that
+#          has access to each federation configure (fedc1, fedc2, fedc3).
+#          Eg.
+#          fedr/slurm/ (src)
+#          fedr/fed1/bin
+#          fedr/fed1/sbin
+#          fedr/fed1/etc
+#          fedr/fed1/...
+#          fedr/fed2/...
+#          fedr/fed3/...
+#          3. controllers are up and running.
+#
+# Output:  "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+#          "FAILURE: ..." otherwise with an explanation of the failure, OR
+#          anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2020 SchedMD LLC.
+# Written by Marshall Garey <marshall@schedmd.com>
+#
+# This file is part of Slurm, a resource management program.
+# For details, see <https://slurm.schedmd.com/>.
+# Please also read the included file: DISCLAIMER.
+#
+# Slurm is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with Slurm; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+############################################################################
+
+source ./globals
+source ./globals_accounting
+source ./globals_federation
+
+set test_id "37.17"
+set c1 $fedc1
+set c2 $fedc2
+set c3 $fedc3
+set exit_code 0
+set job_id1 0
+set job_id2 0
+set user_name [get_my_user_name]
+set file_in_long "test$test_id\_long.in"
+set file_in_short "test$test_id\_short.in"
+set my_scancel "${fed_slurm_base}/$c1/bin/scancel"
+set my_scontrol "${fed_slurm_base}/$c1/bin/scontrol"
+set reason ""
+set dependency ""
+set fed_name "fed_test$test_id"
+
+###############################################################################
+# Functions
+###############################################################################
+
+proc cancel_all_jobs { } {
+	global user_name c1 c2 c3 my_scancel
+
+	spawn $my_scancel -M$c1,$c2,$c3 -u $user_name
+	expect {
+		eof {
+			wait
+		}
+	}
+	sleep 5
+}
+
+proc cancel_job { job_id clusters } {
+	global my_scancel
+
+	spawn $my_scancel $job_id
+	expect {
+		eof {
+			wait
+		}
+	}
+	wait_for_fed_job $job_id "DONE" $clusters
+}
+
+proc cleanup { rc } {
+	global bin_rm file_in_long file_in_short fed_name test_id
+
+	delete_federations $fed_name
+	cancel_all_jobs
+	exec $bin_rm -f $file_in_long
+	exec $bin_rm -f $file_in_short
+	if { $rc > 0 } {
+		print_failure $test_id
+	} else {
+		print_success $test_id
+	}
+	exit $rc
+}
+
+proc submit_job { options cdir file_in } {
+	global bin_sleep sbatch number fed_slurm_base slow_kill test_id
+
+	set job_id 0
+	set my_sbatch "${fed_slurm_base}/$cdir/bin/sbatch"
+	set command "$my_sbatch --job-name=test$test_id\_job -t1 \
+		$options --output=/dev/null $file_in"
+	set sbatch_pid [spawn {*}$command]
+	expect {
+		-re "Submitted batch job ($number)" {
+			set job_id $expect_out(1,string)
+			exp_continue
+		}
+		timeout {
+			log_error "sbatch not responding"
+			slow_kill $sbatch_pid
+			cleanup 1
+		}
+		eof {
+			wait
+		}
+	}
+	if { $job_id == 0 } {
+		log_error "Failed to submit job\n"
+		cleanup 1
+	}
+
+	return $job_id
+}
+
+proc get_job_dependency { job_id } {
+	global my_scontrol reason dependency alpha
+
+	set reason ""
+	set dependency ""
+	set reason_match "\[a-zA-Z_\]+"
+	# Possible dependency syntax:
+	# <type>:<jobid>+<time>(state)
+	# <type>:<jobid>(state)
+	# singleton(unfulfilled)
+	# Notes:
+	# * Multiple dependencies are separated by a comma or a question mark
+	# * The state for singleton will only ever be "unfulfilled"
+	# * The state is either failed or unfulfilled. Fulfilled dependencies
+	#   are cleared from the list
+	# * When there are no dependencies, it will be this string: "(null)"
+
+	# This regex takes care of handling dependencies separated by comma or
+	# question mark.
+	# ([a-zA-Z_]+:[0-9_*+]+\([a-zA-Z]+\)\?*\,*|singleton\(unfulfilled\)\?*\,*)+|\(null\)
+
+
+#set depend_match "\[a-zA-Z_\]+:\[0-9_*+\]+\\($alpha\\)\|\\(null\\)\|singleton\\(unfulfilled\\)"
+
+	set type "\[a-zA-Z_\]+"
+	set jobid_time "\[0-9_*+\]+"
+	set state "\\($alpha\\)\\"
+	set delim ",*\\?*"
+	set depend_regex "\($type:$jobid_time$state$delim\|singleton\\(unfulfilled\\)$delim\)+"
+	set no_depend "\\(null\\)"
+	set depend_match "$depend_regex\|$no_depend"
+
+	log_user 0
+	spawn $my_scontrol show job $job_id
+	expect {
+		-re "Reason=($reason_match) Dependency=($depend_match)" {
+			set reason $expect_out(1,string)
+			set dependency $expect_out(2,string)
+			exp_continue
+		}
+		timeout {
+			log_error "scontrol not responding"
+			cleanup 1
+		}
+		eof {
+			wait
+		}
+	}
+	log_user 1
+	log_info "job $job_id; actual   reason: \"$reason\"; dependency: \"$dependency\""
+	return $dependency
+}
+
+proc check_depend { job_id expected_reason expected_dependency } {
+	global reason dependency
+
+	get_job_dependency $job_id
+	if { [string compare "$reason" "$expected_reason"] } {
+		return 1
+	}
+	if { [string compare "$dependency" "$expected_dependency"] } {
+		return 1
+	}
+	return 0
+}
+
+proc wait_for_depend { job_id expected_reason expected_dependency } {
+	global reason dependency
+
+	set error 0
+	set my_delay 0
+	# max_delay 30 seconds because by default we test remote dependencies
+	# every 30 seconds, so we might have to wait that long for a result.
+	# Make this interval shorter by decreasing MinJobAge in slurm.conf
+	# because dependencies also get tested every MinJobAge seconds.
+	set max_delay 30
+	set poll_interval 3
+
+	if { [string compare $expected_reason "DependencyNeverSatisfied"] } {
+		set want_never_satisfied 0
+	} else {
+		set want_never_satisfied 1
+	}
+
+	log_info "job $job_id; expected reason: \"$expected_reason\"; dependency: \"$expected_dependency\""
+
+	while 1 {
+		if { ![check_depend $job_id $expected_reason \
+			$expected_dependency] } {
+			return 0
+		}
+
+		if { (!$want_never_satisfied) && \
+			!([string compare $reason \
+			  "DependencyNeverSatisfied"]) } {
+			log_error "Job dependency failed, but it shouldn't have."
+			set error 1
+		}
+		if { $my_delay >= $max_delay } {
+			log_info "delay $my_delay max $max_delay"
+			log_error "Timeout waiting for dependency to change."
+			set error 1
+		}
+
+		if { $error } {
+			log_error "Job $job_id actual: reason=\"$reason\"; dependency=\"$dependency\"; expected: reason=\"$expected_reason\"; dependency=\"expected_dependency\""
+			cleanup 1
+		}
+
+		exec sleep $poll_interval
+		set my_delay [expr $my_delay + $poll_interval]
+	}
+}
+
+proc my_wait_for_fed_job { job_id state cluster } {
+	set ret_cluster [wait_for_fed_job $job_id $state $cluster]
+	if { [string compare $cluster $ret_cluster] } {
+		cleanup 1
+	}
+}
+
+proc is_job_on_cluster { job_id cluster } {
+	global my_scontrol
+
+	log_user 0
+	spawn $my_scontrol -M$cluster --local -o show job $job_id
+	expect {
+		-re "JobId=$job_id" {
+			log_user 1
+			log_info "Found job $job_id in cluster $cluster"
+			return 1
+		}
+		-re "Invalid job id specified" {
+			log_user 1
+			log_info "Did not find job $job_id in cluster $cluster"
+			return 0
+		}
+		timeout {
+			log_user 1
+			log_error "scontrol not responding"
+			cleanup 1
+		}
+		eof {
+			wait
+		}
+	}
+}
+
+proc test_after { } {
+	global c1 c2 file_in_long bin_sleep
+	send_user "
+#############################################################################
+# Test after
+#############################################################################
+	\n\n"
+
+	# Local dependency succeeds
+	log_info "after: test that local dependency succeeds:"
+	set job_id1 [submit_job "-M$c1 --begin=now+5" $c1 $file_in_long]
+	set job_id2 [submit_job "--depend=after:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	wait_for_depend $job_id2 "Dependency" "after:$job_id1\(unfulfilled\)"
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id1 $c1
+	cancel_job $job_id2 $c1
+
+	# Remote dependency succeeds
+	log_info "after: test that remote dependency succeeds:"
+	set job_id1 [submit_job "-M$c2 --begin=now+5" $c2 $file_in_long]
+	set job_id2 [submit_job "--depend=after:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	wait_for_depend $job_id2 "Dependency" "after:$job_id1\(unfulfilled\)"
+	my_wait_for_fed_job $job_id1 "RUNNING" $c2
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id1 $c2
+	cancel_job $job_id2 $c1
+
+	# Test after with a time attached. file_in_long sleeps for 60 seconds.
+	log_info "after: test that a after+time works:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "--depend=after:$job_id1+1 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" "after:$job_id1+1(unfulfilled)"
+	log_info "Check that job $job_id2 is still dependent after 45 seconds"
+	exec $bin_sleep 45
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" "after:$job_id1+1(unfulfilled)"
+	log_info "Wait for job $job_id2 dependency to be fulfilled"
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id2 $c1
+
+	# After dependency never fails.
+}
+
+proc test_afterany { } {
+	global c1 c2 file_in_long
+
+	send_user "
+#############################################################################
+# Test afterany
+#############################################################################
+	\n\n"
+
+	# Local dependency succeeds
+	log_info "afterany: test that local dependency succeeds:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "--depend=afterany:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	wait_for_depend $job_id2 "Dependency" "afterany:$job_id1\(unfulfilled\)"
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	cancel_job $job_id1 $c1
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id2 $c1
+
+	# Remote dependency succeeds
+	log_info "afterany: test that remote dependency succeeds:"
+	set job_id1 [submit_job "-M$c2" $c2 $file_in_long]
+	set job_id2 [submit_job "--depend=afterany:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	wait_for_depend $job_id2 "Dependency" "afterany:$job_id1\(unfulfilled\)"
+	my_wait_for_fed_job $job_id1 "RUNNING" $c2
+	cancel_job $job_id1 $c2
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id2 $c1
+
+	# Test old syntax: --depend=jobid,jobid
+	log_info "afterany: test old syntax: --depend=jobid\[,jobid,jobid...\]"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "--depend=$job_id1 -M$c1" $c1 \
+		$file_in_long]
+	set job_id3 [submit_job "--depend=$job_id1,$job_id2 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	my_wait_for_fed_job $job_id3 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" "afterany:$job_id1\(unfulfilled\)"
+	wait_for_depend $job_id3 "Dependency" \
+		"afterany:$job_id1\(unfulfilled\),afterany:$job_id2\(unfulfilled\)"
+	cancel_all_jobs
+}
+
+proc test_aftercorr { } {
+	global c1 c2 kill_invalid_depend file_in_long file_in_short
+
+	send_user "
+#############################################################################
+# Test aftercorr
+#############################################################################
+	\n\n"
+
+	# Local dependency succeeds
+	log_info "aftercorr: test that local dependency succeeds:"
+	set job_array1 [submit_job "-M$c1 --array=1-2" $c1 $file_in_short]
+	set job_array2 [submit_job "--depend=aftercorr:$job_array1 -M$c1 \
+		--array=1-2" $c1 $file_in_long]
+
+	my_wait_for_fed_job "$job_array1\_1" "RUNNING" $c1
+	my_wait_for_fed_job "$job_array1\_2" "RUNNING" $c1
+	my_wait_for_fed_job "$job_array2\_1" "PENDING" $c1
+	my_wait_for_fed_job "$job_array2\_2" "PENDING" $c1
+	wait_for_depend "$job_array2\_1" "Dependency" \
+		"aftercorr:$job_array1\_*(unfulfilled)"
+	wait_for_depend "$job_array2\_2" "Dependency" \
+		"aftercorr:$job_array1\_*(unfulfilled)"
+
+	my_wait_for_fed_job "$job_array1\_1" "DONE" $c1
+	my_wait_for_fed_job "$job_array1\_2" "DONE" $c1
+	wait_for_depend "$job_array2\_1" "None" "(null)"
+	wait_for_depend "$job_array2\_2" "None" "(null)"
+	my_wait_for_fed_job "$job_array2\_1" "RUNNING" $c1
+	my_wait_for_fed_job "$job_array2\_2" "RUNNING" $c1
+	cancel_job $job_array2 $c1
+
+	# Local dependency fails
+	log_info "aftercorr: test that local dependency fails:"
+	set job_array1 [submit_job "-M$c1 --array=1-2" $c1 $file_in_long]
+	set job_array2 [submit_job "--depend=aftercorr:$job_array1 -M$c1 \
+		--array=1-2" $c1 $file_in_long]
+
+	my_wait_for_fed_job "$job_array1\_1" "RUNNING" $c1
+	my_wait_for_fed_job "$job_array1\_2" "RUNNING" $c1
+	my_wait_for_fed_job "$job_array2\_1" "PENDING" $c1
+	my_wait_for_fed_job "$job_array2\_2" "PENDING" $c1
+	wait_for_depend "$job_array2\_1" "Dependency" \
+		"aftercorr:$job_array1\_*(unfulfilled)"
+	wait_for_depend "$job_array2\_2" "Dependency" \
+		"aftercorr:$job_array1\_*(unfulfilled)"
+
+	cancel_job "$job_array1\_1" $c1
+	wait_for_depend "$job_array2\_1" "DependencyNeverSatisfied" \
+		"aftercorr:$job_array1\_*(failed)"
+	cancel_job "$job_array1\_2" $c1
+	wait_for_depend "$job_array2\_2" "DependencyNeverSatisfied" \
+		"aftercorr:$job_array1\_*(failed)"
+	if { !$kill_invalid_depend } {
+		cancel_job "$job_array2" $c1
+	}
+
+	# Remote dependency succeeds
+	log_info "aftercorr: test that remote dependency succeeds:"
+	set job_array1 [submit_job "-M$c2 --array=1-2" $c2 $file_in_short]
+	set job_array2 [submit_job "--depend=aftercorr:$job_array1 -M$c1 \
+		--array=1-2" $c1 $file_in_long]
+
+	my_wait_for_fed_job "$job_array1\_1" "RUNNING" $c2
+	my_wait_for_fed_job "$job_array1\_2" "RUNNING" $c2
+	my_wait_for_fed_job "$job_array2\_1" "PENDING" $c1
+	my_wait_for_fed_job "$job_array2\_2" "PENDING" $c1
+	# The dependency on the remote side has _*, but the dependency locally
+	# doesn't because it couldn't find the remote job.
+	wait_for_depend "$job_array2\_1" "Dependency" \
+		"aftercorr:$job_array1\(unfulfilled)"
+	wait_for_depend "$job_array2\_2" "Dependency" \
+		"aftercorr:$job_array1\(unfulfilled)"
+
+	my_wait_for_fed_job "$job_array1\_1" "DONE" $c2
+	my_wait_for_fed_job "$job_array1\_2" "DONE" $c2
+	wait_for_depend "$job_array2\_1" "None" "(null)"
+	wait_for_depend "$job_array2\_2" "None" "(null)"
+	my_wait_for_fed_job "$job_array2\_1" "RUNNING" $c1
+	my_wait_for_fed_job "$job_array2\_2" "RUNNING" $c1
+	cancel_job $job_array2 $c1
+
+	# Remote dependency fails
+	log_info "aftercorr: test that remote dependency fails:"
+	set job_array1 [submit_job "-M$c2 --array=1-2" $c2 $file_in_long]
+	set job_array2 [submit_job "--depend=aftercorr:$job_array1 -M$c1 \
+		--array=1-2" $c1 $file_in_long]
+
+	my_wait_for_fed_job "$job_array1\_1" "RUNNING" $c2
+	my_wait_for_fed_job "$job_array1\_2" "RUNNING" $c2
+	my_wait_for_fed_job "$job_array2\_1" "PENDING" $c1
+	my_wait_for_fed_job "$job_array2\_2" "PENDING" $c1
+	wait_for_depend "$job_array2\_1" "Dependency" \
+		"aftercorr:$job_array1\(unfulfilled)"
+	wait_for_depend "$job_array2\_2" "Dependency" \
+		"aftercorr:$job_array1\(unfulfilled)"
+
+	cancel_job "$job_array1\_1" $c2
+	wait_for_depend "$job_array2\_1" "DependencyNeverSatisfied" \
+		"aftercorr:$job_array1\(failed)"
+	cancel_job "$job_array1\_2" $c2
+	wait_for_depend "$job_array2\_2" "DependencyNeverSatisfied" \
+		"aftercorr:$job_array1\(failed)"
+	if { !$kill_invalid_depend } {
+		cancel_job "$job_array2" $c1
+	}
+}
+
+proc test_afterok { } {
+	global c1 c2 kill_invalid_depend file_in_long file_in_short
+
+	send_user "
+#############################################################################
+# Test afterok
+#############################################################################
+	\n\n"
+
+	# Local dependency succeeds
+	log_info "afterok: test that local dependency succeeds:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_short]
+	set job_id2 [submit_job "--depend=afterok:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" "afterok:$job_id1\(unfulfilled\)"
+	my_wait_for_fed_job $job_id1 "DONE" $c1
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id2 $c1
+
+	# Local dependency fails
+	log_info "afterok: test that local dependency fails:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "--depend=afterok:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" "afterok:$job_id1\(unfulfilled\)"
+	cancel_job $job_id1 $c1
+	wait_for_depend $job_id2 "DependencyNeverSatisfied" \
+		"afterok:$job_id1\(failed\)"
+	if { !$kill_invalid_depend } {
+		cancel_job $job_id2 $c1
+	}
+
+	# Remote dependency succeeds
+	log_info "afterok: test that remote dependency succeeds:"
+	set job_id1 [submit_job "-M$c2" $c2 $file_in_short]
+	set job_id2 [submit_job "--depend=afterok:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c2
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" "afterok:$job_id1\(unfulfilled\)"
+	my_wait_for_fed_job $job_id1 "DONE" $c2
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id2 $c1
+
+	# Remote dependency fails
+	log_info "afterok: test that remote dependency fails"
+	set job_id1 [submit_job "-M$c2" $c2 $file_in_long]
+	set job_id2 [submit_job "--depend=afterok:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c2
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" "afterok:$job_id1\(unfulfilled\)"
+	cancel_job $job_id1 $c2
+	wait_for_depend $job_id2 "DependencyNeverSatisfied" \
+		"afterok:$job_id1\(failed\)"
+	if { !$kill_invalid_depend } {
+		cancel_job $job_id2 $c1
+	}
+}
+
+proc test_afternotok { } {
+	global c1 c2 kill_invalid_depend file_in_long file_in_short
+
+	send_user "
+#############################################################################
+# Test afternotok
+#############################################################################
+	\n\n"
+
+	# Local dependency succeeds
+	log_info "afternotok: test that local dependency succeeds:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" \
+		"afternotok:$job_id1\(unfulfilled\)"
+	cancel_job $job_id1 $c1
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id2 $c1
+
+	# Local dependency fails
+	log_info "afternotok: test that local dependency fails:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_short]
+	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" \
+		"afternotok:$job_id1\(unfulfilled\)"
+	my_wait_for_fed_job $job_id1 "DONE" $c1
+	wait_for_depend $job_id2 "DependencyNeverSatisfied" \
+		"afternotok:$job_id1\(failed\)"
+	if { !$kill_invalid_depend } {
+		cancel_job $job_id2 $c1
+	}
+
+	# Remote dependency succeeds
+	log_info "afternotok: test that remote dependency succeeds:"
+	set job_id1 [submit_job "-M$c2" $c2 $file_in_long]
+	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c2
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" \
+		"afternotok:$job_id1\(unfulfilled\)"
+	cancel_job $job_id1 $c2
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id2 $c1
+
+	# Remote dependency fails
+	log_info "afternotok: test that remote dependency fails"
+	set job_id1 [submit_job "-M$c2" $c2 $file_in_short]
+	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1" $c1 \
+		$file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c2
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" \
+		"afternotok:$job_id1\(unfulfilled\)"
+	my_wait_for_fed_job $job_id1 "DONE" $c2
+	wait_for_depend $job_id2 "DependencyNeverSatisfied" \
+		"afternotok:$job_id1\(failed\)"
+	if { !$kill_invalid_depend } {
+		cancel_job $job_id2 $c1
+	}
+}
+
+proc test_singleton { } {
+	global c1 c2 c3 disable_remote_singleton file_in_long
+
+	send_user "
+#############################################################################
+# Test singleton
+#############################################################################
+	\n\n"
+
+	# Test one cluster
+	log_info "singleton: test on one cluster"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "--depend=singleton -M$c1" $c1 $file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" "singleton(unfulfilled)"
+	cancel_job $job_id1 $c1
+	wait_for_depend $job_id2 "None" "(null)"
+	my_wait_for_fed_job $job_id2 "RUNNING" $c1
+	cancel_job $job_id2 $c1
+
+	# Test multiple clusters
+	if { $disable_remote_singleton } {
+		# Test that remote jobs don't affect the singleton dependency
+		log_info "singleton: test that disable_remote_singleton works"
+		set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+		set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
+		set job_id3 [submit_job "--depend=singleton -M$c1" $c1 \
+			$file_in_long]
+
+		my_wait_for_fed_job $job_id1 "RUNNING" $c1
+		my_wait_for_fed_job $job_id2 "RUNNING" $c2
+		my_wait_for_fed_job $job_id3 "PENDING" $c1
+		wait_for_depend $job_id3 "Dependency" "singleton(unfulfilled)"
+		# Cancel job 1 - job 3 should start running even though job 2 is
+		# running on another cluster
+		cancel_job $job_id1 $c1
+		wait_for_depend $job_id3 "None" "(null)"
+		my_wait_for_fed_job $job_id3 "RUNNING" $c1
+		cancel_job $job_id2 $c2
+		cancel_job $job_id3 $c1
+	} else {
+		# Test that singleton doesn't get cleared until jobs on all
+		# clusters are done
+		log_info "singleton: test with jobs on all clusters"
+		set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+		set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
+		set job_id3 [submit_job "-M$c3" $c3 $file_in_long]
+		set job_id4 [submit_job "--depend=singleton -M$c1" $c1 \
+			$file_in_long]
+
+		my_wait_for_fed_job $job_id1 "RUNNING" $c1
+		my_wait_for_fed_job $job_id2 "RUNNING" $c2
+		my_wait_for_fed_job $job_id3 "RUNNING" $c3
+		my_wait_for_fed_job $job_id4 "PENDING" $c1
+		wait_for_depend $job_id4 "Dependency" "singleton(unfulfilled)"
+
+		# Job 4 shouldn't start until jobs 1, 2, and 3 are done.
+		# Test that it starts when a remote job is finished last.
+		cancel_job $job_id1 $c1
+		# Should still have the same dependency
+		wait_for_depend $job_id4 "Dependency" "singleton(unfulfilled)"
+		cancel_job $job_id2 $c2
+		cancel_job $job_id3 $c3
+		# Now the dependency should be cleared
+		wait_for_depend $job_id4 "None" "(null)"
+		my_wait_for_fed_job $job_id4 "RUNNING" $c1
+		cancel_job $job_id4 $c1
+	}
+}
+
+proc test_add_remove_clusters { } {
+	global c1 c2 c3 fed_name file_in_long \
+		disable_remote_singleton kill_invalid_depend
+
+	# Test adding/removing clusters from the federation
+	# Removing a cluster from a federation should cause dependencies on
+	# jobs on that cluster to fail.
+	# Adding a cluster to a federation means that any singleton dependencies
+	# have to be fulfilled on that cluster.
+	send_user "
+#############################################################################
+# Test adding/removing a cluster from the federation.
+#############################################################################
+	\n\n"
+
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
+	set job_id3 [submit_job "-M$c3" $c3 $file_in_long]
+	set job_id4 [submit_job "--depend=afterok:$job_id3 -M$c1" $c1 \
+		$file_in_long]
+	set job_id5 [submit_job "--depend=singleton -M$c1" $c1 $file_in_long]
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "RUNNING" $c2
+	my_wait_for_fed_job $job_id3 "RUNNING" $c3
+	my_wait_for_fed_job $job_id4 "PENDING" $c1
+	wait_for_depend $job_id4 "Dependency" "afterok:$job_id3\(unfulfilled\)"
+	wait_for_depend $job_id5 "Dependency" "singleton(unfulfilled)"
+
+	log_info "Test that removing cluster $c3 from fed $fed_name makes dependencies on jobs on $c3 fail"
+	if { [remove_cluster_from_fed $c3 $fed_name] } {
+		cleanup 1
+	}
+	wait_for_depend $job_id4 "DependencyNeverSatisfied" \
+		"afterok:$job_id3\(failed\)"
+	if { !$kill_invalid_depend } {
+		cancel_job $job_id4 $c1
+	}
+
+	if { $disable_remote_singleton } {
+		cancel_job $job_id1 $c1
+		cancel_job $job_id2 $c2
+		cancel_job $job_id3 $c3
+		cancel_job $job_id5 $c1
+		return
+	}
+
+	log_info "Test that the singleton dependency was resent back to cluster $c3 when it was added back to the federation."
+	if { [add_cluster_to_fed $c3 $fed_name] } {
+		cleanup 1
+	}
+	cancel_job $job_id1 $c1
+	cancel_job $job_id2 $c2
+	cancel_job $job_id3 $c3
+	wait_for_depend $job_id5 "None" "(null)"
+	my_wait_for_fed_job $job_id5 "RUNNING" $c1
+	cancel_job $job_id5 $c1
+}
+
+proc test_submit_to_all_clusters { } {
+	global c1 c2 c3 file_in_long
+
+	send_user "
+#############################################################################
+# Test submitting a dependent job to all clusters.
+#############################################################################
+	\n\n"
+
+	log_info "Test that a dependent job is only on its origin cluster while dependent and that it is submitted to all clusters when its dependency is cleared."
+	set job_id1 [submit_job "-M$c2" $c2 $file_in_long]
+	set job_id2 [submit_job "--depend=afternotok:$job_id1 -M$c1,$c2,$c3 \
+		--begin=now+60" $c1 $file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c2
+	my_wait_for_fed_job $job_id2 "PENDING" $c1
+	wait_for_depend $job_id2 "Dependency" \
+		"afternotok:$job_id1\(unfulfilled\)"
+
+	log_info "Test that job $job_id2 is not on clusters $c2 or $c3."
+	if { [is_job_on_cluster $job_id2 $c2] || \
+		[is_job_on_cluster $job_id2 $c3] } {
+			log_error "Job $job_id2 is in cluster $c2 and/or $c3 when it shouldn't be."
+			cleanup 1
+	}
+
+	log_info "Test that job $job_id2 is submitted to all sibling clusters $c2 and $c3 when its dependency is fulfilled."
+	cancel_job $job_id1 $c2
+	wait_for_depend $job_id2 "BeginTime" "(null)"
+	my_wait_for_fed_job $job_id2 "PENDING" "$c1"
+	my_wait_for_fed_job $job_id2 "PENDING" "$c2"
+	my_wait_for_fed_job $job_id2 "PENDING" "$c3"
+	cancel_job $job_id2 "$c1,$c2,$c3"
+}
+
+proc test_or_dependencies { } {
+	global c1 c2 file_in_long kill_invalid_depend
+
+	send_user "
+#############################################################################
+# Test OR dependencies.
+#############################################################################
+	\n\n"
+
+	log_info "OR dependencies: Test that one fulfilled dependency makes the whole dependency fulfilled:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
+	set job_id3 [submit_job \
+		"--depend=afternotok:$job_id1?afternotok:$job_id2 -M$c1" \
+		$c1 $file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "RUNNING" $c2
+	my_wait_for_fed_job $job_id3 "PENDING" $c1
+	wait_for_depend $job_id3 "Dependency" \
+		"afternotok:$job_id1\(unfulfilled\)?afternotok:$job_id2\(unfulfilled\)"
+
+	cancel_job $job_id2 $c2
+	wait_for_depend $job_id3 "None" "(null)"
+	my_wait_for_fed_job $job_id3 "RUNNING" $c1
+	cancel_job $job_id1 $c1
+	cancel_job $job_id3 $c1
+
+	log_info "OR dependencies: Test that the dependency doesn't fail until all dependencies have failed:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
+	set job_id3 [submit_job "--depend=afterok:$job_id1?afterok:$job_id2 \
+		-M$c1" $c1 $file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "RUNNING" $c2
+	my_wait_for_fed_job $job_id3 "PENDING" $c1
+	wait_for_depend $job_id3 "Dependency" \
+		"afterok:$job_id1\(unfulfilled\)?afterok:$job_id2\(unfulfilled\)"
+
+	cancel_job $job_id1 $c1
+	wait_for_depend $job_id3 "Dependency" \
+		"afterok:$job_id1\(failed\)?afterok:$job_id2\(unfulfilled\)"
+	cancel_job $job_id2 $c2
+	wait_for_depend $job_id3 "DependencyNeverSatisfied" \
+		"afterok:$job_id1\(failed\)?afterok:$job_id2\(failed\)"
+	if { !$kill_invalid_depend } {
+		cancel_job $job_id3 $c1
+	}
+}
+
+proc test_and_dependencies { } {
+	global c1 c2 file_in_long kill_invalid_depend
+
+	send_user "
+#############################################################################
+# Test AND dependencies.
+#############################################################################
+	\n\n"
+
+	log_info "AND dependencies: Test that the dependency isn't fulfilled until all dependencies are fulfilled:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
+	set job_id3 [submit_job \
+		"--depend=afternotok:$job_id1,afternotok:$job_id2 -M$c1" \
+		$c1 $file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "RUNNING" $c2
+	my_wait_for_fed_job $job_id3 "PENDING" $c1
+	wait_for_depend $job_id3 "Dependency" \
+		"afternotok:$job_id1\(unfulfilled\),afternotok:$job_id2\(unfulfilled\)"
+
+	cancel_job $job_id1 $c1
+	wait_for_depend $job_id3 "Dependency" \
+		"afternotok:$job_id2\(unfulfilled\)"
+	cancel_job $job_id2 $c2
+	wait_for_depend $job_id3 "None" "(null)"
+	my_wait_for_fed_job $job_id3 "RUNNING" $c1
+	cancel_job $job_id3 $c1
+
+	log_info "AND dependencies: Test that the whole dependency fails when a single dependency fails:"
+	set job_id1 [submit_job "-M$c1" $c1 $file_in_long]
+	set job_id2 [submit_job "-M$c2" $c2 $file_in_long]
+	set job_id3 [submit_job "--depend=afterok:$job_id1,afterok:$job_id2 \
+		-M$c1" $c1 $file_in_long]
+
+	my_wait_for_fed_job $job_id1 "RUNNING" $c1
+	my_wait_for_fed_job $job_id2 "RUNNING" $c2
+	my_wait_for_fed_job $job_id3 "PENDING" $c1
+	wait_for_depend $job_id3 "Dependency" \
+		"afterok:$job_id1\(unfulfilled\),afterok:$job_id2\(unfulfilled\)"
+
+	cancel_job $job_id2 $c2
+	wait_for_depend $job_id3 "DependencyNeverSatisfied" \
+		"afterok:$job_id1\(unfulfilled\),afterok:$job_id2\(failed\)"
+	if { !$kill_invalid_depend } {
+		cancel_job $job_id3 $c1
+	}
+	cancel_job $job_id1 $c1
+}
+
+###############################################################################
+# Begin test
+###############################################################################
+
+print_header $test_id
+
+if { [test_account_storage] == 0 } {
+	log_warn "This test can't be run without a usable AccountStorageType"
+	exit 0
+}
+
+if {[test_federation_setup]} {
+	log_warn "WARNING: This test can't be run without fed_slurm_base, fedc1, fedc2, fedc3 setup in globals.local."
+	exit 0
+}
+
+if {[test_all_up]} {
+	exit 0
+}
+
+delete_federations $fed_name
+if { [setup_federation $fed_name] } {
+	cleanup 1
+}
+
+# Use file_in_short when we have to wait for the job to end.
+# Use file_in_long everywhere else.
+make_bash_script $file_in_long "$bin_sleep 60"
+make_bash_script $file_in_short "$bin_sleep 5"
+
+set permit_job_expansion [test_scheduler_params "permit_job_expansion"]
+log_info "permit_job_expansion: $permit_job_expansion"
+
+set kill_invalid_depend [test_dependency_params "kill_invalid_depend"]
+set disable_remote_singleton [test_dependency_params "disable_remote_singleton"]
+log_info "kill_invalid_depend: $kill_invalid_depend; disable_remote_singleton: $disable_remote_singleton\n"
+
+cancel_all_jobs
+
+test_after
+test_afterany
+# --depend=afterburstbuffer is tested in test35.6
+test_aftercorr
+test_afterok
+test_afternotok
+test_singleton
+# test --depend=expand in another test.
+test_add_remove_clusters
+test_submit_to_all_clusters
+test_or_dependencies
+test_and_dependencies
+
+cleanup 0
-- 
GitLab