From 4823a6488230ef6ca7b0c47cf5ff91374fcca28b Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Fri, 27 Oct 2017 09:41:51 -0600
Subject: [PATCH] Harden pack step SPANK test

---
 testsuite/expect/test38.6        | 13 ++++++++-----
 testsuite/expect/test38.6.prog.c | 30 +++++++++++++++++++++++-------
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/testsuite/expect/test38.6 b/testsuite/expect/test38.6
index b84a2515ccc..802c70771d6 100755
--- a/testsuite/expect/test38.6
+++ b/testsuite/expect/test38.6
@@ -71,6 +71,7 @@ proc end_it { exit_code } {
 	} else {
 		send_user "\nFAILURE: This may be due to NFS sychronization problems: "
 		send_user "Multiple processes on differrent nodes writing to the same file\n"
+		send_user "The SlurmdLogFile on each node should include SPANK logs for each step\n"
 	}
 	exit $exit_code
 }
@@ -176,10 +177,10 @@ expect {
 #	slurmd/slurmstepd on the compute node will not get the updated
 #	configuration.
 #
-$bin_sleep 30
+exec $bin_sleep 20
 send_user "\n\nTest locally logged messages..........\n\n"
 make_bash_script $file_in "
-  $srun --mpi=none --test_suite_srun=5 --pack-group=0,1 $bin_uname
+  $srun --mpi=none --test_suite_srun=5 --pack-group=0,1 $bin_hostname
 "
 
 set job_id 0
@@ -290,7 +291,8 @@ if {[wait_for_file $spank_out] == 0} {
 			set sbatch_arg $expect_out(2,string)
 			set srun_arg   $expect_out(3,string)
 
-			if {$spank_type eq "exit"} {
+			if {($spank_type eq "exit") && ($sbatch_arg eq "4")} {
+#				Skip (possible) external job containers
 				incr matches_spank_exit
 			} elseif {$spank_type eq "task_init"} {
 				incr matches_spank_init
@@ -315,6 +317,7 @@ if {[wait_for_file $spank_out] == 0} {
 			wait
 		}
 	}
+
 	if {$matches_spank_exit != 3} {
 		send_user "\nFAILURE: remote (slurmd - spank_exit) spank plugin failure ($matches_spank_exit != 3)\n"
 		end_it 1
@@ -331,11 +334,11 @@ if {[wait_for_file $spank_out] == 0} {
 		send_user "\nFAILURE: remote (slurmd) spank plugin failure ($matches != 6)\n"
 		end_it 1
 	} else {
-		send_user "\n remote (slurmd) spank plugin success\n"
+		send_user "\nRemote (slurmd) spank plugin success\n"
 	}
 } else {
 	send_user "\nWARNING: This can be caused by the plugstack.conf file not\n"
-	send_user "      being propagated to the compute node or not being in a\n"
+	send_user "      being propagated to the compute nodes or not being in a\n"
 	send_user "      shared file system.\n"
 	end_it 1
 }
diff --git a/testsuite/expect/test38.6.prog.c b/testsuite/expect/test38.6.prog.c
index 78125097bad..08c29b27eaa 100644
--- a/testsuite/expect/test38.6.prog.c
+++ b/testsuite/expect/test38.6.prog.c
@@ -144,15 +144,19 @@ int slurm_spank_task_init(spank_t sp, int ac, char **av)
 	uid_t my_uid;
 	int argc, i;
 	char **argv;
+	char hostname[64] = "";
 
+	gethostname(hostname, sizeof(hostname));
 	if (opt_out_file) {
 		FILE *fp = NULL;
 		for (i = 0; (i < 10) && !fp; i++)
 			fp = fopen(opt_out_file, "a");
 		if (!fp)
 			return -1;
-		fprintf(fp, "%s: opt_arg_sbatch=%d opt_arg_srun=%d\n",
-			__func__, opt_arg_sbatch, opt_arg_srun);
+		if (opt_arg_sbatch || opt_arg_srun)
+			usleep(getpid() % 500000);   /* Reduce NFS collisions */
+		fprintf(fp, "%s: opt_arg_sbatch=%d opt_arg_srun=%d hostname=%s\n",
+			__func__, opt_arg_sbatch, opt_arg_srun, hostname);
 		if (spank_get_item(sp, S_JOB_UID, &my_uid) == ESPANK_SUCCESS)
 			fprintf(fp, "spank_get_item: my_uid=%d\n", my_uid);
                 if (spank_get_item(sp, S_JOB_ARGV, &argc, &argv) ==
@@ -164,6 +168,11 @@ int slurm_spank_task_init(spank_t sp, int ac, char **av)
 		}
 		fclose(fp);
 	}
+
+	slurm_info("%s: opt_arg_sbatch=%d opt_arg_srun=%d hostname=%s out_file=%s",
+		   __func__, opt_arg_sbatch, opt_arg_srun, hostname,
+		   opt_out_file);
+
 	return 0;
 }
 
@@ -176,19 +185,26 @@ int slurm_spank_task_exit(spank_t sp, int ac, char **av) */
 /* Called from both srun and slurmd */
 int slurm_spank_exit(spank_t sp, int ac, char **av)
 {
+	char hostname[64] = "";
 	int i;
 
+	gethostname(hostname, sizeof(hostname));
 	if (opt_out_file) {
 		FILE *fp = NULL;
 		for (i = 0; (i < 10) && !fp; i++)
 			fp = fopen(opt_out_file, "a");
 		if (!fp)
 			return -1;
-		fprintf(fp, "%s: opt_arg_sbatch=%d opt_arg_srun=%d\n",
-			__func__, opt_arg_sbatch, opt_arg_srun);
+		if (opt_arg_sbatch || opt_arg_srun)
+			usleep(getpid() % 500000);   /* Reduce NFS collisions */
+		fprintf(fp, "%s: opt_arg_sbatch=%d opt_arg_srun=%d hostname=%s\n",
+			__func__, opt_arg_sbatch, opt_arg_srun, hostname);
 		fclose(fp);
-	} else
-		slurm_info("%s: opt_arg_sbatch=%d opt_arg_srun=%d",
-			   __func__, opt_arg_sbatch, opt_arg_srun);
+	}
+
+	slurm_info("%s: opt_arg_sbatch=%d opt_arg_srun=%d hostname=%s out_file=%s",
+		   __func__, opt_arg_sbatch, opt_arg_srun, hostname,
+		   opt_out_file);
+
 	return 0;
 }
-- 
GitLab