From 56c47518d25bafc98f272b39f654020f512fd582 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 21 Mar 2007 18:02:03 +0000
Subject: [PATCH] Add special message for user root to restart slurmctld
 daemons as needed. I keep finding the backup slurmctld down on production
 clusters and hope this helps.

---
 src/scontrol/scontrol.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c
index 990be525474..4727a1867e3 100644
--- a/src/scontrol/scontrol.c
+++ b/src/scontrol/scontrol.c
@@ -339,6 +339,7 @@ _ping_slurmctld(char *control_machine, char *backup_controller)
 {
 	static char *state[2] = { "UP", "DOWN" };
 	int primary = 1, secondary = 1;
+	int down_msg = 0;
 
 	if (slurm_ping(1) == SLURM_SUCCESS)
 		primary = 0;
@@ -347,17 +348,27 @@ _ping_slurmctld(char *control_machine, char *backup_controller)
 	fprintf(stdout, "Slurmctld(primary/backup) ");
 	if (control_machine || backup_controller) {
 		fprintf(stdout, "at ");
-		if (control_machine)
+		if (control_machine) {
 			fprintf(stdout, "%s/", control_machine);
-		else
+			if (primary)
+				down_msg = 1;
+		} else
 			fprintf(stdout, "(NULL)/");
-		if (backup_controller)
+		if (backup_controller) {
 			fprintf(stdout, "%s ", backup_controller);
-		else
+			if (secondary)
+				down_msg = 1;
+		} else
 			fprintf(stdout, "(NULL) ");
 	}
 	fprintf(stdout, "are %s/%s\n", 
 		state[primary], state[secondary]);
+
+	if (down_msg && ((getuid() == 0) || (geteuid() == 0))) {
+		fprintf(stdout, "*****************************************\n");
+		fprintf(stdout, "** RESTORE SLURMCTLD DAEMON TO SERVICE **\n");
+		fprintf(stdout, "*****************************************\n");
+	}
 }
 
 /*
-- 
GitLab