From c8aa32797c5cf2ef88c6b679b7c3749520ac8e04 Mon Sep 17 00:00:00 2001
From: Mark Grondona <mgrondona@llnl.gov>
Date: Mon, 13 Jan 2003 23:10:07 +0000
Subject: [PATCH]  o read_pidfile() added to daemonize.c  o slurmd tries to
 kill any old slurmd's on startup  o create_pidfile() locks pid file and keeps
 it open, so that    we can test pidfile for lock to definitively know whether
 old    slurmd is still running.

---
 src/common/Makefile.am |  3 ++-
 src/common/daemonize.c | 56 +++++++++++++++++++++++++++++++++++++++---
 src/common/daemonize.h |  6 +++++
 src/slurmd/slurmd.c    | 18 ++++++++++++++
 4 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index 4e61d70b9f6..ec5c0eced59 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -79,7 +79,8 @@ noinst_HEADERS = 	\
 
 libdaemonize_la_SOURCES =  \
 	daemonize.c        \
-	daemonize.h
+	daemonize.h        \
+        fd.c fd.h
 
 libcred_la_SOURCES =       \
         credential_utils.c \
diff --git a/src/common/daemonize.c b/src/common/daemonize.c
index 45affcef112..08b07ade2db 100644
--- a/src/common/daemonize.c
+++ b/src/common/daemonize.c
@@ -33,6 +33,7 @@
 
 #include "src/common/macros.h"
 #include "src/common/log.h"
+#include "src/common/fd.h"
 #include "src/common/xassert.h"
 
 /* closeall FDs >= a specified value */
@@ -82,9 +83,45 @@ daemon(int nochdir, int noclose)
 
 }
 
+pid_t
+read_pidfile(const char *pidfile)
+{
+	int fd;
+	FILE *fp = NULL;
+	unsigned long pid;
+	pid_t         lpid;
+
+	if ((fd = open(pidfile, O_RDONLY)) < 0) {
+		debug ("unable to open old pid file: %m");
+		return ((pid_t) 0);
+	}
+
+	if (!(fp = fdopen(fd, "r")) && (errno != ENOENT)) 
+		fatal ("Unable to access old pidfile at `%s': %m", pidfile);
+
+	if (fscanf(fp, "%lu", &pid) < 1) {
+		error ("Possible corrupt pidfile `%s'", pidfile);
+		return ((pid_t) 0);
+	}
+
+	if ((lpid = fd_is_read_lock_blocked(fd)) == (pid_t) 0) {
+		verbose ("pidfile not locked, assuming no running slurmd");
+		return (lpid);
+	}
+
+	if (lpid != (pid_t) pid) 
+		fatal ("pidfile locked by %ld but contains pid=%ld",
+				(long) lpid, pid);
+
+	return (lpid);
+}
+
+
+
 int
 create_pidfile(const char *pidfile)
 {
+	int fd;
 	FILE *fp;
 
 	xassert(pidfile != NULL);
@@ -94,15 +131,26 @@ create_pidfile(const char *pidfile)
 		error("Unable to open pidfile `%s': %m", pidfile);
 		return -1;
 	}
-	if (fprintf(fp, "%d\n", (int) getpid()) == EOF) {
-		error("Unable to write to pidfile `%s': %m", pidfile);
+
+	if (fd_get_write_lock(fileno(fp)) < 0) {
+		error ("Unable to lock pidfile `%s': %m", pidfile);
 		goto error;
 	}
-	if (fclose(fp) == EOF) {
-		error("Unable to close pidfile `%s': %m", pidfile);
+
+	if (fprintf(fp, "%d\n", (int) getpid()) == EOF) {
+		error("Unable to write to pidfile `%s': %m", pidfile);
 		goto error;
 	}
 
+	fflush(fp);
+
+	
+	/*
+	 * if (fclose(fp) == EOF) {
+         *	error("Unable to close pidfile `%s': %m", pidfile);
+         *	goto error;
+         *}
+	 */
 	return 0;
 
   error:
diff --git a/src/common/daemonize.h b/src/common/daemonize.h
index 69897498057..a4f382ee7d2 100644
--- a/src/common/daemonize.h
+++ b/src/common/daemonize.h
@@ -41,4 +41,10 @@ int daemon(int nochdir, int noclose);
  */
 int create_pidfile(char *pidfilename);
 
+/*
+ * Attempt to read an old pid from the configured pidfile
+ * Returns 0 if no pidfile exists (No running process)
+ */
+pid_t read_pidfile(char *pidfilename);
+
 #endif /* !_HAVE_DAEMONIZE_H */
diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c
index de29dae79f9..ddc8450a9fc 100644
--- a/src/slurmd/slurmd.c
+++ b/src/slurmd/slurmd.c
@@ -91,6 +91,7 @@ static void       _create_conf();
 static void       _init_conf();
 static void       _print_conf();
 static void       _read_config();
+static void 	  _kill_old_slurmd();
 static void       _reconfigure();
 static void       _wait_for_all_threads();
 static void       _set_slurmd_spooldir(void);
@@ -124,8 +125,15 @@ main (int argc, char *argv[])
 	conf->argv = &argv;
 	conf->argc = &argc;
 
+
+	log_init(argv[0], conf->log_opts, LOG_DAEMON, conf->logfile);
+	_print_conf();
+
+	_kill_old_slurmd();
 	create_pidfile(conf->pidfile);
+
 	info("%s started on %T", xbasename(argv[0]));
+
 	_create_msg_socket();
 	conf->pid = getpid();
 
@@ -575,6 +583,16 @@ _set_slurmd_spooldir(void)
 		fatal("chdir(%s): %m", conf->spooldir);
 }
 
+static void
+_kill_old_slurmd(void)
+{
+	pid_t oldpid = read_pidfile(conf->pidfile);
+	if (oldpid != (pid_t) 0) {
+		info ("killing old slurmd[%ld]", (long) oldpid);
+		kill(oldpid, SIGTERM);
+		sleep(2);
+	}
+}
 
 /* Reset slurmctld logging based upon configuration parameters */
 static void _update_logging(void) 
-- 
GitLab