From c8aa32797c5cf2ef88c6b679b7c3749520ac8e04 Mon Sep 17 00:00:00 2001 From: Mark Grondona <mgrondona@llnl.gov> Date: Mon, 13 Jan 2003 23:10:07 +0000 Subject: [PATCH] o read_pidfile() added to daemonize.c o slurmd tries to kill any old slurmd's on startup o create_pidfile() locks pid file and keeps it open, so that we can test pidfile for lock to definitively know whether old slurmd is still running. --- src/common/Makefile.am | 3 ++- src/common/daemonize.c | 56 +++++++++++++++++++++++++++++++++++++++--- src/common/daemonize.h | 6 +++++ src/slurmd/slurmd.c | 18 ++++++++++++++ 4 files changed, 78 insertions(+), 5 deletions(-) diff --git a/src/common/Makefile.am b/src/common/Makefile.am index 4e61d70b9f6..ec5c0eced59 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -79,7 +79,8 @@ noinst_HEADERS = \ libdaemonize_la_SOURCES = \ daemonize.c \ - daemonize.h + daemonize.h \ + fd.c fd.h libcred_la_SOURCES = \ credential_utils.c \ diff --git a/src/common/daemonize.c b/src/common/daemonize.c index 45affcef112..08b07ade2db 100644 --- a/src/common/daemonize.c +++ b/src/common/daemonize.c @@ -33,6 +33,7 @@ #include "src/common/macros.h" #include "src/common/log.h" +#include "src/common/fd.h" #include "src/common/xassert.h" /* closeall FDs >= a specified value */ @@ -82,9 +83,45 @@ daemon(int nochdir, int noclose) } +pid_t +read_pidfile(const char *pidfile) +{ + int fd; + FILE *fp = NULL; + unsigned long pid; + pid_t lpid; + + if ((fd = open(pidfile, O_RDONLY)) < 0) { + debug ("unable to open old pid file: %m"); + return ((pid_t) 0); + } + + if (!(fp = fdopen(fd, "r")) && (errno != ENOENT)) + fatal ("Unable to access old pidfile at `%s': %m", pidfile); + + if (fscanf(fp, "%lu", &pid) < 1) { + error ("Possible corrupt pidfile `%s'", pidfile); + return ((pid_t) 0); + } + + if ((lpid = fd_is_read_lock_blocked(fd)) == (pid_t) 0) { + verbose ("pidfile not locked, assuming no running slurmd"); + return (lpid); + } + + if (lpid != (pid_t) pid) + fatal ("pidfile locked by %ld but contains pid=%ld", + (long) lpid, pid); + + return (lpid); +} + + + int create_pidfile(const char *pidfile) { + int fd; FILE *fp; xassert(pidfile != NULL); @@ -94,15 +131,26 @@ create_pidfile(const char *pidfile) error("Unable to open pidfile `%s': %m", pidfile); return -1; } - if (fprintf(fp, "%d\n", (int) getpid()) == EOF) { - error("Unable to write to pidfile `%s': %m", pidfile); + + if (fd_get_write_lock(fileno(fp)) < 0) { + error ("Unable to lock pidfile `%s': %m", pidfile); goto error; } - if (fclose(fp) == EOF) { - error("Unable to close pidfile `%s': %m", pidfile); + + if (fprintf(fp, "%d\n", (int) getpid()) == EOF) { + error("Unable to write to pidfile `%s': %m", pidfile); goto error; } + fflush(fp); + + + /* + * if (fclose(fp) == EOF) { + * error("Unable to close pidfile `%s': %m", pidfile); + * goto error; + *} + */ return 0; error: diff --git a/src/common/daemonize.h b/src/common/daemonize.h index 69897498057..a4f382ee7d2 100644 --- a/src/common/daemonize.h +++ b/src/common/daemonize.h @@ -41,4 +41,10 @@ int daemon(int nochdir, int noclose); */ int create_pidfile(char *pidfilename); +/* + * Attempt to read an old pid from the configured pidfile + * Returns 0 if no pidfile exists (No running process) + */ +pid_t read_pidfile(char *pidfilename); + #endif /* !_HAVE_DAEMONIZE_H */ diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c index de29dae79f9..ddc8450a9fc 100644 --- a/src/slurmd/slurmd.c +++ b/src/slurmd/slurmd.c @@ -91,6 +91,7 @@ static void _create_conf(); static void _init_conf(); static void _print_conf(); static void _read_config(); +static void _kill_old_slurmd(); static void _reconfigure(); static void _wait_for_all_threads(); static void _set_slurmd_spooldir(void); @@ -124,8 +125,15 @@ main (int argc, char *argv[]) conf->argv = &argv; conf->argc = &argc; + + log_init(argv[0], conf->log_opts, LOG_DAEMON, conf->logfile); + _print_conf(); + + _kill_old_slurmd(); create_pidfile(conf->pidfile); + info("%s started on %T", xbasename(argv[0])); + _create_msg_socket(); conf->pid = getpid(); @@ -575,6 +583,16 @@ _set_slurmd_spooldir(void) fatal("chdir(%s): %m", conf->spooldir); } +static void +_kill_old_slurmd(void) +{ + pid_t oldpid = read_pidfile(conf->pidfile); + if (oldpid != (pid_t) 0) { + info ("killing old slurmd[%ld]", (long) oldpid); + kill(oldpid, SIGTERM); + sleep(2); + } +} /* Reset slurmctld logging based upon configuration parameters */ static void _update_logging(void) -- GitLab