diff --git a/src/common/Makefile.am b/src/common/Makefile.am index 4e61d70b9f66cebce35dbfbffa82a7d20b471571..ec5c0eced59b78029d19601ab56c3cc2a47d3577 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -79,7 +79,8 @@ noinst_HEADERS = \ libdaemonize_la_SOURCES = \ daemonize.c \ - daemonize.h + daemonize.h \ + fd.c fd.h libcred_la_SOURCES = \ credential_utils.c \ diff --git a/src/common/daemonize.c b/src/common/daemonize.c index 45affcef1123076dfb8a33ed58d9c27c098f1c3c..08b07ade2db59222a288ebf83c20c7ec233b1a47 100644 --- a/src/common/daemonize.c +++ b/src/common/daemonize.c @@ -33,6 +33,7 @@ #include "src/common/macros.h" #include "src/common/log.h" +#include "src/common/fd.h" #include "src/common/xassert.h" /* closeall FDs >= a specified value */ @@ -82,9 +83,45 @@ daemon(int nochdir, int noclose) } +pid_t +read_pidfile(const char *pidfile) +{ + int fd; + FILE *fp = NULL; + unsigned long pid; + pid_t lpid; + + if ((fd = open(pidfile, O_RDONLY)) < 0) { + debug ("unable to open old pid file: %m"); + return ((pid_t) 0); + } + + if (!(fp = fdopen(fd, "r")) && (errno != ENOENT)) + fatal ("Unable to access old pidfile at `%s': %m", pidfile); + + if (fscanf(fp, "%lu", &pid) < 1) { + error ("Possible corrupt pidfile `%s'", pidfile); + return ((pid_t) 0); + } + + if ((lpid = fd_is_read_lock_blocked(fd)) == (pid_t) 0) { + verbose ("pidfile not locked, assuming no running slurmd"); + return (lpid); + } + + if (lpid != (pid_t) pid) + fatal ("pidfile locked by %ld but contains pid=%ld", + (long) lpid, pid); + + return (lpid); +} + + + int create_pidfile(const char *pidfile) { + int fd; FILE *fp; xassert(pidfile != NULL); @@ -94,15 +131,26 @@ create_pidfile(const char *pidfile) error("Unable to open pidfile `%s': %m", pidfile); return -1; } - if (fprintf(fp, "%d\n", (int) getpid()) == EOF) { - error("Unable to write to pidfile `%s': %m", pidfile); + + if (fd_get_write_lock(fileno(fp)) < 0) { + error ("Unable to lock pidfile `%s': %m", pidfile); goto error; } - if (fclose(fp) == EOF) { - error("Unable to close pidfile `%s': %m", pidfile); + + if (fprintf(fp, "%d\n", (int) getpid()) == EOF) { + error("Unable to write to pidfile `%s': %m", pidfile); goto error; } + fflush(fp); + + + /* + * if (fclose(fp) == EOF) { + * error("Unable to close pidfile `%s': %m", pidfile); + * goto error; + *} + */ return 0; error: diff --git a/src/common/daemonize.h b/src/common/daemonize.h index 698974980574564f68d8d72df627e031388bba4a..a4f382ee7d2cf577723ba45c47734592c6afde68 100644 --- a/src/common/daemonize.h +++ b/src/common/daemonize.h @@ -41,4 +41,10 @@ int daemon(int nochdir, int noclose); */ int create_pidfile(char *pidfilename); +/* + * Attempt to read an old pid from the configured pidfile + * Returns 0 if no pidfile exists (No running process) + */ +pid_t read_pidfile(char *pidfilename); + #endif /* !_HAVE_DAEMONIZE_H */ diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c index de29dae79f93fd148a00616e10763df0b6907f67..ddc8450a9fcb135211b310f89a37f0b178b05af6 100644 --- a/src/slurmd/slurmd.c +++ b/src/slurmd/slurmd.c @@ -91,6 +91,7 @@ static void _create_conf(); static void _init_conf(); static void _print_conf(); static void _read_config(); +static void _kill_old_slurmd(); static void _reconfigure(); static void _wait_for_all_threads(); static void _set_slurmd_spooldir(void); @@ -124,8 +125,15 @@ main (int argc, char *argv[]) conf->argv = &argv; conf->argc = &argc; + + log_init(argv[0], conf->log_opts, LOG_DAEMON, conf->logfile); + _print_conf(); + + _kill_old_slurmd(); create_pidfile(conf->pidfile); + info("%s started on %T", xbasename(argv[0])); + _create_msg_socket(); conf->pid = getpid(); @@ -575,6 +583,16 @@ _set_slurmd_spooldir(void) fatal("chdir(%s): %m", conf->spooldir); } +static void +_kill_old_slurmd(void) +{ + pid_t oldpid = read_pidfile(conf->pidfile); + if (oldpid != (pid_t) 0) { + info ("killing old slurmd[%ld]", (long) oldpid); + kill(oldpid, SIGTERM); + sleep(2); + } +} /* Reset slurmctld logging based upon configuration parameters */ static void _update_logging(void)