From 685b46c7a90137fe11b0d8cc8c7e093879f86904 Mon Sep 17 00:00:00 2001
From: Mark Grondona <mgrondona@llnl.gov>
Date: Thu, 20 Mar 2003 00:08:36 +0000
Subject: [PATCH]  o fix bug in slurmd shared memory initialization when
 lockfile exists    but sysv semaphore doesn't

 o changes to some of the logic in slurm_cred_t validity checks
---
 src/slurmd/req.c       | 32 ++++++++++++++++++------
 src/slurmd/semaphore.c |  4 +--
 src/slurmd/shm.c       | 55 ++++++++++++++++++++++++++++++++----------
 3 files changed, 68 insertions(+), 23 deletions(-)

diff --git a/src/slurmd/req.c b/src/slurmd/req.c
index f20e8676846..65a363db0ac 100644
--- a/src/slurmd/req.c
+++ b/src/slurmd/req.c
@@ -53,6 +53,7 @@
 #define MAXHOSTNAMELEN	64
 #endif
 
+
 static bool _job_still_running(uint32_t job_id);
 static int  _kill_all_active_steps(uint32_t jobid, int sig);
 static int  _launch_tasks(launch_tasks_request_msg_t *, slurm_addr *);
@@ -71,6 +72,7 @@ static int  _run_prolog(uint32_t jobid, uid_t uid);
 static int  _run_epilog(uint32_t jobid, uid_t uid);
 static void _wait_for_procs(uint32_t job_id, uid_t job_uid);
 
+
 void
 slurmd_req(slurm_msg_t *msg, slurm_addr *cli)
 {
@@ -144,7 +146,6 @@ _launch_batch_job(batch_job_launch_msg_t *req, slurm_addr *cli)
 {	
 	pid_t pid;
 	int rc;
-
 	
 	switch ((pid = fork())) {
 		case -1:
@@ -202,9 +203,21 @@ _check_job_credential(slurm_cred_t cred, uint32_t jobid,
 	slurm_cred_arg_t arg;
 	hostset_t        hset = NULL;
 
-	if (slurm_cred_verify(conf->vctx, cred, &arg) < 0)
+	/*
+	 * First call slurm_cred_verify() so that all valid
+	 * credentials are checked
+	 */
+	if ( (slurm_cred_verify(conf->vctx, cred, &arg) < 0)
+	   && (uid != conf->slurm_user_id) )
 		return SLURM_ERROR;
 
+	/*
+	 * If the requesting user is the slurm user, do not perform
+	 * any more validity checks
+	 */
+	if (uid == conf->slurm_user_id)
+		return SLURM_SUCCESS;
+
 	if ((arg.jobid != jobid) || (arg.stepid != stepid)) {
 		error("job credential for %d.%d, expected %d.%d",
 		      arg.jobid, arg.stepid, jobid, stepid); 
@@ -232,6 +245,9 @@ _check_job_credential(slurm_cred_t cred, uint32_t jobid,
 		goto fail;
 	}
 
+	hostset_destroy(hset);
+	xfree(arg.hostlist);
+
 	return SLURM_SUCCESS;
 
     fail:
@@ -273,14 +289,14 @@ _rpc_launch_tasks(slurm_msg_t *msg, slurm_addr *cli)
 	if (!slurm_cred_jobid_cached(conf->vctx, req->job_id)) 
 		run_prolog = true;
 
-
-	if ( (_check_job_credential(req->cred, jobid, stepid, req_uid) < 0) 
-	    && (super_user == false) ) {
+	if (_check_job_credential(req->cred, jobid, stepid, req_uid) < 0) {
 		retval = errno;
-		error("Invalid credential from %ld@%s: %m", req_uid, host);
+		error("Invalid job credential from %ld@%s: %m", req_uid, host);
 		goto done;
 	}
 
+	xassert(slurm_cred_jobid_cached(conf->vctx, req->job_id));
+
 	/* Run job prolog if necessary */
 	if (run_prolog && (_run_prolog(req->job_id, req->uid) != 0)) {
 		error("[job %d] prolog failed", req->job_id);
@@ -439,7 +455,6 @@ _kill_running_session_mgrs(uint32_t jobid, int signum)
 	List         steps = shm_get_steps();
 	ListIterator i     = list_iterator_create(steps);
 	job_step_t  *s     = NULL; 
-	int step_cnt       = 0;  
 
 	while ((s = list_next(i))) {
 		if (s->jobid == jobid) {
@@ -448,7 +463,7 @@ _kill_running_session_mgrs(uint32_t jobid, int signum)
 	}
 	list_destroy(steps);
 
-	return step_cnt;
+	return;
 }
 
 /* For the specified job_id: Send SIGXCPU, reply to slurmctld, 
@@ -771,3 +786,4 @@ _run_epilog(uint32_t jobid, uid_t uid)
 	return error_code;
 }
 
+
diff --git a/src/slurmd/semaphore.c b/src/slurmd/semaphore.c
index 29794104b92..80eeb9385be 100644
--- a/src/slurmd/semaphore.c
+++ b/src/slurmd/semaphore.c
@@ -46,9 +46,9 @@
 #include <sys/ipc.h>
 #include <sys/sem.h>
 #include <stdio.h>
-#include "semaphore.h"
 
-#include "../common/log.h"
+#include "src/slurmd/semaphore.h"
+#include "src/common/log.h"
 
 
 #define MAX_TRIES 3
diff --git a/src/slurmd/shm.c b/src/slurmd/shm.c
index 99b8968f111..05fbcee25e4 100644
--- a/src/slurmd/shm.c
+++ b/src/slurmd/shm.c
@@ -192,9 +192,9 @@ shm_cleanup(void)
 	key_t key;
 	int id = -1;
 
+	info("request to destroy shm lock [%s]", SHM_LOCKNAME);
 	if ((s = _create_ipc_name(SHM_LOCKNAME))) {
 		key = ftok(s, 1);
-		info("request to destroy shm lock `%s'", s);
 		if (sem_unlink(s) < 0)
 			error("sem_unlink: %m");
 		xfree(s);
@@ -923,24 +923,52 @@ static int
 _shm_reopen()
 {
 	int retval = SLURM_SUCCESS;
+	int oflags = O_EXCL;        /* Try to reopen semaphore first */
+
+	debug2("going to reopen slurmd shared memory");
+
+	shm_lock = _sem_open(SHM_LOCKNAME, oflags, 0600, 0);
+	/*
+	 * If open of shm lock failed, we could be in one of two
+	 * situations:  
+	 *
+	 * 1. The lockfile associated with the semaphore exists,
+	 *    but the semaphore does not exist (errno == ENOENT) 
+	 *    or
+	 * 2. system failure trying to attach to semaphore.
+	 *
+	 *  For 1, we can cleanup the shm lock, then initialize
+	 *  a new shared memory region, but for 2, we need to
+	 *  exit with a failure
+	 */
 
-	if ((shm_lock = _sem_open(SHM_LOCKNAME, O_CREAT|O_EXCL, 0600, 0)) 
-	     == SEM_FAILED) {
+	if ((shm_lock == SEM_FAILED)) {
 		if (errno != ENOENT) {
 			error("Unable to initialize semaphore: %m");
 			return SLURM_FAILURE;
 		}
-		debug("Lockfile found but semaphore deleted: "
-		      "creating new shm segment");
-		shm_cleanup();
-		if ((shm_lock = _sem_open(SHM_LOCKNAME,O_CREAT|O_EXCL, 
-		     0600, 0)) == SEM_FAILED) {
-			error("Unable to initialize semaphore: %m");
-			return SLURM_FAILURE;
-		}
+
+		debug2( "lockfile exists, but semaphore was deleted: "
+		        "reinitializing shm"                          );
+
+		/*
+		 * Unlink old lockfile, reopen semaphore with create flag,
+		 * and create new shared memory area
+		 */
+		sem_unlink(lockname);
+		shm_lock = _sem_open(SHM_LOCKNAME, oflags|O_CREAT, 0600, 0);
+		return _shm_new();
+	}
+	
+	if (shm_lock == SEM_FAILED) {
+		error("Unable to initialize semaphore: %m");
+		return SLURM_FAILURE;
 	}
 
-	/* Attach to shared memory region */
+	/* 
+	 * Attach to shared memory region 
+	 * If attach fails, try to create a new shm segment
+	 */
 	if ((_shm_attach() < 0) && (_shm_create() < 0)) {
 		error("shm_create(): %m");
 		return SLURM_FAILURE;
@@ -983,7 +1011,8 @@ _shm_lock_and_initialize()
 	}
 
 	shm_lock = _sem_open(SHM_LOCKNAME, O_CREAT|O_EXCL, 0600, 0);
-	debug3("lockname is `%s'", lockname);
+	debug3("slurmd lockfile is `%s': %m", lockname);
+
 	if (shm_lock != SEM_FAILED) /* lock didn't exist. Create shmem      */
 		return _shm_new();
 	else                        /* lock exists. Attach to shared memory */
-- 
GitLab