From add1f9d350811aa5f0e75bb6a3663f5b1b1185e3 Mon Sep 17 00:00:00 2001
From: Brian Christiansen <brian@schedmd.com>
Date: Mon, 3 Apr 2017 16:55:53 -0600
Subject: [PATCH] Fix double locking issue

when using fed_mgr_is_active(). It should only be used outside of any
locks.
---
 src/slurmctld/fed_mgr.c  | 5 ++++-
 src/slurmctld/job_mgr.c  | 4 ++--
 src/slurmctld/proc_req.c | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/slurmctld/fed_mgr.c b/src/slurmctld/fed_mgr.c
index 1d117718b07..b074cdcf5af 100644
--- a/src/slurmctld/fed_mgr.c
+++ b/src/slurmctld/fed_mgr.c
@@ -1661,6 +1661,9 @@ unpack_error:
 
 /*
  * Returns true if the cluster is part of a federation.
+ *
+ * Does it's own locking. Don't use this if the FED_[READ|WRITE] lock is already
+ * set.
  */
 extern bool fed_mgr_is_active(void)
 {
@@ -3317,7 +3320,7 @@ extern int fed_mgr_update_job_cluster_features(struct job_record *job_ptr,
 	} else if ((!IS_JOB_PENDING(job_ptr)) ||
 		   job_ptr->fed_details->cluster_lock) {
 		rc = ESLURM_JOB_NOT_PENDING;
-	} else if (!fed_mgr_is_active()) {
+	} else if (!fed_mgr_fed_rec) {
 		info("sched: update_job: setting ClusterFeatures on a non-active federated cluster for job %u",
 		     job_ptr->job_id);
 		rc = ESLURM_JOB_NOT_FEDERATED;
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index beef8424d5b..8291abd20b5 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -6614,7 +6614,7 @@ extern int validate_job_create_req(job_desc_msg_t * job_desc, uid_t submit_uid,
 	if (rc != SLURM_SUCCESS)
 		return rc;
 
-	if (job_desc->array_inx && fed_mgr_is_active())
+	if (job_desc->array_inx && fed_mgr_fed_rec)
 		return ESLURM_NOT_SUPPORTED;
 
 	if (!_valid_array_inx(job_desc))
@@ -8182,7 +8182,7 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate,
 
 	if (job_desc_msg->job_id != NO_VAL) {
 		struct job_record *dup_job_ptr;
-		if (!fed_mgr_is_active() &&
+		if (!fed_mgr_fed_rec &&
 		    (submit_uid != 0) &&
 		    (submit_uid != slurmctld_conf.slurm_user_id)) {
 			info("attempt by uid %u to set job_id to %u",
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 920eb8da712..62f0468b9dd 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -5507,7 +5507,7 @@ _slurm_rpc_kill_job2(slurm_msg_t *msg)
 	 * know about the job and it owns the job, the this cluster will cancel
 	 * the job and it will report the cancel back to the origin. */
 	lock_slurmctld(fed_job_read_lock);
-	if (fed_mgr_is_active()) {
+	if (fed_mgr_fed_rec) {
 		uint32_t job_id, origin_id;
 		struct job_record *job_ptr;
 
-- 
GitLab