From add1f9d350811aa5f0e75bb6a3663f5b1b1185e3 Mon Sep 17 00:00:00 2001 From: Brian Christiansen <brian@schedmd.com> Date: Mon, 3 Apr 2017 16:55:53 -0600 Subject: [PATCH] Fix double locking issue when using fed_mgr_is_active(). It should only be used outside of any locks. --- src/slurmctld/fed_mgr.c | 5 ++++- src/slurmctld/job_mgr.c | 4 ++-- src/slurmctld/proc_req.c | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/slurmctld/fed_mgr.c b/src/slurmctld/fed_mgr.c index 1d117718b07..b074cdcf5af 100644 --- a/src/slurmctld/fed_mgr.c +++ b/src/slurmctld/fed_mgr.c @@ -1661,6 +1661,9 @@ unpack_error: /* * Returns true if the cluster is part of a federation. + * + * Does it's own locking. Don't use this if the FED_[READ|WRITE] lock is already + * set. */ extern bool fed_mgr_is_active(void) { @@ -3317,7 +3320,7 @@ extern int fed_mgr_update_job_cluster_features(struct job_record *job_ptr, } else if ((!IS_JOB_PENDING(job_ptr)) || job_ptr->fed_details->cluster_lock) { rc = ESLURM_JOB_NOT_PENDING; - } else if (!fed_mgr_is_active()) { + } else if (!fed_mgr_fed_rec) { info("sched: update_job: setting ClusterFeatures on a non-active federated cluster for job %u", job_ptr->job_id); rc = ESLURM_JOB_NOT_FEDERATED; diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index beef8424d5b..8291abd20b5 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -6614,7 +6614,7 @@ extern int validate_job_create_req(job_desc_msg_t * job_desc, uid_t submit_uid, if (rc != SLURM_SUCCESS) return rc; - if (job_desc->array_inx && fed_mgr_is_active()) + if (job_desc->array_inx && fed_mgr_fed_rec) return ESLURM_NOT_SUPPORTED; if (!_valid_array_inx(job_desc)) @@ -8182,7 +8182,7 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, if (job_desc_msg->job_id != NO_VAL) { struct job_record *dup_job_ptr; - if (!fed_mgr_is_active() && + if (!fed_mgr_fed_rec && (submit_uid != 0) && (submit_uid != slurmctld_conf.slurm_user_id)) { info("attempt by uid %u to set job_id to %u", diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 920eb8da712..62f0468b9dd 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -5507,7 +5507,7 @@ _slurm_rpc_kill_job2(slurm_msg_t *msg) * know about the job and it owns the job, the this cluster will cancel * the job and it will report the cancel back to the origin. */ lock_slurmctld(fed_job_read_lock); - if (fed_mgr_is_active()) { + if (fed_mgr_fed_rec) { uint32_t job_id, origin_id; struct job_record *job_ptr; -- GitLab