From 15ce1cbff73e70ac3f29bb7a38c1129d40ddf002 Mon Sep 17 00:00:00 2001
From: Brian Christiansen <brian@schedmd.com>
Date: Mon, 16 Jan 2017 09:40:37 -0700
Subject: [PATCH] Enable canceling fed jobs from origin cluster

---
 src/slurmctld/controller.c |  2 +-
 src/slurmctld/fed_mgr.c    | 56 ++++++++++++++++++++++++++++++++++++--
 src/slurmctld/fed_mgr.h    |  3 ++
 src/slurmctld/gang.c       |  2 +-
 src/slurmctld/job_mgr.c    | 14 ++++++++++
 src/slurmctld/proc_req.c   |  6 ++--
 6 files changed, 76 insertions(+), 7 deletions(-)

diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index f03d4e1d166..a104a664874 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -1657,7 +1657,7 @@ static void *_slurmctld_background(void *no_data)
 		READ_LOCK, READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
 	/* Locks: Read config, write job, write node, read partition */
 	slurmctld_lock_t job_write_lock = {
-		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK };
+		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK };
 	/* Locks: Write job */
 	slurmctld_lock_t job_write_lock2 = {
 		NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
diff --git a/src/slurmctld/fed_mgr.c b/src/slurmctld/fed_mgr.c
index b708c904a25..a8e0d8baee6 100644
--- a/src/slurmctld/fed_mgr.c
+++ b/src/slurmctld/fed_mgr.c
@@ -60,8 +60,8 @@
 
 #define FED_SIBLING_BIT(x) ((uint64_t)1 << (x - 1))
 
-slurmdb_federation_rec_t     *fed_mgr_fed_rec      = NULL;
-static slurmdb_cluster_rec_t *fed_mgr_cluster_rec  = NULL;
+slurmdb_federation_rec_t *fed_mgr_fed_rec     = NULL;
+slurmdb_cluster_rec_t    *fed_mgr_cluster_rec = NULL;
 
 static pthread_t ping_thread  = 0;
 static bool      stop_pinging = false, inited = false;
@@ -2758,6 +2758,58 @@ extern int fed_mgr_job_requeue(struct job_record *job_ptr)
 	return rc;
 }
 
+/* Cancel sibling jobs. Just send request to itself */
+static int _cancel_sibling_jobs(struct job_record *job_ptr, uint16_t signal,
+				uint16_t flags, uid_t uid)
+{
+	int id = 1;
+	uint64_t tmp_sibs = job_ptr->fed_details->siblings;
+	while (tmp_sibs) {
+		if ((tmp_sibs & 1) &&
+		    (id != fed_mgr_cluster_rec->fed.id)) {
+			slurmdb_cluster_rec_t *cluster = _get_cluster_by_id(id);
+			if (!cluster) {
+				error("couldn't find cluster rec by id %d", id);
+				goto next_job;
+			}
+
+			_persist_fed_job_cancel(cluster, job_ptr->job_id,
+						signal, flags, uid);
+		}
+
+next_job:
+		tmp_sibs >>= 1;
+		id++;
+	}
+
+	return SLURM_SUCCESS;
+}
+
+/* Cancel sibling jobs of a federated job
+ *
+ * IN job_ptr - job to cancel
+ * IN signal  - signal to send to job
+ * IN flags   - KILL_.* flags
+ * IN uid     - uid making request
+ */
+extern int fed_mgr_job_cancel(struct job_record *job_ptr, uint16_t signal,
+			      uint16_t flags, uid_t uid)
+{
+	uint32_t origin_id;
+
+	xassert(job_ptr);
+
+	if (!_is_fed_job(job_ptr, &origin_id))
+		return SLURM_SUCCESS;
+
+	if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR)
+		info("cancel fed job %d by origin", job_ptr->job_id);
+
+	_cancel_sibling_jobs(job_ptr, signal, flags, uid);
+
+	return SLURM_SUCCESS;
+}
+
 extern int fed_mgr_is_origin_job(struct job_record *job_ptr)
 {
 	uint32_t origin_id;
diff --git a/src/slurmctld/fed_mgr.h b/src/slurmctld/fed_mgr.h
index 981bed556b6..fdbaacb8ff6 100644
--- a/src/slurmctld/fed_mgr.h
+++ b/src/slurmctld/fed_mgr.h
@@ -41,6 +41,7 @@
 #include "slurm/slurmdb.h"
 
 extern slurmdb_federation_rec_t *fed_mgr_fed_rec;
+extern slurmdb_cluster_rec_t    *fed_mgr_cluster_rec;
 
 extern int       fed_mgr_add_sibling_conn(slurm_persist_conn_t *persist_conn,
 					  char **out_buffer);
@@ -59,6 +60,8 @@ extern int       fed_mgr_job_allocate(slurm_msg_t *msg,
 				      uid_t uid, uint16_t protocol_version,
 				      uint32_t *job_id_ptr, int *alloc_code,
 				      char **err_msg);
+extern int       fed_mgr_job_cancel(struct job_record *job_ptr, uint16_t signal,
+				    uint16_t flags, uid_t uid);
 extern int       fed_mgr_job_complete(struct job_record *job_ptr,
 				      uint32_t return_code, time_t start_time);
 extern int       fed_mgr_job_lock(struct job_record *job_ptr,
diff --git a/src/slurmctld/gang.c b/src/slurmctld/gang.c
index 6386830c955..974ee09e998 100644
--- a/src/slurmctld/gang.c
+++ b/src/slurmctld/gang.c
@@ -1623,7 +1623,7 @@ static void *_timeslicer_thread(void *arg)
 {
 	/* Write locks on job and read lock on nodes */
 	slurmctld_lock_t job_write_lock = {
-		NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
+		NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK, READ_LOCK };
 	ListIterator part_iterator;
 	struct gs_part *p_ptr;
 
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 18b182ebb2e..b2787a1b740 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -4535,6 +4535,20 @@ static int _job_signal(struct job_record *job_ptr, uint16_t signal,
 	if (IS_JOB_FINISHED(job_ptr))
 		return ESLURM_ALREADY_DONE;
 
+	/* If is origin job then cancel siblings -- if they exist.
+	 * origin job = because it knows where the siblings are
+	 * If the job is running locally then just do the normal signalling */
+	if (job_ptr->fed_details &&
+	    fed_mgr_is_origin_job(job_ptr) &&
+	    job_ptr->fed_details->cluster_lock != fed_mgr_cluster_rec->fed.id) {
+		int rc = fed_mgr_job_cancel(job_ptr, signal, flags, uid);
+		/* If the job is running on a remote cluster then wait for the
+		 * job to report back that it's completed, otherwise just signal
+		 * the pending siblings and itself (by not returning). */
+		if (job_ptr->fed_details->cluster_lock)
+			return rc;
+	}
+
 	/* let node select plugin do any state-dependent signalling actions */
 	select_g_job_signal(job_ptr, signal);
 	last_job_update = now;
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 61296e2b2c9..1ceabe5af08 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -1890,7 +1890,7 @@ static void _slurm_rpc_job_step_kill(uint32_t uid, slurm_msg_t * msg)
 		(job_step_kill_msg_t *) msg->data;
 	/* Locks: Read config, write job, write node */
 	slurmctld_lock_t job_write_lock = {
-		READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
+		READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK, READ_LOCK };
 	struct job_record *job_ptr;
 
 	START_TIMER;
@@ -5220,7 +5220,7 @@ inline static void  _slurm_rpc_set_debug_flags(slurm_msg_t *msg)
 	uid_t uid = g_slurm_auth_get_uid(msg->auth_cred,
 					 slurmctld_config.auth_info);
 	slurmctld_lock_t config_write_lock =
-		{ WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
+		{ WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK };
 	set_debug_flags_msg_t *request_msg =
 		(set_debug_flags_msg_t *) msg->data;
 	uint64_t debug_flags;
@@ -5793,7 +5793,7 @@ _slurm_rpc_kill_job2(slurm_msg_t *msg)
 	DEF_TIMERS;
 	job_step_kill_msg_t *kill;
 	slurmctld_lock_t lock = {READ_LOCK, WRITE_LOCK,
-				 WRITE_LOCK, NO_LOCK, NO_LOCK };
+				 WRITE_LOCK, NO_LOCK, READ_LOCK };
 	uid_t uid;
 	int cc;
 
-- 
GitLab