From cb68cce85aa466cb72a74eab36439f038d470542 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Sat, 26 Mar 2011 02:57:23 +0000 Subject: [PATCH] Patch #21: Prevents nesting of ALPS sessions within salloc. Please see separate email correspondence with Ben - to me this now looks much similar to the way the SGI pagg IDs are used on Cray: * upper part = node_id (corresponding to alloc_node) * lower part = 32 bit job ID (corresponding to alloc_sid) --- src/slurmctld/job_mgr.c | 36 ++++++++++++++++++++++++++++++++++++ src/slurmctld/proc_req.c | 12 ++++++++++++ src/slurmctld/slurmctld.h | 7 +++++++ 3 files changed, 55 insertions(+) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index d1f22d1605b..ea05b8b825c 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2166,6 +2166,42 @@ extern bool partition_in_use(char *part_name) return false; } +/* + * allocated_session_in_use - check if an interactive session is already running + * IN new_alloc - allocation (alloc_node:alloc_sid) to test for + * Returns true if an interactive session of the same node:sid already is in use + * by a RUNNING, PENDING, or SUSPENDED job. Provides its own locking. + */ +extern bool allocated_session_in_use(job_desc_msg_t *new_alloc) +{ + ListIterator job_iter; + struct job_record *job_ptr; + /* Locks: Read job */ + slurmctld_lock_t job_read_lock = { + NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; + + if ((new_alloc->script != NULL) || (new_alloc->alloc_node == NULL)) + return false; + + lock_slurmctld(job_read_lock); + job_iter = list_iterator_create(job_list); + if (job_iter == NULL) + fatal("list_iterator_create: malloc failure"); + + while ((job_ptr = (struct job_record *)list_next(job_iter))) { + if (job_ptr->batch_flag || IS_JOB_FINISHED(job_ptr)) + continue; + if (job_ptr->alloc_node && + (strcmp(job_ptr->alloc_node, new_alloc->alloc_node) == 0) && + (job_ptr->alloc_sid == new_alloc->alloc_sid)) + break; + } + list_iterator_destroy(job_iter); + unlock_slurmctld(job_read_lock); + + return job_ptr != NULL; +} + /* * kill_running_job_by_node_name - Given a node name, deallocate RUNNING * or COMPLETING jobs from the node or kill them diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index a98a3b0d52c..42d8a376481 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -769,6 +769,18 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) error("REQUEST_RESOURCE_ALLOCATE lacks alloc_node from uid=%d", uid); } +#if HAVE_CRAY + /* + * We are using the alloc_sid as unique identifier to confirm the ALPS + * reservation. ALPS will refuse any attempt to create a second session + * with the same identifier, hence sessions may not be nested. + */ + if (allocated_session_in_use(job_desc_msg)) { + error_code = ESLURM_RESERVATION_BUSY; + error("attempt to nest ALPS allocation on %s:%d by uid=%d", + job_desc_msg->alloc_node, job_desc_msg->alloc_sid, uid); + } +#endif slurm_get_peer_addr(msg->conn_fd, &resp_addr); job_desc_msg->resp_host = xmalloc(16); slurm_get_ip_str(&resp_addr, &port, job_desc_msg->resp_host, 16); diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 9c012ef9eec..462f4ca0272 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -875,6 +875,13 @@ extern bool is_node_down (char *name); */ extern bool is_node_resp (char *name); +/* + * allocated_session_in_use - check if an interactive session is already running + * IN new_alloc - allocation (alloc_node:alloc_sid) to test for + * Returns true if an interactive session of the same node:sid already exists. + */ +extern bool allocated_session_in_use(job_desc_msg_t *new_alloc); + /* * job_alloc_info - get details about an existing job allocation * IN uid - job issuing the code -- GitLab