diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 8b5645b729818a0c068adaf9faa259e4a177906c..68510b4002456a4e4905727771ff22a5848fce7c 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -70,10 +70,14 @@ #ifdef HAVE_BG #include "src/common/node_select.h" #include "src/plugins/select/bluegene/bg_enums.h" -#endif - -#ifndef __USE_XOPEN_EXTENDED -extern pid_t getsid(pid_t pid); /* missing from <unistd.h> */ +#elif defined(HAVE_CRAY) +#include "src/common/node_select.h" +/* + * On Cray installations, the libjob headers are not automatically installed + * by default, while libjob.so always is, and kernels are > 2.6. Hence it is + * simpler to just duplicate the single declaration here. + */ +extern uint64_t job_getjid(pid_t pid); #endif #define MAX_RETRIES 10 @@ -543,6 +547,25 @@ static void _set_submit_dir_env(void) /* Returns 0 on success, -1 on failure */ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) { +#ifdef HAVE_CRAY + uint64_t pagg_id = job_getjid(getpid()); + /* + * Interactive sessions require pam_job.so in /etc/pam.d/common-session + * since creating sgi_job containers requires root permissions. This is + * the only exception where we allow the fallback of using the SID to + * confirm the reservation (caught later, in do_basil_confirm). + */ + if (pagg_id == (uint64_t)-1) { + error("No SGI job container ID detected - please enable the " + "Cray job service via /etc/init.d/job"); + } else { + if (!desc->select_jobinfo) + desc->select_jobinfo = select_g_select_jobinfo_alloc(); + + select_g_select_jobinfo_set(desc->select_jobinfo, + SELECT_JOBDATA_PAGG_ID, &pagg_id); + } +#endif desc->contiguous = opt.contiguous ? 1 : 0; desc->features = opt.constraints; desc->gres = opt.gres; diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 80722779576527201c8a8f37f9ab3ed840e594cf..44d0c65138cc076000f52afcbc8a6f0c9cbbce14 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -777,9 +777,9 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) } #if HAVE_CRAY /* - * We are using the alloc_sid as unique identifier to confirm the ALPS - * reservation. ALPS will refuse any attempt to create a second session - * with the same identifier, hence sessions may not be nested. + * Catch attempts to nest salloc sessions. It is not possible to use an + * ALPS session which has the same alloc_sid, it fails even if PAGG + * container IDs are used. */ if (allocated_session_in_use(job_desc_msg)) { error_code = ESLURM_RESERVATION_BUSY;