diff --git a/NEWS b/NEWS index a3fde504e9315694cffae6a9600c50f9c6bbc7ee..f31b910c96f6ab31bd44208a8c1d168a84d00b18 100644 --- a/NEWS +++ b/NEWS @@ -78,6 +78,8 @@ documents those changes that are of interest to users and admins. -- CRAY - Dynamically create BASIL XML buffer to resize as needed. -- Fix checking if QOS limit MaxCPUMinsPJ is set along with DenyOnLimit to deny the job instead of holding it. + -- Make sure on systems that use a different launcher than launch/slurm not + to attempt to signal tasks on the frontend node. * Changes in Slurm 2.5.4 ======================== diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index b9f21108199dfed0f6760ef32ab05c138e253206..4eba1e3b440989537948b333703623b9bf3b1c6c 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -8724,9 +8724,6 @@ static void _signal_job(struct job_record *job_ptr, int signal) #endif agent_arg_t *agent_args = NULL; signal_job_msg_t *signal_job_msg = NULL; -#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P - static int notify_srun = 1; -#else static int notify_srun_static = -1; int notify_srun = 0; @@ -8739,6 +8736,12 @@ static void _signal_job(struct job_record *job_ptr, int signal) notify_srun_static = 0; xfree(launch_type); } + +#ifdef HAVE_FRONT_END + /* On a front end system always notify_srun instead of slurmd */ + if (notify_srun_static) + notify_srun = 1; +#else /* For launch/poe all signals are forwarded by srun to poe to tasks * except SIGSTOP/SIGCONT, which are used for job preemption. In that * case the slurmd must directly suspend tasks and switch resources. */ diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index fa31ddbbf008402120a818c9cc1f164d6f3e0b9b..3f1b814fe996d3265c6fa73aa35a4378bd5285f2 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -345,6 +345,7 @@ int job_step_signal(uint32_t job_id, uint32_t step_id, int rc = SLURM_SUCCESS; static bool notify_slurmd = true; static int notify_srun = -1; + static bool front_end = false; if (notify_srun == -1) { char *launch_type = slurm_get_launch_type(); @@ -424,7 +425,14 @@ int job_step_signal(uint32_t job_id, uint32_t step_id, step_ptr->requid = uid; srun_step_complete(step_ptr); } - if ((signal == SIGKILL) || notify_slurmd) + +#ifdef HAVE_FRONT_END + front_end = true; +#endif + /* Never signal tasks on a front_end system if we aren't + * suppose to notify the slurmd (i.e. BGQ and Cray) */ + if (front_end && !notify_slurmd) { + } else if ((signal == SIGKILL) || notify_slurmd) signal_step_tasks(step_ptr, signal, REQUEST_SIGNAL_TASKS); return SLURM_SUCCESS;