diff --git a/NEWS b/NEWS index f153af7c4db9c9e354f6c64505e642b1c25d8182..5cc313b68a60a7dd885248c2425d06bb85cc03d3 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,8 @@ documents those changes that are of interest to users and administrators. that had a partition in them. -- Don't return the extern step from sstat by default. -- In sstat print 'extern' instead of 4294967295 for the extern step. + -- Lock slurmstepd in memory to avoid possible SIGBUS if the daemon is paged + out at the time of a Slurm upgrade (changing plugins). * Changes in Slurm 16.05.3 ========================== diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c index ae13ff0f1d6adcef99b5ab39609888c355a350c1..4ee439718abc545c395141dfd8a041706a3ffd51 100644 --- a/src/slurmd/slurmstepd/slurmstepd.c +++ b/src/slurmd/slurmstepd/slurmstepd.c @@ -157,6 +157,17 @@ main (int argc, char *argv[]) * on STDERR_FILENO for us. */ dup2(STDERR_FILENO, STDOUT_FILENO); + /* slurmstepd is the only daemon that should survive upgrade. If it + * had been swapped out before upgrade happened it could easily lead + * to SIGBUS at any time after upgrade. Avoid that by locking it + * in-memory. */ +#ifdef _POSIX_MEMLOCK + if (mlockall(MCL_FUTURE | MCL_CURRENT) < 0) + info("failed to mlock() slurmstepd pages: %m"); +#else + info("mlockall() system call does not appear to be available"); +#endif + /* This does most of the stdio setup, then launches all the tasks, * and blocks until the step is complete */ rc = job_manager(job);