From c58a84c7907b3e860c3db1412e215536057bade0 Mon Sep 17 00:00:00 2001
From: Marshall Garey <marshall@schedmd.com>
Date: Tue, 12 Dec 2017 21:01:27 -0800
Subject: [PATCH] Add documentation for pam_slurm_adopt.

Based off of Ryan Cox's original contribs/pam_slurm_adopt/README.

Bug 3567.
---
 doc/html/Makefile.am           |   1 +
 doc/html/Makefile.in           |   1 +
 doc/html/documentation.shtml   |   3 +-
 doc/html/faq.shtml             |   8 +-
 doc/html/pam_slurm_adopt.shtml | 236 +++++++++++++++++++++++++++++++++
 5 files changed, 246 insertions(+), 3 deletions(-)
 create mode 100644 doc/html/pam_slurm_adopt.shtml

diff --git a/doc/html/Makefile.am b/doc/html/Makefile.am
index 6d199e02205..e99e75a2d6c 100644
--- a/doc/html/Makefile.am
+++ b/doc/html/Makefile.am
@@ -64,6 +64,7 @@ generated_html = \
 	news.html \
 	node_features_plugins.html \
 	overview.html \
+	pam_slurm_adopt.html \
 	platforms.html \
 	plugins.html \
 	power_mgmt.html \
diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in
index a7fd30d3a7e..d2d85b2ca7c 100644
--- a/doc/html/Makefile.in
+++ b/doc/html/Makefile.in
@@ -522,6 +522,7 @@ generated_html = \
 	news.html \
 	node_features_plugins.html \
 	overview.html \
+	pam_slurm_adopt.html \
 	platforms.html \
 	plugins.html \
 	power_mgmt.html \
diff --git a/doc/html/documentation.shtml b/doc/html/documentation.shtml
index 2d342551891..73c09b538ac 100644
--- a/doc/html/documentation.shtml
+++ b/doc/html/documentation.shtml
@@ -48,6 +48,7 @@ may be found in the <a href="https://slurm.schedmd.com/archive/">archive</a>.
 <li><a href="cpu_management.html">CPU Management User and Administrator Guide</a></li>
 <li><a href="nonstop.html">Failure Management Support</a></li>
 <li><a href="federation.html">Federated Scheduling Guide</a></li>
+<li><a href="pam_slurm_adopt.html">Job Containment with pam_slurm_adopt</a></li>
 <li><a href="big_sys.html">Large Cluster Administration Guide</a></li>
 <li><a href="licenses.html">License Management</a></li>
 <li><a href="mcs.html">Multi-Category Security (MCS) Guide</a></li>
@@ -140,6 +141,6 @@ may be found in the <a href="https://slurm.schedmd.com/archive/">archive</a>.
 </li>
 </ul>
 
-<p style="text-align:center;">Last modified 18 May 2017</p>
+<p style="text-align:center;">Last modified 12 December 2017</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml
index 13e3cdde51d..c4748311559 100644
--- a/doc/html/faq.shtml
+++ b/doc/html/faq.shtml
@@ -1237,6 +1237,10 @@ Create a resource reservation as described b. Slurm's
 
 <p><a name="pam"><b>9. How can PAM be used to control a user's limits on
 or access to compute nodes?</b></a><br>
+The pam_slurm_adopt PAM module is highly recommended for most installations,
+and is documented in its <a href="pam_slurm_adopt.shtml">own guide</a>. The
+following info applies to the older less-functional pam_slurm plugin instead.
+
 You will need to build and install Slurm including it's PAM module
 (a <i>slurm_pam</i> package is provided, the code is located in the
 <i>contribs/pam</i> directory).
@@ -1266,7 +1270,7 @@ limit from being propagated:<i>PropagateResourceLimitsExcept=MEMLOCK</i>.</p>
 
 <p>We also have a PAM module for Slurm that prevents users from
 logging into nodes that they have not been allocated (except for user
-root, which can always login.
+root, which can always login).
 This pam_slurm module is included with the Slurm distribution.
 The module is built by default, but can be disabled using the
 .rpmmacros option "%_without_pam 1" or by entering the command line
@@ -2269,6 +2273,6 @@ $ mysql -u&lt;user&gt; -p&lt;user&gt; &lt;/tmp/slurm_db_backup.sql
 
 <p class="footer"><a href="#top">top</a></p>
 
-<p style="text-align:center;">Last modified 29 November 2017</p>
+<p style="text-align:center;">Last modified 12 December 2017</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/html/pam_slurm_adopt.shtml b/doc/html/pam_slurm_adopt.shtml
new file mode 100644
index 00000000000..2810930e781
--- /dev/null
+++ b/doc/html/pam_slurm_adopt.shtml
@@ -0,0 +1,236 @@
+<!--#include virtual="header.txt"-->
+
+<h1>pam_slurm_adopt</h1>
+
+<p>The purpose of this module is to prevent users from sshing into nodes that
+they do not have a running job on, and to track the ssh connection and any
+other spawned processes for accounting and to ensure complete job cleanup when
+the job is completed. This module does this by determining the job which
+originated the ssh connection. The user's connection is "adopted" into the
+"external" step of the job.</p>
+
+<h2><a name="INSTALLATION">Installation</a></h2>
+
+<h3>Source:</h3>
+
+<p>In your Slurm build directory, navigate to slurm/contribs/pam_slurm_adopt/
+and run
+
+<pre>make &amp;&amp; make install</pre>
+
+as root. This will place pam_slurm_adopt.a,
+pam_slurm_adopt.la, and pam_slurm_adopt.so in /lib/security/ (on Debian
+systems) or /lib64/security/ (on RedHat/SuSE systems).</p>
+
+<h3>RPM:</h3>
+
+<p>The included slurm.spec will build a slurm-pam_slurm RPM which will install
+pam_slurm_adopt. Refer to the
+<a href="https://slurm.schedmd.com/quickstart_admin.html">Quick Start
+Administrator Guide</a> for instructions on managing an RPM-based install.</p>
+
+<h2><a name="PAM_CONFIG">PAM Configuration</a></h2>
+
+<p>Add the following line to the appropriate file in /etc/pam.d, such as
+system-auth or sshd (you may use either the "required" or "sufficient" PAM
+control flag):</p>
+
+<pre>
+account    sufficient    pam_slurm_adopt.so
+</pre>
+
+<p>The last PAM module in the account stack should be pam_slurm_adopt.so. For
+example, you might have the following account stack in sshd:</p>
+
+<pre>
+account    required      pam_nologin.so
+account    include       password-auth
+account    sufficient    pam_slurm_adopt.so
+</pre>
+
+<p>pam_slurm_adopt must be used with the task/cgroup plugin.
+The pam_systemd module will conflict with pam_slurm_adopt, so you need to
+disable it. In addition, make sure a different PAM module isn't short-circuiting
+the account stack before it gets to pam_slurm_adopt.so. For the example above,
+the following two lines have been commented out in password-auth:</p>
+
+<pre>
+#account    sufficient    pam_localuser.so
+#-session   optional      pam_systemd.so
+</pre>
+
+<p>Note: This may involve editing a file that is auto-generated (password-auth
+in this example). Do not run the config script that generates the file or your
+changes will be erased.</p>
+
+<p>If you always want to allow access for an administrative group (e.g., wheel),
+stack the pam_access module after pam_slurm_adopt. A success with
+pam_slurm_adopt is sufficient to allow access, but the pam_access module can
+allow others, such as administrative staff, access even without jobs on that
+node:</p>
+
+<pre>
+account    sufficient    pam_slurm_adopt.so
+account    required      pam_access.so
+</pre>
+
+<p>Then edit the pam_access configuration file (/etc/security/access.conf):</p>
+
+<pre>
++:wheel:ALL
+-:ALL:ALL
+</pre>
+
+<p>When access is denied, the user will receive a relevant error message.</p>
+
+<h2><a name="OPTIONS">pam_slurm_adopt Module Options</a></h2>
+
+<p>
+This module is configurable. Add these options to the end of the pam_slurm_adopt
+line in the appropriate file in /etc/pam.d/ (e.g., sshd or system-auth):
+</p>
+
+<pre>
+account sufficient pam_slurm_adopt.so optionname=optionvalue
+</pre>
+
+<p>This module has the following options:</p>
+
+<dl compact>
+
+<dt><b>action_no_jobs</b></dt>
+<dd>
+The action to perform if the user has no jobs on the node. Configurable
+values are:
+
+<dl compact>
+<dt></dt>
+<dd>
+<dl compact>
+  <dt><b>ignore</b></dt>
+<dd>Do nothing. Fall through to the next pam module.</dd>
+<dt><b>deny</b> (default)</dt>
+<dd>Deny the connection.</dd>
+</dl>
+</dd>
+</dl>
+
+</dd>
+
+<dt><b>action_unknown</b></dt>
+<dd>
+The action to perform when the user has multiple jobs on the node <b>and</b>
+the RPC does not locate the source job. If the RPC mechanism works properly in
+your environment, this option will likely be relevant <b>only</b> when
+connecting from a login node. Configurable values are:
+
+<dl compact>
+<dt></dt>
+<dd>
+<dl compact>
+<dt><b>newest</b> (default)</dt>
+<dd>Pick the newest job on the node. The "newest" job is chosen based
+on the mtime of the job's step_extern cgroup; asking Slurm would
+require an RPC to the controller. Thus, the memory cgroup must be in
+use so that the code can check mtimes of cgroup directories. The user
+can ssh in but may be adopted into a job that exits earlier than the
+job they intended to check on. The ssh connection will at least be
+subject to appropriate limits and the user can be informed of better
+ways to accomplish their objectives if this becomes a problem.
+</dd>
+<dt><b>allow</b></dt>
+<dd>Let the connection through without adoption.</dd>
+<dt><b>deny</b></dt>
+<dd>Deny the connection.</dd>
+</dl>
+</dd>
+</dl>
+
+</dd>
+
+<dt><b>action_adopt_failure</b></dt>
+<dd>The action to perform if the process is unable to be adopted into any
+job for whatever reason. If the process cannot be adopted into the job
+identified by the callerid RPC, it will fall through to the action_unknown
+code and try to adopt there. A failure at that point or if there is only
+one job will result in this action being taken. Configurable values are:
+
+<dl compact>
+<dt></dt>
+<dd>
+<dl compact>
+<dt><b>allow</b> (default)</dt>
+<dd>Let the connection through without adoption.</dd>
+<dt><b>deny</b></dt>
+<dd>Deny the connection.</dd>
+</dl>
+</dd>
+</dl>
+
+</dd>
+
+<dt><b>action_generic_failure</b></dt>
+<dd>The action to perform if there are certain failures such as the
+inability to talk to the local <i>slurmd</i> or if the kernel doesn't offer
+the correct facilities. Configurable values are:
+
+<dl compact>
+<dt></dt>
+<dd>
+<dl compact>
+<dt><b>ignore</b> (default)</dt>
+<dd>Do nothing. Fall through to the next pam module.</dd>
+<dt><b>allow</b></dt>
+<dd>Let the connection through without adoption.</dd>
+<dt><b>deny</b></dt>
+<dd>Deny the connection.</dd>
+</dl>
+</dd>
+</dl>
+
+</dd>
+
+<dt><b>log_level</b></dt>
+<dd>See <a href="https://slurm.schedmd.com/slurm.conf.html#OPT_SlurmdDebug">
+SlurmdDebug</a> in slurm.conf for available options.
+The default log_level is <b>info</b>.
+</dd>
+
+</dl>
+
+<h2><a name="SLURM_CONFIG">Slurm Configuration</a></h2>
+
+<p><b>PrologFlags=contain</b> must be set in the slurm.conf. This sets up the
+"extern" step into which ssh-launched processes will be adopted. You must also
+enable the task/cgroup plugin in slurm.conf. See the <a
+href="https://slurm.schedmd.com/cgroups.html">Slurm cgroups guide.</a></p>
+
+<h3><b>Important</b></h3>
+
+<p><b>PrologFlags=contain</b> must be in place <i>before</i> using this module.
+The module bases its checks on local steps that have already been launched. If
+the user has no steps on the node, such as the extern step, the module will
+assume that the user has no jobs allocated to the node. Depending on your
+configuration of the PAM module, you might accidentally deny all user ssh
+attempts without <b>PrologFlags=contain.</b></p>
+
+<h2><a name="OTHER_CONFIG">Other Configuration</a></h2>
+
+<p>Verify that <b>UsePAM</b> is set to <b>On</b> in /etc/ssh/sshd_config (it
+should be on by default).</p>
+
+<p>The <b>UsePAM</b> option in slurm.conf is not related to this module, and
+usually should not be set on your cluster.</p>
+
+<h3>Firewalls, IP Addresses, etc.</h3>
+
+<p><i>slurmd</i> should be accessible on any IP address from which a user might
+launch ssh. The RPC to determine the source job must be able to reach the
+<i>slurmd</i> port on that particular IP address. If there is no <i>slurmd</i>
+on the source node, such as on a login node, it is better to have the RPC be
+rejected rather than silently dropped. This will allow better responsiveness to
+the RPC initiator.</p>
+
+<p style="text-align:center;">Last modified 12 December 2017</p>
+
+<!--#include virtual="footer.txt"-->
-- 
GitLab