From 71eb412d2a4714261c5067b4dc544898e1e42442 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 13 Jul 2007 23:37:53 +0000
Subject: [PATCH] Make OpenSSL be an optional digital signal plugin
 (crypto/openssl) with Munge an alternative.

---
 NEWS                            |  4 +-
 doc/html/download.shtml         |  4 +-
 doc/html/quickstart.shtml       |  2 +-
 doc/html/quickstart_admin.shtml | 75 +++++++++++++--------------------
 doc/man/man1/srun.1             | 26 ++++++++----
 src/slurmd/slurmd/slurmd.c      |  9 +++-
 src/srun/srun.c                 |  3 +-
 src/srun/srun_job.c             | 64 +++-------------------------
 src/srun/srun_job.h             |  3 +-
 9 files changed, 69 insertions(+), 121 deletions(-)

diff --git a/NEWS b/NEWS
index 1c8bd1ae1bf..4c3f58b6b62 100644
--- a/NEWS
+++ b/NEWS
@@ -45,7 +45,9 @@ documents those changes that are of interest to users and admins.
     is identical to Moab's (from Ernest Artiaga and Asier Roa, BSC).
  -- Added "scontrol show slurmd" command to status local slurmd daemon.
  -- Set node DOWN if prolog fails on node zero of batch job launch.
-
+ -- Properly handle "srun --cpus-per-task" within a job allocation when 
+    SLURM_TASKS_PER_NODE environment varable is not set.
+ 
 * Changes in SLURM 1.2.11
 =========================
  -- Updated "etc/mpich1.slurm.patch" for direct srun launch of MPICH1_P4
diff --git a/doc/html/download.shtml b/doc/html/download.shtml
index 2b5684af8f4..2b4c2f59c79 100644
--- a/doc/html/download.shtml
+++ b/doc/html/download.shtml
@@ -40,7 +40,7 @@ The latest stable release is version 1.4.</p>
 <h1>Related Software</h1>
 <ul>
 
-<li><b>OpenSSL</b> is required for secure communications between SLURM 
+<li><b>OpenSSL</b> is recommended for secure communications between SLURM 
 components. Download it from 
 <a href="http://www.openssl.org/">http://www.openssl.org/</a>.
 </li>
@@ -98,6 +98,6 @@ Portable Linux Processor Affinity (PLPA)</a></li>
 
 </ul>
 
-<p style="text-align:center;">Last modified 15 June 2007</p>
+<p style="text-align:center;">Last modified 13 July 2007</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/html/quickstart.shtml b/doc/html/quickstart.shtml
index 82538cacbac..9b41bca1997 100644
--- a/doc/html/quickstart.shtml
+++ b/doc/html/quickstart.shtml
@@ -363,7 +363,7 @@ with the <i>--mpi=mpich1_shmem</i> option.</p>
 $ srun -n16 --mpi=mpich1_shmem a.out
 </pre>
 
-<p>If you are using MPICH P4 (<i>DEFAULT_DEVICE=ch_shmem</i> in 
+<p>If you are using MPICH P4 (<i>DEFAULT_DEVICE=ch_p4</i> in 
 the mpirun script) and SLURM version 1.2.11 or newer, 
 then it is recommended that you apply the patch in the SLURM 
 distribution's file <i>etc/mpich1.slurm.patch</i>. 
diff --git a/doc/html/quickstart_admin.shtml b/doc/html/quickstart_admin.shtml
index a92790f22d4..877c51f9e80 100644
--- a/doc/html/quickstart_admin.shtml
+++ b/doc/html/quickstart_admin.shtml
@@ -200,40 +200,8 @@ auth/munge plugin for communication, the clocks on all nodes will need to
 be synchronized. </p>
 
 <h3>MPI support</h3>
-<p>Quadrics MPI works directly with SLURM on systems having Quadrics 
-interconnects and is the preferred version of MPI for those systems.
-Set the <b>MpiDefault=none</b> configuration parameter in slurm.conf.</p>
-
-<p>For <a href="http://www.myricom.com/">Myrinet</a> systems, MPICH-GM
-is preferred. In order to use MPICH-GM, set <b>MpiDefault=mpichgm</b> and 
-<b>ProctrackType=proctrack/linuxproc</b> configuration parameters in 
-slurm.conf.</p>
-
-<p>HP customers would be well served by using 
-<a href="http://www.hp.com/go/mpi">HP-MPI</a>.</p>
-
-<p>A good open-source MPI for use with SLURM is 
-<a href="http://www.lam-mpi.org/">LAM MPI</a>. LAM MPI uses the command 
-<i>lamboot</i> to initiate job-specific daemons on each node using SLURM's 
-<span class="commandline">srun</span> 
-command. This places all MPI processes in a process-tree under the control of 
-the <b>slurmd</b> daemon. LAM/MPI version 7.1 or higher contains support for 
-SLURM. 
-Set the <b>MpiDefault=none</b> configuration parameters in slurm.conf.
-LAM MPI will explicitly set the mpi plugin type to "lam" on the  
-<span class="commandline">srun</span> execute line as needed.</p>
-
-<p>Another good open-source MPI for use with SLURM is
-<a href="http://www.open-mpi.org/">Open MPI</a>. Open MPI initiates its 
-processes using SLURM's <span class="commandline">srun</span> 
-command. 
-Set the <b>MpiDefault=none</b> configuration parameters in slurm.conf.
-Open MPI will explicitly set the mpi plugin type to "lam" on the
-<span class="commandline">srun</span> execute line as needed.</p>
-
-<p>Note that the ordering of tasks within an job's allocation matches that of 
-nodes in the slurm.conf configuration file. SLURM presently lacks the ability 
-to arbitrarily order tasks across nodes.</p> 
+<p>SLURM supports many different SLURM implementations. 
+For more information, see <a href="quickstart.html#mpi">MPI</a>.
 
 <h3>Scheduler support</h3>
 <p>The scheduler used by SLURM is controlled by the <b>SchedType</b> configuration 
@@ -411,10 +379,23 @@ NodeName=mcr[0-1151] NodeAddr=emcr[0-1151]
 PartitionName=DEFAULT State=UP    
 PartitionName=pdebug Nodes=mcr[0-191] MaxTime=30 MaxNodes=32 Default=YES
 PartitionName=pbatch Nodes=mcr[192-1151]
-</pre> 
+</pre>
+ 
 <h2>Security</h2>
-<p>You will should create unique job credential keys for your site
-using the program <a href="http://www.openssl.org/">openssl</a>. 
+<p>The use of <a href="http://www.openssl.org/">OpenSSL</a> is 
+recommended to provide a digital signature on job step credentials.
+<a href="http://home.gna.org/munge/">Munge</a> can alternately
+be used with somewhat slower performance.
+This signature is used by <i>slurmctld</i> to construct a job step
+credential, which is sent to <i>srun</i> and then forwarded to
+<i>slurmd</i> to initiate job steps.
+This design offers improved performance by removing much of the 
+job step initiation overhead from the <i> slurmctld </i> daemon.
+The mechanism to be used is controlled through the <b>CryptoType</b>
+configuration parameter.</p>
+
+<p>If using OpenSSL, you will should create unique job credential 
+keys for your site using the program openssl. 
 <b>You must use openssl and not ssh-genkey to construct these keys.</b>
 An example of how to do this is shown below. Specify file names that 
 match the values of <b>JobCredentialPrivateKey</b> and 
@@ -423,10 +404,11 @@ The <b>JobCredentialPrivateKey</b> file must be readable only by <b>SlurmUser</b
 The <b>JobCredentialPublicCertificate</b> file must be readable by all users.
 Note that you should build the key files one one node and then distribute 
 them to all nodes in the cluster.
-This insures that all nodes have a consistent set of encryption keys.
-These keys are used by <i>slurmctld</i> to construct a job credential, 
-which is sent to <i>srun</i> and then forwarded to <i>slurmd</i> to 
-initiate job steps.</p>
+This insures that all nodes have a consistent set of digital signature 
+keys.
+These keys are used by <i>slurmctld</i> to construct a job step 
+credential, which is sent to <i>srun</i> and then forwarded to 
+<i>slurmd</i> to initiate job steps.</p>
 
 <p class="commandline" style="margin-left:.2in">
 <i>openssl genrsa -out &lt;sysconfdir&gt;/slurm.key 1024</i><br>
@@ -434,10 +416,13 @@ initiate job steps.</p>
 </p>
 <p>Authentication of communications from SLURM commands to the daemons 
 or between the daemons uses a different security mechanism that is configurable.
-You must specify one &quot;auth&quot; plugin for this purpose. 
-Currently, only three 
-authentication plugins are supported: <b>auth/none</b>, <b>auth/authd</b>, and 
-<b>auth/munge</b>. The auth/none plugin is built and used by default, but either 
+Authentication means that SLURM can positively identify the user who originated 
+the communication.
+You must specify a plugin for this purpose using the <b>AuthType</b> configuration 
+paramter. 
+Currently, only three authentication plugins are supported: 
+<b>auth/none</b>, <b>auth/authd</b>, and <b>auth/munge</b>. 
+The auth/none plugin is built and used by default, but either 
 Brent Chun's <a href="http://www.theether.org/authd/">authd</a>, or Chris Dunlap's 
 <a href="http://home.gna.org/munge/">munge</a> should be installed in order to 
 get properly authenticated communications. 
diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1
index 8a2186a094b..dc49d3e19e6 100644
--- a/doc/man/man1/srun.1
+++ b/doc/man/man1/srun.1
@@ -548,6 +548,23 @@ comma\-separated and case insensitive types are recongnized:
 IBM systems see \fIpoe\fR documenation on the environment variables 
 \fBMP_EUIDEVICE\fR and \fBMP_USE_BULK_XFER\fR.
 
+.TP
+\fB\-\-nice\fR[=\fIadjustment]\fR
+Run the job with an adjusted scheduling priority. 
+With no adjustment value the scheduling priority is decreased 
+by 100. The adjustment range is from \-10000 (highest priority)
+to 10000 (lowest priority). Only privileged users can specify 
+a negative adjustment. NOTE: This option is presently 
+ignored if \fISchedulerType=sched/maui\fR.
+
+.TP
+\fB\-\-no\-requeue\fR
+Specifies that the batch job is not requeue. 
+Setting this option will prevent system administrators from being able 
+to restart the job (for example, after a scheduled downtime).
+When a job is requeued, the batch script is initiated from its beginning.
+This option is only applicable to batch job submission (see \fB\-\-batch\fR).
+
 .TP
 \fB\-\-ntasks\-per\-core\fR=\fIntasks\fR
 Request that no more than \fIntasks\fR be invoked on each core.
@@ -568,15 +585,6 @@ is specified.
 NOTE: This option is not supported unless \fISelectType=CR_Socket\fR 
 or \fISelectType=CR_Socket_Memory\fR is configured.
 
-.TP
-\fB\-\-nice\fR[=\fIadjustment]\fR
-Run the job with an adjusted scheduling priority. 
-With no adjustment value the scheduling priority is decreased 
-by 100. The adjustment range is from \-10000 (highest priority)
-to 10000 (lowest priority). Only privileged users can specify 
-a negative adjustment. NOTE: This option is presently 
-ignored if \fISchedulerType=sched/maui\fR.
-
 .TP
 \fB\-\-ntasks\-per\-node\fR=\fIntasks\fR
 Request that no more than \fIntasks\fR be invoked on each node.
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index 497d4371852..ace05497f87 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -147,9 +147,16 @@ static void      _install_fork_handlers(void);
 int 
 main (int argc, char *argv[])
 {
-	int pidfd;
+	int i, pidfd;
 	int blocked_signals[] = {SIGPIPE, 0};
 
+	/*
+	 * Make sure we have no extra open files which 
+	 * would be propagated to spawned tasks.
+	 */
+	for (i=3; i<256; i++)
+		(void) close(i);
+
 	/*
 	 * Create and set default values for the slurmd global
 	 * config variable "conf"
diff --git a/src/srun/srun.c b/src/srun/srun.c
index 36bf7150343..d89bb77a3bf 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -204,10 +204,9 @@ int srun(int ac, char **av)
 		if (opt.alloc_nodelist == NULL)
                        opt.alloc_nodelist = xstrdup(resp->node_list);
 
+		job = job_step_create_allocation(resp);
 		slurm_free_resource_allocation_response_msg(resp);
 
-		job = job_step_create_allocation(job_id);
-
 		if(!job)
 			exit(1);
 		
diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c
index a286902229c..2fb76311a54 100644
--- a/src/srun/srun_job.c
+++ b/src/srun/srun_job.c
@@ -157,15 +157,15 @@ job_create_noalloc(void)
  * (i.e. inside an allocation)
  */
 srun_job_t *
-job_step_create_allocation(uint32_t job_id)
+job_step_create_allocation(resource_allocation_response_msg_t *resp)
 {
+	uint32_t job_id = resp->job_id;
 	srun_job_t *job = NULL;
 	allocation_info_t *ai = xmalloc(sizeof(*ai));
 	hostlist_t hl = NULL;
 	char buf[8192];
 	int count = 0;
 	uint32_t alloc_count = 0;
-	char *tasks_per_node = xstrdup(getenv("SLURM_TASKS_PER_NODE"));
 	
 	ai->jobid          = job_id;
 	ai->stepid         = NO_VAL;
@@ -336,63 +336,9 @@ job_step_create_allocation(uint32_t job_id)
 		goto error;
 	}
 
-	if(tasks_per_node) {
-		int i = 0;
-		
-		ai->num_cpu_groups = 0;
-		ai->cpus_per_node = xmalloc(sizeof(uint32_t) * alloc_count);
-		ai->cpu_count_reps = xmalloc(sizeof(uint32_t) * alloc_count);
-		
-		while(tasks_per_node[i]
-		      && (ai->num_cpu_groups < alloc_count)) {
-			if(tasks_per_node[i] >= '0' 
-			   && tasks_per_node[i] <= '9')
-				ai->cpus_per_node[ai->num_cpu_groups] =
-					atoi(&tasks_per_node[i]);
-			else {
-				error("problem with tasks_per_node %s", 
-				      tasks_per_node);
-				goto error;
-			}
-			while(tasks_per_node[i]!='x' 
-			      && tasks_per_node[i]!=',' 
-			      && tasks_per_node[i])
-				i++;
-
-			if(tasks_per_node[i] == ',' || !tasks_per_node[i]) {
-				if(tasks_per_node[i])
-					i++;	
-				ai->cpu_count_reps[ai->num_cpu_groups] = 1;
-				ai->num_cpu_groups++;
-				continue;
-			}
-
-			i++;
-			if(tasks_per_node[i] >= '0' 
-			   && tasks_per_node[i] <= '9')
-				ai->cpu_count_reps[ai->num_cpu_groups] = 
-					atoi(&tasks_per_node[i]);
-			else {
-				error("1 problem with tasks_per_node %s", 
-				      tasks_per_node);
-				goto error;
-			}
-				
-			while(tasks_per_node[i]!=',' && tasks_per_node[i])
-				i++;
-			if(tasks_per_node[i] == ',') {
-				i++;	
-			}
-			ai->num_cpu_groups++;
-		}
-		xfree(tasks_per_node);
-	} else {
-		uint32_t cpn = (opt.nprocs + alloc_count - 1) / alloc_count;
-		debug("SLURM_TASKS_PER_NODE not set! "
-		      "Guessing %d cpus per node", cpn);
-		ai->cpus_per_node  = &cpn;
-		ai->cpu_count_reps = &alloc_count;
-	}
+	ai->num_cpu_groups = resp->num_cpu_groups;
+	ai->cpus_per_node  = resp->cpus_per_node;
+	ai->cpu_count_reps = resp->cpu_count_reps;
 
 /* 	info("looking for %d nodes out of %s with a must list of %s", */
 /* 	     ai->nnodes, ai->nodelist, opt.nodelist); */
diff --git a/src/srun/srun_job.h b/src/srun/srun_job.h
index 5b677101667..44a3d6016a7 100644
--- a/src/srun/srun_job.h
+++ b/src/srun/srun_job.h
@@ -148,7 +148,8 @@ void    job_force_termination(srun_job_t *job);
 srun_job_state_t job_state(srun_job_t *job);
 
 extern srun_job_t * job_create_noalloc(void);
-extern srun_job_t *job_step_create_allocation(uint32_t job_id);
+extern srun_job_t *job_step_create_allocation(
+	resource_allocation_response_msg_t *resp);
 extern srun_job_t * job_create_allocation(
 	resource_allocation_response_msg_t *resp);
 extern srun_job_t * job_create_structure(
-- 
GitLab