From 71eb412d2a4714261c5067b4dc544898e1e42442 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 13 Jul 2007 23:37:53 +0000 Subject: [PATCH] Make OpenSSL be an optional digital signal plugin (crypto/openssl) with Munge an alternative. --- NEWS | 4 +- doc/html/download.shtml | 4 +- doc/html/quickstart.shtml | 2 +- doc/html/quickstart_admin.shtml | 75 +++++++++++++-------------------- doc/man/man1/srun.1 | 26 ++++++++---- src/slurmd/slurmd/slurmd.c | 9 +++- src/srun/srun.c | 3 +- src/srun/srun_job.c | 64 +++------------------------- src/srun/srun_job.h | 3 +- 9 files changed, 69 insertions(+), 121 deletions(-) diff --git a/NEWS b/NEWS index 1c8bd1ae1bf..4c3f58b6b62 100644 --- a/NEWS +++ b/NEWS @@ -45,7 +45,9 @@ documents those changes that are of interest to users and admins. is identical to Moab's (from Ernest Artiaga and Asier Roa, BSC). -- Added "scontrol show slurmd" command to status local slurmd daemon. -- Set node DOWN if prolog fails on node zero of batch job launch. - + -- Properly handle "srun --cpus-per-task" within a job allocation when + SLURM_TASKS_PER_NODE environment varable is not set. + * Changes in SLURM 1.2.11 ========================= -- Updated "etc/mpich1.slurm.patch" for direct srun launch of MPICH1_P4 diff --git a/doc/html/download.shtml b/doc/html/download.shtml index 2b5684af8f4..2b4c2f59c79 100644 --- a/doc/html/download.shtml +++ b/doc/html/download.shtml @@ -40,7 +40,7 @@ The latest stable release is version 1.4.</p> <h1>Related Software</h1> <ul> -<li><b>OpenSSL</b> is required for secure communications between SLURM +<li><b>OpenSSL</b> is recommended for secure communications between SLURM components. Download it from <a href="http://www.openssl.org/">http://www.openssl.org/</a>. </li> @@ -98,6 +98,6 @@ Portable Linux Processor Affinity (PLPA)</a></li> </ul> -<p style="text-align:center;">Last modified 15 June 2007</p> +<p style="text-align:center;">Last modified 13 July 2007</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/quickstart.shtml b/doc/html/quickstart.shtml index 82538cacbac..9b41bca1997 100644 --- a/doc/html/quickstart.shtml +++ b/doc/html/quickstart.shtml @@ -363,7 +363,7 @@ with the <i>--mpi=mpich1_shmem</i> option.</p> $ srun -n16 --mpi=mpich1_shmem a.out </pre> -<p>If you are using MPICH P4 (<i>DEFAULT_DEVICE=ch_shmem</i> in +<p>If you are using MPICH P4 (<i>DEFAULT_DEVICE=ch_p4</i> in the mpirun script) and SLURM version 1.2.11 or newer, then it is recommended that you apply the patch in the SLURM distribution's file <i>etc/mpich1.slurm.patch</i>. diff --git a/doc/html/quickstart_admin.shtml b/doc/html/quickstart_admin.shtml index a92790f22d4..877c51f9e80 100644 --- a/doc/html/quickstart_admin.shtml +++ b/doc/html/quickstart_admin.shtml @@ -200,40 +200,8 @@ auth/munge plugin for communication, the clocks on all nodes will need to be synchronized. </p> <h3>MPI support</h3> -<p>Quadrics MPI works directly with SLURM on systems having Quadrics -interconnects and is the preferred version of MPI for those systems. -Set the <b>MpiDefault=none</b> configuration parameter in slurm.conf.</p> - -<p>For <a href="http://www.myricom.com/">Myrinet</a> systems, MPICH-GM -is preferred. In order to use MPICH-GM, set <b>MpiDefault=mpichgm</b> and -<b>ProctrackType=proctrack/linuxproc</b> configuration parameters in -slurm.conf.</p> - -<p>HP customers would be well served by using -<a href="http://www.hp.com/go/mpi">HP-MPI</a>.</p> - -<p>A good open-source MPI for use with SLURM is -<a href="http://www.lam-mpi.org/">LAM MPI</a>. LAM MPI uses the command -<i>lamboot</i> to initiate job-specific daemons on each node using SLURM's -<span class="commandline">srun</span> -command. This places all MPI processes in a process-tree under the control of -the <b>slurmd</b> daemon. LAM/MPI version 7.1 or higher contains support for -SLURM. -Set the <b>MpiDefault=none</b> configuration parameters in slurm.conf. -LAM MPI will explicitly set the mpi plugin type to "lam" on the -<span class="commandline">srun</span> execute line as needed.</p> - -<p>Another good open-source MPI for use with SLURM is -<a href="http://www.open-mpi.org/">Open MPI</a>. Open MPI initiates its -processes using SLURM's <span class="commandline">srun</span> -command. -Set the <b>MpiDefault=none</b> configuration parameters in slurm.conf. -Open MPI will explicitly set the mpi plugin type to "lam" on the -<span class="commandline">srun</span> execute line as needed.</p> - -<p>Note that the ordering of tasks within an job's allocation matches that of -nodes in the slurm.conf configuration file. SLURM presently lacks the ability -to arbitrarily order tasks across nodes.</p> +<p>SLURM supports many different SLURM implementations. +For more information, see <a href="quickstart.html#mpi">MPI</a>. <h3>Scheduler support</h3> <p>The scheduler used by SLURM is controlled by the <b>SchedType</b> configuration @@ -411,10 +379,23 @@ NodeName=mcr[0-1151] NodeAddr=emcr[0-1151] PartitionName=DEFAULT State=UP PartitionName=pdebug Nodes=mcr[0-191] MaxTime=30 MaxNodes=32 Default=YES PartitionName=pbatch Nodes=mcr[192-1151] -</pre> +</pre> + <h2>Security</h2> -<p>You will should create unique job credential keys for your site -using the program <a href="http://www.openssl.org/">openssl</a>. +<p>The use of <a href="http://www.openssl.org/">OpenSSL</a> is +recommended to provide a digital signature on job step credentials. +<a href="http://home.gna.org/munge/">Munge</a> can alternately +be used with somewhat slower performance. +This signature is used by <i>slurmctld</i> to construct a job step +credential, which is sent to <i>srun</i> and then forwarded to +<i>slurmd</i> to initiate job steps. +This design offers improved performance by removing much of the +job step initiation overhead from the <i> slurmctld </i> daemon. +The mechanism to be used is controlled through the <b>CryptoType</b> +configuration parameter.</p> + +<p>If using OpenSSL, you will should create unique job credential +keys for your site using the program openssl. <b>You must use openssl and not ssh-genkey to construct these keys.</b> An example of how to do this is shown below. Specify file names that match the values of <b>JobCredentialPrivateKey</b> and @@ -423,10 +404,11 @@ The <b>JobCredentialPrivateKey</b> file must be readable only by <b>SlurmUser</b The <b>JobCredentialPublicCertificate</b> file must be readable by all users. Note that you should build the key files one one node and then distribute them to all nodes in the cluster. -This insures that all nodes have a consistent set of encryption keys. -These keys are used by <i>slurmctld</i> to construct a job credential, -which is sent to <i>srun</i> and then forwarded to <i>slurmd</i> to -initiate job steps.</p> +This insures that all nodes have a consistent set of digital signature +keys. +These keys are used by <i>slurmctld</i> to construct a job step +credential, which is sent to <i>srun</i> and then forwarded to +<i>slurmd</i> to initiate job steps.</p> <p class="commandline" style="margin-left:.2in"> <i>openssl genrsa -out <sysconfdir>/slurm.key 1024</i><br> @@ -434,10 +416,13 @@ initiate job steps.</p> </p> <p>Authentication of communications from SLURM commands to the daemons or between the daemons uses a different security mechanism that is configurable. -You must specify one "auth" plugin for this purpose. -Currently, only three -authentication plugins are supported: <b>auth/none</b>, <b>auth/authd</b>, and -<b>auth/munge</b>. The auth/none plugin is built and used by default, but either +Authentication means that SLURM can positively identify the user who originated +the communication. +You must specify a plugin for this purpose using the <b>AuthType</b> configuration +paramter. +Currently, only three authentication plugins are supported: +<b>auth/none</b>, <b>auth/authd</b>, and <b>auth/munge</b>. +The auth/none plugin is built and used by default, but either Brent Chun's <a href="http://www.theether.org/authd/">authd</a>, or Chris Dunlap's <a href="http://home.gna.org/munge/">munge</a> should be installed in order to get properly authenticated communications. diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 8a2186a094b..dc49d3e19e6 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -548,6 +548,23 @@ comma\-separated and case insensitive types are recongnized: IBM systems see \fIpoe\fR documenation on the environment variables \fBMP_EUIDEVICE\fR and \fBMP_USE_BULK_XFER\fR. +.TP +\fB\-\-nice\fR[=\fIadjustment]\fR +Run the job with an adjusted scheduling priority. +With no adjustment value the scheduling priority is decreased +by 100. The adjustment range is from \-10000 (highest priority) +to 10000 (lowest priority). Only privileged users can specify +a negative adjustment. NOTE: This option is presently +ignored if \fISchedulerType=sched/maui\fR. + +.TP +\fB\-\-no\-requeue\fR +Specifies that the batch job is not requeue. +Setting this option will prevent system administrators from being able +to restart the job (for example, after a scheduled downtime). +When a job is requeued, the batch script is initiated from its beginning. +This option is only applicable to batch job submission (see \fB\-\-batch\fR). + .TP \fB\-\-ntasks\-per\-core\fR=\fIntasks\fR Request that no more than \fIntasks\fR be invoked on each core. @@ -568,15 +585,6 @@ is specified. NOTE: This option is not supported unless \fISelectType=CR_Socket\fR or \fISelectType=CR_Socket_Memory\fR is configured. -.TP -\fB\-\-nice\fR[=\fIadjustment]\fR -Run the job with an adjusted scheduling priority. -With no adjustment value the scheduling priority is decreased -by 100. The adjustment range is from \-10000 (highest priority) -to 10000 (lowest priority). Only privileged users can specify -a negative adjustment. NOTE: This option is presently -ignored if \fISchedulerType=sched/maui\fR. - .TP \fB\-\-ntasks\-per\-node\fR=\fIntasks\fR Request that no more than \fIntasks\fR be invoked on each node. diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 497d4371852..ace05497f87 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -147,9 +147,16 @@ static void _install_fork_handlers(void); int main (int argc, char *argv[]) { - int pidfd; + int i, pidfd; int blocked_signals[] = {SIGPIPE, 0}; + /* + * Make sure we have no extra open files which + * would be propagated to spawned tasks. + */ + for (i=3; i<256; i++) + (void) close(i); + /* * Create and set default values for the slurmd global * config variable "conf" diff --git a/src/srun/srun.c b/src/srun/srun.c index 36bf7150343..d89bb77a3bf 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -204,10 +204,9 @@ int srun(int ac, char **av) if (opt.alloc_nodelist == NULL) opt.alloc_nodelist = xstrdup(resp->node_list); + job = job_step_create_allocation(resp); slurm_free_resource_allocation_response_msg(resp); - job = job_step_create_allocation(job_id); - if(!job) exit(1); diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c index a286902229c..2fb76311a54 100644 --- a/src/srun/srun_job.c +++ b/src/srun/srun_job.c @@ -157,15 +157,15 @@ job_create_noalloc(void) * (i.e. inside an allocation) */ srun_job_t * -job_step_create_allocation(uint32_t job_id) +job_step_create_allocation(resource_allocation_response_msg_t *resp) { + uint32_t job_id = resp->job_id; srun_job_t *job = NULL; allocation_info_t *ai = xmalloc(sizeof(*ai)); hostlist_t hl = NULL; char buf[8192]; int count = 0; uint32_t alloc_count = 0; - char *tasks_per_node = xstrdup(getenv("SLURM_TASKS_PER_NODE")); ai->jobid = job_id; ai->stepid = NO_VAL; @@ -336,63 +336,9 @@ job_step_create_allocation(uint32_t job_id) goto error; } - if(tasks_per_node) { - int i = 0; - - ai->num_cpu_groups = 0; - ai->cpus_per_node = xmalloc(sizeof(uint32_t) * alloc_count); - ai->cpu_count_reps = xmalloc(sizeof(uint32_t) * alloc_count); - - while(tasks_per_node[i] - && (ai->num_cpu_groups < alloc_count)) { - if(tasks_per_node[i] >= '0' - && tasks_per_node[i] <= '9') - ai->cpus_per_node[ai->num_cpu_groups] = - atoi(&tasks_per_node[i]); - else { - error("problem with tasks_per_node %s", - tasks_per_node); - goto error; - } - while(tasks_per_node[i]!='x' - && tasks_per_node[i]!=',' - && tasks_per_node[i]) - i++; - - if(tasks_per_node[i] == ',' || !tasks_per_node[i]) { - if(tasks_per_node[i]) - i++; - ai->cpu_count_reps[ai->num_cpu_groups] = 1; - ai->num_cpu_groups++; - continue; - } - - i++; - if(tasks_per_node[i] >= '0' - && tasks_per_node[i] <= '9') - ai->cpu_count_reps[ai->num_cpu_groups] = - atoi(&tasks_per_node[i]); - else { - error("1 problem with tasks_per_node %s", - tasks_per_node); - goto error; - } - - while(tasks_per_node[i]!=',' && tasks_per_node[i]) - i++; - if(tasks_per_node[i] == ',') { - i++; - } - ai->num_cpu_groups++; - } - xfree(tasks_per_node); - } else { - uint32_t cpn = (opt.nprocs + alloc_count - 1) / alloc_count; - debug("SLURM_TASKS_PER_NODE not set! " - "Guessing %d cpus per node", cpn); - ai->cpus_per_node = &cpn; - ai->cpu_count_reps = &alloc_count; - } + ai->num_cpu_groups = resp->num_cpu_groups; + ai->cpus_per_node = resp->cpus_per_node; + ai->cpu_count_reps = resp->cpu_count_reps; /* info("looking for %d nodes out of %s with a must list of %s", */ /* ai->nnodes, ai->nodelist, opt.nodelist); */ diff --git a/src/srun/srun_job.h b/src/srun/srun_job.h index 5b677101667..44a3d6016a7 100644 --- a/src/srun/srun_job.h +++ b/src/srun/srun_job.h @@ -148,7 +148,8 @@ void job_force_termination(srun_job_t *job); srun_job_state_t job_state(srun_job_t *job); extern srun_job_t * job_create_noalloc(void); -extern srun_job_t *job_step_create_allocation(uint32_t job_id); +extern srun_job_t *job_step_create_allocation( + resource_allocation_response_msg_t *resp); extern srun_job_t * job_create_allocation( resource_allocation_response_msg_t *resp); extern srun_job_t * job_create_structure( -- GitLab