diff --git a/NEWS b/NEWS index 8e081cf2a0943558d7a294440e593d41e825a674..221564da4df0d8000148408ab461e29c160302e5 100644 --- a/NEWS +++ b/NEWS @@ -214,6 +214,21 @@ documents those changes that are of interest to users and administrators. basis should reset it's "nice" value. -- Print buffer sizes as unsigned values when failed to pack messages. -- Fix race condition where sprio would print factors without weights applied. + -- Document the sacct option JobIDRaw which for arrays prints the jobid instead + of the arrayTaskId. + -- Allow users to modify MinCPUsNode, MinMemoryNode and MinTmpDiskNode of + their own jobs. + -- Increase the jobid print field in SQUEUE_FORMAT in + opt_modulefiles_slurm.in. + -- Enable compiling without optimizations and with debugging symbols by + default. Disable this by configuring with --disable-debug. + -- job_submit/lua plugin: Add mail_type and mail_user fields. + -- Correct output message from sshare. + -- Use standard statvfs(2) syscall if available, in preference to + non-standard statfs. + -- Add a new option -U/--Users to sshare to display only users + information, parent and ancestors are not printed. + -- Purge 50000 records at a time so that locks can released periodically. * Changes in Slurm 14.11.5 ========================== diff --git a/auxdir/x_ac_debug.m4 b/auxdir/x_ac_debug.m4 index 2dc010d4e7411dbe0f3f48feaf9df4dcdd52e563..50f67289a2cc2751d8f1ae9f80d85ffa9453c5b7 100644 --- a/auxdir/x_ac_debug.m4 +++ b/auxdir/x_ac_debug.m4 @@ -24,7 +24,7 @@ AC_DEFUN([X_AC_DEBUG], [ AC_MSG_CHECKING([whether or not developer options are enabled]) AC_ARG_ENABLE( [developer], - AS_HELP_STRING(--enable-developer,enable developer options (-Werror - also sets --enable-debug as well)), + AS_HELP_STRING(--enable-developer,enable developer options (asserts, -Werror - also sets --enable-debug as well)), [ case "$enableval" in yes) x_ac_developer=yes ;; no) x_ac_developer=no ;; @@ -34,34 +34,35 @@ AC_DEFUN([X_AC_DEBUG], [ ] ) if test "$x_ac_developer" = yes; then - test "$GCC" = yes && CFLAGS="$CFLAGS -Werror" - test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Werror" - # automatically turn on --enable-debug if being a developer - x_ac_debug=yes + test "$GCC" = yes && CFLAGS="$CFLAGS -Werror" + test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Werror" + # automatically turn on --enable-debug if being a developer + x_ac_debug=yes + else + AC_DEFINE([NDEBUG], [1], + [Define to 1 if you are building a production release.] + ) fi AC_MSG_RESULT([${x_ac_developer=no}]) AC_MSG_CHECKING([whether debugging is enabled]) AC_ARG_ENABLE( [debug], - AS_HELP_STRING(--enable-debug,enable debugging code for development (automatically set if --enable-developer is used)), + AS_HELP_STRING(--disable-debug,disable debugging symbols and compile with optimizations), [ case "$enableval" in yes) x_ac_debug=yes ;; no) x_ac_debug=no ;; *) AC_MSG_RESULT([doh!]) AC_MSG_ERROR([bad value "$enableval" for --enable-debug]) ;; esac - ] + ], + [x_ac_debug=yes] ) if test "$x_ac_debug" = yes; then # you will most likely get a -O2 in you compile line, but the last option # is the only one that is looked at. test "$GCC" = yes && CFLAGS="$CFLAGS -Wall -g -O0 -fno-strict-aliasing" test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Wall -g -O0 -fno-strict-aliasing" - else - AC_DEFINE([NDEBUG], [1], - [Define to 1 if you are building a production release.] - ) fi AC_MSG_RESULT([${x_ac_debug=no}]) diff --git a/config.h.in b/config.h.in index 6fa6246892c8e012dd3741a8090a4c475a931bcc..2bfae76c60be7e2b20474173ee0df73ed2949d43 100644 --- a/config.h.in +++ b/config.h.in @@ -287,6 +287,12 @@ /* Define to 1 if you have the <socket.h> header file. */ #undef HAVE_SOCKET_H +/* Define to 1 if you have the `statfs' function. */ +#undef HAVE_STATFS + +/* Define to 1 if you have the `statvfs' function. */ +#undef HAVE_STATVFS + /* Define to 1 if you have the <stdbool.h> header file. */ #undef HAVE_STDBOOL_H @@ -350,6 +356,9 @@ /* Define to 1 if you have the <sys/statfs.h> header file. */ #undef HAVE_SYS_STATFS_H +/* Define to 1 if you have the <sys/statvfs.h> header file. */ +#undef HAVE_SYS_STATVFS_H + /* Define to 1 if you have the <sys/stat.h> header file. */ #undef HAVE_SYS_STAT_H diff --git a/configure b/configure index 447c48b889649afb457973502b5a6a3c193d13a4..7a00ebe7d87fab3a3cb3ef88cbd0c2a06508fbcb 100755 --- a/configure +++ b/configure @@ -1698,10 +1698,10 @@ Optional Features: Run SLURM in an emulated Cray mode --enable-native-cray Run SLURM natively on a Cray without ALPS --enable-cray-network Run SLURM on a non-Cray system with a Cray network - --enable-developer enable developer options (-Werror - also sets - --enable-debug as well) - --enable-debug enable debugging code for development (automatically - set if --enable-developer is used) + --enable-developer enable developer options (asserts, -Werror - also + sets --enable-debug as well) + --disable-debug disable debugging symbols and compile with + optimizations --enable-memory-leak-debug enable memory leak debugging code for development --enable-front-end enable slurmd operation on a front-end @@ -18258,7 +18258,7 @@ for ac_header in mcheck.h values.h socket.h sys/socket.h \ pty.h utmp.h \ sys/syslog.h linux/sched.h \ kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h \ - sys/termios.h float.h + sys/termios.h float.h sys/statvfs.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` @@ -19603,6 +19603,8 @@ for ac_func in \ get_current_dir_name \ faccessat \ eaccess \ + statvfs \ + statfs \ do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` @@ -22602,10 +22604,14 @@ $as_echo "doh!" >&6; } fi if test "$x_ac_developer" = yes; then - test "$GCC" = yes && CFLAGS="$CFLAGS -Werror" - test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Werror" - # automatically turn on --enable-debug if being a developer - x_ac_debug=yes + test "$GCC" = yes && CFLAGS="$CFLAGS -Werror" + test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Werror" + # automatically turn on --enable-debug if being a developer + x_ac_debug=yes + else + +$as_echo "#define NDEBUG 1" >>confdefs.h + fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${x_ac_developer=no}" >&5 $as_echo "${x_ac_developer=no}" >&6; } @@ -22622,6 +22628,8 @@ $as_echo "doh!" >&6; } as_fn_error $? "bad value \"$enableval\" for --enable-debug" "$LINENO" 5 ;; esac +else + x_ac_debug=yes fi @@ -22630,10 +22638,6 @@ fi # is the only one that is looked at. test "$GCC" = yes && CFLAGS="$CFLAGS -Wall -g -O0 -fno-strict-aliasing" test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Wall -g -O0 -fno-strict-aliasing" - else - -$as_echo "#define NDEBUG 1" >>confdefs.h - fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${x_ac_debug=no}" >&5 $as_echo "${x_ac_debug=no}" >&6; } diff --git a/configure.ac b/configure.ac index 9b381924df0aba43e258a9261e913da7e1a9e205..abbb4bcbeb17c4985994b873c9bc45fda92d3ba1 100644 --- a/configure.ac +++ b/configure.ac @@ -129,7 +129,7 @@ AC_CHECK_HEADERS(mcheck.h values.h socket.h sys/socket.h \ pty.h utmp.h \ sys/syslog.h linux/sched.h \ kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h \ - sys/termios.h float.h + sys/termios.h float.h sys/statvfs.h ) AC_HEADER_SYS_WAIT AC_HEADER_TIME @@ -206,6 +206,8 @@ AC_CHECK_FUNCS( \ get_current_dir_name \ faccessat \ eaccess \ + statvfs \ + statfs \ ) AC_CHECK_DECLS([hstrerror, strsignal, sys_siglist]) diff --git a/contribs/cray/opt_modulefiles_slurm.in b/contribs/cray/opt_modulefiles_slurm.in index cc309a8a8d7b57409ba7c2c80acb0915795ecd92..b8c039633202bc4b9415d128ca372a28b62ae9c6 100644 --- a/contribs/cray/opt_modulefiles_slurm.in +++ b/contribs/cray/opt_modulefiles_slurm.in @@ -36,7 +36,7 @@ prepend-path PERL5LIB "$slurmdir/$perldir" # other useful environment variables setenv SINFO_FORMAT {%9P %5a %8s %.10l %.6c %.6z %.7D %10T %N} -setenv SQUEUE_FORMAT {%.6i %.8u %.7a %.14j %.3t %9r %19S %.10M %.10L %.5D %.4C} +setenv SQUEUE_FORMAT {%.8i %.8u %.7a %.14j %.3t %9r %19S %.10M %.10L %.5D %.4C} setenv SQUEUE_ALL {yes} ;# show hidden partitions, too setenv SQUEUE_SORT {-t,e,S} diff --git a/doc/html/mc_support.shtml b/doc/html/mc_support.shtml index 6566457eba8bc291df25f383f3f0ce272ffbb570..6549aceb8f1034c3035ef76f10ba9cf0b19caa9b 100644 --- a/doc/html/mc_support.shtml +++ b/doc/html/mc_support.shtml @@ -5,7 +5,7 @@ <h2>Contents</h2> <UL> <LI> <a href=#defs>Definitions</a> -<LI> <a href=#flags>Overview of new srun flags</a> +<LI> <a href=#flags>Overview of srun flags</a> <LI> <a href=#motivation>Motivation behind high-level srun flags</a> <LI> <a href=#utilities>Extensions to sinfo/squeue/scontrol</a> <LI> <a href=#config>Configuration settings in slurm.conf</a> @@ -14,9 +14,17 @@ <a name=defs> <h2>Definitions</h2></a> -<P> <b>Socket/Core/Thread</b> - Figure 1 illustrates the notion of -Socket, Core and Thread as it is defined in Slurm's multi-core/multi-thread -support documentation.</p> +<dl> +<dt><b>BaseBoard</b> +<dd>Also called motherboard. +<dt><b>LDom</b> +<dd>Locality domain or NUMA domain. May be equivalent to BaseBoard or Socket. +<dt><b>Socket/Core/Thread</b> +<dd>Figure 1 illustrates the notion of Socket, Core and Thread as it is defined +in Slurm's multi-core/multi-thread support documentation. +<dt><b>CPU</b> +<dd>Depending upon system configuration, this can be either a core or a thread. +</dl> <center> <img src="mc_support.gif"> @@ -41,12 +49,12 @@ allowing a process to run on more than one logical processor. </dl> <a name=flags> -<h2>Overview of new srun flags</h2></a> +<h2>Overview of srun flags</h2></a> -<p> Several flags have been defined to allow users to +<p>Many flags have been defined to allow users to better take advantage of this architecture by explicitly specifying the number of sockets, cores, and threads required -by their application. Table 1 summarizes the new multi-core flags. +by their application. Table 1 summarizes these options. <P> <table border=1 cellspacing=0 cellpadding=4> @@ -77,13 +85,13 @@ by their application. Table 1 summarizes the new multi-core flags. <td> Combined shortcut option for --sockets-per-node, --cores-per_cpu, --threads-per_core </td></tr> <tr><td colspan=2> -<b><a href="#srun_dist">New Distributions</b> +<b><a href="#srun_dist">Task Distribution Options</b> </td></tr> <tr> <td> -m / --distribution </td> - <td> Distributions of: block | cyclic | hostfile + <td> Distributions of: arbitrary | block | cyclic | <a href="dist_plane.html"><u>plane=<i>x</i></u></a> - | <u>[block|cyclic]:[block|cyclic]</u> + | <u>[block|cyclic]:[block|cyclic|fcyclic]</u> </td></tr> <tr><td colspan=2> <b><a href="#srun_consres">Memory as a consumable resource</a></b> @@ -100,6 +108,9 @@ by their application. Table 1 summarizes the new multi-core flags. <b><a href="#srun_ntasks">Task invocation control</a></b> </td></tr> <tr> + <td> --cpus-per-task=<i>CPUs</i></td> + <td> number of CPUs required per task +</td></tr> <td> --ntasks-per-node=<i>ntasks</i></td> <td> number of tasks to invoke on each node </td></tr> @@ -108,6 +119,9 @@ by their application. Table 1 summarizes the new multi-core flags. </td></tr> <td> --ntasks-per-core=<i>ntasks</i></td> <td> number of tasks to invoke on each core +</td></tr> + <td> --overcommit</td> + <td> Permit more than one task per CPU </td></tr> <tr><td colspan=2> <b><a href="#srun_hints">Application hints</a></b> @@ -122,15 +136,26 @@ by their application. Table 1 summarizes the new multi-core flags. <td> --hint=[no]multithread</td> <td> [don't] use extra threads with in-core multi-threading </td></tr> +<tr><td colspan=2> +<b><a href="#srun_hints">Resources reserved for system use</a></b> +</td></tr> +<tr> + <td> --core-spec=<i>cores</i></td> + <td> Count of cores to reserve for system use +</td></tr> + <td> --thread-spec=<i>threads</i></td> + <td> Count of threads to reserve for system use (future) +</td></tr> </table> <p> <center> -Table 1: New srun flags to support the multi-core/multi-threaded environment +Table 1: srun flags to support the multi-core/multi-threaded environment </center> <p>It is important to note that many of these flags are only meaningful if the processes' have some affinity to specific CPUs and (optionally) memory. +Inconsistent options generally result in errors. Task affinity is configured using the TaskPlugin parameter in the slurm.conf file. Several options exist for the TaskPlugin depending upon system architecture and available software, any of them except "task/none" will find tasks to CPUs. @@ -155,6 +180,20 @@ See the "Task Launch" section if generating slurm.conf via mask_cpu:<i><list></i> specify a CPU ID binding mask for each task where <i><list></i> is <i><mask1>,<mask2>,...<maskN></i> + rank_ldom bind task by rank to CPUs in a NUMA + locality domain + map_ldom:<i><list></i> specify a NUMA locality domain ID + for each task where <i><list></i> is + <i><ldom1>,<ldom2>,...<ldomN></i> + rank_ldom bind task by rank to CPUs in a NUMA + locality domain where <i><list></i> is + <i><ldom1>,<ldom2>,...<ldomN></i> + mask_ldom:<i><list></i> specify a NUMA locality domain ID mask + for each task where <i><list></i> is + <i><ldom1>,<ldom2>,...<ldomN></i> + boards auto-generated masks bind to boards + ldoms auto-generated masks bind to NUMA locality + domains sockets auto-generated masks bind to sockets cores auto-generated masks bind to cores threads auto-generated masks bind to threads @@ -164,8 +203,8 @@ See the "Task Launch" section if generating slurm.conf via <p> The affinity can be either set to either a specific logical processor (socket, core, threads) or at a coarser granularity than the lowest level of logical processor (core or thread). -In the later case the processes are allowed to roam within a specific -socket or core. +In the later case the processes are allowed to utilize multiple processors +within a specific socket or core. <p>Examples:</p> @@ -188,9 +227,9 @@ of nodes, sockets, cores, and threads:</p> <PRE> -B --extra-node-info=<i>S[:C[:T]]</i> Expands to: - --sockets-per-node=<i>S</i> number of sockets per node to allocate - --cores-per-socket=<i>C</i> number of cores per socket to allocate - --threads-per-core=<i>T</i> number of threads per core to allocate + --sockets-per-node=<i>S</i> number of sockets per node to allocate + --cores-per-socket=<i>C</i> number of cores per socket to allocate + --threads-per-core=<i>T</i> number of threads per core to allocate each field can be 'min' or wildcard '*' <font face="serif">Total cpus requested = (<i>Nodes</i>) x (<i>S</i> x <i>C</i> x <i>T</i>)</font> @@ -231,32 +270,37 @@ note: compare the above with the previous corresponding --cpu_bind=... examples <p>See also 'srun --help' and 'man srun'</p> <a name="srun_dist"> -<h3>New distributions: Extensions to -m / --distribution</h3></a> +<h3>Task distribution options: Extensions to -m / --distribution</h3></a> <p>The -m / --distribution option for distributing processes across nodes has been extended to also describe the distribution within the lowest level of logical processors. Available distributions include: <br> -block | cyclic | hostfile | <u>plane=<i>x</i></u> | <u>[block|cyclic]:[block|cyclic]</u>) +arbitrary | block | cyclic | <u>plane=<i>x</i></u> | <u>[block|cyclic]:[block|cyclic|fcyclic]</u> </p> -<p>The new <A HREF="dist_plane.html">plane distribution</A> (plane=<i>x</i>) -results in a block cyclic distribution of blocksize equal to <i>x</i>. +<p>The <A HREF="dist_plane.html">plane distribution</A> (plane=<i>x</i>) +results in a block:cyclic distribution of blocksize equal to <i>x</i>. In the following we use "lowest level of logical processors" to describe sockets, cores or threads depending of the architecture. -The new distribution divides +The distribution divides the cluster into planes (including a number of the lowest level of logical processors on each node) and then schedule first within each plane and then across planes.</p> -<p>For the two dimensional distributions ([block|cyclic]:[block|cyclic]), +<p>For the two dimensional distributions ([block|cyclic]:[block|cyclic|fcyclic]), the second distribution (after ":") allows users to specify a distribution method for processes within a node and applies to the lowest level of logical -processors (sockets, core or thread depending on the architecture).</p> +processors (sockets, core or thread depending on the architecture). +When a task requires more than one CPU, the <i>cyclic</i> will allocate all +of those CPUs as a group (i.e. within the same socket if possible) while +<i>fcyclic</i> would distribute each of those CPU of the in a cyclic fashion +across sockets.</p> -<p>The binding is enabled automatically when high level flags are used as long as the task/affinity plug-in -is enabled. To disable binding at the job level use --cpu_bind=no.</p> +<p>The binding is enabled automatically when high level flags are used as long +as the task/affinity plug-in is enabled. To disable binding at the job level +use --cpu_bind=no.</p> <p>The distribution flags can be combined with the other switches: @@ -429,7 +473,7 @@ flags much easier to use.</li> <p>Also as illustrated in the example below it is much simpler to specify a different layout using the high-level flags since users do not have to -recalculate mask or CPU IDs. The new approach is very effortless compared to +recalculate mask or CPU IDs. This approach is much simpler than rearranging the mask or map.</p> <p>Given a 32-process MPI job and a four dual-socket dual-core node @@ -437,11 +481,11 @@ cluster, we want to use a block distribution across the four nodes and then a cyclic distribution within the node across the physical processors. We have had several requests from users that they would like this distribution to be the default distribution on multi-core clusters. Below we show how to obtain the -wanted layout using 1) the new high-level flags and 2) --cpubind</p> +wanted layout using 1) the high-level flags and 2) --cpubind</p> <h3>High-Level flags</h3> -<p>Using Slurm's new high-level flag, users can obtain the above layout with:</p> +<p>Using Slurm's high-level flag, users can obtain the above layout with:</p> <DL> <DL> @@ -605,7 +649,7 @@ numbering.)</p> <h3>Block map_cpu on a system with cyclic core numbering</h3> <p>If users do not check their system's core numbering before specifying -the map_cpu list and thereby do not realize that the new system has cyclic core +the map_cpu list and thereby do not realize that the system has cyclic core numbering instead of block numbering then they will not get the expected layout.. For example, if they decide to re-use their mpirun command from above:</p> @@ -854,8 +898,9 @@ TaskPlugin=task/affinity # enable task affinity # "TaskPlugin" : Define a task launch plugin. This may be used to # provide resource management within a node (e.g. pinning # tasks to specific processors). Permissible values are: -# "task/none" : no task launch actions, the default. # "task/affinity" : CPU affinity support +# "task/cgroup" : bind tasks to resources using Linux cgroup +# "task/none" : no task launch actions, the default # # Example: # @@ -874,82 +919,10 @@ FastSchedule=1 NodeName=dualcore[01-16] CPUs=4 CoresPerSocket=2 ThreadsPerCore=1 </PRE> -<p>Below is a more complete description of the configuration possible -using NodeName: - -<PRE> -# -# o Node configuration -# -# The configuration information of nodes (or machines) to be managed -# by Slurm is described here. The only required value in this section -# of the config file is the "NodeName" field, which specifies the -# hostnames of the node or nodes to manage. It is recommended, however, -# that baseline values for the node configuration be established -# using the following parameters (see slurm.config(5) for more info): -# -# "NodeName" : The only required node configuration parameter, NodeName -# specifies a node or set of nodes to be managed by SLURM. -# The special NodeName of "DEFAULT" may be used to establish -# default node configuration parameters for subsequent node -# records. Typically this would be the string that -# `/bin/hostname -s` would return on the node. However -# NodeName may be an arbitrary string if NodeHostname is -# used (see below). -# -# "Feature" : comma separated list of "features" for the given node(s) -# -# "NodeAddr" : preferred address for contacting the node. This may be -# either a name or IP address. -# -# "NodeHostname" -# : the string that `/bin/hostname -s` would return on the -# node. In other words, NodeName may be the name other than -# the real hostname. -# -# "RealMemory" : Amount of real memory (in Megabytes) -# -# "CPUs" : Number of logical processors on the node. -# If CPUs is omitted, it will be inferred from: -# Sockets, CoresPerSocket, and ThreadsPerCore. -# -# "Sockets" : Number of physical processor sockets/chips on the node. -# If Sockets is omitted, it will be inferred from: -# CPUs, CoresPerSocket, and ThreadsPerCore. -# -# "CoresPerSocket" -# : Number of cores in a single physical processor socket -# The CoresPerSocket value describes physical cores, not -# the logical number of processors per socket. -# The default value is 1. -# -# "ThreadsPerCore" -# : Number of logical threads in a single physical core. -# The default value is 1. -# -# "State" : Initial state (IDLE, DOWN, etc.) -# -# "TmpDisk" : Temporary disk space available on node -# -# "Weight" : Priority of node for scheduling purposes -# -# If any of the above values are set for a node or group of nodes, and -# that node checks in to the slurm controller with less than the -# configured resources, the node's state will be set to DOWN, in order -# to avoid scheduling any jobs on a possibly misconfigured machine. -# -# Example Node configuration: -# -# NodeName=DEFAULT CPUs=2 TmpDisk=64000 State=UNKNOWN -# NodeName=host[0-25] NodeAddr=ehost[0-25] Weight=16 -# NodeName=host26 NodeAddr=ehost26 Weight=32 Feature=graphics_card -# NodeName=dualcore01 CPUs=4 CoresPerSocket=2 ThreadsPerCore=1 -# NodeName=dualcore02 CPUs=4o Sockets=2 CoresPerSocket=2 ThreadsPerCore=1 -# NodeName=multicore03 CPUs=64 Sockets=8 CoresPerSocket=4 ThreadsPerCore=2 -</PRE> - +<p>For a more complete description of the various node configuration options +see the slurm.conf man page.</p> <!--------------------------------------------------------------------------> -<p style="text-align:center;">Last modified 21 April 2014</p> +<p style="text-align:center;">Last modified 1 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/team.shtml b/doc/html/team.shtml index 1ebf592a70a095c8fe2793e35a1301c01a7e0583..981cbd748ef0c2e4295e1bd59b5078b593e6e70b 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -192,6 +192,7 @@ Lead Slurm developers are: <li>Uwe Sauter (High Performance Computing Center Stuttgart, Germany)</li> <li>Chris Scheller (University of Michigan)</li> <li>Rod Schultz (Bull)</li> +<li>Samuel Senoner (Vienna University of Technology, Austria)</li> <li>David Singleton</li> <li>Filip Skalski (University of Warsaw, Poland)</li> <li>Jason Sollom (Cray)</li> diff --git a/doc/html/troubleshoot.shtml b/doc/html/troubleshoot.shtml index 265b4dae36bb9d200b93c5bf6160b63792c4fb41..b4414a54be85fbb576d755f43eec9bab2dbb6b9c 100644 --- a/doc/html/troubleshoot.shtml +++ b/doc/html/troubleshoot.shtml @@ -130,7 +130,11 @@ and "<i>scontrol update NodeName=<node> State=resume</i>"). This permits other jobs to use the node, but leaves the non-killable process in place. If the process should ever complete the I/O, the pending SIGKILL -should terminate it immediately.</li> +should terminate it immediately. <b>-OR-</b></li> +<li>Use the <b>UnkillableStepProgram</b> and <b>UnkillableStepTimeout</b> +configuration parameters to automatically respond to processes which can not +be killed, by sending email or rebooting the node. For more information, +see the <i>slurm.conf</i> documentation.</li> </ol> <p class="footer"><a href="#top">top</a></p> @@ -301,6 +305,6 @@ partition 000. </ol> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 6 January 20124</p> +<p style="text-align:center;">Last modified 2 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index 7e5b1c8e5a2d13cd21e4f4cbe2d1a0d9ca296969..d6a36766fc7f72c88e93e0a9bbf6977a302a32ce 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -126,25 +126,25 @@ Print a list of fields that can be specified with the \f3\-\-format\fP option. .ft 3 Fields available: -AllocCPUS Account AssocID AveCPU -AveCPUFreq AveDiskRead AveDiskWrite AvePages -AveRSS AveVMSize BlockID Cluster -Comment ConsumedEnergy CPUTime CPUTimeRAW -DerivedExitCode Elapsed Eligible End -ExitCode GID Group JobID -JobName Layout MaxDiskRead MaxDiskReadNode -MaxDiskReadTask MaxDiskWrite MaxDiskWriteNode MaxDiskWriteTask -MaxPages MaxPagesNode MaxPagesTask MaxRSS -MaxRSSNode MaxRSSTask MaxVMSize MaxVMSizeNode -MaxVMSizeTask MinCPU MinCPUNode MinCPUTask -NCPUS NNodes NodeList NTasks -Priority Partition QOSRAW ReqCPUFreqMin -ReqCPUFreqMax ReqCPUFreqGov ReqCPUs ReqMem -Reservation ReservationId Reserved ResvCPU -ResvCPURAW Start State Submit -Suspended SystemCPU Timelimit TotalCPU -UID User UserCPU WCKey -WCKeyID +AllocCPUS Account AssocID AveCPU +AveCPUFreq AveDiskRead AveDiskWrite AvePages +AveRSS AveVMSize BlockID Cluster +Comment ConsumedEnergy CPUTime CPUTimeRAW +DerivedExitCode Elapsed Eligible End +ExitCode GID Group JobID +JobIDRaw JobName Layout MaxDiskRead +MaxDiskReadNode MaxDiskReadTask MaxDiskWrite MaxDiskWriteNode +MaxDiskWriteTask MaxPages MaxPagesNode MaxPagesTask +MaxRSS MaxRSSNode MaxRSSTask MaxVMSize +MaxVMSizeNode MaxVMSizeTask MinCPU MinCPUNode +MinCPUTask NCPUS NNodes NodeList +NTasks Priority Partition QOSRAW +ReqCPUFreqMin ReqCPUFreqMax ReqCPUFreqGov ReqCPUs +ReqMem Reservation ReservationId Reserved +ResvCPU ResvCPURAW Start State +Submit Suspended SystemCPU Timelimit +TotalCPU UID User UserCPU +WCKey WCKeyID .ft 1 .fi @@ -563,6 +563,12 @@ It is in the form: \f2job.jobstep\fP\c \&. +.TP +\f3JobIDRaw\fP +In case of job array print the jobId instead of the ArrayJobId. +For non job arrays the output is the jobId in the format \f2job.jobstep\fP\c +\&. + .TP \f3JobName\fP The name of the job or job step. The \f3slurm_accounting.log\fP file diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 053377b336e972859d37f0057a8286256c0d9572..dea30c5ef6104ef19676a7960ae37ee93c7d0039 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -556,7 +556,7 @@ Multiple license names should be comma separated (e.g. .TP \fB\-m\fR, \fB\-\-distribution\fR= -<\fIblock\fR|\fIcyclic\fR|\fIarbitrary\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR]> +\fIarbitrary\fR|<\fIblock\fR|\fIcyclic\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR|\fIfcyclic\fR]> Specify alternate distribution methods for remote processes. In salloc, this only sets environment variables that will be used by @@ -640,6 +640,15 @@ that consecutive tasks share a socket. The cyclic distribution method will distribute tasks to sockets such that consecutive tasks are distributed over consecutive sockets (in a round\-robin fashion). +Tasks requiring more than one CPU will have all of those CPUs allocated on a +single socket if possible. +.TP +.B fcyclic +The fcyclic distribution method will distribute tasks to sockets such +that consecutive tasks are distributed over consecutive sockets (in a +round\-robin fashion). +Tasks requiring more than one CPU will have each CPUs allocated in a cyclic +fashion across sockets. .RE .TP @@ -933,7 +942,7 @@ Create a IP version 4 connection for LAPI communications on one switch network for each task. .TP \fBInstances=2,US,LAPI,MPI\fR -Create two user space connections each for LAPI and MPI communcations on every +Create two user space connections each for LAPI and MPI communications on every switch network for each task. Note that SN_ALL is the default option so every switch network is used. Also note that Instances=2 specifies that two connections are established for each protocol (LAPI and MPI) and each task. diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 73c51ca9b8431242899e6109fbe74e701e6b245f..b3ae1f0a6589130faa498f4b3812a0d833c174ef 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -652,7 +652,7 @@ between clusters. .TP \fB\-m\fR, \fB\-\-distribution\fR= -<\fIblock\fR|\fIcyclic\fR|\fIarbitrary\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR]> +\fIarbitrary\fR|<\fIblock\fR|\fIcyclic\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR|\fIfcyclic\fR]> Specify alternate distribution methods for remote processes. In sbatch, this only sets environment variables that will be used by @@ -736,6 +736,15 @@ that consecutive tasks share a socket. The cyclic distribution method will distribute tasks to sockets such that consecutive tasks are distributed over consecutive sockets (in a round\-robin fashion). +Tasks requiring more than one CPU will have all of those CPUs allocated on a +single socket if possible. +.TP +.B fcyclic +The fcyclic distribution method will distribute tasks to sockets such +that consecutive tasks are distributed over consecutive sockets (in a +round\-robin fashion). +Tasks requiring more than one CPU will have each CPUs allocated in a cyclic +fashion across sockets. .RE .TP diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index f50597d692c24bcee652899ad7de6121bcc5c85a..f32aa4da3f3d4d3c3e7dc3f284bfa44e4b9ab1ed 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -584,17 +584,14 @@ of the cluster) as described in salloc/sbatch/srun man pages. .TP \fIMinCPUsNode\fP=<count> Set the job's minimum number of CPUs per node to the specified value. -Only the Slurm administrator or root can change this parameter. .TP \fIMinMemoryCPU\fP=<megabytes> Set the job's minimum real memory required per allocated CPU to the specified -value. Only the Slurm administrator or root can change this parameter. -Either \fIMinMemoryCPU\fP or \fIMinMemoryNode\fP may be set, but not both. +value. Either \fIMinMemoryCPU\fP or \fIMinMemoryNode\fP may be set, but not both. .TP \fIMinMemoryNode\fP=<megabytes> Set the job's minimum real memory required per node to the specified value. Either \fIMinMemoryCPU\fP or \fIMinMemoryNode\fP may be set, but not both. -Only the Slurm administrator or root can change this parameter. .TP \fIMinTmpDiskNode\fP=<megabytes> Set the job's minimum temporary disk space required per node to the specified value. @@ -1259,7 +1256,7 @@ but may use these reserved resources plus any which are generally available. .TP \fICoreCnt\fP=<num> -This option is only suported when SelectType=select/cons_res. Identify number of +This option is only supported when SelectType=select/cons_res. Identify number of cores to be reserved. If NodeCnt is used, this is the total number of cores to reserve where cores per node is CoreCnt/NodeCnt. If a nodelist is used, this should be an array of core numbers by node: Nodes=node[1\-5] CoreCnt=2,2,3,3,4 diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index c87dcedc14a71b324c7b3d8a5baa47824c633324..61224c082bbee28e57550dc84beb056227b18834 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -111,7 +111,7 @@ when running with various options are In the above format strings, the use of "#" represents the maximum length of any partition name or node list to be printed. A pass is made over the records to be printed to establish the size in order -to allign the sinfo output, then a second pass is made over the records to +to align the sinfo output, then a second pass is made over the records to print them. Note that the literal character "#" itself is not a valid field length specification, but is only used to document this behaviour. @@ -254,7 +254,7 @@ size of field .TP \fB\-p <partition>\fR, \fB\-\-partition=<partition>\fR -Print information only about the specified partition(s). Mutliple partitions +Print information only about the specified partition(s). Multiple partitions are separated by commas. .TP @@ -418,7 +418,7 @@ Size of temporary disk space in megabytes on these nodes. .SH "NODE STATE CODES" .PP Node state codes are shortened as required for the field size. -These node states may be followed by a special characater to identify +These node states may be followed by a special character to identify state flags associated with the node. The following node sufficies and states are used: .TP 4 diff --git a/doc/man/man1/smap.1 b/doc/man/man1/smap.1 index e439655dbb05ee849952a4c40ff02b6b4d667dc5..f8aef9b6f3370d9cf4ae0619daaf38fb5e385a56 100644 --- a/doc/man/man1/smap.1 +++ b/doc/man/man1/smap.1 @@ -371,7 +371,7 @@ Clear all partitions created. .SH "NODE STATE CODES" .PP Node state codes are shortened as required for the field size. -These node states may be followed by a special characater to identify +These node states may be followed by a special character to identify state flags associated with the node. The following node sufficies and states are used: .TP 4 diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index 8887965e752e961cab86033110328ade5f469a3f..2493205971cd1ff6668acf0fc97242c2a387f290 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -991,7 +991,7 @@ The job's constraints can not be satisfied. The job's earliest start time has not yet been reached. .TP \fBBlockFreeAction\fR -An IBM BlueGene block is being freedand can not allow more jobs to start. +An IBM BlueGene block is being freed and can not allow more jobs to start. .TP \fBBlockMaxError\fR An IBM BlueGene block has too many cnodes in error state to allow more jobs to start. diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 8775f9463a3495ac61f009186a4fd2d97827c37a..0235856fdb7e8f0f4d60e650c5200e2140785d18 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -630,7 +630,7 @@ See \fBEXAMPLE\fR below. .TP \fB\-\-export\fR=<\fIenvironment variables | NONE\fR> -Identify which environment variables are propagated to the laucnhed application. +Identify which environment variables are propagated to the launched application. Multiple environment variable names should be comma separated. Environment variable names may be specified to propagate the current value of those variables (e.g. "\-\-export=EDITOR") or specific values @@ -787,7 +787,7 @@ Multiple license names should be comma separated (e.g. .TP \fB\-m\fR, \fB\-\-distribution\fR= -<\fIblock\fR|\fIcyclic\fR|\fIarbitrary\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR]>[,\fIPack\fR|\fINoPack\fR] +\fIarbitrary\fR|<\fIblock\fR|\fIcyclic\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR|\fIfcyclic\fR]>[,\fIPack\fR|\fINoPack\fR] Specify alternate distribution methods for remote processes. This option controls the assignment of tasks to the nodes on which @@ -796,6 +796,7 @@ to tasks for binding (task affinity). The first distribution method (before the ":") controls the distribution of resources across nodes. The optional second distribution method (after the ":") controls the distribution of resources across sockets within a node. + Note that with select/cons_res, the number of cpus allocated on each socket and node may be different. Refer to http://slurm.schedmd.com/mc_support.html @@ -878,6 +879,15 @@ that consecutive tasks share a socket. The cyclic distribution method will distribute tasks to sockets such that consecutive tasks are distributed over consecutive sockets (in a round\-robin fashion). +Tasks requiring more than one CPU will have all of those CPUs allocated on a +single socket if possible. +.TP +.B fcyclic +The fcyclic distribution method will distribute tasks to sockets such +that consecutive tasks are distributed over consecutive sockets (in a +round\-robin fashion). +Tasks requiring more than one CPU will have each CPUs allocated in a cyclic +fashion across sockets. .RE .TP @@ -1683,10 +1693,10 @@ Specify a minimum amount of temporary disk space. .TP \fB\-u\fR, \fB\-\-unbuffered\fR -By default the connection between slurmdstepd and the user launched application +By default the connection between slurmstepd and the user launched application is over a pipe. The stdio output written by the application is buffered by the glibc until it is flushed or the output is set as unbuffered. -See setbuf(3).If this option is specified the tasks are executed with +See setbuf(3). If this option is specified the tasks are executed with a pseudo terminal so that the application output is unbuffered. .TP \fB\-\-usage\fR @@ -1736,7 +1746,7 @@ If you specify a minimum node or processor count larger than can be satisfied by the supplied host list, additional resources will be allocated on other nodes as needed. Rather than repeating a host name multiple times, an asterisk and -a repitition count may be appended to a host name. For example +a repetition count may be appended to a host name. For example "host1,host1" and "host1*2" are equivalent. .TP diff --git a/doc/man/man1/sshare.1 b/doc/man/man1/sshare.1 index e3a493dc03a8e2e4a6ee70686f18f9da52b4ef4b..5e1586271dfebc0e4808870757318345969ac23d 100644 --- a/doc/man/man1/sshare.1 +++ b/doc/man/man1/sshare.1 @@ -52,6 +52,11 @@ Output will be '|' delimited without a '|' at the end. \fB\-u\fR, \fB\-\-users=\fR Display information for specific users (comma separated list). +.TP +\fB\-U\fR, \fB\-\-Users\fR +If specified only the users information are printed, the parent +and ancestors are not displayed. + .TP \fB\-v\fR, \fB\-\-verbose\fR Display more information about the specified options. diff --git a/doc/man/man1/strigger.1 b/doc/man/man1/strigger.1 index 4f6b0984338cde376020e0476803ac984f4f3a48..e4532079bc41e9bdd07a60f200031acab198ede1 100644 --- a/doc/man/man1/strigger.1 +++ b/doc/man/man1/strigger.1 @@ -173,7 +173,7 @@ with the \fB\-\-jobid\fR option. When the \fB\-\-jobid\fR option is used in conjunction with the \fB\-\-up\fR, \fB\-\-down\fR or \fB\-\-drained\fR option, all nodes allocated to that job will considered the nodes used as a -trigger event.Since this option's argument is optional, for proper +trigger event. Since this option's argument is optional, for proper parsing the single letter option must be followed immediately with the value and not include a space between them. For example "\-ntux" and not "\-n tux". diff --git a/doc/man/man3/slurm_allocate_resources.3 b/doc/man/man3/slurm_allocate_resources.3 index f01beb122eda15463e42b572c9c03dc063efa637..7da0b82024a6e4131f73be3765abd460a03a15f5 100644 --- a/doc/man/man3/slurm_allocate_resources.3 +++ b/doc/man/man3/slurm_allocate_resources.3 @@ -141,7 +141,7 @@ Specifies the pointer to the structure to be created and filled in by the functi \fIslurm_job_will_run\fP. .TP \fIslurm_alloc_msg_thr_ptr\fP -Specigies the pointer to the structure created and returned by the +Specifies the pointer to the structure created and returned by the function \fIslurm_allocation_msg_thr_create\fP. Must be destroyed with function \fIslurm_allocation_msg_thr_destroy\fP. .TP diff --git a/doc/man/man3/slurm_step_launch.3 b/doc/man/man3/slurm_step_launch.3 index 351db72211002de3b3356671d42de232f339f7fc..42dafe4a1823df026d87aedcdf517a107f52aae7 100644 --- a/doc/man/man3/slurm_step_launch.3 +++ b/doc/man/man3/slurm_step_launch.3 @@ -69,7 +69,7 @@ the job step to be launched. .SH "DESCRIPTION" .LP -\fBslurm_step_launch_params_t_init\fR Iinitialize a user-allocated +\fBslurm_step_launch_params_t_init\fR initialize a user-allocated slurm_step_launch_params_t structure with default values. default values. This function will NOT allocate any new memory. .LP diff --git a/doc/man/man5/nonstop.conf.5 b/doc/man/man5/nonstop.conf.5 index a9b36c467bc7c4c14e8086661c5c0c075695f4ea..403750280141e16e36e9abc22435efc1c5d8ff6e 100644 --- a/doc/man/man5/nonstop.conf.5 +++ b/doc/man/man5/nonstop.conf.5 @@ -69,7 +69,7 @@ If a job requires replacement resources and none are immediately available, then permit a job to extend its time limit by the length of time required to secure replacement resources up to the number of minutes specified by \fBTimeLimitDelay\fR. -This option will only take effect if no hot spare resouces are available at +This option will only take effect if no hot spare resources are available at the time replacement resources are requested. This time limit extension is in addition to the value calculated using the \fBTimeLimitExtend\fR. diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 4426a0f9e840f63d7cb0dfe42f6d4ec45746d2ea..99c07e0d91edda705a60bd0ac97e43e6dca381cf 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -998,7 +998,7 @@ or "jobacct_gather/cgroup" must be configured. \fBNOTE:\fR Changing this configuration parameter changes the contents of the messages between Slurm daemons. Any previously running job steps are managed by a slurmstepd daemon that will persist through the lifetime of -that job step and not change it's communication prototol. Only change this +that job step and not change it's communication protocol. Only change this configuration parameter when there are no running job steps. .TP @@ -1368,7 +1368,7 @@ May not exceed 65533. \fBMemLimitEnforce\fR If set to "no" then Slurm will not terminate the job or the job step if they exceeds the value requested using the --mem-per-cpu option of -salloc/sbatch/srun. This is usefull if jobs need to specify --mem-per-cpu +salloc/sbatch/srun. This is useful if jobs need to specify --mem-per-cpu for scheduling but they should not be terminate if they exceed the estimated value. The default value is 'yes', terminate the job/step if exceed the requested memory. @@ -3279,7 +3279,7 @@ memory limit. For example, if a job's real memory limit is 500MB and VSizeFactor is set to 101 then the job will be killed if its real memory exceeds 500MB or its virtual memory exceeds 505MB (101 percent of the real memory limit). -The default valus is 0, which disables enforcement of virtual memory limits. +The default value is 0, which disables enforcement of virtual memory limits. The value may not exceed 65533 percent. .TP diff --git a/doc/man/man5/slurmdbd.conf.5 b/doc/man/man5/slurmdbd.conf.5 index 720af06302d9dacbf4bff8e2da31822af2c576b0..fad815b9711206fa636c798481ad4bb98e79bd9f 100644 --- a/doc/man/man5/slurmdbd.conf.5 +++ b/doc/man/man5/slurmdbd.conf.5 @@ -54,7 +54,7 @@ reservation data, no otherwise. Default is no. This script can be executed every time a rollup happens (every hour, day and month), depending on the Purge*After options. This script is used to transfer accounting records out of the database into an archive. It is -used in place of the internal process used to acrhive objects. +used in place of the internal process used to archive objects. The script is executed with a no arguments, The following environment variables are set. .RS @@ -207,7 +207,7 @@ SQL statements/queries when dealing with wckeys in the database. .TP \fBDebugLevel\fR The level of detail to provide the Slurm Database Daemon's logs. -The default valus is \fBinfo\fR. +The default value is \fBinfo\fR. .RS .TP 10 \fBquiet\fR diff --git a/slurm.spec b/slurm.spec index 9849389442f29b7319f8786e6d2122346414ce1b..6ec6af37a775966aea85c71a113ea82f3a299693 100644 --- a/slurm.spec +++ b/slurm.spec @@ -16,7 +16,7 @@ # --with cray %_with_cray 1 build for a Cray system without ALPS # --with cray_alps %_with_cray_alps 1 build for a Cray system with ALPS # --with cray_network %_with_cray_network 1 build for a non-Cray system with a Cray network -# --with debug %_with_debug 1 enable extra debugging within Slurm +# --without debug %_without_debug 1 don't compile with debugging symbols # --with lua %_with_lua 1 build Slurm lua bindings (proctrack only for now) # --without munge %_without_munge 1 don't build auth-munge RPM # --with mysql %_with_mysql 1 require mysql support @@ -45,7 +45,6 @@ %slurm_without_opt cray %slurm_without_opt cray_alps %slurm_without_opt cray_network -%slurm_without_opt debug %slurm_without_opt sun_const %slurm_without_opt salloc_background %slurm_without_opt multiple_slurmd @@ -65,6 +64,9 @@ # Use readline by default on all systems %slurm_with_opt readline +# Use debug by default on all systems +%slurm_with_opt debug + # Build with PAM by default on linux %ifos linux %slurm_with_opt pam @@ -414,7 +416,7 @@ Gives the ability for Slurm to use Berkeley Lab Checkpoint/Restart %build %configure \ - %{?slurm_with_debug:--enable-debug} \ + %{!?slurm_with_debug:--disable-debug} \ %{?slurm_with_partial_attach:--enable-partial-attach} \ %{?slurm_with_sun_const:--enable-sun-const} \ %{?with_db2_dir:--with-db2-dir=%{?with_db2_dir}} \ diff --git a/src/database/mysql_common.c b/src/database/mysql_common.c index 4cb13b8ca464110dcfcc049b6f9f62f61e1d5d0a..bcdc12e56517d93b1dcf4626b04e443fddc2b1fd 100644 --- a/src/database/mysql_common.c +++ b/src/database/mysql_common.c @@ -762,6 +762,25 @@ extern int mysql_db_query(mysql_conn_t *mysql_conn, char *query) return rc; } +/* + * Executes a single delete sql query. + * Returns the number of deleted rows, <0 for failure. + */ +extern int mysql_db_delete_affected_rows(mysql_conn_t *mysql_conn, char *query) +{ + int rc = SLURM_SUCCESS; + + if (!mysql_conn || !mysql_conn->db_conn) { + fatal("You haven't inited this storage yet."); + return 0; /* For CLANG false positive */ + } + slurm_mutex_lock(&mysql_conn->lock); + if (!(rc = _mysql_query_internal(mysql_conn->db_conn, query))) + rc = mysql_affected_rows(mysql_conn->db_conn); + slurm_mutex_unlock(&mysql_conn->lock); + return rc; +} + extern int mysql_db_ping(mysql_conn_t *mysql_conn) { int rc; diff --git a/src/database/mysql_common.h b/src/database/mysql_common.h index 054bfd21a5bb92bf543269d939377877196c7fa7..e3adb3ccffe439736eb5bdb8de137c2d3b59a7fe 100644 --- a/src/database/mysql_common.h +++ b/src/database/mysql_common.h @@ -104,6 +104,7 @@ extern int mysql_db_get_db_connection(mysql_conn_t *mysql_conn, char *db_name, extern int mysql_db_close_db_connection(mysql_conn_t *mysql_conn); extern int mysql_db_cleanup(); extern int mysql_db_query(mysql_conn_t *mysql_conn, char *query); +extern int mysql_db_delete_affected_rows(mysql_conn_t *mysql_conn, char *query); extern int mysql_db_ping(mysql_conn_t *mysql_conn); extern int mysql_db_commit(mysql_conn_t *mysql_conn); extern int mysql_db_rollback(mysql_conn_t *mysql_conn); diff --git a/src/plugins/accounting_storage/mysql/as_mysql_archive.c b/src/plugins/accounting_storage/mysql/as_mysql_archive.c index a973435a776d5441770302b1f4517bb11a95820e..a25a918246c47327cf9ef4caeedc04341709b184 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_archive.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_archive.c @@ -49,6 +49,9 @@ #define SLURMDBD_2_6_VERSION 12 /* slurm version 2.6 */ #define SLURMDBD_2_5_VERSION 11 /* slurm version 2.5 */ +#define MAX_PURGE_LIMIT 50000 /* Number of records that are purged at a time + so that locks can be periodically released. */ + typedef struct { char *cluster_nodes; char *cpu_count; @@ -2198,11 +2201,16 @@ static int _execute_archive(mysql_conn_t *mysql_conn, return rc; } query = xstrdup_printf("delete from \"%s_%s\" where " - "time_start <= %ld && time_end != 0", - cluster_name, event_table, curr_end); + "time_start <= %ld && time_end != 0 " + "LIMIT %d", + cluster_name, event_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); + xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old event data"); @@ -2235,11 +2243,15 @@ exit_events: return rc; } query = xstrdup_printf("delete from \"%s_%s\" where " - "time_start <= %ld && time_end != 0", - cluster_name, suspend_table, curr_end); + "time_start <= %ld && time_end != 0 " + "LIMIT %d", + cluster_name, suspend_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old suspend data"); @@ -2273,11 +2285,16 @@ exit_suspend: } query = xstrdup_printf("delete from \"%s_%s\" where " - "time_start <= %ld && time_end != 0", - cluster_name, step_table, curr_end); + "time_start <= %ld && time_end != 0 " + "LIMIT %d", + cluster_name, step_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); + xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old step data"); @@ -2311,11 +2328,15 @@ exit_steps: query = xstrdup_printf("delete from \"%s_%s\" " "where time_submit <= %ld " - "&& time_end != 0", - cluster_name, job_table, curr_end); + "&& time_end != 0 LIMIT %d", + cluster_name, job_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); + xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old job data"); @@ -2349,11 +2370,15 @@ exit_jobs: query = xstrdup_printf("delete from \"%s_%s\" " "where time_start <= %ld " - "&& time_end != 0", - cluster_name, resv_table, curr_end); + "&& time_end != 0 LIMIT %d", + cluster_name, resv_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); + xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old resv data"); diff --git a/src/plugins/job_submit/lua/job_submit_lua.c b/src/plugins/job_submit/lua/job_submit_lua.c index cc3b3d960477c2d6ffb22dbc44924003a9b796ee..b7b096fd649d3ae48eb0f015c13716a3ee993a04 100644 --- a/src/plugins/job_submit/lua/job_submit_lua.c +++ b/src/plugins/job_submit/lua/job_submit_lua.c @@ -522,6 +522,10 @@ static int _get_job_req_field(const struct job_descriptor *job_desc, lua_pushnumber (L, job_desc->group_id); } else if (!strcmp(name, "licenses")) { lua_pushstring (L, job_desc->licenses); + } else if (!strcmp(name, "mail_type")) { + lua_pushnumber (L, job_desc->mail_type); + } else if (!strcmp(name, "mail_user")) { + lua_pushstring (L, job_desc->mail_user); } else if (!strcmp(name, "max_cpus")) { lua_pushnumber (L, job_desc->max_cpus); } else if (!strcmp(name, "max_nodes")) { diff --git a/src/sacct/sacct.h b/src/sacct/sacct.h index 7f8ac544c2807c04955d70b502d0c46ca087f952..2daedbfbb04ffd71221d9e51b7f034e39a0b3a9f 100644 --- a/src/sacct/sacct.h +++ b/src/sacct/sacct.h @@ -69,7 +69,7 @@ #define BRIEF_COMP_FIELDS "jobid,uid,state" #define DEFAULT_FIELDS "jobid,jobname,partition,account,alloccpus,state,exitcode" #define DEFAULT_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodelist,state,end" -#define LONG_FIELDS "jobid,jobname,partition,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,alloccpus,elapsed,state,exitcode,avecpufreq,reqcpufreqmin,reqcpufreqmax,reqcpufreqgov,reqmem,consumedenergy,maxdiskread,maxdiskreadnode,maxdiskreadtask,avediskread,maxdiskwrite,maxdiskwritenode,maxdiskwritetask,avediskwrite,allocgres,reqgres" +#define LONG_FIELDS "jobid,jobidraw,jobname,partition,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,alloccpus,elapsed,state,exitcode,avecpufreq,reqcpufreqmin,reqcpufreqmax,reqcpufreqgov,reqmem,consumedenergy,maxdiskread,maxdiskreadnode,maxdiskreadtask,avediskread,maxdiskwrite,maxdiskwritenode,maxdiskwritetask,avediskwrite,allocgres,reqgres" #define LONG_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodelist,state,start,end,timelimit" diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 134674e7729f968c7275ffd03dc172127dab2030..482a21e8d0c89494fd1298effd1fb10b49f95451 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -9734,19 +9734,14 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, if ((job_specs->pn_min_cpus != (uint16_t) NO_VAL) && (job_specs->pn_min_cpus != 0)) { - if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) + + if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) { error_code = ESLURM_JOB_NOT_PENDING; - else if (authorized - || (detail_ptr->pn_min_cpus - > job_specs->pn_min_cpus)) { + }else { detail_ptr->pn_min_cpus = job_specs->pn_min_cpus; info("update_job: setting pn_min_cpus to %u for " "job_id %u", job_specs->pn_min_cpus, job_ptr->job_id); - } else { - error("Attempt to increase pn_min_cpus for job %u", - job_ptr->job_id); - error_code = ESLURM_ACCESS_DENIED; } } if (error_code != SLURM_SUCCESS) @@ -10133,13 +10128,14 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, goto fini; if (job_specs->pn_min_memory != NO_VAL) { - if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) + + if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) { error_code = ESLURM_JOB_NOT_PENDING; - else if (job_specs->pn_min_memory - == detail_ptr->pn_min_memory) + } else if (job_specs->pn_min_memory + == detail_ptr->pn_min_memory) { debug("sched: update_job: new memory limit identical " "to old limit for job %u", job_ptr->job_id); - else if (authorized) { + } else { char *entity; if (job_specs->pn_min_memory & MEM_PER_CPU) entity = "cpu"; @@ -10155,32 +10151,21 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, * since if set by a super user it be set correctly */ job_ptr->limit_set_pn_min_memory = acct_policy_limit_set.pn_min_memory; - } else { - error("sched: Attempt to modify pn_min_memory for " - "job %u", job_ptr->job_id); - error_code = ESLURM_ACCESS_DENIED; } } if (error_code != SLURM_SUCCESS) goto fini; if (job_specs->pn_min_tmp_disk != NO_VAL) { - if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) + + if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) { error_code = ESLURM_JOB_NOT_PENDING; - else if (authorized - || (detail_ptr->pn_min_tmp_disk - > job_specs->pn_min_tmp_disk)) { + } else { detail_ptr->pn_min_tmp_disk = job_specs->pn_min_tmp_disk; info("sched: update_job: setting job_min_tmp_disk to " "%u for job_id %u", job_specs->pn_min_tmp_disk, job_ptr->job_id); - } else { - - error("sched: Attempt to modify pn_min_tmp_disk " - "for job %u", - job_ptr->job_id); - error_code = ESLURM_ACCESS_DENIED; } } if (error_code != SLURM_SUCCESS) diff --git a/src/slurmd/slurmd/get_mach_stat.c b/src/slurmd/slurmd/get_mach_stat.c index 942940136de896b27be0ae3cb458d702b61accc7..d7c5eb1a06199cfb70c34aef0439b0ae6d663701 100644 --- a/src/slurmd/slurmd/get_mach_stat.c +++ b/src/slurmd/slurmd/get_mach_stat.c @@ -81,6 +81,10 @@ #include <sys/utsname.h> +#ifdef HAVE_SYS_STATVFS_H +# include <sys/statvfs.h> +#endif + #ifdef HAVE_SYS_STATFS_H # include <sys/statfs.h> #else @@ -212,7 +216,29 @@ extern int get_tmp_disk(uint32_t *tmp_disk, char *tmp_fs) { int error_code = 0; -#ifdef HAVE_SYS_VFS_H + +#if defined(HAVE_STATVFS) + struct statvfs stat_buf; + uint64_t total_size = 0; + char *tmp_fs_name = tmp_fs; + + *tmp_disk = 0; + total_size = 0; + + if (tmp_fs_name == NULL) + tmp_fs_name = "/tmp"; + if (statvfs(tmp_fs_name, &stat_buf) == 0) { + total_size = stat_buf.f_blocks * stat_buf.f_frsize; + total_size /= 1024 * 1024; + } + else if (errno != ENOENT) { + error_code = errno; + error ("get_tmp_disk: error %d executing statvfs on %s", + errno, tmp_fs_name); + } + *tmp_disk += (uint32_t)total_size; + +#elif defined(HAVE_STATFS) struct statfs stat_buf; long total_size; float page_size; diff --git a/src/sshare/process.c b/src/sshare/process.c index 7ba7746d48ab77c6dc306c4c8702c7a306250d52..3f6e86663c2ca3796bce5a5ae36cbb291c965dd4 100644 --- a/src/sshare/process.c +++ b/src/sshare/process.c @@ -42,7 +42,7 @@ extern int long_flag; -extern int process(shares_response_msg_t *resp) +extern int process(shares_response_msg_t *resp, uint16_t options) { uint32_t flags = slurmctld_conf.priority_flags; int rc = SLURM_SUCCESS; @@ -213,6 +213,9 @@ extern int process(shares_response_msg_t *resp) char *tmp_char = NULL; char *local_acct = NULL; + if ((options & PRINT_USERS_ONLY) && share->user == 0) + continue; + while ((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCOUNT: diff --git a/src/sshare/sshare.c b/src/sshare/sshare.c index 8e95aeea228d8fa3b8d482e023eb02e7cdde6735..5315bf6bf002569f33fd7041391d2cf29667e74f 100644 --- a/src/sshare/sshare.c +++ b/src/sshare/sshare.c @@ -70,6 +70,7 @@ main (int argc, char *argv[]) char *temp = NULL; int option_index; bool all_users = 0; + uint16_t options; static struct option long_options[] = { {"accounts", 1, 0, 'A'}, @@ -81,6 +82,7 @@ main (int argc, char *argv[]) {"parsable", 0, 0, 'p'}, {"parsable2",0, 0, 'P'}, {"users", 1, 0, 'u'}, + {"Users", 0, 0, 'U'}, {"verbose", 0, 0, 'v'}, {"version", 0, 0, 'V'}, {"help", 0, 0, OPT_LONG_HELP}, @@ -96,7 +98,7 @@ main (int argc, char *argv[]) slurm_conf_init(NULL); log_init("sshare", opts, SYSLOG_FACILITY_DAEMON, NULL); - while((opt_char = getopt_long(argc, argv, "aA:hlM:npPqu:t:vV", + while((opt_char = getopt_long(argc, argv, "aA:hlM:npPqUu:t:vV", long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': @@ -153,6 +155,9 @@ main (int argc, char *argv[]) list_create(slurm_destroy_char); _addto_name_char_list(req_msg.user_list, optarg, 0); break; + case 'U': + options |= PRINT_USERS_ONLY; + break; case 'v': quiet_flag = -1; verbosity++; @@ -207,11 +212,13 @@ main (int argc, char *argv[]) } if (req_msg.acct_list && list_count(req_msg.acct_list)) { - fprintf(stderr, "Accounts requested:\n"); - ListIterator itr = list_iterator_create(req_msg.acct_list); - while((temp = list_next(itr))) - fprintf(stderr, "\t: %s\n", temp); - list_iterator_destroy(itr); + if (verbosity) { + fprintf(stderr, "Accounts requested:\n"); + ListIterator itr = list_iterator_create(req_msg.acct_list); + while((temp = list_next(itr))) + fprintf(stderr, "\t: %s\n", temp); + list_iterator_destroy(itr); + } } else { if (req_msg.acct_list && list_count(req_msg.acct_list)) { @@ -236,7 +243,7 @@ main (int argc, char *argv[]) } /* do stuff with it */ - process(resp_msg); + process(resp_msg, options); slurm_free_shares_response_msg(resp_msg); diff --git a/src/sshare/sshare.h b/src/sshare/sshare.h index 324050251fca1b54ecfe558b40a92c5df8ffbdf8..28731ed950052a51a23b21442b22ca7388f25cdd 100644 --- a/src/sshare/sshare.h +++ b/src/sshare/sshare.h @@ -87,6 +87,10 @@ #define CKPT_WAIT 10 #define MAX_INPUT_FIELDS 128 +/* Print only the users and not the hierarchy. + */ +#define PRINT_USERS_ONLY 0x01 + typedef enum { SSHARE_TIME_SECS, SSHARE_TIME_MINS, @@ -100,6 +104,6 @@ extern sshare_time_format_t time_format; extern char *time_format_string; extern List clusters; -extern int process(shares_response_msg_t *msg); +extern int process(shares_response_msg_t *msg, uint16_t); #endif