diff --git a/NEWS b/NEWS
index 17c68195dee467d328bda943c0eaf97e53bcd3a3..262ff7da95aad7ce98bcf7fe2bbb3b432c7d007e 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,12 @@ documents those changes that are of interest to users and admins.
     getting information for all jobs. Improved performance of some commands. 
     NOTE: Change in RPC means all nodes in the cluster should be updated 
     at the same time.
+ -- In salloc, sbatch, and srun replace --task-mem options with --mem-per-cpu
+    (--task-mem will continue to be accepted for now, but is not documented).
+    Replace DefMemPerTask and MaxMemPerTask with DefMemPerCPU, DefMemPerNode,
+    MaxMemPerCPU and MaxMemPerNode in slurm.conf. Allocate a job's memory memory
+    at the same time that processors are allocated based upon the --mem or
+    --mem-per-cpu option rather than when job steps are initiated.
 
 * Changes in SLURM 1.3.5
 ========================
diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in
index 2b61e0922a9f68addf9f1bd8fc92f14701a3b418..7fca6c422b7f54e381578e7dd8fc0cfac6ba2c30 100644
--- a/doc/html/configurator.html.in
+++ b/doc/html/configurator.html.in
@@ -206,9 +206,9 @@ function displayfile()
    "# <br>" +
    "# <br>" +
    "# SCHEDULING <br>" +
-   "#DefMemPerTask=0 <br>" +
+   "#DefMemPerCPU=0 <br>" +
    "FastSchedule=" + get_radio_value(document.config.fast_schedule) + "<br>" +
-   "#MaxMemPerTask=0 <br>" +
+   "#MaxMemPerCPU=0 <br>" +
    "#SchedulerRootFilter=1 <br>" +
    "#SchedulerTimeSlice=30 <br>" +
    "SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" +
diff --git a/doc/html/cons_res.shtml b/doc/html/cons_res.shtml
index ae4f0229330198d13b00876d6f270a0dad6f8747..368810a9ebc241aab3849330d9073e34b8b017e7 100644
--- a/doc/html/cons_res.shtml
+++ b/doc/html/cons_res.shtml
@@ -28,15 +28,15 @@ this plug-in is described below.
     slurm.conf (e.g. <i>SelectType=select/cons_res</i>).</li>
 <pre>
 #
-# "SelectType"			: node selection logic for scheduling.
-#	"select/bluegene"	: the default on BlueGene systems, aware of
-#				  system topology, manages bglblocks, etc.
-#	"select/cons_res"	: allocate individual consumable resources
-#				  (i.e. processors, memory, etc.)
-#	"select/linear"		: the default on non-BlueGene systems,
-#				  no topology awareness, oriented toward
-#				  allocating nodes to jobs rather than
-#				  resources within a node (e.g. CPUs)
+# "SelectType"         : node selection logic for scheduling.
+#    "select/bluegene" : the default on BlueGene systems, aware of
+#                        system topology, manages bglblocks, etc.
+#    "select/cons_res" : allocate individual consumable resources
+#                        (i.e. processors, memory, etc.)
+#    "select/linear"   : the default on non-BlueGene systems,
+#                        no topology awareness, oriented toward
+#                        allocating nodes to jobs rather than
+#                        resources within a node (e.g. CPUs)
 #
 # SelectType=select/linear
 SelectType=select/cons_res
@@ -98,15 +98,15 @@ SelectType=select/cons_res
       SelectTypeParameter in the slurm.conf.</li>
 <pre>
 #
-# "SelectType"			: node selection logic for scheduling.
-#	"select/bluegene"	: the default on BlueGene systems, aware of
-#				  system topology, manages bglblocks, etc.
-#	"select/cons_res"	: allocate individual consumable resources
-#				  (i.e. processors, memory, etc.)
-#	"select/linear"		: the default on non-BlueGene systems,
-#				  no topology awareness, oriented toward
-#				  allocating nodes to jobs rather than
-#				  resources within a node (e.g. CPUs)
+# "SelectType"         : node selection logic for scheduling.
+#    "select/bluegene" : the default on BlueGene systems, aware of
+#                        system topology, manages bglblocks, etc.
+#    "select/cons_res" : allocate individual consumable resources
+#                        (i.e. processors, memory, etc.)
+#    "select/linear"   : the default on non-BlueGene systems,
+#                        no topology awareness, oriented toward
+#                        allocating nodes to jobs rather than
+#                        resources within a node (e.g. CPUs)
 #
 # SelectType=select/linear
 SelectType=select/cons_res
@@ -115,34 +115,33 @@ SelectType=select/cons_res
 #    - select/bluegene - this parameter is currently ignored
 #    - select/linear   - this parameter is currently ignored
 #    - select/cons_res - the parameters available are
-#          - CR_CPU     (1) - CPUs as consumable resources. 
-#                      	No notion of sockets, cores, or threads. 
-#                      	On a multi-core system CPUs will be cores
-#                      	On a multi-core/hyperthread system CPUs will 
-#                      		       be threads
-#                      	On a single-core systems CPUs are CPUs. ;-)
-#          - CR_Socket (2) - Sockets as a consumable resource.
-#          - CR_Core   (3) - Cores as a consumable resource. 
-#				(Not yet implemented)
-#          - CR_Memory (4) - Memory as a consumable resource. 
-#				Note! CR_Memory assumes Shared=Yes
-#          - CR_Socket_Memory (5) - Socket and Memory as consumable 
-#				resources.
-#          - CR_Core_Memory (6) - Core and Memory as consumable 
-#				resources. (Not yet implemented)	
-#          - CR_CPU_Memory (7) - CPU and Memory as consumable 
-#				resources.
+#       - CR_CPU  (1)  - CPUs as consumable resources. 
+#                        No notion of sockets, cores, or threads. 
+#                        On a multi-core system CPUs will be cores
+#                        On a multi-core/hyperthread system CPUs 
+#                                        will be threads
+#                        On a single-core systems CPUs are CPUs.
+#      - CR_Socket (2) - Sockets as a consumable resource.
+#      - CR_Core   (3) - Cores as a consumable resource. 
+#      - CR_Memory (4) - Memory as a consumable resource. 
+#                        Note! CR_Memory assumes Shared=Yes
+#      - CR_Socket_Memory (5) - Socket and Memory as consumable 
+#                               resources.
+#      - CR_Core_Memory (6)   - Core and Memory as consumable 
+#                               resources. (Not yet implemented)
+#      - CR_CPU_Memory (7)    - CPU and Memory as consumable 
+#                               resources.
 #
 # (#) refer to the output of "scontrol show config"
 #
-# NB!:	The -E extension for sockets, cores, and threads 
-#	are ignored within the node allocation mechanism 
-#	when CR_CPU or CR_CPU_MEMORY is selected. 
-#	They are considered to compute the total number of 
-#	tasks when -n is not specified
+# NB!:   The -E extension for sockets, cores, and threads 
+#        are ignored within the node allocation mechanism 
+#        when CR_CPU or CR_CPU_MEMORY is selected. 
+#        They are considered to compute the total number of 
+#        tasks when -n is not specified
 #
 # NB! All CR_s assume Shared=No or Shared=Force EXCEPT for 
-#	CR_MEMORY which assumes Shared=Yes
+#        CR_MEMORY which assumes Shared=Yes
 #
 #SelectTypeParameters=CR_CPU (default)
 </pre>
@@ -169,7 +168,7 @@ SelectType=select/cons_res
     way as when using the default node selection scheme.</li>
   <li>The <i>--exclusive</i> srun switch allows users to request nodes in 
     exclusive mode even when consumable resources is enabled. see "man srun" 
-    for details. </li>	
+    for details. </li>        
   <li>srun's <i>-s</i> or <i>--share</i> is incompatible with the consumable resource 
     environment and will therefore not be honored. Since in this environment nodes 
     are shared by default, <i>--exclusive</i> allows users to obtain dedicated nodes.</li>
@@ -213,19 +212,18 @@ Please send comments and requests about the consumable resources to
 # srun sleep 100 &
 # srun sleep 100 &
 # squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-   1132  allNodes    sleep   sballe   R       0:05      1 hydra12
-   1133  allNodes    sleep   sballe   R       0:04      1 hydra12
-   1134  allNodes    sleep   sballe   R       0:02      1 hydra12
+JOBID PARTITION   NAME     USER  ST   TIME  NODES NODELIST(REASON)
+ 1132  allNodes  sleep   sballe   R   0:05      1 hydra12
+ 1133  allNodes  sleep   sballe   R   0:04      1 hydra12
+ 1134  allNodes  sleep   sballe   R   0:02      1 hydra12
 # srun -N 2-2 -E 2:2 sleep 100 &
 srun: job 1135 queued and waiting for resources
 #squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-   1135  allNodes    sleep   sballe  PD       0:00      2 (Resources)
-   1132  allNodes    sleep   sballe   R       0:24      1 hydra12
-   1133  allNodes    sleep   sballe   R       0:23      1 hydra12
-   1134  allNodes    sleep   sballe   R       0:21      1 hydra12
-#
+JOBID PARTITION   NAME     USER  ST   TIME  NODES NODELIST(REASON)
+ 1135  allNodes  sleep   sballe  PD   0:00      2 (Resources)
+ 1132  allNodes  sleep   sballe   R   0:24      1 hydra12
+ 1133  allNodes  sleep   sballe   R   0:23      1 hydra12
+ 1134  allNodes  sleep   sballe   R   0:21      1 hydra12
 </pre>
     <li><b>Proposed solution:</b> Enhance the selection mechanism to go through {node,socket,core,thread}-tuplets to find available match for specific request (bounded knapsack problem). </li>
     </ul>
@@ -248,7 +246,7 @@ srun: job 1135 queued and waiting for resources
 <h2>Examples of CR_Memory, CR_Socket_Memory, and CR_CPU_Memory type consumable resources</h2> 
 
 <pre>
-sinfo -lNe
+# sinfo -lNe
 NODELIST     NODES PARTITION  STATE  CPUS  S:C:T MEMORY 
 hydra[12-16]     5 allNodes*  ...       4  2:2:1   2007 
 </pre>
@@ -256,59 +254,59 @@ hydra[12-16]     5 allNodes*  ...       4  2:2:1   2007
 <p>Using select/cons_res plug-in with CR_Memory</p>
 <pre>
 Example:
-srun -N 5 -n 20 --job-mem=1000 sleep 100 &  <-- running
-srun -N 5 -n 20 --job-mem=10 sleep 100 &    <-- running 
-srun -N 5 -n 10 --job-mem=1000 sleep 100 &  <-- queued and waiting for resources
-
-squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-   1820  allNodes    sleep   sballe  PD       0:00      5 (Resources)
-   1818  allNodes    sleep   sballe   R       0:17      5 hydra[12-16]
-   1819  allNodes    sleep   sballe   R       0:11      5 hydra[12-16]
+# srun -N 5 -n 20 --job-mem=1000 sleep 100 &  <-- running
+# srun -N 5 -n 20 --job-mem=10 sleep 100 &    <-- running 
+# srun -N 5 -n 10 --job-mem=1000 sleep 100 &  <-- queued and waiting for resources
+
+# squeue
+JOBID PARTITION   NAME     USER  ST   TIME  NODES NODELIST(REASON)
+ 1820  allNodes  sleep   sballe  PD   0:00      5 (Resources)
+ 1818  allNodes  sleep   sballe   R   0:17      5 hydra[12-16]
+ 1819  allNodes  sleep   sballe   R   0:11      5 hydra[12-16]
 </pre>
 
 <p>Using select/cons_res plug-in with CR_Socket_Memory (2 sockets/node)</p>
 <pre>
 Example 1:
-srun -N 5 -n 5 --job-mem=1000 sleep 100 &        <-- running
-srun -n 1 -w hydra12 --job-mem=2000 sleep 100 &  <-- queued and waiting for resources
+# srun -N 5 -n 5 --job-mem=1000 sleep 100 &        <-- running
+# srun -n 1 -w hydra12 --job-mem=2000 sleep 100 &  <-- queued and waiting for resources
 
-squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-   1890  allNodes    sleep   sballe  PD       0:00      1 (Resources)
-   1889  allNodes    sleep   sballe   R       0:08      5 hydra[12-16]
+# squeue
+JOBID PARTITION   NAME     USER  ST   TIME  NODES NODELIST(REASON)
+ 1890  allNodes  sleep   sballe  PD   0:00      1 (Resources)
+ 1889  allNodes  sleep   sballe   R   0:08      5 hydra[12-16]
 
 Example 2:
-srun -N 5 -n 10 --job-mem=10 sleep 100 & <-- running 
-srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resourcessqueue
+# srun -N 5 -n 10 --job-mem=10 sleep 100 & <-- running 
+# srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resourcessqueue
 
-squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-   1831  allNodes    sleep   sballe  PD       0:00      1 (Resources)
-   1830  allNodes    sleep   sballe   R       0:07      5 hydra[12-16]
+# squeue
+JOBID PARTITION   NAME     USER  ST   TIME  NODES NODELIST(REASON)
+ 1831  allNodes  sleep   sballe  PD   0:00      1 (Resources)
+ 1830  allNodes  sleep   sballe   R   0:07      5 hydra[12-16]
 </pre>
 
 <p>Using select/cons_res plug-in with CR_CPU_Memory (4 CPUs/node)</p>
 <pre>
 Example 1:
-srun -N 5 -n 5 --job-mem=1000 sleep 100 &  <-- running 
-srun -N 5 -n 5 --job-mem=10 sleep 100 &    <-- running
-srun -N 5 -n 5 --job-mem=1000 sleep 100 &  <-- queued and waiting for resources
+# srun -N 5 -n 5 --job-mem=1000 sleep 100 &  <-- running 
+# srun -N 5 -n 5 --job-mem=10 sleep 100 &    <-- running
+# srun -N 5 -n 5 --job-mem=1000 sleep 100 &  <-- queued and waiting for resources
 
-squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-   1835  allNodes    sleep   sballe  PD       0:00      5 (Resources)
-   1833  allNodes    sleep   sballe   R       0:10      5 hydra[12-16]
-   1834  allNodes    sleep   sballe   R       0:07      5 hydra[12-16]
+# squeue
+JOBID PARTITION   NAME     USER  ST   TIME  NODES NODELIST(REASON)
+ 1835  allNodes  sleep   sballe  PD   0:00      5 (Resources)
+ 1833  allNodes  sleep   sballe   R   0:10      5 hydra[12-16]
+ 1834  allNodes  sleep   sballe   R   0:07      5 hydra[12-16]
 
 Example 2:
-srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running 
-srun -n 1 --job-mem=10 sleep 100 &       <-- queued and waiting for resources
+# srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running 
+# srun -n 1 --job-mem=10 sleep 100 &       <-- queued and waiting for resources
 
-squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-   1837  allNodes    sleep   sballe  PD       0:00      1 (Resources)
-   1836  allNodes    sleep   sballe   R       0:11      5 hydra[12-16]
+# squeue
+JOBID PARTITION   NAME     USER  ST   TIME  NODES NODELIST(REASON)
+ 1837  allNodes  sleep   sballe  PD   0:00      1 (Resources)
+ 1836  allNodes  sleep   sballe   R   0:11      5 hydra[12-16]
 </pre>
 
 <p class="footer"><a href="#top">top</a></p>
@@ -365,11 +363,11 @@ have one idle cpu and linux04 has 3 idle cpus.</p>
 
 <pre>
 # squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-      3       lsf    sleep     root  PD       0:00      3 (Resources)
-      4       lsf    sleep     root  PD       0:00      1 (Resources)
-      5       lsf    sleep     root  PD       0:00      1 (Resources)
-      2       lsf    sleep     root   R       0:14      4 xc14n[13-16]
+JOBID PARTITION   NAME   USER  ST   TIME  NODES NODELIST(REASON)
+    3       lsf  sleep   root  PD   0:00      3 (Resources)
+    4       lsf  sleep   root  PD   0:00      1 (Resources)
+    5       lsf  sleep   root  PD   0:00      1 (Resources)
+    2       lsf  sleep   root   R   0:14      4 xc14n[13-16]
 </pre>
 
 <p>Once Job 2 is finished, Job 3 is scheduled and runs on
@@ -381,10 +379,10 @@ and Job 4 can run concurrently on the cluster.</p>
 
 <pre>
 # squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-      5       lsf    sleep     root  PD       0:00      1 (Resources)
-      3       lsf    sleep     root   R       0:11      3 xc14n[13-15]
-      4       lsf    sleep     root   R       0:11      1 xc14n16
+JOBID PARTITION   NAME   USER  ST   TIME  NODES NODELIST(REASON)
+    5       lsf  sleep   root  PD   0:00      1 (Resources)
+    3       lsf  sleep   root   R   0:11      3 xc14n[13-15]
+    4       lsf  sleep   root   R   0:11      1 xc14n16
 </pre>
 
 <p>Once Job 3 finishes, Job 5 is allocated resources and can run.</p>
@@ -426,16 +424,16 @@ nodes) and Job 4 is scheduled onto one of the remaining idle cpus on Linux04.</p
 <pre>
 
 # squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-      5       lsf    sleep     root  PD       0:00      1 (Resources)
-      2       lsf    sleep     root   R       0:13      4 linux[01-04]
-      3       lsf    sleep     root   R       0:09      3 linux[01-03]
-      4       lsf    sleep     root   R       0:05      1 linux04
+JOBID PARTITION   NAME   USER  ST   TIME  NODES NODELIST(REASON)
+    5       lsf  sleep   root  PD   0:00      1 (Resources)
+    2       lsf  sleep   root   R   0:13      4 linux[01-04]
+    3       lsf  sleep   root   R   0:09      3 linux[01-03]
+    4       lsf  sleep   root   R   0:05      1 linux04
 
 # sinfo -lNe
 NODELIST     NODES PARTITION       STATE CPUS MEMORY TMP_DISK WEIGHT FEATURES REASON
-linux[01-03]    3      lsf*   allocated    2   2981        1      1   (null) none
-linux04         1      lsf*   allocated    4   3813        1      1   (null) none
+linux[01-03]     3      lsf*   allocated    2   2981        1      1   (null) none
+linux04          1      lsf*   allocated    4   3813        1      1   (null) none
 </pre>
 
 <p>Once Job 2 finishes, Job 5, which was pending, is allocated available resources and is then
@@ -443,10 +441,10 @@ running as illustrated below:</p>
 
 <pre>
 # squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-      3       lsf    sleep     root   R       1:58      3 linux[01-03]
-      4       lsf    sleep     root   R       1:54      1 linux04
-      5       lsf    sleep     root   R       0:02      3 linux[01-03]
+JOBID PARTITION   NAME   USER  ST   TIME  NODES NODELIST(REASON)
+   3       lsf   sleep   root   R   1:58      3 linux[01-03]
+   4       lsf   sleep   root   R   1:54      1 linux04
+   5       lsf   sleep   root   R   0:02      3 linux[01-03]
 # sinfo -lNe
 NODELIST     NODES PARTITION       STATE CPUS MEMORY TMP_DISK WEIGHT FEATURES REASON
 linux[01-03]     3      lsf*   allocated    2   2981        1      1   (null) none
@@ -457,8 +455,8 @@ linux04          1      lsf*        idle    4   3813        1      1   (null) no
 
 <pre>
 # squeue
-  JOBID PARTITION     NAME     USER  ST       TIME  NODES NODELIST(REASON)
-      5       lsf    sleep     root   R       1:52      3 linux[01-03]
+JOBID PARTITION   NAME   USER  ST   TIME  NODES NODELIST(REASON)
+    5       lsf  sleep   root   R   1:52      3 linux[01-03]
 </pre>
 
 <p>Job 3 and Job 4 have finshed and Job 5 is still running on nodes linux[01-03].</p>
@@ -480,6 +478,6 @@ one mpi process per node.</p>
 
 <p class="footer"><a href="#top">top</a></p>
 
-<p style="text-align:center;">Last modified 25 September 2006</p>
+<p style="text-align:center;">Last modified 8 July 2008</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/html/cons_res_share.shtml b/doc/html/cons_res_share.shtml
index 66715e41d32a2979600373c4077ec757818e4976..b007fdf70abf2d9430e920717b2baf4d9a0f42cc 100644
--- a/doc/html/cons_res_share.shtml
+++ b/doc/html/cons_res_share.shtml
@@ -173,7 +173,9 @@ to begin running "on top of" the existing jobs. This happens with the
 
 <H3>Memory Management</H3>
 <P>
-The management of memory as a consumable resource remains unchanged:
+The management of memory as a consumable resource remains unchanged and
+can be used to prevent oversubscription of memory, which would result in
+having memory pages swapped out and severely degraded performance.
 </P>
 <TABLE CELLPADDING=3 CELLSPACING=1 BORDER=1>
 <TR><TH>Selection Setting</TH>
@@ -202,21 +204,26 @@ available memory to meet the job's memory requirement will not be allocated to
 the job.</TD>
 </TR>
 </TABLE>
-<P>Note that the <CODE>srun --mem=&lt;num&gt;</CODE> option is only used to
-request nodes that have &lt;num&gt; amount of real memory. This option does not
-compute memory that is currently available.
-</P><P>
-The <CODE>srun --job-mem=&lt;num&gt;</CODE> option is used with the
-<CODE>select/cons_res</CODE> plugin to request available memory from each node.
-</P><P>
-The <CODE>select/cons_res</CODE> plugin tracks memory usage by each job on each
-node regardless of the number partitions a node may be assigned to. The primary
-purpose of tracking memory as a consumable resource is to protect jobs from
-having their memory pages swapped out because the memory has been overcommitted.
-</P>
+<P>Users can specify their job's memory requirements one of two ways.
+<CODE>--mem=&lt;num&gt;</CODE> can be used to specify the job's memory 
+requirement on a per allocated node basis. This option is probably best 
+suited for use with the <CODE>select/linear</CODE> plugin, which allocates 
+whole nodes to jobs. 
+<CODE>--mem-per-cpu=&lt;num&gt;</CODE> can be used to specify the job's 
+memory requirement on a per allocated CPU basis. This is probably best
+suited for use with the <CODE>select/cons_res</CODE> plugin which can 
+allocate individual CPUs to jobs.</P>
+
+<P>Default and maximum values for memory on a per node or per CPU basis can 
+be configued using the following options: <CODE>DefMemPerCPU</CODE>,
+<CODE>DefMemPerNode</CODE>, <CODE>MaxMemPerCPU</CODE> and <CODE>MaxMemPerNode</CODE>.
+Enforcement of a job's memory allocation is performed by the accounting 
+plugin, which periodically gathers data about running jobs. Set 
+<CODE>JobAcctGather</CODE> and <CODE>JobAcctFrequency</CODE> to 
+values suitable for your system.</P>
 
 <p class="footer"><a href="#top">top</a></p>
 
-<p style="text-align:center;">Last modified 27 May 2008</p>
+<p style="text-align:center;">Last modified 8 July 2008</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/html/gang_scheduling.shtml b/doc/html/gang_scheduling.shtml
index 66c0b7cf690a86d02f163c293690400f21297874..b249cb6eb7fa853931eac8db33f13c4d385c9b58 100644
--- a/doc/html/gang_scheduling.shtml
+++ b/doc/html/gang_scheduling.shtml
@@ -8,29 +8,30 @@ to jobs.
 Beginning in SLURM version 1.3, gang scheduling is supported. 
 Gang scheduling is when two or more jobs are allocated to the same resources 
 and these jobs are alternately suspended to let all of the tasks of each 
-job have full access to the shared resources for a period of time.
+job have full access to the shared resources for a period of time.
 </P>
-<P>
+<P>
 A resource manager that supports timeslicing can improve it's responsiveness
 and utilization by allowing more jobs to begin running sooner. Shorter-running 
 jobs no longer have to wait in a queue behind longer-running jobs. Instead they 
 can be run "in parallel" with the longer-running jobs, which will allow them 
 to finish quicker. Throughput is also improved because overcommitting the 
 resources provides opportunities for "local backfilling" to occur (see example 
-below).
+below).
 </P>
-<P>
+<P>
 The SLURM 1.3.0 the <I>sched/gang</I> plugin provides timeslicing. When enabled, 
-it monitors each of the partitions in SLURM. If a new job has been allocated to
-resources in a partition that have already been allocated to an existing job,
-then the plugin will suspend the new job until the configured
-<I>SchedulerTimeslice</I> interval has elapsed. Then it will suspend the
-running job and let the new job make use of the resources for a 
-<I>SchedulerTimeslice</I> interval. This will continue until one of the
-jobs terminates.
+it monitors each of the partitions in SLURM. If a new job has been allocated to
+resources in a partition that have already been allocated to an existing job,
+then the plugin will suspend the new job until the configured
+<I>SchedulerTimeslice</I> interval has elapsed. Then it will suspend the
+running job and let the new job make use of the resources for a 
+<I>SchedulerTimeslice</I> interval. This will continue until one of the
+jobs terminates.
 </P>
 
 <H2>Configuration</H2>
+
 <P>
 There are several important configuration parameters relating to 
 gang scheduling:
@@ -46,15 +47,18 @@ allocated by the <I>select/cons_res</I> plugin.
 with jobs, the resource selection plugin should be configured to track the 
 amount of memory used by each job to ensure that memory page swapping does 
 not occur. When <I>select/linear</I> is chosen, we recommend setting 
-<I>SelectTypeParameter=CR_Memory</I>. When <I>select/cons_res</I> is
-chosen, we recommend including Memory as a resource (ex.
+<I>SelectTypeParameter=CR_Memory</I>. When <I>select/cons_res</I> is
+chosen, we recommend including Memory as a resource (ex.
 <I>SelectTypeParameter=CR_Core_Memory</I>).
 </LI>
 <LI>
-<B>DefMemPerTask</B>: Since job requests may not explicitly specify 
-a memory requirement, we also recommend configuring <I>DefMemPerTask</I> 
-(default memory per task). It may also be desirable to configure 
-<I>MaxMemPerTask</I> (maximum memory per task) in <I>slurm.conf</I>.
+<B>DefMemPerCPU</B>: Since job requests may not explicitly specify 
+a memory requirement, we also recommend configuring
+<I>DefMemPerCPU</I> (default memory per allocated CPU) or
+<I>DefMemPerNode</I> (default memory per allocated node).
+It may also be desirable to configure
+<I>MaxMemPerCPU</I> (maximum memory per allocated CPU) or
+<I>MaxMemPerNode</I> (maximum memory per allocated node) in <I>slurm.conf</I>.
 </LI>
 <LI>
 <B>JobAcctGatherType and JobAcctGatherFrequency</B>:
@@ -63,9 +67,9 @@ using the <I>JobAcctGatherType</I> and <I>JobAcctGatherFrequency</I>
 parameters. If accounting is enabled and a job exceeds its configured
 memory limits, it will be canceled in order to prevent it from 
 adversely effecting other jobs sharing the same resources.
-</LI>
+</LI>
 <LI>
-<B>SchedulerType</B>: Configure the <I>sched/gang</I> plugin by setting
+<B>SchedulerType</B>: Configure the <I>sched/gang</I> plugin by setting
 <I>SchedulerType=sched/gang</I> in <I>slurm.conf</I>.
 </LI>
 <LI>
@@ -88,7 +92,7 @@ allocated to a common resource, set <I>Shared=FORCE:6</I>.
 In order to enable gang scheduling after making the configuration changes 
 described above, restart SLURM if it is already running. Any change to the 
 plugin settings in SLURM requires a full restart of the daemons. If you 
-just change the partition <I>Shared</I> setting, this can be updated with
+just change the partition <I>Shared</I> setting, this can be updated with
 <I>scontrol reconfig</I>.
 </P>
 <P>
@@ -96,377 +100,412 @@ For an advanced topic discussion on the potential use of swap space,
 see "Making use of swap space" in the "Future Work" section below.
 </P>
 
-<H2>Timeslicer Design and Operation</H2>
+<H2>Timeslicer Design and Operation</H2>
 
 <P>
-When enabled, the <I>sched/gang</I> plugin keeps track of the resources
-allocated to all jobs. For each partition an "active bitmap" is maintained that
-tracks all concurrently running jobs in the SLURM cluster. Each time a new
-job is allocated to resources in a partition, the <I>sched/gang</I> plugin
-compares these newly allocated resources with the resources already maintained
-in the "active bitmap". If these two sets of resources are disjoint then the new
-job is added to the "active bitmap". If these two sets of resources overlap then
-the new job is suspended. All jobs are tracked in a per-partition job queue
+When enabled, the <I>sched/gang</I> plugin keeps track of the resources
+allocated to all jobs. For each partition an "active bitmap" is maintained that
+tracks all concurrently running jobs in the SLURM cluster. Each time a new
+job is allocated to resources in a partition, the <I>sched/gang</I> plugin
+compares these newly allocated resources with the resources already maintained
+in the "active bitmap". If these two sets of resources are disjoint then the new
+job is added to the "active bitmap". If these two sets of resources overlap then
+the new job is suspended. All jobs are tracked in a per-partition job queue
 within the <I>sched/gang</I> plugin.
 </P>
 <P>
-A separate <I>timeslicer thread</I> is spawned by the <I>sched/gang</I> plugin
-on startup. This thread sleeps for the configured <I>SchedulerTimeSlice</I>
-interval. When it wakes up, it checks each partition for suspended jobs. If
-suspended jobs are found then the <I>timeslicer thread</I> moves all running
-jobs to the end of the job queue. It then reconstructs the "active bitmap" for
-this partition beginning with the suspended job that has waited the longest to
-run (this will be the first suspended job in the run queue). Each following job
-is then compared with the new "active bitmap", and if the job can be run
-concurrently with the other "active" jobs then the job is added. Once this is
-complete then the <I>timeslicer thread</I> suspends any currently running jobs
-that are no longer part of the "active bitmap", and resumes jobs that are new to
+A separate <I>timeslicer thread</I> is spawned by the <I>sched/gang</I> plugin
+on startup. This thread sleeps for the configured <I>SchedulerTimeSlice</I>
+interval. When it wakes up, it checks each partition for suspended jobs. If
+suspended jobs are found then the <I>timeslicer thread</I> moves all running
+jobs to the end of the job queue. It then reconstructs the "active bitmap" for
+this partition beginning with the suspended job that has waited the longest to
+run (this will be the first suspended job in the run queue). Each following job
+is then compared with the new "active bitmap", and if the job can be run
+concurrently with the other "active" jobs then the job is added. Once this is
+complete then the <I>timeslicer thread</I> suspends any currently running jobs
+that are no longer part of the "active bitmap", and resumes jobs that are new to
 the "active bitmap".
 </P>
 <P>
-This <I>timeslicer thread</I> algorithm for rotating jobs is designed to prevent
-jobs from starving (remaining in the suspended state indefinitly) and to be as
-fair as possible in the distribution of runtime while still keeping all of the
+This <I>timeslicer thread</I> algorithm for rotating jobs is designed to prevent
+jobs from starving (remaining in the suspended state indefinitly) and to be as
+fair as possible in the distribution of runtime while still keeping all of the
 resources as busy as possible.
 </P>
 <P>
-The <I>sched/gang</I> plugin suspends jobs via the same internal functions that
-support <I>scontrol suspend</I> and <I>scontrol resume</I>. A good way to
-observe the operation of the timeslicer is by running <I>watch squeue</I> in a
+The <I>sched/gang</I> plugin suspends jobs via the same internal functions that
+support <I>scontrol suspend</I> and <I>scontrol resume</I>. A good way to
+observe the operation of the timeslicer is by running <I>watch squeue</I> in a
 terminal window.
 </P>
 
-<H2>A Simple Example</H2>
+<H2>A Simple Example</H2>
 
 <P>
-The following example is configured with <I>select/linear</I>,
-<I>sched/gang</I>, and <I>Shared=FORCE</I>. This example takes place on a small
+The following example is configured with <I>select/linear</I>,
+<I>sched/gang</I>, and <I>Shared=FORCE</I>. This example takes place on a small
 cluster of 5 nodes:
-</P>
-<PRE>
-[user@n16 load]$ <B>sinfo</B>
-PARTITION AVAIL  TIMELIMIT NODES  STATE NODELIST
-active*      up   infinite     5   idle n[12-16]
+</P>
+<PRE>
+[user@n16 load]$ <B>sinfo</B>
+PARTITION AVAIL  TIMELIMIT NODES  STATE NODELIST
+active*      up   infinite     5   idle n[12-16]
 </PRE>
 <P>
 Here are the Scheduler settings (the last two settings are the relevant ones):
 </P>
-<PRE>
-[user@n16 load]$ <B>scontrol show config | grep Sched</B>
-FastSchedule            = 1
-SchedulerPort           = 7321
-SchedulerRootFilter     = 1
-SchedulerTimeSlice      = 30
-SchedulerType           = sched/gang
-[user@n16 load]$
-</PRE>
-<P>
-The <I>myload</I> script launches a simple load-generating app that runs
+<PRE>
+[user@n16 load]$ <B>scontrol show config | grep Sched</B>
+FastSchedule            = 1
+SchedulerPort           = 7321
+SchedulerRootFilter     = 1
+SchedulerTimeSlice      = 30
+SchedulerType           = sched/gang
+</PRE>
+<P>
+The <I>myload</I> script launches a simple load-generating app that runs
 for the given number of seconds. Submit <I>myload</I> to run on all nodes:
 </P>
-<PRE>
-[user@n16 load]$ <B>sbatch -N5 ./myload 300</B>
-sbatch: Submitted batch job 3
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-    3    active  myload  user     0:05     5 n[12-16]
+<PRE>
+[user@n16 load]$ <B>sbatch -N5 ./myload 300</B>
+sbatch: Submitted batch job 3
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+    3    active  myload  user     0:05     5 n[12-16]
 </PRE>
 <P>
 Submit it again and watch the <I>sched/gang</I> plugin suspend it:
 </P>
-<PRE>
-[user@n16 load]$ <B>sbatch -N5 ./myload 300</B>
-sbatch: Submitted batch job 4
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-    3    active  myload  user  R  0:13     5 n[12-16]
-    4    active  myload  user  S  0:00     5 n[12-16]
+<PRE>
+[user@n16 load]$ <B>sbatch -N5 ./myload 300</B>
+sbatch: Submitted batch job 4
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+    3    active  myload  user  R  0:13     5 n[12-16]
+    4    active  myload  user  S  0:00     5 n[12-16]
 </PRE>
 <P>
-After 30 seconds the <I>sched/gang</I> plugin swaps jobs, and now job 4 is the
+After 30 seconds the <I>sched/gang</I> plugin swaps jobs, and now job 4 is the
 active one:
 </P>
-<PRE>
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-    4    active  myload  user  R  0:08     5 n[12-16]
-    3    active  myload  user  S  0:41     5 n[12-16]
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-    4    active  myload  user  R  0:21     5 n[12-16]
-    3    active  myload  user  S  0:41     5 n[12-16]
+<PRE>
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+    4    active  myload  user  R  0:08     5 n[12-16]
+    3    active  myload  user  S  0:41     5 n[12-16]
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+    4    active  myload  user  R  0:21     5 n[12-16]
+    3    active  myload  user  S  0:41     5 n[12-16]
+</PRE>
+<P>
+After another 30 seconds the <I>sched/gang</I> plugin sets job 3 running again:
+</P>
+<PRE>
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+    3    active  myload  user  R  0:50     5 n[12-16]
+    4    active  myload  user  S  0:30     5 n[12-16]
+</PRE>
+
+<P>
+<B>A possible side effect of timeslicing</B>: Note that jobs that are
+immediately suspended may cause their srun commands to produce the following
+output:
+</P>
+<PRE>
+[user@n16 load]$ <B>cat slurm-4.out</B>
+srun: Job step creation temporarily disabled, retrying
+srun: Job step creation still disabled, retrying
+srun: Job step creation still disabled, retrying
+srun: Job step creation still disabled, retrying
+srun: Job step created
+</PRE>
+<P>
+This occurs because <I>srun</I> is attempting to launch a jobstep in an
+allocation that has been suspended. The <I>srun</I> process will continue in a
+retry loop to launch the jobstep until the allocation has been resumed and the
+jobstep can be launched.
+</P>
+<P>
+When the <I>sched/gang</I> plugin is enabled, this type of output in the user
+jobs should be considered benign.
+</P>
+
+<H2>More examples</H2>
+
+<P>
+The following example shows how the timeslicer algorithm keeps the resources
+busy. Job 10 runs continually, while jobs 9 and 11 are timesliced:
+</P>
+
+<PRE>
+[user@n16 load]$ <B>sbatch -N3 ./myload 300</B>
+sbatch: Submitted batch job 9
+
+[user@n16 load]$ <B>sbatch -N2 ./myload 300</B>
+sbatch: Submitted batch job 10
+
+[user@n16 load]$ <B>sbatch -N3 ./myload 300</B>
+sbatch: Submitted batch job 11
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+    9    active  myload  user  R  0:11     3 n[12-14]
+   10    active  myload  user  R  0:08     2 n[15-16]
+   11    active  myload  user  S  0:00     3 n[12-14]
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   10    active  myload  user  R  0:50     2 n[15-16]
+   11    active  myload  user  R  0:12     3 n[12-14]
+    9    active  myload  user  S  0:41     3 n[12-14]
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   10    active  myload  user  R  1:04     2 n[15-16]
+   11    active  myload  user  R  0:26     3 n[12-14]
+    9    active  myload  user  S  0:41     3 n[12-14]
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+    9    active  myload  user  R  0:46     3 n[12-14]
+   10    active  myload  user  R  1:13     2 n[15-16]
+   11    active  myload  user  S  0:30     3 n[12-14]
 </PRE>
-<P>
After another 30 seconds the <I>sched/gang</I> plugin sets job 3 running again:
-</P>
-<PRE>
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-    3    active  myload  user  R  0:50     5 n[12-16]
-    4    active  myload  user  S  0:30     5 n[12-16]
-</PRE>
-<P>
-<B>A possible side effect of timeslicing</B>: Note that jobs that are
-immediately suspended may cause their srun commands to produce the following
-output:
-</P>
-<PRE>
-[user@n16 load]$ <B>cat slurm-4.out</B>
-srun: Job step creation temporarily disabled, retrying
-srun: Job step creation still disabled, retrying
-srun: Job step creation still disabled, retrying
-srun: Job step creation still disabled, retrying
-srun: Job step created
+</P>
+<P>
+The next example displays "local backfilling":
+</P>
+<PRE>
+[user@n16 load]$ <B>sbatch -N3 ./myload 300</B>
+sbatch: Submitted batch job 12
+
+[user@n16 load]$ <B>sbatch -N5 ./myload 300</B>
+sbatch: Submitted batch job 13
+
+[user@n16 load]$ <B>sbatch -N2 ./myload 300</B>
+sbatch: Submitted batch job 14
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   12    active  myload  user  R  0:14     3 n[12-14]
+   14    active  myload  user  R  0:06     2 n[15-16]
+   13    active  myload  user  S  0:00     5 n[12-16]
 </PRE>
-<P>
-This occurs because <I>srun</I> is attempting to launch a jobstep in an
-allocation that has been suspended. The <I>srun</I> process will continue in a
-retry loop to launch the jobstep until the allocation has been resumed and the
-jobstep can be launched.
-</P>
-<P>
-When the <I>sched/gang</I> plugin is enabled, this type of output in the user
-jobs should be considered benign.
-</P>
-
-<H2>More examples</H2>
-<P>
-The following example shows how the timeslicer algorithm keeps the resources
-busy. Job 10 runs continually, while jobs 9 and 11 are timesliced:
-</P>
-<PRE>
-[user@n16 load]$ <B>sbatch -N3 ./myload 300</B>
-sbatch: Submitted batch job 9
-[user@n16 load]$ <B>sbatch -N2 ./myload 300</B>
-sbatch: Submitted batch job 10
-[user@n16 load]$ <B>sbatch -N3 ./myload 300</B>
-sbatch: Submitted batch job 11
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-    9    active  myload  user  R  0:11     3 n[12-14]
-   10    active  myload  user  R  0:08     2 n[15-16]
-   11    active  myload  user  S  0:00     3 n[12-14]
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   10    active  myload  user  R  0:50     2 n[15-16]
-   11    active  myload  user  R  0:12     3 n[12-14]
-    9    active  myload  user  S  0:41     3 n[12-14]
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   10    active  myload  user  R  1:04     2 n[15-16]
-   11    active  myload  user  R  0:26     3 n[12-14]
-    9    active  myload  user  S  0:41     3 n[12-14]
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-    9    active  myload  user  R  0:46     3 n[12-14]
-   10    active  myload  user  R  1:13     2 n[15-16]
-   11    active  myload  user  S  0:30     3 n[12-14]
-[user@n16 load]$
-</PRE>
-</P>
-<P>
-The next example displays "local backfilling":
-</P>
-<PRE>
-[user@n16 load]$ <B>sbatch -N3 ./myload 300</B>
-sbatch: Submitted batch job 12
-[user@n16 load]$ <B>sbatch -N5 ./myload 300</B>
-sbatch: Submitted batch job 13
-[user@n16 load]$ <B>sbatch -N2 ./myload 300</B>
-sbatch: Submitted batch job 14
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   12    active  myload  user  R  0:14     3 n[12-14]
-   14    active  myload  user  R  0:06     2 n[15-16]
-   13    active  myload  user  S  0:00     5 n[12-16]
-[user@n16 load]$
-</PRE>
-<P>
-Without timeslicing and without the backfill scheduler enabled, job 14 has to
-wait for job 13 to finish.
-</P><P>
-This is called "local" backfilling because the backfilling only occurs with jobs
-close enough in the queue to get allocated by the scheduler as part of
-oversubscribing the resources. Recall that the number of jobs that can
-overcommit a resource is controlled by the <I>Shared=FORCE:max_share</I> value,
-so this value effectively controls the scope of "local backfilling".
-</P><P>
-Normal backfill algorithms check <U>all</U> jobs in the wait queue.
-</P>
-
-<H2>Consumable Resource Examples</H2>
-<P>
-The following two examples illustrate the primary difference between
-<I>CR_CPU</I> and <I>CR_Core</I> when consumable resource selection is enabled
-(<I>select/cons_res</I>).
-</P>
-<P>
-When <I>CR_CPU</I> (or <I>CR_CPU_Memory</I>) is configured then the selector
-treats the CPUs as simple, <I>interchangeable</I> computing resources. However
-when <I>CR_Core</I> (or <I>CR_Core_Memory</I>) is enabled the selector treats
-the CPUs as individual resources that are <U>specifically</U> allocated to jobs.
-This subtle difference is highlighted when timeslicing is enabled.
-</P>
-<P>
-In both examples 6 jobs are submitted. Each job requests 2 CPUs per node, and
-all of the nodes contain two quad-core processors. The timeslicer will initially
-let the first 4 jobs run and suspend the last 2 jobs. The manner in which these
-jobs are timesliced depends upon the configured <I>SelectTypeParameter</I>.
-</P>
-<P>
-In the first example <I>CR_Core_Memory</I> is configured. Note that jobs 46 and
-47 don't <U>ever</U> get suspended. This is because they are not sharing their
-cores with any other job. Jobs 48 and 49 were allocated to the same cores as
-jobs 45 and 46. The timeslicer recognizes this and timeslices only those jobs: 
-</P>
-<PRE>
-[user@n16 load]$ <B>sinfo</B>
-PARTITION AVAIL  TIMELIMIT NODES  STATE NODELIST
-active*      up   infinite     5   idle n[12-16]
-[user@n16 load]$ <B>scontrol show config | grep Select</B>
-SelectType              = select/cons_res
-SelectTypeParameters    = CR_CORE_MEMORY
-[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B>
-NODELIST             NODES CPUS  S:C:T
-n[12-16]             5     8     2:4:1
-[user@n16 load]$
-[user@n16 load]$
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 44
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 45
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 46
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 47
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 48
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 49
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   44    active  myload  user  R  0:09     5 n[12-16]
-   45    active  myload  user  R  0:08     5 n[12-16]
-   46    active  myload  user  R  0:08     5 n[12-16]
-   47    active  myload  user  R  0:07     5 n[12-16]
-   48    active  myload  user  S  0:00     5 n[12-16]
-   49    active  myload  user  S  0:00     5 n[12-16]
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   46    active  myload  user  R  0:49     5 n[12-16]
-   47    active  myload  user  R  0:48     5 n[12-16]
-   48    active  myload  user  R  0:06     5 n[12-16]
-   49    active  myload  user  R  0:06     5 n[12-16]
-   44    active  myload  user  S  0:44     5 n[12-16]
-   45    active  myload  user  S  0:43     5 n[12-16]
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   44    active  myload  user  R  1:23     5 n[12-16]
-   45    active  myload  user  R  1:22     5 n[12-16]
-   46    active  myload  user  R  2:22     5 n[12-16]
-   47    active  myload  user  R  2:21     5 n[12-16]
-   48    active  myload  user  S  1:00     5 n[12-16]
-   49    active  myload  user  S  1:00     5 n[12-16]
-[user@n16 load]$
-</PRE>
-<P>
-Note the runtime of all 6 jobs in the output of the last <I>squeue</I> command.
-Jobs 46 and 47 have been running continuously, while jobs 45 and 46 are
-splitting their runtime with jobs 48 and 49.
-</P><P>
-The next example has <I>CR_CPU_Memory</I> configured and the same 6 jobs are
-submitted. Here the selector and the timeslicer treat the CPUs as countable
-resources which results in all 6 jobs sharing time on the CPUs:
-</P>
-<PRE>
-[user@n16 load]$ <B>sinfo</B>
-PARTITION AVAIL  TIMELIMIT NODES  STATE NODELIST
-active*      up   infinite     5   idle n[12-16]
-[user@n16 load]$ <B>scontrol show config | grep Select</B>
-SelectType              = select/cons_res
-SelectTypeParameters    = CR_CPU_MEMORY
-[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B>
-NODELIST             NODES CPUS  S:C:T
-n[12-16]             5     8     2:4:1
-[user@n16 load]$
-[user@n16 load]$
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 51
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 52
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 53
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 54
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 55
-[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
-sbatch: Submitted batch job 56
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   51    active  myload  user  R  0:11     5 n[12-16]
-   52    active  myload  user  R  0:11     5 n[12-16]
-   53    active  myload  user  R  0:10     5 n[12-16]
-   54    active  myload  user  R  0:09     5 n[12-16]
-   55    active  myload  user  S  0:00     5 n[12-16]
-   56    active  myload  user  S  0:00     5 n[12-16]
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   51    active  myload  user  R  1:09     5 n[12-16]
-   52    active  myload  user  R  1:09     5 n[12-16]
-   55    active  myload  user  R  0:23     5 n[12-16]
-   56    active  myload  user  R  0:23     5 n[12-16]
-   53    active  myload  user  S  0:45     5 n[12-16]
-   54    active  myload  user  S  0:44     5 n[12-16]
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   53    active  myload  user  R  0:55     5 n[12-16]
-   54    active  myload  user  R  0:54     5 n[12-16]
-   55    active  myload  user  R  0:40     5 n[12-16]
-   56    active  myload  user  R  0:40     5 n[12-16]
-   51    active  myload  user  S  1:16     5 n[12-16]
-   52    active  myload  user  S  1:16     5 n[12-16]
-[user@n16 load]$ <B>squeue</B>
-JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
-   51    active  myload  user  R  3:18     5 n[12-16]
-   52    active  myload  user  R  3:18     5 n[12-16]
-   53    active  myload  user  R  3:17     5 n[12-16]
-   54    active  myload  user  R  3:16     5 n[12-16]
-   55    active  myload  user  S  3:00     5 n[12-16]
-   56    active  myload  user  S  3:00     5 n[12-16]
-[user@n16 load]$
-</PRE>
-<P>
-Note that the runtime of all 6 jobs is roughly equal. Jobs 51-54 ran first so
-they're slightly ahead, but so far all jobs have run for at least 3 minutes.
-</P><P>
-At the core level this means that SLURM relies on the linux kernel to move jobs
-around on the cores to maximize performance. This is different than when
-<I>CR_Core_Memory</I> was configured and the jobs would effectively remain
-"pinned" to their specific cores for the duration of the job. Note that
-<I>CR_Core_Memory</I> supports CPU binding, while <I>CR_CPU_Memory</I> does not.
+<P>
+Without timeslicing and without the backfill scheduler enabled, job 14 has to
+wait for job 13 to finish.
+</P>
+<P>
+This is called "local" backfilling because the backfilling only occurs with jobs
+close enough in the queue to get allocated by the scheduler as part of
+oversubscribing the resources. Recall that the number of jobs that can
+overcommit a resource is controlled by the <I>Shared=FORCE:max_share</I> value,
+so this value effectively controls the scope of "local backfilling".
+</P>
+<P>
+Normal backfill algorithms check <U>all</U> jobs in the wait queue.
+</P>
+
+<H2>Consumable Resource Examples</H2>
+
+<P>
+The following two examples illustrate the primary difference between
+<I>CR_CPU</I> and <I>CR_Core</I> when consumable resource selection is enabled
+(<I>select/cons_res</I>).
+</P>
+<P>
+When <I>CR_CPU</I> (or <I>CR_CPU_Memory</I>) is configured then the selector
+treats the CPUs as simple, <I>interchangeable</I> computing resources. However
+when <I>CR_Core</I> (or <I>CR_Core_Memory</I>) is enabled the selector treats
+the CPUs as individual resources that are <U>specifically</U> allocated to jobs.
+This subtle difference is highlighted when timeslicing is enabled.
+</P>
+<P>
+In both examples 6 jobs are submitted. Each job requests 2 CPUs per node, and
+all of the nodes contain two quad-core processors. The timeslicer will initially
+let the first 4 jobs run and suspend the last 2 jobs. The manner in which these
+jobs are timesliced depends upon the configured <I>SelectTypeParameter</I>.
+</P>
+<P>
+In the first example <I>CR_Core_Memory</I> is configured. Note that jobs 46 and
+47 don't <U>ever</U> get suspended. This is because they are not sharing their
+cores with any other job. Jobs 48 and 49 were allocated to the same cores as
+jobs 45 and 46. The timeslicer recognizes this and timeslices only those jobs: 
+</P>
+<PRE>
+[user@n16 load]$ <B>sinfo</B>
+PARTITION AVAIL  TIMELIMIT NODES  STATE NODELIST
+active*      up   infinite     5   idle n[12-16]
+
+[user@n16 load]$ <B>scontrol show config | grep Select</B>
+SelectType              = select/cons_res
+SelectTypeParameters    = CR_CORE_MEMORY
+
+[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B>
+NODELIST             NODES CPUS  S:C:T
+n[12-16]             5     8     2:4:1
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 44
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 45
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 46
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 47
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 48
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 49
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   44    active  myload  user  R  0:09     5 n[12-16]
+   45    active  myload  user  R  0:08     5 n[12-16]
+   46    active  myload  user  R  0:08     5 n[12-16]
+   47    active  myload  user  R  0:07     5 n[12-16]
+   48    active  myload  user  S  0:00     5 n[12-16]
+   49    active  myload  user  S  0:00     5 n[12-16]
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   46    active  myload  user  R  0:49     5 n[12-16]
+   47    active  myload  user  R  0:48     5 n[12-16]
+   48    active  myload  user  R  0:06     5 n[12-16]
+   49    active  myload  user  R  0:06     5 n[12-16]
+   44    active  myload  user  S  0:44     5 n[12-16]
+   45    active  myload  user  S  0:43     5 n[12-16]
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   44    active  myload  user  R  1:23     5 n[12-16]
+   45    active  myload  user  R  1:22     5 n[12-16]
+   46    active  myload  user  R  2:22     5 n[12-16]
+   47    active  myload  user  R  2:21     5 n[12-16]
+   48    active  myload  user  S  1:00     5 n[12-16]
+   49    active  myload  user  S  1:00     5 n[12-16]
+</PRE>
+<P>
+Note the runtime of all 6 jobs in the output of the last <I>squeue</I> command.
+Jobs 46 and 47 have been running continuously, while jobs 45 and 46 are
+splitting their runtime with jobs 48 and 49.
+</P>
+<P>
+The next example has <I>CR_CPU_Memory</I> configured and the same 6 jobs are
+submitted. Here the selector and the timeslicer treat the CPUs as countable
+resources which results in all 6 jobs sharing time on the CPUs:
+</P>
+<PRE>
+[user@n16 load]$ <B>sinfo</B>
+PARTITION AVAIL  TIMELIMIT NODES  STATE NODELIST
+active*      up   infinite     5   idle n[12-16]
+
+[user@n16 load]$ <B>scontrol show config | grep Select</B>
+SelectType              = select/cons_res
+SelectTypeParameters    = CR_CPU_MEMORY
+
+[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B>
+NODELIST             NODES CPUS  S:C:T
+n[12-16]             5     8     2:4:1
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 51
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 52
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 53
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 54
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 55
+
+[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B>
+sbatch: Submitted batch job 56
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   51    active  myload  user  R  0:11     5 n[12-16]
+   52    active  myload  user  R  0:11     5 n[12-16]
+   53    active  myload  user  R  0:10     5 n[12-16]
+   54    active  myload  user  R  0:09     5 n[12-16]
+   55    active  myload  user  S  0:00     5 n[12-16]
+   56    active  myload  user  S  0:00     5 n[12-16]
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   51    active  myload  user  R  1:09     5 n[12-16]
+   52    active  myload  user  R  1:09     5 n[12-16]
+   55    active  myload  user  R  0:23     5 n[12-16]
+   56    active  myload  user  R  0:23     5 n[12-16]
+   53    active  myload  user  S  0:45     5 n[12-16]
+   54    active  myload  user  S  0:44     5 n[12-16]
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   53    active  myload  user  R  0:55     5 n[12-16]
+   54    active  myload  user  R  0:54     5 n[12-16]
+   55    active  myload  user  R  0:40     5 n[12-16]
+   56    active  myload  user  R  0:40     5 n[12-16]
+   51    active  myload  user  S  1:16     5 n[12-16]
+   52    active  myload  user  S  1:16     5 n[12-16]
+
+[user@n16 load]$ <B>squeue</B>
+JOBID PARTITION    NAME  USER ST  TIME NODES NODELIST
+   51    active  myload  user  R  3:18     5 n[12-16]
+   52    active  myload  user  R  3:18     5 n[12-16]
+   53    active  myload  user  R  3:17     5 n[12-16]
+   54    active  myload  user  R  3:16     5 n[12-16]
+   55    active  myload  user  S  3:00     5 n[12-16]
+   56    active  myload  user  S  3:00     5 n[12-16]
+</PRE>
+<P>
+Note that the runtime of all 6 jobs is roughly equal. Jobs 51-54 ran first so
+they're slightly ahead, but so far all jobs have run for at least 3 minutes.
+</P>
+<P>
+At the core level this means that SLURM relies on the linux kernel to move jobs
+around on the cores to maximize performance. This is different than when
+<I>CR_Core_Memory</I> was configured and the jobs would effectively remain
+"pinned" to their specific cores for the duration of the job. Note that
+<I>CR_Core_Memory</I> supports CPU binding, while <I>CR_CPU_Memory</I> does not.
 </P>
 
 <H2>Future Work</H2>
-
-<P>
-Priority scheduling and preemptive scheduling are other forms of gang
-scheduling that are currently under development for SLURM.
-</P>
-<P>
-<B>Making use of swap space</B>: (note that this topic is not currently
-scheduled for development, unless someone would like to pursue this) It should
-be noted that timeslicing does provide an interesting mechanism for high
-performance jobs to make use of swap space. The optimal scenario is one in which
-suspended jobs are "swapped out" and active jobs are "swapped in". The swapping
-activity would only occur once every  <I>SchedulerTimeslice</I> interval.
-</P>
-<P>
-However, SLURM should first be modified to include support for scheduling jobs
-into swap space and to provide controls to prevent overcommitting swap space.
-For now this idea could be experimented with by disabling memory support in the
-selector and submitting appropriately sized jobs.
-</P>
-
-<p style="text-align:center;">Last modified 17 March 2008</p>
+
+<P>
+Priority scheduling and preemptive scheduling are other forms of gang
+scheduling that are currently under development for SLURM.
+</P>
+<P>
+<B>Making use of swap space</B>: (note that this topic is not currently
+scheduled for development, unless someone would like to pursue this) It should
+be noted that timeslicing does provide an interesting mechanism for high
+performance jobs to make use of swap space. The optimal scenario is one in which
+suspended jobs are "swapped out" and active jobs are "swapped in". The swapping
+activity would only occur once every  <I>SchedulerTimeslice</I> interval.
+</P>
+<P>
+However, SLURM should first be modified to include support for scheduling jobs
+into swap space and to provide controls to prevent overcommitting swap space.
+For now this idea could be experimented with by disabling memory support in the
+selector and submitting appropriately sized jobs.
+</P>
+
+<p style="text-align:center;">Last modified 7 July 2008</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/html/preempt.shtml b/doc/html/preempt.shtml
index d58acf0032483a69c2fc9033c28b17dcb7546a58..f9fd8c0b9db9ee7c2a6955d000c826ebe817e2d1 100644
--- a/doc/html/preempt.shtml
+++ b/doc/html/preempt.shtml
@@ -44,10 +44,13 @@ chosen, we recommend setting <I>SelectTypeParameter=CR_Memory</I>. When
 (ex. <I>SelectTypeParameter=CR_Core_Memory</I>).
 </LI>
 <LI>
-<B>DefMemPerTask</B>: Since job requests may not explicitly specify 
-a memory requirement, we also recommend configuring <I>DefMemPerTask</I> 
-(default memory per task). It may also be desirable to configure 
-<I>MaxMemPerTask</I> (maximum memory per task) in <I>slurm.conf</I>.
+<B>DefMemPerCPU</B>: Since job requests may not explicitly specify 
+a memory requirement, we also recommend configuring 
+<I>DefMemPerCPU</I> (default memory per allocated CPU) or 
+<I>DefMemPerNode</I> (default memory per allocated node). 
+It may also be desirable to configure 
+<I>MaxMemPerCPU</I> (maximum memory per allocated CPU) or 
+<I>MaxMemPerNode</I> (maximum memory per allocated node) in <I>slurm.conf</I>.
 </LI>
 <LI>
 <B>JobAcctGatherType and JobAcctGatherFrequency</B>:
@@ -242,6 +245,6 @@ again. This will be investigated at some point in the future. Requeuing a
 preempted job may make the most sense with <I>Shared=NO</I> partitions.
 </P>
 
-<p style="text-align:center;">Last modified 11 April 2008</p>
+<p style="text-align:center;">Last modified 7 July 2008</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1
index 72a8bcdb59faa95ac8eb3d090dc780317ff03b39..356f916d006bd7c2accba7bb63bd135b23458205 100644
--- a/doc/man/man1/salloc.1
+++ b/doc/man/man1/salloc.1
@@ -1,4 +1,4 @@
-.TH "salloc" "1" "SLURM 1.3" "May 2008" "SLURM Commands"
+.TH "salloc" "1" "SLURM 1.3" "July 2008" "SLURM Commands"
 .SH "NAME"
 .LP 
 salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, and then release the allocation when the command is finished.
@@ -306,12 +306,24 @@ The default value is the username of the submitting user.
 .TP
 \fB\-\-mem\fR[=]<\fIMB\fR>
 Specify the real memory required per node in MegaBytes.
-If a value is specified, that quantity of memory will be 
-reserved for this job. 
-If no value is specified and real memory is exhausted on 
-any allocated node then the job is subject to cancellation.
-Also see \fB\-\-task\-mem\fR.
-
+Default value is \fBDefMemPerNode\fR and the maximum value is
+\fBMaxMemPerNode\fR. If configured, both of parameters can be
+seen using the \fBscontrol show config\fR command.
+This parameter would generally be used of whole nodes
+are allocated to jobs (\fBSelectType=select/linear\fR).
+Also see \fB\-\-mem\-per\-cpu\fR.
+\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive.
+
+.TP
+\fB\-\-mem\-per\-cpu\fR[=]<\fIMB\fR>
+Mimimum memory required per allocated CPU in MegaBytes.
+Default value is \fBDefMemPerCPU\fR and the maximum value is
+\fBMaxMemPerCPU\fR. If configured, both of parameters can be 
+seen using the \fBscontrol show config\fR command.
+This parameter would generally be used of individual processors
+are allocated to jobs (\fBSelectType=select/cons_res\fR).
+Also see \fB\-\-mem\fR.
+\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive.
 
 .TP
 \fB\-\-mincores\fR[=]<\fIn\fR>
@@ -495,13 +507,6 @@ Acceptable time formats include "minutes", "minutes:seconds",
 "hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and 
 "days\-hours:minutes:seconds".
 
-.TP
-\fB\-\-task\-mem\fR[=]<\fIMB\fR>
-Mimimum memory available per task in MegaBytes.
-Default value is \fBDefMemPerTask\fR and the maximum value is
-\fBMaxMemPerTask\fR, both of which can be seen using the
-\fBscontrol show config\fR command.
-
 .TP
 \fB\-\-tmp\fR[=]<\fIMB\fR>
 Specify a minimum amount of temporary disk space.
@@ -709,6 +714,7 @@ salloc \-N5 srun \-n10 myprogram
 
 .SH "COPYING"
 Copyright (C) 2006\-2007 The Regents of the University of California.
+Copyright (C) 2008 Lawrence Livermore National Security.
 Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 LLNL\-CODE\-402394.
 .LP
diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1
index 03accb92bfec2f88a4b977225ab4a960d1835ae9..918f7843130e5d4f46cb0c461947af6ff4f40339 100644
--- a/doc/man/man1/sbatch.1
+++ b/doc/man/man1/sbatch.1
@@ -1,4 +1,4 @@
-.TH "sbatch" "1" "SLURM 1.3" "May 2008" "SLURM Commands"
+.TH "sbatch" "1" "SLURM 1.3" "July 2008" "SLURM Commands"
 .SH "NAME"
 .LP 
 sbatch \- Submit a batch script to SLURM.
@@ -330,11 +330,24 @@ The default value is the username of the submitting user.
 .TP
 \fB\-\-mem\fR[=]<\fIMB\fR>
 Specify the real memory required per node in MegaBytes.
-If a value is specified, that quantity of memory will be 
-reserved for this job. 
-If no value is specified and real memory is exhausted on 
-any allocated node then the job is subject to cancellation.
-Also see \fB\-\-task\-mem\fR.
+Default value is \fBDefMemPerNode\fR and the maximum value is
+\fBMaxMemPerNode\fR. If configured, both of parameters can be
+seen using the \fBscontrol show config\fR command.
+This parameter would generally be used of whole nodes
+are allocated to jobs (\fBSelectType=select/linear\fR).
+Also see \fB\-\-mem\-per\-cpu\fR.
+\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive.
+
+.TP
+\fB\-\-mem\-per\-cpu\fR[=]<\fIMB\fR>
+Mimimum memory required per allocated CPU in MegaBytes.
+Default value is \fBDefMemPerCPU\fR and the maximum value is
+\fBMaxMemPerCPU\fR. If configured, both of parameters can be 
+seen using the \fBscontrol show config\fR command.
+This parameter would generally be used of individual processors
+are allocated to jobs (\fBSelectType=select/cons_res\fR).
+Also see \fB\-\-mem\fR.
+\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive.
 
 .TP
 \fB\-\-mincores\fR[=]<\fIn\fR>
@@ -582,13 +595,6 @@ Acceptable time formats include "minutes", "minutes:seconds",
 "hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and 
 "days\-hours:minutes:seconds".
 
-.TP
-\fB\-\-task\-mem\fR[=]<\fIMB\fR>
-Mimimum memory available per task in MegaBytes.
-Default value is \fBDefMemPerTask\fR and the maximum value is
-\fBMaxMemPerTask\fR, both of which can be seen using the
-\fBscontrol show config\fR command.
-
 .TP
 \fB\-\-tasks\-per\-node\fR[=]<\fIn\fR>
 Specify the number of tasks to be launched per node.
@@ -867,6 +873,7 @@ host4
 
 .SH "COPYING"
 Copyright (C) 2006\-2007 The Regents of the University of California.
+Copyright (C) 2008 Lawrence Livermore National Security.
 Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 LLNL\-CODE\-402394.
 .LP
diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1
index e7c755e885d0ef875ab65ca6498d40ba4a1e40d9..5aca020e0194eff6dcf6d90eb4b7e9943694515f 100644
--- a/doc/man/man1/srun.1
+++ b/doc/man/man1/srun.1
@@ -1,6 +1,4 @@
-.\" $Id$
-.\"
-.TH SRUN "1" "May 2008" "srun 1.3" "slurm components"
+.TH SRUN "1" "July 2008" "srun 1.3" "slurm components"
 
 .SH "NAME"
 srun \- run parallel jobs
@@ -425,11 +423,24 @@ The default value is the submitting user.
 .TP
 \fB\-\-mem\fR[=]<\fIMB\fR>
 Specify the real memory required per node in MegaBytes.
-If a value is specified, that quantity of memory will be 
-reserved for this job. 
-If no value is specified and real memory is exhausted on 
-any allocated node then the job is subject to cancellation.
-Also see \fB\-\-task\-mem\fR.
+Default value is \fBDefMemPerNode\fR and the maximum value is
+\fBMaxMemPerNode\fR. If configured, both of parameters can be
+seen using the \fBscontrol show config\fR command.
+This parameter would generally be used of whole nodes
+are allocated to jobs (\fBSelectType=select/linear\fR).
+Also see \fB\-\-mem\-per\-cpu\fR.
+\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive.
+
+.TP
+\fB\-\-mem\-per\-cpu\fR[=]<\fIMB\fR>
+Mimimum memory required per allocated CPU in MegaBytes.
+Default value is \fBDefMemPerCPU\fR and the maximum value is
+\fBMaxMemPerCPU\fR. If configured, both of parameters can be 
+seen using the \fBscontrol show config\fR command.
+This parameter would generally be used of individual processors
+are allocated to jobs (\fBSelectType=select/cons_res\fR).
+Also see \fB\-\-mem\fR.
+\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive.
 
 .TP
 \fB\-\-mem_bind\fR=[{\fIquiet,verbose\fR},]\fItype\fR
@@ -843,13 +854,6 @@ in slurm.conf is executed. This is meant to be a very short\-lived
 program. If it fails to terminate within a few seconds, it will 
 be killed along with any descendant processes.
 
-.TP
-\fB\-\-task\-mem\fR[=]<\fIMB\fR>
-Mimimum memory available per task in MegaBytes.
-Default value is \fBDefMemPerTask\fR and the maximum value is
-\fBMaxMemPerTask\fR, both of which can be seen using the
-\fBscontrol show config\fR command.
-
 .TP
 \fB\-\-task\-prolog\fR=\fIexecutable\fR
 The \fBslurmd\fR daemon will run \fIexecutable\fR just before launching 
@@ -1624,6 +1628,7 @@ wait
 
 .SH "COPYING"
 Copyright (C) 2006\-2007 The Regents of the University of California.
+Copyright (C) 2008 Lawrence Livermore National Security.
 Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 LLNL\-CODE\-402394.
 .LP
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 1d908839d3617961e8332bfce4b5b0734f8ddb19..6e625fa804f6207bbeb816106d8077eb94970cb2 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -1,4 +1,4 @@
-.TH "slurm.conf" "5" "June 2008" "slurm.conf 1.3" "Slurm configuration file"
+.TH "slurm.conf" "5" "July 2008" "slurm.conf 1.3" "Slurm configuration file"
 
 .SH "NAME"
 slurm.conf \- Slurm configuration file 
@@ -208,11 +208,25 @@ License (GPL).
 The default value is "crypto/openssl".
 
 .TP
-\fBDefMemPerTask\fR
-Default real memory size available per task in MegaBytes. 
+\fBDefMemPerCPU\fR
+Default real memory size available per allocated CPU in MegaBytes. 
 Used to avoid over\-subscribing memory and causing paging.
-Also see \fBMaxMemPerTask\fR.
+\fBDefMemPerCPU\fR would generally be used if individual processors
+are alocated to jobs (\fBSelectType=select/cons_res\fR). 
 The default value is 0 (unlimited).
+Also see \fBDefMemPerNode\fR and \fBMaxMemPerCPU\fR.
+\fBDefMemPerCPU\fR and \fBDefMemPerNode\fR are mutually exclusive.
+
+.TP
+\fBDefMemPerNode\fR
+Default real memory size available per allocated node in MegaBytes.
+Used to avoid over\-subscribing memory and causing paging.
+\fBDefMemPerNode\fR would generally be used if whole nodes
+are alocated to jobs (\fBSelectType=select/linear\fR) and 
+resources are shared (\fBShared=yes\fR or \fBShared=force\fR).
+The default value is 0 (unlimited).
+Also see \fBDefMemPerCPU\fR and \fBMaxMemPerNode\fR.
+\fBDefMemPerCPU\fR and \fBDefMemPerNode\fR are mutually exclusive.
 
 .TP
 \fBDefaultStorageHost\fR
@@ -525,11 +539,25 @@ of the slurmctld daemon.
 May not exceed 65533.
 
 .TP
-\fBMaxMemPerTask\fR
-Maximum real memory size available per task in MegaBytes. 
+\fBMaxMemPerCPU\fR
+Maximum real memory size available per allocated CPU in MegaBytes. 
+Used to avoid over\-subscribing memory and causing paging.
+\fBMaxMemPerCPU\fR would generally be used if individual processors
+are alocated to jobs (\fBSelectType=select/cons_res\fR).
+The default value is 0 (unlimited).
+Also see \fBDefMemPerCPU\fR and \fBMaxMemPerNode\fR.
+\fBMaxMemPerCPU\fR and \fBMaxMemPerNode\fR are mutually exclusive.
+
+.TP
+\fBMaxMemPerNode\fR
+Maximum real memory size available per allocated node in MegaBytes.
 Used to avoid over\-subscribing memory and causing paging.
-Also see \fBDefMemPerTask\fR.
+\fBMaxMemPerNode\fR would generally be used if whole nodes
+are alocated to jobs (\fBSelectType=select/linear\fR) and
+resources are shared (\fBShared=yes\fR or \fBShared=force\fR).
 The default value is 0 (unlimited).
+Also see \fBDefMemPerNode\fR and \fBMaxMemPerCPU\fR.
+\fBMaxMemPerCPU\fR and \fBMaxMemPerNode\fR are mutually exclusive.
 
 .TP
 \fBMessageTimeout\fR
@@ -835,22 +863,26 @@ On single\-core systems, each CPUs will be considered a CPU.
 .TP
 \fBCR_CPU_Memory\fR
 CPUs and memory are consumable resources.
+Setting a value for \fBDefMemPerCPU\fR is strongly recommended.
 .TP
 \fBCR_Core\fR
 Cores are consumable resources.
 .TP
 \fBCR_Core_Memory\fR
 Cores and memory are consumable resources.
+Setting a value for \fBDefMemPerCPU\fR is strongly recommended.
 .TP
 \fBCR_Socket\fR
 Sockets are consumable resources.
 .TP
 \fBCR_Socket_Memory\fR
 Memory and CPUs are consumable resources.
+Setting a value for \fBDefMemPerCPU\fR is strongly recommended.
 .TP
 \fBCR_Memory\fR
 Memory is a consumable resource.
 NOTE: This implies \fIShared=YES\fR or \fIShared=FORCE\fR for all partitions.
+Setting a value for \fBDefMemPerCPU\fR is strongly recommended.
 .RE
 
 .TP
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index 775a449d4691cfc5cf9073f5481622a993c28601..ba543b016cfa6754b99626b8611c2f7e0dc47062 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -448,6 +448,7 @@ typedef enum select_type_plugin_info {
 #define TASK_PARAM_CPUSETS 0x0001
 #define TASK_PARAM_SCHED   0x0002
 
+#define MEM_PER_CPU  0x80000000
 #define SHARED_FORCE 0x8000
 
 /*****************************************************************************\
@@ -528,7 +529,9 @@ typedef struct job_descriptor {	/* For submit, allocate, and update requests */
 	uint16_t job_min_sockets;  /* minimum sockets per node, default=0 */
 	uint16_t job_min_cores;    /* minimum cores per processor, default=0 */
 	uint16_t job_min_threads;  /* minimum threads per core, default=0 */
-	uint32_t job_min_memory;   /* minimum real memory per node, default=0 */
+	uint32_t job_min_memory;   /* minimum real memory per node OR 
+				    * real memory per CPU | MEM_PER_CPU,
+				    * default=0 (no limit) */
 	uint32_t job_min_tmp_disk; /* minimum tmp disk per node, default=0 */
 	uint32_t num_procs;	/* total count of processors required, 
 				 * default=0 */
diff --git a/src/api/config_info.c b/src/api/config_info.c
index db17884c6c42774433f094316ec80a200d6f8236..f1867c87ae177d597f69e9ddae95d781ae685d74 100644
--- a/src/api/config_info.c
+++ b/src/api/config_info.c
@@ -153,11 +153,15 @@ void slurm_print_ctl_conf ( FILE* out,
 		slurm_ctl_conf_ptr->control_machine);
 	fprintf(out, "CryptoType              = %s\n",
 		slurm_ctl_conf_ptr->crypto_type);
-	if (slurm_ctl_conf_ptr->def_mem_per_task) {
-		fprintf(out, "DefMemPerTask           = %u\n",
+	if (slurm_ctl_conf_ptr->def_mem_per_task & MEM_PER_CPU) {
+		fprintf(out, "DefMemPerCPU            = %u\n",
+			slurm_ctl_conf_ptr->def_mem_per_task &
+			(~MEM_PER_CPU));
+	} else if (slurm_ctl_conf_ptr->def_mem_per_task) {
+		fprintf(out, "DefMemPerNode           = %u\n",
 			slurm_ctl_conf_ptr->def_mem_per_task);
 	} else
-		fprintf(out, "DefMemPerTask           = UNLIMITED\n");
+		fprintf(out, "DefMemPerCPU            = UNLIMITED\n");
 	if (slurm_ctl_conf_ptr->disable_root_jobs)
 		fprintf(out, "DisableRootJobs         = YES\n");
 	else
@@ -220,11 +224,15 @@ void slurm_print_ctl_conf ( FILE* out,
 		slurm_ctl_conf_ptr->mail_prog);
 	fprintf(out, "MaxJobCount             = %u\n", 
 		slurm_ctl_conf_ptr->max_job_cnt);
-	if (slurm_ctl_conf_ptr->max_mem_per_task) {
-		fprintf(out, "MaxMemPerTask           = %u\n",
+	if (slurm_ctl_conf_ptr->max_mem_per_task & MEM_PER_CPU) {
+		fprintf(out, "MaxMemPerCPU            = %u\n",
+			slurm_ctl_conf_ptr->max_mem_per_task &
+			(~MEM_PER_CPU));
+	} else if (slurm_ctl_conf_ptr->max_mem_per_task) {
+		fprintf(out, "MaxMemPerNode           = %u\n",
 			slurm_ctl_conf_ptr->max_mem_per_task);
 	} else
-		fprintf(out, "MaxMemPerTask           = UNLIMITED\n");
+		fprintf(out, "MaxMemPerCPU            = UNLIMITED\n");
 	fprintf(out, "MessageTimeout          = %u\n",
 		slurm_ctl_conf_ptr->msg_timeout);
 	fprintf(out, "MinJobAge               = %u\n", 
diff --git a/src/api/init_msg.c b/src/api/init_msg.c
index abb3d973d0ffd2e3e4e8d10b2c98513abcacdbcb..333752b31d3f1bcb9320351057a08e97a16b22fa 100644
--- a/src/api/init_msg.c
+++ b/src/api/init_msg.c
@@ -1,8 +1,8 @@
 /*****************************************************************************\
  *  init_msg.c - initialize RPC messages contents
- *  $Id$
  *****************************************************************************
- *  Copyright (C) 2002-2006 The Regents of the University of California.
+ *  Copyright (C) 2002-2007 The Regents of the University of California.
+ *  Copyright (C) 2008 Lawrence Livermore National Security.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Morris Jette <jette1@llnl.gov>.
  *  LLNL-CODE-402394.
@@ -55,71 +55,25 @@
  */
 void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg)
 {
-	job_desc_msg->account     = NULL;
-	job_desc_msg->acctg_freq  = (uint16_t) NO_VAL;
-	job_desc_msg->alloc_node  = NULL;
-	job_desc_msg->alloc_sid   = NO_VAL;
-	job_desc_msg->comment     = NULL;
-	job_desc_msg->contiguous  = (uint16_t) NO_VAL;
-	job_desc_msg->cpus_per_task = (uint16_t) NO_VAL;
-	job_desc_msg->ntasks_per_node   = (uint16_t) NO_VAL;
-	job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL;
-	job_desc_msg->ntasks_per_core   = (uint16_t) NO_VAL;
-	job_desc_msg->dependency  = NULL;
-	job_desc_msg->environment = ((char **) NULL);
-	job_desc_msg->env_size    = 0;
-	job_desc_msg->features    = NULL;
-	job_desc_msg->immediate   = 0;
-	job_desc_msg->job_id      = NO_VAL;
-	job_desc_msg->job_min_cores   = (uint16_t) NO_VAL;
-	job_desc_msg->job_min_procs   = (uint16_t) NO_VAL;
-	job_desc_msg->job_min_sockets = (uint16_t) NO_VAL;
-	job_desc_msg->job_min_threads = (uint16_t) NO_VAL;
-	job_desc_msg->job_min_memory  = NO_VAL;
-	job_desc_msg->job_min_tmp_disk= NO_VAL;
-	job_desc_msg->kill_on_node_fail = (uint16_t) NO_VAL;
-	job_desc_msg->licenses    = NULL;
-	job_desc_msg->name        = NULL;
-	job_desc_msg->network     = NULL;
-	job_desc_msg->nice        = NICE_OFFSET;
-	job_desc_msg->ntasks_per_core   = (uint16_t) NO_VAL;
-	job_desc_msg->ntasks_per_node   = (uint16_t) NO_VAL;
-	job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL;
-	job_desc_msg->num_tasks   = NO_VAL;
-	job_desc_msg->open_mode   = 0;	/* system default */
-	job_desc_msg->overcommit  = (uint8_t) NO_VAL;
-	job_desc_msg->partition   = NULL;
-	job_desc_msg->plane_size  = (uint16_t) NO_VAL;
-	job_desc_msg->priority    = NO_VAL;
-	job_desc_msg->req_nodes   = NULL;
-	job_desc_msg->exc_nodes   = NULL;
-	job_desc_msg->script      = NULL;
-	job_desc_msg->argv        = ((char **) NULL);
-	job_desc_msg->argc        = 0;
-	job_desc_msg->shared      = (uint16_t) NO_VAL;
-	job_desc_msg->task_dist   = (uint16_t) NO_VAL;
-	job_desc_msg->time_limit  = NO_VAL;
-	job_desc_msg->num_procs   = NO_VAL;
-	job_desc_msg->max_nodes   = NO_VAL;
-	job_desc_msg->min_nodes   = NO_VAL;
-	job_desc_msg->max_sockets = (uint16_t) NO_VAL;
-	job_desc_msg->min_sockets = (uint16_t) NO_VAL;
-	job_desc_msg->max_cores   = (uint16_t) NO_VAL;
-	job_desc_msg->min_cores   = (uint16_t) NO_VAL;
-	job_desc_msg->max_threads = (uint16_t) NO_VAL;
-	job_desc_msg->min_threads = (uint16_t) NO_VAL;
-	job_desc_msg->err         = NULL;
-	job_desc_msg->in          = NULL;
-	job_desc_msg->out         = NULL;
-	job_desc_msg->user_id     = NO_VAL;
-	job_desc_msg->group_id    = NO_VAL;
-	job_desc_msg->work_dir    = NULL;
-	job_desc_msg->alloc_resp_port = 0;
-	job_desc_msg->other_port  = 0;
-	job_desc_msg->mail_type   = 0;
-	job_desc_msg->mail_user   = NULL;
-	job_desc_msg->begin_time  = 0;
-	job_desc_msg->requeue  = (uint16_t) NO_VAL;
+	job_desc_msg->account		= NULL;
+	job_desc_msg->acctg_freq	= (uint16_t) NO_VAL;
+	job_desc_msg->alloc_node	= NULL;
+	job_desc_msg->alloc_resp_port	= 0;
+	job_desc_msg->alloc_sid		= NO_VAL;
+	job_desc_msg->argc		= 0;
+	job_desc_msg->argv		= ((char **) NULL);
+	job_desc_msg->begin_time	= 0;
+	job_desc_msg->blrtsimage	= NULL;
+	job_desc_msg->comment		= NULL;
+	job_desc_msg->conn_type		= (uint16_t) NO_VAL;
+	job_desc_msg->contiguous	= (uint16_t) NO_VAL;
+	job_desc_msg->cpus_per_task	= (uint16_t) NO_VAL;
+	job_desc_msg->dependency	= NULL;
+	job_desc_msg->environment	= ((char **) NULL);
+	job_desc_msg->env_size		= 0;
+	job_desc_msg->err		= NULL;
+	job_desc_msg->exc_nodes		= NULL;
+	job_desc_msg->features		= NULL;
 #if SYSTEM_DIMENSIONS
 {
 	int i;
@@ -127,14 +81,58 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg)
 		job_desc_msg->geometry[i] = (uint16_t) NO_VAL;
 }
 #endif
-	job_desc_msg->conn_type   = (uint16_t) NO_VAL;
-	job_desc_msg->reboot      = (uint16_t) NO_VAL;
-	job_desc_msg->rotate      = (uint16_t) NO_VAL;
-	job_desc_msg->blrtsimage = NULL;
-	job_desc_msg->linuximage = NULL;
-	job_desc_msg->mloaderimage = NULL;
-	job_desc_msg->ramdiskimage = NULL;
-	job_desc_msg->select_jobinfo = NULL;
+	job_desc_msg->group_id		= NO_VAL;
+	job_desc_msg->immediate		= 0;
+	job_desc_msg->in		= NULL;
+	job_desc_msg->job_id		= NO_VAL;
+	job_desc_msg->job_min_cores	= (uint16_t) NO_VAL;
+	job_desc_msg->job_min_procs	= (uint16_t) NO_VAL;
+	job_desc_msg->job_min_sockets	= (uint16_t) NO_VAL;
+	job_desc_msg->job_min_threads	= (uint16_t) NO_VAL;
+	job_desc_msg->job_min_memory    = NO_VAL;
+	job_desc_msg->job_min_tmp_disk  = NO_VAL;
+	job_desc_msg->kill_on_node_fail = (uint16_t) NO_VAL;
+	job_desc_msg->licenses		= NULL;
+	job_desc_msg->linuximage	= NULL;
+	job_desc_msg->mail_type		= 0;
+	job_desc_msg->mail_user		= NULL;
+	job_desc_msg->max_cores		= (uint16_t) NO_VAL;
+	job_desc_msg->max_nodes		= NO_VAL;
+	job_desc_msg->max_sockets	= (uint16_t) NO_VAL;
+	job_desc_msg->max_threads	= (uint16_t) NO_VAL;
+	job_desc_msg->min_cores		= (uint16_t) NO_VAL;
+	job_desc_msg->min_nodes		= NO_VAL;
+	job_desc_msg->min_sockets	= (uint16_t) NO_VAL;
+	job_desc_msg->min_threads	= (uint16_t) NO_VAL;
+	job_desc_msg->mloaderimage	= NULL;
+	job_desc_msg->name		= NULL;
+	job_desc_msg->network		= NULL;
+	job_desc_msg->nice		= NICE_OFFSET;
+	job_desc_msg->ntasks_per_core	= (uint16_t) NO_VAL;
+	job_desc_msg->ntasks_per_node	= (uint16_t) NO_VAL;
+	job_desc_msg->ntasks_per_socket	= (uint16_t) NO_VAL;
+	job_desc_msg->num_procs		= NO_VAL;
+	job_desc_msg->num_tasks		= NO_VAL;
+	job_desc_msg->open_mode		= 0;	/* system default */
+	job_desc_msg->other_port	= 0;
+	job_desc_msg->out		= NULL;
+	job_desc_msg->overcommit	= (uint8_t) NO_VAL;
+	job_desc_msg->partition		= NULL;
+	job_desc_msg->plane_size	= (uint16_t) NO_VAL;
+	job_desc_msg->priority		= NO_VAL;
+	job_desc_msg->ramdiskimage	= NULL;
+	job_desc_msg->reboot		= (uint16_t) NO_VAL;
+	job_desc_msg->resp_host		= NULL;
+	job_desc_msg->req_nodes		= NULL;
+	job_desc_msg->requeue		= (uint16_t) NO_VAL;
+	job_desc_msg->rotate		= (uint16_t) NO_VAL;
+	job_desc_msg->script		= NULL;
+	job_desc_msg->select_jobinfo	= NULL;
+	job_desc_msg->shared		= (uint16_t) NO_VAL;
+	job_desc_msg->task_dist		= (uint16_t) NO_VAL;
+	job_desc_msg->time_limit	= NO_VAL;
+	job_desc_msg->user_id		= NO_VAL;
+	job_desc_msg->work_dir		= NULL;
 }
 
 /*
diff --git a/src/api/job_info.c b/src/api/job_info.c
index 6a53047bd4e99ee45a58853ddec4d315665b9a26..f44ed7db99757edbee9bce412f3c995e8a0feecb 100644
--- a/src/api/job_info.c
+++ b/src/api/job_info.c
@@ -371,13 +371,18 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
 		xstrcat(out, "\n   ");
 
 	/****** Line 10 ******/
+	if (job_ptr->job_min_memory & MEM_PER_CPU) {
+		job_ptr->job_min_memory &= (~MEM_PER_CPU);
+		tmp3_ptr = "CPU";
+	} else
+		tmp3_ptr = "Node";
 	convert_num_unit((float)job_ptr->job_min_memory, tmp1, sizeof(tmp1),
 			 UNIT_NONE);
 	convert_num_unit((float)job_ptr->job_min_tmp_disk, tmp2, sizeof(tmp2),
 			 UNIT_NONE);
 	snprintf(tmp_line, sizeof(tmp_line), 
-		"MinMemory=%s MinTmpDisk=%s Features=%s",
-		tmp1, tmp2, job_ptr->features);
+		"MinMemory%s=%s MinTmpDisk=%s Features=%s",
+		tmp3_ptr, tmp1, tmp2, job_ptr->features);
 	xstrcat(out, tmp_line);
 	if (one_liner)
 		xstrcat(out, " ");
diff --git a/src/api/step_ctx.c b/src/api/step_ctx.c
index cfbafbf636200e0d426e5d02d74999a34bf04030..4bbcd2da4afe2bb87fcd1985910c424c4186714f 100644
--- a/src/api/step_ctx.c
+++ b/src/api/step_ctx.c
@@ -58,12 +58,14 @@ static void
 _job_fake_cred(struct slurm_step_ctx_struct *ctx)
 {
 	slurm_cred_arg_t arg;
-	arg.jobid    = ctx->job_id;
-	arg.stepid   = ctx->step_resp->job_step_id;
-	arg.uid      = ctx->user_id;
-	arg.hostlist = ctx->step_req->node_list;
-        arg.alloc_lps_cnt = 0;    
-        arg.alloc_lps     =  NULL; 
+	arg.alloc_lps_cnt = 0;
+	arg.alloc_lps     = NULL;
+	arg.hostlist      = ctx->step_req->node_list;
+	arg.job_mem       = 0;
+	arg.jobid         = ctx->job_id;
+	arg.stepid        = ctx->step_resp->job_step_id;
+	arg.task_mem      = 0;
+	arg.uid           = ctx->user_id;
 	ctx->step_resp->cred = slurm_cred_faker(&arg);
 }
 
diff --git a/src/common/read_config.c b/src/common/read_config.c
index 724caa3e10bc88c9a36ef2ece883395a6efc2a77..dfcf0ffd08d3f0e9aa19e01889dcceebfe137506 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -146,7 +146,9 @@ s_p_options_t slurm_conf_options[] = {
 	{"DefaultStoragePort", S_P_UINT32},
 	{"DefaultStorageType", S_P_STRING},
 	{"DefaultStorageUser", S_P_STRING},
-	{"DefMemPerTask", S_P_UINT32},
+	{"DefMemPerCPU", S_P_UINT32},
+	{"DefMemPerNode", S_P_UINT32},
+	{"DefMemPerTask", S_P_UINT32},	/* defunct */
 	{"DisableRootJobs", S_P_BOOLEAN},
 	{"EnforcePartLimits", S_P_BOOLEAN},
 	{"Epilog", S_P_STRING},
@@ -179,7 +181,9 @@ s_p_options_t slurm_conf_options[] = {
 	{"Licenses", S_P_STRING},
 	{"MailProg", S_P_STRING},
 	{"MaxJobCount", S_P_UINT16},
-	{"MaxMemPerTask", S_P_UINT32},
+	{"MaxMemPerCPU", S_P_UINT32},
+	{"MaxMemPerNode", S_P_UINT32},
+	{"MaxMemPerTask", S_P_UINT32},	/* defunct */
 	{"MessageTimeout", S_P_UINT16},
 	{"MinJobAge", S_P_UINT16},
 	{"MpichGmDirectSupport", S_P_LONG, defunct_option},
@@ -1551,7 +1555,7 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 	s_p_get_string(&default_storage_host, "DefaultStorageHost", hashtbl);
 	s_p_get_string(&default_storage_user, "DefaultStorageUser", hashtbl);
 	s_p_get_string(&default_storage_pass, "DefaultStoragePass", hashtbl);
-	s_p_get_string(&default_storage_loc, "DefaultStorageLoc", hashtbl);
+	s_p_get_string(&default_storage_loc,  "DefaultStorageLoc", hashtbl);
 	s_p_get_uint32(&default_storage_port, "DefaultStoragePort", hashtbl);
 
 	if (!s_p_get_string(&conf->job_credential_private_key,
@@ -1577,8 +1581,11 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 	if (!s_p_get_string(&conf->crypto_type, "CryptoType", hashtbl))
 		 conf->crypto_type = xstrdup(DEFAULT_CRYPTO_TYPE);
 
-	if (!s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerTask", hashtbl))
-		conf->def_mem_per_task = DEFAULT_MEM_PER_TASK;
+	if ((s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerCPU", hashtbl)) ||
+	    (s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerTask", hashtbl)))
+		conf->def_mem_per_task |= MEM_PER_CPU;
+	else if (!s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerNode", hashtbl))
+		conf->def_mem_per_task = DEFAULT_MEM_PER_CPU;
 
 	if (!s_p_get_boolean((bool *) &conf->disable_root_jobs, 
 			     "DisableRootJobs", hashtbl))
@@ -1708,8 +1715,11 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 	if (!s_p_get_uint16(&conf->max_job_cnt, "MaxJobCount", hashtbl))
 		conf->max_job_cnt = DEFAULT_MAX_JOB_COUNT;
 
-	if (!s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerTask", hashtbl))
-		conf->max_mem_per_task = DEFAULT_MAX_MEM_PER_TASK;
+	if ((s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerCPU", hashtbl)) ||
+	    (s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerTask", hashtbl)))
+		conf->max_mem_per_task |= MEM_PER_CPU;
+	else if (!s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerNode", hashtbl))
+		conf->max_mem_per_task = DEFAULT_MAX_MEM_PER_CPU;
 
 	if (!s_p_get_uint16(&conf->msg_timeout, "MessageTimeout", hashtbl))
 		conf->msg_timeout = DEFAULT_MSG_TIMEOUT;
diff --git a/src/common/read_config.h b/src/common/read_config.h
index cddc90068df4f3ff9030a6e939b2398279da16d7..c64361b4ccc6fcee48786a46883c865e225b4286 100644
--- a/src/common/read_config.h
+++ b/src/common/read_config.h
@@ -73,8 +73,8 @@ extern char *default_plugstack;
 #define DEFAULT_KILL_WAIT           30
 #define DEFAULT_MAIL_PROG           "/bin/mail"
 #define DEFAULT_MAX_JOB_COUNT       5000
-#define DEFAULT_MEM_PER_TASK        0
-#define DEFAULT_MAX_MEM_PER_TASK    0
+#define DEFAULT_MEM_PER_CPU         0
+#define DEFAULT_MAX_MEM_PER_CPU     0
 #define DEFAULT_MIN_JOB_AGE         300
 #define DEFAULT_MPI_DEFAULT         "none"
 #define DEFAULT_MSG_TIMEOUT         10
diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index d73c11b623c63431c353d72e1bad32ecffc0c2c2..26be1838eeb68e944bc9092b475ef53fd0c6f30a 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -599,6 +599,17 @@ static uint16_t _get_task_count(struct node_cr_record *select_node_ptr,
 					 &threads, alloc_cores, 
 					 cr_type, job_ptr->job_id,
 					 this_node->node_ptr->name);
+
+	if (job_ptr->details->job_min_memory & MEM_PER_CPU) {
+		uint32_t free_mem, mem_per_cpu;
+		int max_cpus;
+		mem_per_cpu = job_ptr->details->job_min_memory & (~MEM_PER_CPU);
+		free_mem = this_node->real_memory - this_node->alloc_memory;
+		max_cpus = free_mem / mem_per_cpu;
+		/* info("cpus avail:%d  mem for %d", numtasks, max_cpus); */
+		numtasks = MIN(numtasks, max_cpus);
+	}
+
 #if (CR_DEBUG)
 	info("cons_res: _get_task_count computed a_tasks %d s %d c %d "
 		"t %d on %s for job %d",
@@ -1992,8 +2003,9 @@ static int _verify_node_state(struct node_cr_record *select_node_ptr,
 			      enum node_cr_state job_node_req)
 {
 	int i;
-	uint32_t free_mem;
+	uint32_t free_mem, min_mem;
 
+	min_mem = job_ptr->details->job_min_memory & (~MEM_PER_CPU);
 	for (i = 0; i < select_node_cnt; i++) {
 		if (!bit_test(bitmap, i))
 			continue;
@@ -2003,7 +2015,7 @@ static int _verify_node_state(struct node_cr_record *select_node_ptr,
 		     (cr_type == CR_MEMORY) || (cr_type == CR_SOCKET_MEMORY))) {
 			free_mem = select_node_ptr[i].real_memory;
 			free_mem -= select_node_ptr[i].alloc_memory;
-			if (free_mem < job_ptr->details->job_min_memory)
+			if (free_mem < min_mem)
 				goto clear_bit;
 		}
 
@@ -2589,9 +2601,6 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap,
 			job->cpus[j] = 0;
 		}
 		job->alloc_cpus[j] = 0;
-		if ((cr_type == CR_CORE_MEMORY) || (cr_type == CR_CPU_MEMORY) ||
-		    (cr_type == CR_MEMORY) || (cr_type == CR_SOCKET_MEMORY))
-			job->alloc_memory[j] = job_ptr->details->job_min_memory; 
 		if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)||
 		    (cr_type == CR_SOCKET) || (cr_type == CR_SOCKET_MEMORY)) {
 			_chk_resize_job(job, j, job->num_sockets[j]);
@@ -2652,6 +2661,26 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap,
 		return error_code;
 	}
 
+	if (job_ptr->details->job_min_memory &&
+	    ((cr_type == CR_CORE_MEMORY) || (cr_type == CR_CPU_MEMORY) ||
+	     (cr_type == CR_MEMORY) || (cr_type == CR_SOCKET_MEMORY))) {
+		j = 0;
+		for (i = 0; i < node_record_count; i++) {
+			if (bit_test(bitmap, i) == 0)
+				continue;
+			if (job_ptr->details->job_min_memory & MEM_PER_CPU) {
+				job->alloc_memory[j] = job_ptr->details->
+						       job_min_memory &
+						       (~MEM_PER_CPU);
+				job->alloc_memory[j] *= job->alloc_cpus[j];
+			} else {
+				job->alloc_memory[j] = job_ptr->details->
+						       job_min_memory;
+			}
+			j++;
+		}
+	}
+
 	_append_to_job_list(job);
 	last_cr_update_time = time(NULL);
 
diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c
index f210650025593d1c4a84a18d2b3c1d902b59c678..787580d21aedceffd8d0a5bef83773dcb62dd304 100644
--- a/src/plugins/select/linear/select_linear.c
+++ b/src/plugins/select/linear/select_linear.c
@@ -2,8 +2,6 @@
  *  select_linear.c - node selection plugin for simple one-dimensional 
  *  address space. Selects nodes for a job so as to minimize the number 
  *  of sets of consecutive nodes using a best-fit algorithm.
- *
- *  $Id$
  *****************************************************************************
  *  Copyright (C) 2004-2007 The Regents of the University of California.
  *  Copyright (C) 2008 Lawrence Livermore National Security.
@@ -559,7 +557,7 @@ static int _job_count_bitmap(struct node_cr_record *node_cr_ptr,
 {
 	int i, count = 0, total_jobs, total_run_jobs;
 	struct part_cr_record *part_cr_ptr;
-	uint32_t job_memory = 0;
+	uint32_t job_memory_cpu = 0, job_memory_node = 0;
 	bool exclusive;
 
 	xassert(node_cr_ptr);
@@ -572,24 +570,42 @@ static int _job_count_bitmap(struct node_cr_record *node_cr_ptr,
 	else
 		exclusive = true;
 
-	if (job_ptr->details->job_min_memory  && (cr_type == CR_MEMORY))
-		job_memory = job_ptr->details->job_min_memory;
+	if (job_ptr->details->job_min_memory  && (cr_type == CR_MEMORY)) {
+		if (job_ptr->details->job_min_memory & MEM_PER_CPU) {
+			job_memory_cpu = job_ptr->details->job_min_memory &
+					 (~MEM_PER_CPU);
+		} else
+			job_memory_node = job_ptr->details->job_min_memory;
+	}
 
 	for (i = 0; i < node_record_count; i++) {
 		if (!bit_test(bitmap, i)) {
 			bit_clear(jobmap, i);
 			continue;
 		}
-
-		if (select_fast_schedule) {
-			if ((node_cr_ptr[i].alloc_memory + job_memory) >
-			     node_record_table_ptr[i].config_ptr->real_memory) {
-				bit_clear(jobmap, i);
-				continue;
+		if (job_memory_cpu || job_memory_node) {
+			uint32_t alloc_mem, job_mem, avail_mem;
+			alloc_mem = node_cr_ptr[i].alloc_memory;
+			if (select_fast_schedule) {
+				avail_mem = node_record_table_ptr[i].
+					    config_ptr->real_memory;
+				if (job_memory_cpu) {
+					job_mem = job_memory_cpu *
+						  node_record_table_ptr[i].
+						  config_ptr->cpus;
+				} else
+					job_mem = job_memory_node;
+			} else {
+				avail_mem = node_record_table_ptr[i].
+					    real_memory;
+				if (job_memory_cpu) {
+					job_mem = job_memory_cpu *
+						  node_record_table_ptr[i].
+						  cpus;
+				} else
+					job_mem = job_memory_node;
 			}
-		} else {
-			if ((node_cr_ptr[i].alloc_memory + job_memory) >
-			     node_record_table_ptr[i].real_memory) {
+			if ((alloc_mem + job_mem) >avail_mem) {
 				bit_clear(jobmap, i);
 				continue;
 			}
@@ -1132,7 +1148,7 @@ static int _rm_job_from_nodes(struct node_cr_record *node_cr_ptr,
 {
 	int i, rc = SLURM_SUCCESS;
 	struct part_cr_record *part_cr_ptr;
-	uint32_t job_memory = 0;
+	uint32_t job_memory, job_memory_cpu = 0, job_memory_node = 0;
 
 	if (node_cr_ptr == NULL) {
 		error("%s: node_cr_ptr not initialized", pre_err);
@@ -1140,12 +1156,27 @@ static int _rm_job_from_nodes(struct node_cr_record *node_cr_ptr,
 	}
 
 	if (remove_all && job_ptr->details && 
-	    job_ptr->details->job_min_memory && (cr_type == CR_MEMORY))
-		job_memory = job_ptr->details->job_min_memory;
+	    job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) {
+		if (job_ptr->details->job_min_memory & MEM_PER_CPU) {
+			job_memory_cpu = job_ptr->details->job_min_memory &
+					 (~MEM_PER_CPU);
+		} else
+			job_memory_node = job_ptr->details->job_min_memory;
+	}
 
 	for (i = 0; i < select_node_cnt; i++) {
 		if (bit_test(job_ptr->node_bitmap, i) == 0)
 			continue;
+		if (job_memory_cpu == 0)
+			job_memory = job_memory_node;
+		else if (select_fast_schedule) {
+			job_memory = job_memory_cpu *
+				     node_record_table_ptr[i].
+				     config_ptr->cpus;
+		} else {
+			job_memory = job_memory_cpu *
+				     node_record_table_ptr[i].cpus;
+		}
 		if (node_cr_ptr[i].alloc_memory >= job_memory)
 			node_cr_ptr[i].alloc_memory -= job_memory;
 		else {
@@ -1208,7 +1239,7 @@ static int _add_job_to_nodes(struct node_cr_record *node_cr_ptr,
 {
 	int i, rc = SLURM_SUCCESS, exclusive = 0;
 	struct part_cr_record *part_cr_ptr;
-	uint32_t job_memory = 0;
+	uint32_t job_memory_cpu = 0, job_memory_node = 0;
 
 	if (node_cr_ptr == NULL) {
 		error("%s: node_cr_ptr not initialized", pre_err);
@@ -1216,15 +1247,32 @@ static int _add_job_to_nodes(struct node_cr_record *node_cr_ptr,
 	}
 
 	if (alloc_all && job_ptr->details && 
-	    job_ptr->details->job_min_memory && (cr_type == CR_MEMORY))
-		job_memory = job_ptr->details->job_min_memory;
+	    job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) {
+		if (job_ptr->details->job_min_memory & MEM_PER_CPU) {
+			job_memory_cpu = job_ptr->details->job_min_memory &
+					 (~MEM_PER_CPU);
+		} else
+			job_memory_node = job_ptr->details->job_min_memory;
+	}
+
 	if (job_ptr->details->shared == 0)
 		exclusive = 1;
 
 	for (i = 0; i < select_node_cnt; i++) {
 		if (bit_test(job_ptr->node_bitmap, i) == 0)
 			continue;
-		node_cr_ptr[i].alloc_memory += job_memory;
+		if (job_memory_cpu == 0)
+			node_cr_ptr[i].alloc_memory += job_memory_node;
+		else if (select_fast_schedule) {
+			node_cr_ptr[i].alloc_memory += 
+					job_memory_cpu *
+					node_record_table_ptr[i].
+					config_ptr->cpus;
+		} else {
+			node_cr_ptr[i].alloc_memory += 
+					job_memory_cpu *
+					node_record_table_ptr[i].cpus;
+		}
 		if (exclusive) {
 			if (node_cr_ptr[i].exclusive_jobid) {
 				error("select/linear: conflicting exclusive "
@@ -1341,7 +1389,7 @@ static void _init_node_cr(void)
 	ListIterator part_iterator;
 	struct job_record *job_ptr;
 	ListIterator job_iterator;
-	uint32_t job_memory, step_mem;
+	uint32_t job_memory_cpu, job_memory_node, step_mem = 0;
 	int exclusive, i, node_inx;
 	ListIterator step_iterator;
 	struct step_record *step_ptr;
@@ -1375,11 +1423,17 @@ static void _init_node_cr(void)
 		    (job_ptr->job_state != JOB_SUSPENDED))
 			continue;
 
+		job_memory_cpu  = 0;
+		job_memory_node = 0;
 		if (job_ptr->details && 
-		    job_ptr->details->job_min_memory && (cr_type == CR_MEMORY))
-			job_memory = job_ptr->details->job_min_memory;
-		else
-			job_memory = 0;
+		    job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) {
+			if (job_ptr->details->job_min_memory & MEM_PER_CPU) {
+				job_memory_cpu = job_ptr->details->job_min_memory &
+						 (~MEM_PER_CPU);
+			} else {
+				job_memory_node = job_ptr->details->job_min_memory;
+			}
+		}
 		if (job_ptr->details->shared == 0)
 			exclusive = 1;
 		else
@@ -1400,7 +1454,18 @@ static void _init_node_cr(void)
 				}
 				node_cr_ptr[i].exclusive_jobid = job_ptr->job_id;
 			}
-			node_cr_ptr[i].alloc_memory += job_memory;
+			if (job_memory_cpu == 0)
+				node_cr_ptr[i].alloc_memory += job_memory_node;
+			else if (select_fast_schedule) {
+				node_cr_ptr[i].alloc_memory += 
+						job_memory_cpu *
+						node_record_table_ptr[i].
+						config_ptr->cpus;
+			} else {
+				node_cr_ptr[i].alloc_memory += 
+						job_memory_cpu *
+						node_record_table_ptr[i].cpus;
+			}
 			part_cr_ptr = node_cr_ptr[i].parts;
 			while (part_cr_ptr) {
 				if (part_cr_ptr->part_ptr != job_ptr->part_ptr) {
diff --git a/src/salloc/opt.c b/src/salloc/opt.c
index 9e17c746bd650d9d4f610e30b5851f024561f2f5..a46937536c3dc7c76a419671900db89ccf3500ed 100644
--- a/src/salloc/opt.c
+++ b/src/salloc/opt.c
@@ -125,7 +125,7 @@
 #define LONG_OPT_NTASKSPERNODE   0x136
 #define LONG_OPT_NTASKSPERSOCKET 0x137
 #define LONG_OPT_NTASKSPERCORE   0x138
-#define LONG_OPT_TASK_MEM        0x13a
+#define LONG_OPT_MEM_PER_CPU     0x13a
 #define LONG_OPT_HINT            0x13b
 #define LONG_OPT_ACCTG_FREQ      0x13c
 
@@ -267,7 +267,7 @@ static void _opt_default()
 	opt.minsockets      = -1;
 	opt.mincores        = -1;
 	opt.minthreads      = -1;
-	opt.task_mem	    = -1;
+	opt.mem_per_cpu	    = -1;
 	opt.realmem	    = -1;
 	opt.tmpdisk	    = -1;
 
@@ -512,8 +512,9 @@ void set_options(const int argc, char **argv)
 		{"mincores",      required_argument, 0, LONG_OPT_MINCORES},
 		{"minthreads",    required_argument, 0, LONG_OPT_MINTHREADS},
 		{"mem",           required_argument, 0, LONG_OPT_MEM},
-		{"job-mem",       required_argument, 0, LONG_OPT_TASK_MEM},
-		{"task-mem",      required_argument, 0, LONG_OPT_TASK_MEM},
+		{"job-mem",       required_argument, 0, LONG_OPT_MEM_PER_CPU},
+		{"task-mem",      required_argument, 0, LONG_OPT_MEM_PER_CPU},
+		{"mem-per-cpu",   required_argument, 0, LONG_OPT_MEM_PER_CPU},
 		{"hint",          required_argument, 0, LONG_OPT_HINT},
 		{"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE},
 		{"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET},
@@ -761,9 +762,9 @@ void set_options(const int argc, char **argv)
 				exit(1);
 			}
 			break;
-		case LONG_OPT_TASK_MEM:
-			opt.task_mem = (int) str_to_bytes(optarg);
-			if (opt.task_mem < 0) {
+		case LONG_OPT_MEM_PER_CPU:
+			opt.mem_per_cpu = (int) str_to_bytes(optarg);
+			if (opt.mem_per_cpu < 0) {
 				error("invalid memory constraint %s", 
 				      optarg);
 				exit(1);
@@ -1015,15 +1016,11 @@ static bool _opt_verify(void)
 		verified = false;
 	}
 
-        /* When CR with memory as a CR is enabled we need to assign
-	 * adequate value or check the value to opt.mem */
-	if ((opt.realmem >= -1) && (opt.task_mem > 0)) {
-		if (opt.realmem == -1) {
-			opt.realmem = opt.task_mem;
-		} else if (opt.realmem < opt.task_mem) {
-			info("mem < task-mem - resizing mem to be equal "
-			     "to task-mem");
-			opt.realmem = opt.task_mem;
+	if ((opt.realmem > -1) && (opt.mem_per_cpu > -1)) {
+		if (opt.realmem < opt.mem_per_cpu) {
+			info("mem < mem-per-cpu - resizing mem to be equal "
+			     "to mem-per-cpu");
+			opt.realmem = opt.mem_per_cpu;
 		}
 	}
 	
@@ -1173,8 +1170,8 @@ static char *print_constraints()
 	if (opt.realmem > 0)
 		xstrfmtcat(buf, "mem=%dM ", opt.realmem);
 
-	if (opt.task_mem > 0)
-		xstrfmtcat(buf, "task-mem=%dM ", opt.task_mem);
+	if (opt.mem_per_cpu > 0)
+		xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu);
 
 	if (opt.tmpdisk > 0)
 		xstrfmtcat(buf, "tmp=%ld ", opt.tmpdisk);
@@ -1353,7 +1350,7 @@ static void _usage(void)
 "              [--mail-type=type] [--mail-user=user][--nice[=value]]\n"
 "              [--bell] [--no-bell] [--kill-command[=signal]]\n"
 "              [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n"
-"              [--network=type]\n"
+"              [--network=type] [--mem-per-cpu=MB]\n"
 "              executable [args...]\n");
 }
 
@@ -1416,8 +1413,8 @@ static void _help(void)
 "Consumable resources related options:\n" 
 "      --exclusive             allocate nodes in exclusive mode when\n" 
 "                              cpu consumable resource is enabled\n"
-"      --task-mem=MB           maximum amount of real memory per task\n"
-"                              required by the job.\n" 
+"      --mem-per-cpu=MB        maximum amount of real memory per allocated\n"
+"                              cpu required by the job.\n" 
 "                              --mem >= --job-mem if --mem is specified.\n" 
 "\n"
 "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" 
diff --git a/src/salloc/opt.h b/src/salloc/opt.h
index 972444517d95c9382d75ee8cdd176ef8cfb6a8fb..2ca869cc76af50fdb7291e96888da3ee8141e74b 100644
--- a/src/salloc/opt.h
+++ b/src/salloc/opt.h
@@ -107,7 +107,7 @@ typedef struct salloc_options {
 	int minsockets;		/* --minsockets=n		*/
 	int mincores;		/* --mincores=n			*/
 	int minthreads;		/* --minthreads=n		*/
-	int task_mem;		/* --task-mem=n			*/
+	int mem_per_cpu;	/* --mem_per_cpu=n		*/
 	int realmem;		/* --mem=n			*/
 	long tmpdisk;		/* --tmp=n			*/
 	char *constraints;	/* --constraints=, -C constraint*/
diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c
index edb65cb94d70fce8145eaffe3367791a47129b67..79c5616b3c2a02b988f90101d0e600dd75ee208b 100644
--- a/src/salloc/salloc.c
+++ b/src/salloc/salloc.c
@@ -217,10 +217,6 @@ int main(int argc, char *argv[])
 		env_array_append_fmt(&env, "SLURM_ACCTG_FREQ", "%d",
 			opt.acctg_freq);
 	}
-	if (opt.task_mem >= 0) {
-		env_array_append_fmt(&env, "SLURM_TASK_MEM", "%d",
-			opt.task_mem);
-	}
 	if (opt.network)
 		env_array_append_fmt(&env, "SLURM_NETWORK", "%s", opt.network);
 	env_array_set_environment(env);
@@ -360,6 +356,8 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc)
 		desc->job_min_threads = opt.minthreads;
 	if (opt.realmem > -1)
 		desc->job_min_memory = opt.realmem;
+	else if (opt.mem_per_cpu > -1)
+		desc->job_min_memory = opt.mem_per_cpu | MEM_PER_CPU;
 	if (opt.tmpdisk > -1)
 		desc->job_min_tmp_disk = opt.tmpdisk;
 	if (opt.overcommit) {
diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c
index 365782b68d917a3c395e87e1549c1ee79d4d9a49..263b70c204ac4b640d48e1c5f6b8ff135f2664ad 100644
--- a/src/sbatch/opt.c
+++ b/src/sbatch/opt.c
@@ -122,7 +122,7 @@
 #define LONG_OPT_NTASKSPERNODE   0x136
 #define LONG_OPT_NTASKSPERSOCKET 0x137
 #define LONG_OPT_NTASKSPERCORE   0x138
-#define LONG_OPT_TASK_MEM        0x13a
+#define LONG_OPT_MEM_PER_CPU     0x13a
 #define LONG_OPT_HINT            0x13b
 #define LONG_OPT_BLRTS_IMAGE     0x140
 #define LONG_OPT_LINUX_IMAGE     0x141
@@ -269,7 +269,7 @@ static void _opt_default()
 	opt.minsockets      = -1;
 	opt.mincores        = -1;
 	opt.minthreads      = -1;
-	opt.task_mem	    = -1;
+	opt.mem_per_cpu	    = -1;
 	opt.realmem	    = -1;
 	opt.tmpdisk	    = -1;
 
@@ -523,8 +523,9 @@ static struct option long_options[] = {
 	{"mincores",      required_argument, 0, LONG_OPT_MINCORES},
 	{"minthreads",    required_argument, 0, LONG_OPT_MINTHREADS},
 	{"mem",           required_argument, 0, LONG_OPT_MEM},
-	{"job-mem",       required_argument, 0, LONG_OPT_TASK_MEM},
-	{"task-mem",      required_argument, 0, LONG_OPT_TASK_MEM},
+	{"job-mem",       required_argument, 0, LONG_OPT_MEM_PER_CPU},
+	{"task-mem",      required_argument, 0, LONG_OPT_MEM_PER_CPU},
+	{"mem-per-cpu",   required_argument, 0, LONG_OPT_MEM_PER_CPU},
 	{"hint",          required_argument, 0, LONG_OPT_HINT},
 	{"tmp",           required_argument, 0, LONG_OPT_TMP},
 	{"jobid",         required_argument, 0, LONG_OPT_JOBID},
@@ -1150,14 +1151,13 @@ static void _set_options(int argc, char **argv)
 				exit(1);
 			}
 			break;
-		case LONG_OPT_TASK_MEM:
-			opt.task_mem = (int) str_to_bytes(optarg);
-			if (opt.task_mem < 0) {
+		case LONG_OPT_MEM_PER_CPU:
+			opt.mem_per_cpu = (int) str_to_bytes(optarg);
+			if (opt.mem_per_cpu < 0) {
 				error("invalid memory constraint %s", 
 				      optarg);
 				exit(1);
 			}
-			setenvf(NULL, "SLURM_TASK_MEM", "%d", opt.task_mem);
 			break;
 		case LONG_OPT_TMP:
 			opt.tmpdisk = str_to_bytes(optarg);
@@ -1773,15 +1773,11 @@ static bool _opt_verify(void)
 		verified = false;
 	}
 
-        /* When CR with memory as a CR is enabled we need to assign
-	 * adequate value or check the value to opt.mem */
-	if ((opt.realmem >= -1) && (opt.task_mem > 0)) {
-		if (opt.realmem == -1) {
-			opt.realmem = opt.task_mem;
-		} else if (opt.realmem < opt.task_mem) {
-			info("mem < task-mem - resizing mem to be equal "
-			     "to task-mem");
-			opt.realmem = opt.task_mem;
+	if ((opt.realmem > -1) && (opt.mem_per_cpu > -1)) {
+		if (opt.realmem < opt.mem_per_cpu) {
+			info("mem < mem-per-cpu - resizing mem to be equal "
+			     "to mem-per-cpu");
+			opt.realmem = opt.mem_per_cpu;
 		}
 	}
 	
@@ -1979,8 +1975,8 @@ static char *print_constraints()
 	if (opt.realmem > 0)
 		xstrfmtcat(buf, "mem=%dM ", opt.realmem);
 
-	if (opt.task_mem > 0)
-		xstrfmtcat(buf, "task-mem=%dM ", opt.task_mem);
+	if (opt.mem_per_cpu > 0)
+		xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu);
 
 	if (opt.tmpdisk > 0)
 		xstrfmtcat(buf, "tmp=%ld ", opt.tmpdisk);
@@ -2154,7 +2150,7 @@ static void _usage(void)
 "              [--mail-type=type] [--mail-user=user][--nice[=value]]\n"
 "              [--requeue] [--no-requeue] [--ntasks-per-node=n] [--propagate]\n"
 "              [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n"
-"              [--network=type]\n"
+"              [--network=type] [--mem-per-cpu=MB]\n"
 "              executable [args...]\n");
 }
 
@@ -2219,9 +2215,8 @@ static void _help(void)
 "Consumable resources related options:\n" 
 "      --exclusive             allocate nodes in exclusive mode when\n" 
 "                              cpu consumable resource is enabled\n"
-"      --task-mem=MB           maximum amount of real memory per task\n"
-"                              required by the job.\n" 
-"                              --mem >= --job-mem if --mem is specified.\n" 
+"      --mem-per-cpu=MB        maximum amount of real memory per CPU\n"
+"                              allocated to the job.\n" 
 "\n"
 "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" 
 "  -B --extra-node-info=S[:C[:T]]            Expands to:\n"
diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h
index e2403dac574e1b2216a244a63dbf3e0885a0434b..7d83a3c3cc79a4fe71f4ed8978667b709b59d530 100644
--- a/src/sbatch/opt.h
+++ b/src/sbatch/opt.h
@@ -113,7 +113,7 @@ typedef struct sbatch_options {
 	int minsockets;		/* --minsockets=n		*/
 	int mincores;		/* --mincores=n			*/
 	int minthreads;		/* --minthreads=n		*/
-	int task_mem;		/* --task-mem=n			*/
+	int mem_per_cpu;	/* --mem-per-cpu=n		*/
 	int realmem;		/* --mem=n			*/
 	long tmpdisk;		/* --tmp=n			*/
 	char *constraints;	/* --constraints=, -C constraint*/
diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c
index 03d0377895b4effcb5a8acab5937ca0d2f35fe32..92d42e91df75dd86c6ee2613d9dfcbbf9f66b434 100644
--- a/src/sbatch/sbatch.c
+++ b/src/sbatch/sbatch.c
@@ -227,6 +227,8 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc)
 		desc->job_min_threads = opt.minthreads;
 	if (opt.realmem > -1)
 		desc->job_min_memory = opt.realmem;
+	else if (opt.mem_per_cpu > -1)
+		desc->job_min_memory = opt.mem_per_cpu | MEM_PER_CPU;
 	if (opt.tmpdisk > -1)
 		desc->job_min_tmp_disk = opt.tmpdisk;
 	if (opt.overcommit) {
diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c
index 49ae2df8ab6a1baaf52ce4c9e21d80307275241b..b981d761dfd0bec2cd6f4aa7cd81de2166035499 100644
--- a/src/scontrol/update_job.c
+++ b/src/scontrol/update_job.c
@@ -324,12 +324,19 @@ scontrol_update_job (int argc, char *argv[])
 						(char **) NULL, 10);
 			update_cnt++;
 		}
-		else if (strncasecmp(argv[i], "MinMemory=", 10) == 0) {
+		else if (strncasecmp(argv[i], "MinMemoryNode=", 14) == 0) {
 			job_msg.job_min_memory = 
-				(uint32_t) strtol(&argv[i][10], 
+				(uint32_t) strtol(&argv[i][14], 
 						(char **) NULL, 10);
 			update_cnt++;
 		}
+		else if (strncasecmp(argv[i], "MinMemoryCPU=", 13) == 0) {
+			job_msg.job_min_memory =
+				(uint32_t) strtol(&argv[i][13],
+						(char **) NULL, 10);
+			job_msg.job_min_memory |= MEM_PER_CPU;
+			update_cnt++;
+		}
 		else if (strncasecmp(argv[i], "MinTmpDisk=", 11) == 0) {
 			job_msg.job_min_tmp_disk = 
 				(uint32_t) strtol(&argv[i][11], 
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 12da6e651c4b0a432d1b930c2a4ac2ea33d6bcd4..fb07c2300c85536a9bb4754045ed0a8de1adda8e 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -2,8 +2,6 @@
  *  job_mgr.c - manage the job information of slurm
  *	Note: there is a global job list (job_list), time stamp 
  *	(last_job_update), and hash table (job_hash)
- *
- *  $Id$
  *****************************************************************************
  *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Copyright (C) 2008 Lawrence Livermore National Security.
@@ -1228,7 +1226,7 @@ void dump_job_desc(job_desc_msg_t * job_specs)
 	long kill_on_node_fail, shared, immediate;
 	long cpus_per_task, requeue, num_tasks, overcommit;
 	long ntasks_per_node, ntasks_per_socket, ntasks_per_core;
-	char buf[100];
+	char *mem_type, buf[100];
 
 	if (job_specs == NULL)
 		return;
@@ -1262,12 +1260,21 @@ void dump_job_desc(job_desc_msg_t * job_specs)
 	debug3("   job_min_cores=%ld job_min_threads=%ld",
 	       job_min_cores, job_min_threads);
 
-	job_min_memory   = (job_specs->job_min_memory != NO_VAL) ? 
-		(long) job_specs->job_min_memory : -1L;
+	if (job_specs->job_min_memory == NO_VAL) {
+		job_min_memory = -1L;
+		mem_type = "job";
+	} else if (job_specs->job_min_memory & MEM_PER_CPU) {
+		job_min_memory = (long) (job_specs->job_min_memory &
+					 (~MEM_PER_CPU));
+		mem_type = "cpu";
+	} else {
+		job_min_memory = (long) job_specs->job_min_memory;
+		mem_type = "job";
+	}
 	job_min_tmp_disk = (job_specs->job_min_tmp_disk != NO_VAL) ? 
 		(long) job_specs->job_min_tmp_disk : -1L;
-	debug3("   job_min_memory=%ld job_min_tmp_disk=%ld",
-	       job_min_memory, job_min_tmp_disk);
+	debug3("   min_memory_%s=%ld job_min_tmp_disk=%ld",
+	       mem_type, job_min_memory, job_min_tmp_disk);
 	immediate = (job_specs->immediate == 0) ? 0L : 1L;
 	debug3("   immediate=%ld features=%s",
 	       immediate, job_specs->features);
@@ -2854,6 +2861,53 @@ static char *_copy_nodelist_no_dup(char *node_list)
 	return xstrdup(buf);
 }
 
+static bool _valid_job_min_mem(job_desc_msg_t * job_desc_msg)
+{
+	uint32_t base_size = job_desc_msg->job_min_memory;
+	uint32_t size_limit = slurmctld_conf.max_mem_per_task;
+	uint16_t cpus_per_node;
+
+	if (size_limit == 0)
+		return true;
+
+	if ((base_size  & MEM_PER_CPU) && (size_limit & MEM_PER_CPU)) {
+		base_size  &= (~MEM_PER_CPU);
+		size_limit &= (~MEM_PER_CPU);
+		if (base_size <= size_limit)
+			return true;
+		return false;
+	}
+
+	if (((base_size  & MEM_PER_CPU) == 0) &&
+	    ((size_limit & MEM_PER_CPU) == 0)) {
+		if (base_size <= size_limit)
+			return true;
+		return false;
+	}
+
+	/* Our size is per CPU and limit per node or vise-versa.
+	 * CPU count my vary by node, but we don't have a good
+	 * way to identify specific nodes for the job at this 
+	 * point, so just pick the first node as a basis for 
+	 * enforcing MaxMemPerCPU. */
+	if (slurmctld_conf.fast_schedule)
+		cpus_per_node = node_record_table_ptr[0].config_ptr->cpus;
+	else
+		cpus_per_node = node_record_table_ptr[0].cpus;
+	if (job_desc_msg->num_procs != NO_VAL)
+		cpus_per_node = MIN(cpus_per_node, job_desc_msg->num_procs);
+	if (base_size & MEM_PER_CPU) {
+		base_size &= (~MEM_PER_CPU);
+		base_size *= cpus_per_node;
+	} else {
+		size_limit &= (~MEM_PER_CPU);
+		size_limit *= cpus_per_node;
+	}
+	if (base_size <= size_limit)
+		return true;
+	return false;
+}
+
 /* 
  * job_time_limit - terminate jobs which have exceeded their time limit
  * global: job_list - pointer global job list
@@ -3010,6 +3064,12 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate,
 			job_desc_msg->nice = NICE_OFFSET;
 	}
 
+	if (job_desc_msg->job_min_memory == NO_VAL) {
+		/* Default memory limit is DefMemPerCPU (if set) or no limit */
+		job_desc_msg->job_min_memory = slurmctld_conf.def_mem_per_task;
+	} else if (!_valid_job_min_mem(job_desc_msg))
+		return ESLURM_INVALID_TASK_MEMORY;
+
 	if (job_desc_msg->min_sockets == (uint16_t) NO_VAL)
 		job_desc_msg->min_sockets = 1;	/* default socket count of 1 */
 	if (job_desc_msg->min_cores == (uint16_t) NO_VAL)
@@ -3035,8 +3095,6 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate,
 		job_desc_msg->job_min_cores = 1;   /* default 1 core per socket */
 	if (job_desc_msg->job_min_threads == (uint16_t) NO_VAL)
 		job_desc_msg->job_min_threads = 1; /* default 1 thread per core */
-	if (job_desc_msg->job_min_memory == NO_VAL)
-		job_desc_msg->job_min_memory = 0;  /* default no memory limit */
 	if (job_desc_msg->job_min_tmp_disk == NO_VAL)
 		job_desc_msg->job_min_tmp_disk = 0;/* default 0MB disk per node */
 
@@ -3900,12 +3958,16 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 	if (job_specs->job_min_memory != NO_VAL) {
 		if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL))
 			error_code = ESLURM_DISABLED;
-		else if (super_user
-			 || (detail_ptr->job_min_memory
-			     > job_specs->job_min_memory)) {
+		else if (super_user) {
+			char *entity;
+			if (job_specs->job_min_memory & MEM_PER_CPU)
+				entity = "cpu";
+			else
+				entity = "job";
 			detail_ptr->job_min_memory = job_specs->job_min_memory;
-			info("update_job: setting job_min_memory to %u for "
-			     "job_id %u", job_specs->job_min_memory, 
+			info("update_job: setting min_memory_%s to %u for "
+			     "job_id %u", entity, 
+			     (job_specs->job_min_memory & (~MEM_PER_CPU)), 
 			     job_specs->job_id);
 		} else {
 			error("Attempt to increase job_min_memory for job %u",
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 0bc50f8e961967d620ce6f64b5b0ca3acd23f8bc..c8870005075f37573758074caa52e584a18fa832 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -1196,7 +1196,8 @@ extern int job_req_node_filter(struct job_record *job_ptr,
 		FREE_NULL_BITMAP(feature_bitmap);
 		if (slurmctld_conf.fast_schedule) {
 			if ((detail_ptr->job_min_procs    > config_ptr->cpus       )
-			||  (detail_ptr->job_min_memory   > config_ptr->real_memory) 
+			||  ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > 
+			      config_ptr->real_memory) 
 			||  (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk)) {
 				bit_clear(avail_bitmap, i);
 				continue;
@@ -1213,7 +1214,8 @@ extern int job_req_node_filter(struct job_record *job_ptr,
 			}
 		} else {
 			if ((detail_ptr->job_min_procs    > node_ptr->cpus       )
-			||  (detail_ptr->job_min_memory   > node_ptr->real_memory) 
+			||  ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) >
+			      node_ptr->real_memory) 
 			||  (detail_ptr->job_min_tmp_disk > node_ptr->tmp_disk)) {
 				bit_clear(avail_bitmap, i);
 				continue;
@@ -1284,7 +1286,8 @@ static int _build_node_list(struct job_record *job_ptr,
 
 		config_filter = 0;
 		if ((detail_ptr->job_min_procs    > config_ptr->cpus       )
-		||  (detail_ptr->job_min_memory   > config_ptr->real_memory) 
+		||  ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > 
+		      config_ptr->real_memory) 
 		||  (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk))
 			config_filter = 1;
 		if (mc_ptr
@@ -1391,7 +1394,8 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr,
 
 			node_con = node_record_table_ptr[i].config_ptr;
 			if ((job_con->job_min_procs    <= node_con->cpus)
-			&&  (job_con->job_min_memory   <= node_con->real_memory)
+			&&  ((job_con->job_min_memory & (~MEM_PER_CPU)) <= 
+			      node_con->real_memory)
 			&&  (job_con->job_min_tmp_disk <= node_con->tmp_disk))
 				job_ok = 1;
 			if (mc_ptr
@@ -1419,7 +1423,8 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr,
 
 			node_ptr = &node_record_table_ptr[i];
 			if ((job_con->job_min_procs    <= node_ptr->cpus)
-			&&  (job_con->job_min_memory   <= node_ptr->real_memory)
+			&&  ((job_con->job_min_memory & (~MEM_PER_CPU)) <= 
+			      node_ptr->real_memory)
 			&&  (job_con->job_min_tmp_disk <= node_ptr->tmp_disk))
 				job_ok = 1;
 			if (mc_ptr
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 2fc834f8d8219b09cb60b963e3aef1d994e9a24b..d156b1b8a949e42548ee47acc4beb11886461faa 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -331,7 +331,8 @@ struct job_details {
 	uint16_t ntasks_per_node;	/* number of tasks on each node */
 	/* job constraints: */
 	uint32_t job_min_procs;		/* minimum processors per node */
-	uint32_t job_min_memory;	/* minimum memory per node, MB */
+	uint32_t job_min_memory;	/* minimum memory per node (MB) OR
+					 * memory per allocated CPU | MEM_PER_CPU */
 	uint32_t job_min_tmp_disk;	/* minimum tempdisk per node, MB */
 	char *err;			/* pathname of job's stderr file */
 	char *in;			/* pathname of job's stdin file */
diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c
index dab7c08eebe584255b48a26ca40771625c0fc7a6..a7cdb943379b9dedb6fc7e953824d5f1968efd29 100644
--- a/src/slurmd/slurmd/req.c
+++ b/src/slurmd/slurmd/req.c
@@ -616,7 +616,8 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid,
 {
 	slurm_cred_arg_t arg;
 	hostset_t        hset    = NULL;
-	bool             user_ok = _slurm_authorized_user(uid); 
+	bool             user_ok = _slurm_authorized_user(uid);
+	bool             verified = true;
 	int              host_index = -1;
 	int              rc;
 	slurm_cred_t     cred = req->cred;
@@ -628,24 +629,18 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid,
 	 * credentials are checked
 	 */
 	if ((rc = slurm_cred_verify(conf->vctx, cred, &arg)) < 0) {
-		if (!user_ok) {
+		verified = false;
+		if (!user_ok)
 			return SLURM_ERROR;
-		} else {
+		else {
 			debug("_check_job_credential slurm_cred_verify failed:"
 			      " %m, but continuing anyway.");
 		}
 	}
 
-	/* Overwrite any memory limits in the RPC with 
-	 * contents of the credential */
-	req->job_mem  = arg.job_mem;
-	req->task_mem = arg.task_mem;
-
-	/*
-	 * If uid is the slurm user id or root, do not bother
-	 * performing validity check of the credential
-	 */
-	if (user_ok) {
+	/* If uid is the SlurmUser or root and the credential is bad,
+	 * then do not attempt validating the credential */
+	if (!verified) {
 		*step_hset = NULL;
 		if (rc >= 0) {
 			if ((hset = hostset_create(arg.hostlist)))
@@ -684,12 +679,11 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid,
 	}
 
         if ((arg.alloc_lps_cnt > 0) && (tasks_to_launch > 0)) {
-
                 host_index = hostset_find(hset, conf->node_name);
 
                 /* Left in here for debugging purposes */
 #if(0)
-                if(host_index >= 0)
+                if (host_index >= 0)
                   info(" cons_res %u alloc_lps_cnt %u "
 			"task[%d] = %u = task_to_launch %d host %s ", 
 			arg.jobid, arg.alloc_lps_cnt, host_index, 
@@ -714,6 +708,20 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid,
 		}
         }
 
+	/* Overwrite any memory limits in the RPC with 
+	 * contents of the credential */
+	if (arg.job_mem & MEM_PER_CPU) {
+		req->job_mem = arg.job_mem & (~MEM_PER_CPU);
+		if (host_index >= 0)
+			req->job_mem *= arg.alloc_lps[host_index];
+	} else
+		req->job_mem = arg.job_mem;
+	req->task_mem = arg.task_mem;	/* Defunct */
+#if 0
+	info("mem orig:%u cpus:%u limit:%u", 
+	     arg.job_mem, arg.alloc_lps[host_index], req->job_mem);
+#endif
+
 	*step_hset = hset;
 	xfree(arg.hostlist);
 	arg.alloc_lps_cnt = 0;
diff --git a/src/squeue/print.c b/src/squeue/print.c
index 0985f5ed2d17d249bce68e559d6e7f1d082d771d..74b010636836d4032e4a630370bd9b0757df5594 100644
--- a/src/squeue/print.c
+++ b/src/squeue/print.c
@@ -892,6 +892,7 @@ int _print_job_min_memory(job_info_t * job, int width, bool right_justify,
 		_print_str("MIN_MEMORY", width, right_justify, true);
 	else {
 	    	tmp_char[0] = '\0';
+		job->job_min_memory &= (~MEM_PER_CPU);
 		convert_num_unit((float)job->job_min_memory, min_mem, 
 				 sizeof(min_mem), UNIT_NONE);
 		strcat(tmp_char, min_mem);
diff --git a/src/squeue/sort.c b/src/squeue/sort.c
index 05c42f4f7e3b4eea4a1239a84348997e0d7cc7a0..2a0bce6371b830330e7fd0b2966a9e4fe8262828 100644
--- a/src/squeue/sort.c
+++ b/src/squeue/sort.c
@@ -455,6 +455,8 @@ static int _sort_job_by_min_memory(void *void1, void *void2)
 	job_info_t *job1 = (job_info_t *) void1;
 	job_info_t *job2 = (job_info_t *) void2;
 
+	job1->job_min_memory &= (~MEM_PER_CPU);
+	job2->job_min_memory &= (~MEM_PER_CPU);
 	diff = job1->job_min_memory - job2->job_min_memory;
 
 	if (reverse_order)
diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index 8f7cb8715c0bcbfffc34b09e19141e72be756218..c06508cd172c97c6b938b5fbbe55094440ee3f0b 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -450,7 +450,9 @@ job_desc_msg_create_from_opts ()
 	if (opt.job_min_threads != NO_VAL)
 		j->job_min_threads  = opt.job_min_threads;
 	if (opt.job_min_memory != NO_VAL)
-		j->job_min_memory   = opt.job_min_memory;
+		j->job_min_memory = opt.job_min_memory;
+	else if (opt.mem_per_cpu != NO_VAL)
+		j->job_min_memory = opt.mem_per_cpu | MEM_PER_CPU;
 	if (opt.job_min_tmp_disk != NO_VAL)
 		j->job_min_tmp_disk = opt.job_min_tmp_disk;
 	if (opt.overcommit) {
@@ -511,8 +513,6 @@ create_job_step(srun_job_t *job)
 		: (opt.nprocs*opt.cpus_per_task);
 	
 	job->ctx_params.relative = (uint16_t)opt.relative;
-	if (opt.task_mem != NO_VAL)
-		job->ctx_params.mem_per_task = (uint16_t)opt.task_mem;
 	job->ctx_params.ckpt_interval = (uint16_t)opt.ckpt_interval;
 	job->ctx_params.ckpt_path = opt.ckpt_path;
 	job->ctx_params.exclusive = (uint16_t)opt.exclusive;
diff --git a/src/srun/opt.c b/src/srun/opt.c
index 97d161f93a45eb57c90ae7154efd60f6e0490d7e..94bfff45fc2b4022f6ac3b8ae4f0fd245773d87c 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -1,8 +1,8 @@
 /*****************************************************************************\
  *  opt.c - options processing for srun
- *  $Id$
  *****************************************************************************
- *  Copyright (C) 2002-2006 The Regents of the University of California.
+ *  Copyright (C) 2002-2007 The Regents of the University of California.
+ *  Copyright (C) 2008 Lawrence Livermore National Security.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Mark Grondona <grondona1@llnl.gov>, et. al.
  *  LLNL-CODE-402394.
@@ -153,7 +153,7 @@
 #define LONG_OPT_NTASKSPERNODE	 0x136
 #define LONG_OPT_NTASKSPERSOCKET 0x137
 #define LONG_OPT_NTASKSPERCORE	 0x138
-#define LONG_OPT_TASK_MEM        0x13a
+#define LONG_OPT_MEM_PER_CPU     0x13a
 #define LONG_OPT_HINT	         0x13b
 #define LONG_OPT_BLRTS_IMAGE     0x140
 #define LONG_OPT_LINUX_IMAGE     0x141
@@ -656,7 +656,7 @@ static void _opt_default()
 	opt.job_min_cores   = NO_VAL;
 	opt.job_min_threads = NO_VAL;
 	opt.job_min_memory  = NO_VAL;
-	opt.task_mem        = NO_VAL;
+	opt.mem_per_cpu     = NO_VAL;
 	opt.job_min_tmp_disk= NO_VAL;
 
 	opt.hold	    = false;
@@ -777,7 +777,6 @@ env_vars_t env_vars[] = {
 {"SLURM_EXCLUSIVE",     OPT_EXCLUSIVE,  NULL,               NULL             },
 {"SLURM_OPEN_MODE",     OPT_OPEN_MODE,  NULL,               NULL             },
 {"SLURM_ACCTG_FREQ",    OPT_INT,        &opt.acctg_freq,    NULL             },
-{"SLURM_TASK_MEM",      OPT_INT,        &opt.task_mem,      NULL             },
 {"SLURM_NETWORK",       OPT_STRING,     &opt.network,       NULL             },
 {NULL, 0, NULL, NULL}
 };
@@ -991,8 +990,9 @@ static void set_options(const int argc, char **argv)
 		{"mincores",         required_argument, 0, LONG_OPT_MINCORES},
 		{"minthreads",       required_argument, 0, LONG_OPT_MINTHREADS},
 		{"mem",              required_argument, 0, LONG_OPT_MEM},
-		{"job-mem",          required_argument, 0, LONG_OPT_TASK_MEM},
-		{"task-mem",         required_argument, 0, LONG_OPT_TASK_MEM},
+		{"job-mem",          required_argument, 0, LONG_OPT_MEM_PER_CPU},
+		{"task-mem",         required_argument, 0, LONG_OPT_MEM_PER_CPU},
+		{"mem-per-cpu",      required_argument, 0, LONG_OPT_MEM_PER_CPU},
 		{"hint",             required_argument, 0, LONG_OPT_HINT},
 		{"mpi",              required_argument, 0, LONG_OPT_MPI},
 		{"tmp",              required_argument, 0, LONG_OPT_TMP},
@@ -1314,9 +1314,9 @@ static void set_options(const int argc, char **argv)
 				exit(1);
 			}
 			break;
-		case LONG_OPT_TASK_MEM:
-			opt.task_mem = (int) str_to_bytes(optarg);
-			if (opt.task_mem < 0) {
+		case LONG_OPT_MEM_PER_CPU:
+			opt.mem_per_cpu = (int) str_to_bytes(optarg);
+			if (opt.mem_per_cpu < 0) {
 				error("invalid memory constraint %s", 
 				      optarg);
 				exit(1);
@@ -1626,15 +1626,11 @@ static void _opt_args(int argc, char **argv)
 
 	set_options(argc, argv);
 
-        /* When CR with memory as a CR is enabled we need to assign
-	 * adequate value or check the value to opt.mem */
-	if ((opt.job_min_memory >= -1) && (opt.task_mem > 0)) {
-		if (opt.job_min_memory == -1) {
-			opt.job_min_memory = opt.task_mem;
-		} else if (opt.job_min_memory < opt.task_mem) {
-			info("mem < task-mem - resizing mem to be equal "
-			     "to task-mem");
-			opt.job_min_memory = opt.task_mem;
+	if ((opt.job_min_memory > -1) && (opt.mem_per_cpu > -1)) {
+		if (opt.job_min_memory < opt.mem_per_cpu) {
+			info("mem < mem-per-cpu - resizing mem to be equal "
+			     "to mem-per-cpu");
+			opt.job_min_memory = opt.mem_per_cpu;
 		}
 	}
 
@@ -2030,19 +2026,6 @@ static bool _opt_verify(void)
 		xfree(sched_name);
 	}
 
-	if (opt.task_mem > 0) {
-		uint32_t max_mem = slurm_get_max_mem_per_task();
-		if (max_mem && (opt.task_mem > max_mem)) {
-			info("WARNING: Reducing --task-mem to system maximum "
-			     "of %u MB", max_mem);
-			opt.task_mem = max_mem;
-		}	
-	} else {
-		uint32_t max_mem = slurm_get_def_mem_per_task();
-		if (max_mem)
-			opt.task_mem = max_mem;
-	}
-
 	return verified;
 }
 
@@ -2069,8 +2052,8 @@ static char *print_constraints()
 	if (opt.job_min_memory > 0)
 		xstrfmtcat(buf, "mem=%dM ", opt.job_min_memory);
 
-	if (opt.task_mem > 0)
-		xstrfmtcat(buf, "task-mem=%dM ", opt.task_mem);
+	if (opt.mem_per_cpu > 0)
+		xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu);
 
 	if (opt.job_min_tmp_disk > 0)
 		xstrfmtcat(buf, "tmp=%ld ", opt.job_min_tmp_disk);
@@ -2223,7 +2206,7 @@ static void _usage(void)
 "            [--kill-on-bad-exit] [--propagate[=rlimits] [--comment=name]\n"
 "            [--cpu_bind=...] [--mem_bind=...] [--network=type]\n"
 "            [--ntasks-per-node=n] [--ntasks-per-socket=n]\n"
-"            [--ntasks-per-core=n]\n"
+"            [--ntasks-per-core=n] [--mem-per-cpu=MB]\n"
 #ifdef HAVE_BG		/* Blue gene specific options */
 "            [--geometry=XxYxZ] [--conn-type=type] [--no-rotate] [--reboot]\n"
 "            [--blrts-image=path] [--linux-image=path]\n"
@@ -2321,8 +2304,8 @@ static void _help(void)
 "      --exclusive             allocate nodes in exclusive mode when\n" 
 "                              cpu consumable resource is enabled\n"
 "                              or don't share CPUs for job steps\n"
-"      --task-mem=MB           maximum amount of real memory per task\n"
-"                              required by the job.\n" 
+"      --mem-per-cpu=MB        maximum amount of real memory per allocated\n"
+"                              CPU required by the job.\n" 
 "                              --mem >= --job-mem if --mem is specified.\n" 
 "\n"
 "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" 
diff --git a/src/srun/opt.h b/src/srun/opt.h
index f4668e37e2ee650a91fed53dacfcb9890a94a5c1..394051e9e76eedaa2cf18ba96920e02abaae640a 100644
--- a/src/srun/opt.h
+++ b/src/srun/opt.h
@@ -170,7 +170,7 @@ typedef struct srun_options {
 	int32_t job_min_cores;	/* --mincores=n			*/
 	int32_t job_min_threads;/* --minthreads=n		*/
 	int32_t job_min_memory;	/* --mem=n			*/
-	int32_t task_mem;	/* --task-mem=n			*/
+	int32_t mem_per_cpu;	/* --mem-per-cpu=n		*/
 	long job_min_tmp_disk;	/* --tmp=n			*/
 	char *constraints;	/* --constraints=, -C constraint*/
 	bool contiguous;	/* --contiguous			*/
diff --git a/testsuite/expect/test1.23 b/testsuite/expect/test1.23
index ab47df6699f5e4a9ebed3bf66321758dc9863a95..2eb71e58cdb768065c5a98b364aad4b203e23991 100755
--- a/testsuite/expect/test1.23
+++ b/testsuite/expect/test1.23
@@ -108,7 +108,7 @@ set host_0      ""
 set timeout $max_job_delay
 set srun_pid [spawn $srun -N1 -l --mem=999999 -t1 $bin_hostname]
 expect {
-	-re "configuration is not available" {
+	-re "not available" {
 		send_user "This error is expected, no worries\n"
 		set err_msg 1
 		exp_continue
diff --git a/testsuite/expect/test15.7 b/testsuite/expect/test15.7
index ee9a6e126d01f5f8b8fe40d6b8ed3b32b4e930be..f928c5ba58f829dfe32cdd49cded983d14074f77 100755
--- a/testsuite/expect/test15.7
+++ b/testsuite/expect/test15.7
@@ -101,7 +101,7 @@ expect {
 		}
 		exp_continue
 	}
-	-re "MinMemory=($number)" {
+	-re "MinMemoryNode=($number)" {
 		set read_mem $expect_out(1,string)
 		if {$read_mem == $mem_size} {
 			incr matches
diff --git a/testsuite/expect/test17.10 b/testsuite/expect/test17.10
index e50a538e22ab4b5b5a9d031ef56d4fc8562a1ccc..d2b11f3a26fff6991a28361beff0d9cdc9f2f539 100755
--- a/testsuite/expect/test17.10
+++ b/testsuite/expect/test17.10
@@ -104,7 +104,7 @@ expect {
 		}
 		exp_continue
 	}
-	-re "MinMemory=($number)" {
+	-re "MinMemoryNode=($number)" {
 		set read_mem $expect_out(1,string)
 		if {$read_mem == $mem_size} {
 			incr matches