diff --git a/NEWS b/NEWS
index 489f9ccc784028aa27fb93953007833186f7536c..74c286026f6d814ca5ba60e9bd8cef2b098db708 100644
--- a/NEWS
+++ b/NEWS
@@ -14,7 +14,8 @@ documents those changes that are of interest to users and admins.
  -- New srun task distribution options to -m: plane
  -- Multi-core support in sinfo, squeue, and scontrol.
  -- Memory can be treated as a consumable resource.
- -- New srun options --ntasks_per_[node|socket|core].
+ -- New srun options --ntasks-per-[node|socket|core].
+ -- patch from HP patch.1.2.0.pre4.061017.crcore_hints
 
 * Changes in SLURM 1.2.0-pre3
 =============================
diff --git a/doc/html/mc_support.shtml b/doc/html/mc_support.shtml
index 8c838d2fb60e8de00a91c8a4648053781f10dbd6..3618f1ad8229e1204fefd4a71d3143b2d3ae43ee 100644
--- a/doc/html/mc_support.shtml
+++ b/doc/html/mc_support.shtml
@@ -113,7 +113,7 @@ to dedicate to a job (minimum or range)
     <td> maximum amount of real memory per node required by the job.
 </td></tr>
 <tr><td colspan=2>
-<b><a href="#srun_ntasks">Task invocation as a function of logical processors</a></b>
+<b><a href="#srun_ntasks">Task invocation control</a></b>
 </td></tr>
 <tr>
     <td> --ntasks-per-node=<i>ntasks</i></td>
@@ -125,6 +125,19 @@ to dedicate to a job (minimum or range)
     <td> --ntasks-per-core=<i>ntasks</i></td>
     <td> number of tasks to invoke on each core
 </td></tr>
+<tr><td colspan=2>
+<b><a href="#srun_hints">Application hints</a></b>
+</td></tr>
+<tr>
+    <td> --hint=compute_bound</td>
+    <td> use all cores in each physical CPU
+</td></tr>
+    <td> --hint=memory_bound</td>
+    <td> use only one core in each physical CPU
+</td></tr>
+    <td> --hint=[no]multithread</td>
+    <td> [don't] use extra threads with in-core multi-threading
+</td></tr>
 </table>
 
 <p>
@@ -394,6 +407,54 @@ behavior of these flags:
 
 <p>See also 'srun --help' and 'man srun'</p>
 
+<a name="srun_hints">
+<h3>Application hints</h3></a>
+
+Different applications will have various levels of resource
+requirements. Some applications tend to be computationally intensive
+but require little to no inter-process communication. Some applications
+will be memory bound, saturating the memory bandwidth of a processor
+before exhausting the computational capabilities. Other applications
+will be highly communication intensive causing processes to block
+awaiting messages from other processes. Applications with these
+different properties tend to run well on a multi-core system given
+the right mappings.
+
+For computationally intensive applications, all cores in a multi-core
+system would normally be used. For memory bound applications, only
+using a single core on each CPU will result in the highest per
+core memory bandwidth. For communication intensive applications,
+using in-core multi-threading (e.g. hyperthreading, SMT, or TMT)
+may also improve performance.
+The following command line flags can be used to communicate these
+types of application hints to the SLURM multi-core support:
+
+<PRE>
+    --hint=             Bind tasks according to application hints
+        compute_bound   use all cores in each physical CPU
+        memory_bound    use only one core in each physical CPU
+        [no]multithread [don't] use extra threads with in-core multi-threading
+        help            show this help message
+</PRE>
+
+For example, given a cluster with nodes containing two sockets,
+each containing two cores, the following commands illustrate the
+behavior of these flags:
+<pre>
+        % srun -n 4 --hint=compute_bound --cpu_bind=verbose sleep 1
+        setting affinity of task 0 pid 15425 on host hydra12 to mask 0x1
+        setting affinity of task 2 pid 15427 on host hydra12 to mask 0x2
+        setting affinity of task 1 pid 15426 on host hydra12 to mask 0x4
+        setting affinity of task 3 pid 15428 on host hydra12 to mask 0x8
+
+        % srun -n 4 --hint=memory_bound --cpu_bind=verbose sleep 1
+        setting affinity of task 1 pid 15551 on host hydra12 to mask 0x4
+        setting affinity of task 0 pid 15550 on host hydra12 to mask 0x1
+        setting affinity of task 2 pid 14974 on host <b>hydra13</b> to mask 0x1
+        setting affinity of task 3 pid 14975 on host <b>hydra13</b> to mask 0x4
+</pre>
+
+<p>See also 'srun --hint=help' and 'man srun'</p>
 <!-------------------------------------------------------------------------->
 <a name=motivation>
 <h2>Motivation behind high-level srun flags</h2></a>
@@ -987,4 +1048,3 @@ using NodeName:
 
 <!--#include virtual="footer.txt"-->
 
-
diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1
index 9c552c32406aa9330d430d39970295012739290a..ed9063fb8e63cc3fd08c4ce1f75a1622c6ec56d2 100644
--- a/doc/man/man1/srun.1
+++ b/doc/man/man1/srun.1
@@ -565,14 +565,33 @@ instead of the node level.  Masks will automatically be generated
 to bind the tasks to specific core unless \fB\-\-cpu_bind=none\fR
 is specified.
 .TP
+\fB\-\-hint\fR=\fItype\fR
+Bind tasks according to application hints
+.RS
+.TP
+.B compute_bound
+Select settings for compute bound applications:
+use all cores in each physical CPU
+.TP
+.B memory_bound
+Select settings for memory bound applications:
+use only one core in each physical CPU
+.TP
+.B [no]multithread
+[don't] use extra threads with in-core multi-threading
+which can benefit communication intensive applications
+.B help
+show this help message
+.RE
+.TP
 \fB\-\-cpu_bind\fR=[{\fIquiet,verbose\fR},]\fItype\fR
 Bind tasks to CPUs
 .RS
 .TP
-.B q[uiet],
+.B q[uiet]
 quietly bind before task runs (default)
 .TP
-.B v[erbose],
+.B v[erbose]
 verbosely report binding before task runs
 .TP
 .B no[ne]
@@ -590,8 +609,9 @@ with '0x' in which case they interpreted as hexadecimal values.
 .B mask_cpu:<list>
 bind by setting CPU masks on tasks as specified
 where <list> is <mask1>,<mask2>,...<maskN>.
-CPU masks are \fBalways\fR interpreted as hexadecimal values but can be
-preceded with an optional '0x'.
+CPU masks are \fBalways\fR interpreted as hexadecimal values.
+Note that masks must be preceded with a '0x' if they don't begin
+with [0-9] so they are seen as numerical values by srun.
 .TP
 .B sockets
 auto\-generated masks bind to sockets
@@ -655,10 +675,10 @@ options "\-\-cpu_bind=verbose,none \-\-mem_bind=verbose,none" to determine
 the specific configuration.
 .RS
 .TP
-.B q[uiet],
+.B q[uiet]
 quietly bind before task runs (default)
 .TP
-.B v[erbose],
+.B v[erbose]
 verbosely report binding before task runs
 .TP
 .B no[ne]
@@ -680,8 +700,9 @@ with '0x' in which case they interpreted as hexadecimal values
 .B mask_mem:<list>
 bind by setting memory masks on tasks as specified
 where <list> is <mask1>,<mask2>,...<maskN>.
-memory masks are \fBalways\fR interpreted as hexadecimal values but can be
-preceded with an optional '0x' (not recommended)
+memory masks are \fBalways\fR interpreted as hexadecimal values.
+Note that masks must be preceded with a '0x' if they don't begin
+with [0-9] so they are seen as numerical values by srun.
 .TP
 .B help
 show this help message
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index 3aba2e85a278e73baae6b0c16cd9d999f44a5788..d3599f1bed426573ed005e0b0926a91b41282e7f 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -467,7 +467,7 @@ typedef struct job_descriptor {	/* For submit, allocate, and update requests */
 	char *account;		/* charge to specified account */
 	char *network;		/* network use spec */
 	char *comment;		/* arbitrary comment (used by Moab scheduler) */
-	uint16_t task_dist; /* see enum task_dist_state */
+	uint16_t task_dist;	/* see enum task_dist_state */
 	uint32_t plane_size;	/* plane size when task_dist =
 				   SLURM_DIST_PLANE */
 	time_t begin_time;	/* delay initiation until this time */
diff --git a/src/api/init_msg.c b/src/api/init_msg.c
index c534e9c56716c59825507db269bd28414bb5c825..89c1bbc01155141d2da47df6af0e2538a9a52f1f 100644
--- a/src/api/init_msg.c
+++ b/src/api/init_msg.c
@@ -61,6 +61,11 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg)
 	job_desc_msg->comment     = NULL;
 	job_desc_msg->contiguous  = (uint16_t) NO_VAL;
 	job_desc_msg->cpus_per_task = (uint16_t) NO_VAL;
+	job_desc_msg->ntasks_per_node   = (uint16_t) NO_VAL;
+	job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL;
+	job_desc_msg->ntasks_per_core   = (uint16_t) NO_VAL;
+	job_desc_msg->task_dist   = SLURM_DIST_CYCLIC;
+	job_desc_msg->plane_size  = NO_VAL;
 	job_desc_msg->dependency  = NO_VAL;
 	job_desc_msg->environment = ((char **) NULL);
 	job_desc_msg->env_size    = 0;
diff --git a/src/common/slurm_resource_info.c b/src/common/slurm_resource_info.c
index 9a95843391bf0bdeeb7a9eccf263af2cdbe07c84..3b6e654073e8d28367b22dd172ad9ea3cd8f1142 100644
--- a/src/common/slurm_resource_info.c
+++ b/src/common/slurm_resource_info.c
@@ -70,25 +70,28 @@
  * Note: used in both the select/{linear,cons_res} plugins.
  */
 int slurm_get_avail_procs(const int mxsockets,
-				 const int mxcores,
-				 const int mxthreads,
-				 const int cpuspertask,
-                                 const int ntaskspernode,
-                                 const int ntaskspersocket,
-                                 const int ntaskspercore,
-				 int *cpus, 
-				 int *sockets, 
-				 int *cores, 
-				 int *threads,
-				 const int alloc_sockets,
-				 const int alloc_lps,
-				 const select_type_plugin_info_t cr_type)
+			  const int mxcores,
+			  const int mxthreads,
+			  const int cpuspertask,
+			  const int ntaskspernode,
+			  const int ntaskspersocket,
+			  const int ntaskspercore,
+			  int *cpus, 
+			  int *sockets, 
+			  int *cores, 
+			  int *threads,
+			  const int alloc_sockets,
+			  const int *alloc_cores,
+			  const int alloc_lps,
+			  const select_type_plugin_info_t cr_type)
 {
 	int avail_cpus = 0, max_cpus = 0;
+	int max_avail_cpus = INT_MAX;	/* for alloc_* accounting */
 	int max_sockets   = mxsockets;
 	int max_cores     = mxcores;
 	int max_threads   = mxthreads;
 	int cpus_per_task = cpuspertask;
+	int i;
 
         /* pick defaults for any unspecified items */
 	if (cpus_per_task <= 0)
@@ -113,15 +116,19 @@ int slurm_get_avail_procs(const int mxsockets,
 			*sockets, *cores, *threads);
         info("get_avail_procs Ntask node    %d sockets %d core    %d ",
                         ntaskspernode, ntaskspersocket, ntaskspercore);
-	info("get_avail_procs cr_type %d Allocated sockets %d lps %d ",
-			cr_type, alloc_sockets, alloc_lps);
+	info("get_avail_procs cr_type %d cpus %d Allocated sockets %d lps %d ",
+			cr_type, *cpus, alloc_sockets, alloc_lps);
+	if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) {
+		for (i = 0; i < *sockets; i++)
+			info("get_avail_procs alloc_cores[%d] = %d", i, alloc_cores[i]);
+	}
 #endif
 	if ((*threads <= 0) || (*cores <= 0) || (*sockets <= 0))
 		fatal(" ((threads <= 0) || (cores <= 0) || (sockets <= 0))");
 		
-	switch(cr_type) { 
-	/* Nodes have no notion of socket, core, threads. Only one level
-	   of logical processors */
+	switch(cr_type) {
+	/* For the following CR types, nodes have no notion of socket, core,
+	   and thread.  Only one level of logical processors */ 
 	case CR_CPU:
 	case CR_CPU_MEMORY:
 	case CR_MEMORY:
@@ -142,7 +149,7 @@ int slurm_get_avail_procs(const int mxsockets,
 			max_cpus = MIN(max_cpus, ntaskspernode);
 		}
 		break;
-	/* Nodes contain sockets, cores, threads */
+	/* For all other types, nodes contain sockets, cores, and threads */
 	case CR_SOCKET:
 	case CR_SOCKET_MEMORY:
 	case CR_CORE:
@@ -161,7 +168,16 @@ int slurm_get_avail_procs(const int mxsockets,
 			break;
 		case CR_CORE:
 		case CR_CORE_MEMORY:
-			/* Not yet implemented */
+			*cpus -= alloc_lps;
+			if (*cpus < 0) 
+				error(" cons_res: *cpus < 0");
+
+			if (alloc_lps > 0) {
+				max_avail_cpus = 0;
+				for (i=0; i<*sockets; i++)
+					max_avail_cpus += *cores - alloc_cores[i];
+				max_avail_cpus *= *threads;
+			}
 			break;
 		default:
 			break;
@@ -175,16 +191,19 @@ int slurm_get_avail_procs(const int mxsockets,
 		/*** compute an overall maximum cpu count honoring ntasks* ***/
 		max_cpus  = *threads;
 		if (ntaskspercore > 0) {
-			max_threads = MIN(max_cpus, ntaskspercore);
+			max_cpus = MIN(max_cpus, ntaskspercore);
 		}
 		max_cpus *= *cores;
 		if (ntaskspersocket > 0) {
-		    max_cpus = MIN(max_cpus, ntaskspersocket);
+			max_cpus = MIN(max_cpus, ntaskspersocket);
 		}
 		max_cpus *= *sockets;
 		if (ntaskspernode > 0) {
 			max_cpus = MIN(max_cpus, ntaskspernode);
 		}
+
+		/*** honor any availability maximum ***/
+		max_cpus = MIN(max_cpus, max_avail_cpus);
 		break;
 	}
 
@@ -196,6 +215,11 @@ int slurm_get_avail_procs(const int mxsockets,
 	
 	avail_cpus = MIN(avail_cpus, max_cpus);
 
+#if(0)
+	info("get_avail_procs return cpus %d sockets %d cores %d threads %d ",
+			*cpus, *sockets, *cores, *threads);
+	info("get_avail_procs avail_cpus %d", avail_cpus);
+#endif
 	return(avail_cpus);
 }
 
@@ -208,7 +232,7 @@ int slurm_get_avail_procs(const int mxsockets,
  */
 void slurm_sprint_cpu_bind_type(char *str, cpu_bind_type_t cpu_bind_type)
 {
-    	if (!str)
+	if (!str)
 		return;
 
 	str[0] = '\0';
@@ -246,7 +270,7 @@ void slurm_sprint_cpu_bind_type(char *str, cpu_bind_type_t cpu_bind_type)
  */
 void slurm_sprint_mem_bind_type(char *str, mem_bind_type_t mem_bind_type)
 {
-    	if (!str)
+	if (!str)
 		return;
 
 	str[0] = '\0';
diff --git a/src/common/slurm_resource_info.h b/src/common/slurm_resource_info.h
index b9533f67e21807b5c389236b2f750e8dca367480..f74e0aadfde282495496dce754a571f656cce6b8 100644
--- a/src/common/slurm_resource_info.h
+++ b/src/common/slurm_resource_info.h
@@ -61,6 +61,7 @@ int slurm_get_avail_procs(const int mxsockets,
 				 int *cores, 
 				 int *threads,
 				 const int alloc_sockets,
+				 const int *alloc_cores,
 				 const int alloc_lps,
 				 const select_type_plugin_info_t cr_type);
 
diff --git a/src/common/slurm_selecttype_info.c b/src/common/slurm_selecttype_info.c
index 153cd4ec8e4a86134d564282d39a04a8c549d999..0c3fcd530d445c13e8205a7feaa32c405f2acb6c 100644
--- a/src/common/slurm_selecttype_info.c
+++ b/src/common/slurm_selecttype_info.c
@@ -46,7 +46,7 @@
  *
  * Return SLURM_SUCCESS on success, or SLURM_ERROR otherwise
  */
-extern int parse_select_type_param(char *select_type_parameters, 
+int parse_select_type_param(char *select_type_parameters, 
 				   select_type_plugin_info_t *param)
 {
 	int rc = SLURM_SUCCESS;	
@@ -56,7 +56,7 @@ extern int parse_select_type_param(char *select_type_parameters,
 	char *st_str = xstrdup(select_type_parameters);
 	if ((str_parameters = strtok(st_str,",")) != NULL) {
 		do {
-			if (strcasecmp(str_parameters, "CR_SOCKET") == 0) {
+			if (strcasecmp(str_parameters, "CR_Socket") == 0) {
 				*param = CR_SOCKET;
 			} else if (strcasecmp(str_parameters, "CR_Socket_Memory") == 0) {
 				*param = CR_SOCKET_MEMORY;
diff --git a/src/common/slurm_selecttype_info.h b/src/common/slurm_selecttype_info.h
index d07efd7c6d356fd42346414072f939c063b0b5fd..e8156d78d2b00589d0a053e1fa3814bf18f5b4aa 100644
--- a/src/common/slurm_selecttype_info.h
+++ b/src/common/slurm_selecttype_info.h
@@ -43,7 +43,7 @@
 #include <string.h>
 #include <slurm/slurm.h>
 
-extern int parse_select_type_param(char *select_type_parameters, 
+int parse_select_type_param(char *select_type_parameters, 
 				   select_type_plugin_info_t *param);
 
 #endif /*__SLURM_SELECTTYPE_INFO_H__*/
diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index 25276c7b02f9135b3dc7654138b4c3efc0f8cf35..600778407903a5461d341342159b6a7e8eac8aff 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -55,8 +55,6 @@
  * The advantage of the consumable resource scheduling policy is that
  * the job throughput can increase dramatically.
  *
- *  $Id$
- *
  *****************************************************************************
  *  Copyright (C) 2005-2006 Hewlett-Packard Development Company, L.P.
  *  Written by Susanne M. Balle <susanne.balle@hp.com>, who borrowed heavily
@@ -115,6 +113,10 @@
 #include "src/common/slurm_resource_info.h"
 #include "src/slurmctld/slurmctld.h"
 
+#if 0
+#define CR_DEBUG 1
+#endif
+
 /*
  * These variables are required by the generic plugin interface.  If they
  * are not found in the plugin, the plugin loader will ignore it.
@@ -155,7 +157,8 @@ struct node_cr_record {
 	struct node_record *node_ptr;	/* ptr to the node that own these resources */
 	uint32_t alloc_lps;	/* cpu count reserved by already scheduled jobs */
 	uint32_t alloc_sockets;	/* socket count reserved by already scheduled jobs */
-	uint32_t *alloc_cores;	/* core count reserved by already scheduled jobs */
+	uint32_t *alloc_cores;	/* core count per socket reserved by
+				 * already scheduled jobs */
 	uint32_t alloc_memory;	/* real memory reserved by already scheduled jobs */
 	struct node_cr_record *node_next;/* next entry with same hash index */
 };
@@ -169,10 +172,14 @@ struct select_cr_job {
 	int nhosts;		/* number of hosts allocated to job    */
 	char **host;		/* hostname vector                     */
 	int *cpus;		/* number of processors on each host   */
-	int *alloc_lps;	        /* number of allocated threads/lps on each host */
-	int *alloc_sockets;	/* number of allocated sockets on each host */
-	int **alloc_cores;	/* Allocated cores per socket on each host */
-	int *alloc_memory;      /* number of allocated MB of real memory on each host */
+	int *alloc_lps;	        /* number of allocated threads/lps on
+				 * each host */
+	int *alloc_sockets;	/* number of allocated sockets on each
+				 * host */
+	int **alloc_cores;	/* number of allocated cores on each
+				 * host */
+	int *alloc_memory;      /* number of allocated MB of real
+				 * memory on each host */
 	int max_sockets;
 	int max_cores;
 	int max_threads;
@@ -349,14 +356,13 @@ static void _get_resources_this_node(int *cpus,
 	}
 	*alloc_sockets  = this_cr_node->alloc_sockets;
 	*alloc_lps      = this_cr_node->alloc_lps;
-#if(0)
-	info("cons_res %d _get_resources host %s HW_ cpus %d sockets %d cores %d threads %d ", 
+
+	debug3("cons_res %d _get_resources host %s HW_ cpus %d sockets %d cores %d threads %d ", 
 	       *jobid, this_cr_node->node_ptr->name,
 	       *cpus, *sockets, *cores, *threads);
-	info("cons_res %d _get_resources host %s Alloc_ sockets %d lps %d ", 
+	debug3("cons_res %d _get_resources host %s Alloc_ sockets %d lps %d ", 
 	       *jobid, this_cr_node->node_ptr->name, 
 	       *alloc_sockets, *alloc_lps);
-#endif
 }
 
 /*
@@ -405,7 +411,7 @@ static int _get_avail_lps(struct job_record *job_ptr,
 	int max_sockets = 0, max_cores = 0, max_threads = 0;
 	int ntasks_per_node = 0, ntasks_per_socket = 0, ntasks_per_core = 0;
 	int cpus, sockets, cores, threads;
-	int alloc_sockets = 0, alloc_lps     = 0;
+	int alloc_sockets = 0, alloc_lps = 0;
 	struct node_cr_record *this_cr_node;
 	
 	if (job_ptr->details && job_ptr->details->cpus_per_task)
@@ -416,6 +422,12 @@ static int _get_avail_lps(struct job_record *job_ptr,
 		max_cores = job_ptr->details->max_cores;
 	if (job_ptr->details && job_ptr->details->max_threads)
 		max_threads = job_ptr->details->max_threads;
+	if (job_ptr->details && job_ptr->details->ntasks_per_node)
+		ntasks_per_node = job_ptr->details->ntasks_per_node;
+	if (job_ptr->details && job_ptr->details->ntasks_per_socket)
+		ntasks_per_socket = job_ptr->details->ntasks_per_socket;
+	if (job_ptr->details && job_ptr->details->ntasks_per_core)
+		ntasks_per_core = job_ptr->details->ntasks_per_core;
 
 	this_cr_node = _find_cr_node_record (select_node_ptr[index].node_ptr->name);
 	if (this_cr_node == NULL) {
@@ -425,7 +437,7 @@ static int _get_avail_lps(struct job_record *job_ptr,
 		return avail_cpus;
 	}
 	_get_resources_this_node(&cpus, &sockets, &cores, &threads, 
-				 this_cr_node, &alloc_sockets, 
+				 this_cr_node, &alloc_sockets,
 				 &alloc_lps, &job_ptr->job_id);
 	if (all_available) {
 		alloc_sockets = 0;
@@ -441,6 +453,7 @@ static int _get_avail_lps(struct job_record *job_ptr,
 					   ntasks_per_core,
 					   &cpus, &sockets, &cores,
 					   &threads, alloc_sockets,
+					   (int *)this_cr_node->alloc_cores, 
 					   alloc_lps, cr_type);
 	return(avail_cpus);
 }		
@@ -500,29 +513,59 @@ static int _compute_c_b_task_dist(struct select_cr_job *job)
 				int alloc_lps     = 0;
 				_get_resources_this_node(&cpus, &sockets, 
 							 &cores, &threads, 
-							 this_node, &alloc_sockets, 
-							 &alloc_lps, &job->job_id);
-
-				avail_cpus = slurm_get_avail_procs(job->max_sockets, 
-								   job->max_cores, 
-								   job->max_threads, 
-								   job->cpus_per_task, 
-								   job->ntasks_per_node, 
-								   job->ntasks_per_socket, 
-								   job->ntasks_per_core, 
-								   &cpus,
-								   &sockets,
-								   &cores,
-								   &threads,
-								   alloc_sockets,
-								   alloc_lps,
-								   cr_type);
+							 this_node,
+							 &alloc_sockets, 
+							 &alloc_lps,
+							 &job->job_id);
+
+				avail_cpus = slurm_get_avail_procs(
+					job->max_sockets, 
+					job->max_cores, 
+					job->max_threads, 
+					job->cpus_per_task, 
+					job->ntasks_per_node, 
+					job->ntasks_per_socket, 
+					job->ntasks_per_core, 
+					&cpus,
+					&sockets,
+					&cores,
+					&threads,
+					alloc_sockets,
+					(int *)this_node->alloc_cores,
+					alloc_lps,
+					cr_type);
 				break;
 			}
 			case CR_CORE:
 			case CR_CORE_MEMORY:
-				/* Not implemented yet */
+			{
+				int alloc_sockets = 0;
+				int alloc_lps     = 0;
+				_get_resources_this_node(&cpus, &sockets, 
+							 &cores, &threads, 
+							 this_node,
+							 &alloc_sockets,
+							 &alloc_lps,
+							 &job->job_id);
+				
+				avail_cpus = slurm_get_avail_procs(
+					job->max_sockets, 
+					job->max_cores, 
+					job->max_threads, 
+					job->cpus_per_task,
+					job->ntasks_per_node,
+					job->ntasks_per_socket,
+					job->ntasks_per_core, 
+					&cpus,
+					&sockets,
+					&cores,
+					&threads,
+					alloc_sockets,
+					(int *)this_node->alloc_cores,
+					alloc_lps,
+					cr_type);
 				break;
+			}
 			default:
 				/* We should never get in here. If we
                                    do it is a bug */
@@ -569,12 +612,15 @@ static int _compute_c_b_task_dist(struct select_cr_job *job)
  * avoiding interference between co-allocated running jobs.
  *
  * In the consumable resources environment we need to determine the
- * CPU or core layout schema within slurmctld.
+ * layout schema within slurmctld.
 */
 static int _cr_dist(struct select_cr_job *job, const int cyclic)
 {
-	int  rc = SLURM_SUCCESS; 
-	int taskcount = 0;
+#if(CR_DEBUG)
+    	int i;
+#endif
+	int j, rc = SLURM_SUCCESS; 
+	int taskcount = 0; 
 	int maxtasks  = job->nprocs;
 	int host_index;
 	int usable_cpus = 0;
@@ -604,7 +650,7 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic)
 		if (bit_test(job->node_bitmap, host_index) == 0)
 			continue;
 		job_index++;
-
+		
 		this_cr_node = _find_cr_node_record(
 			node_record_table_ptr[host_index].name);
 		if (this_cr_node == NULL) {
@@ -613,7 +659,7 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic)
 			return SLURM_ERROR;
 		}
 
-		_get_resources_this_node(&usable_cpus, &usable_sockets, 
+		_get_resources_this_node(&usable_cpus,  &usable_sockets, 
 					 &usable_cores, &usable_threads, 
 					 this_cr_node,  &alloc_sockets, 
 					 &alloc_lps, &job->job_id);
@@ -630,13 +676,22 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic)
 						   &usable_cores,
 						   &usable_threads,
 						   alloc_sockets,
+						   (int *)this_cr_node->
+						   alloc_cores,
 						   alloc_lps,
 						   cr_type);
-#if(0)		
-		info("_cr_dist %u avail_s %d _c %d _t %d alloc_s %d alloc_lps %d ",
+		
+#if(CR_DEBUG)
+		info("cons_res: _cr_dist %u avail_s %d _c %d _t %d "
+		     "alloc_s %d lps %d ",
 		     job->job_id, usable_sockets, usable_cores, usable_threads,
 		     alloc_sockets, alloc_lps);
-#endif
+		if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY))
+			for(i=0; i<usable_sockets;i++)
+				info("cons_res: _cr_dist alloc_cores %d = %d", 
+				       i, this_cr_node->alloc_cores[i]);
+#endif		
+		
 		if (avail_cpus == 0) {
 			error(" cons_res: no available cpus on node %s", 
 			      node_record_table_ptr[host_index].name);
@@ -644,10 +699,19 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic)
 		maxtasks = job->alloc_lps[job_index];
 		taskcount = 0; 
 		job->alloc_sockets[job_index] = 0;
+		if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) {
+			for (j = 0; 
+			     j < node_record_table_ptr[host_index].cores; 
+			     j++)
+				job->alloc_cores[job_index][j] = 0;
+		}		
 
 		if (cyclic == 0) { /* block lllp distribution */
 			int s, c, t;
+			int c_ok = 0;
 			last_socket_index = -1;	
+			int socket_cnt = -1;
+			int core_cnt = 0;
 			for (s=0; 
 			     s < usable_sockets; s++) {
 				last_core_index = -1;	
@@ -662,21 +726,40 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic)
 						if (maxtasks <= taskcount) 
 							continue;
 						if (last_socket_index != s) {
-							job->alloc_sockets[job_index]++;
-#if(0)
-							info("block jid %u s %d c %d t %d tc %d", 
-							     job->job_id, s, c, t, 
-							     taskcount);
-#endif
+							job->alloc_sockets
+								[job_index]++;
 							last_socket_index = s;
 						}
+						if (((cr_type == CR_CORE) 
+						     || (cr_type
+							 == CR_CORE_MEMORY)) 
+						    && (c_ok == 0)) {
+							if (socket_cnt >= 
+							    node_record_table_ptr[host_index].sockets)
+								continue;
+							core_cnt = job->alloc_cores[job_index][socket_cnt]
+								+ this_cr_node->alloc_cores[socket_cnt];
+							if (core_cnt >= 
+							    node_record_table_ptr[host_index].cores) {
+								socket_cnt++;
+							}
+							job->alloc_cores[job_index][socket_cnt]++;
+							info("cons_res %u BLOCK job->alloc_cores[%d][%d] = %d",
+							     job->job_id, job_index, socket_cnt, 
+							     job->alloc_cores[job_index][socket_cnt]);
+							if (c == (usable_cores-1))
+								c_ok = 1;
+						}
 						taskcount++;
 					}
 				}
+				socket_cnt++;
 			}
 		} else if (cyclic == 1) { /* cyclic lllp distribution */
 			int s, c, t;
 			int max_s = 0;	
+			int socket_cnt = -1;
+			int core_cnt = 0;
 			for (t=0; 
 			     t < usable_threads; t++) {
 				if (maxtasks <= taskcount)
@@ -694,20 +777,40 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic)
 							if(s == (usable_sockets-1))
 								max_s = 1;
 						}
+						if (((cr_type == CR_CORE) 
+						     || (cr_type == CR_CORE_MEMORY)) 
+						    && (t == 0)) {
+							socket_cnt++;
+							if (socket_cnt >= 
+							    node_record_table_ptr[host_index].sockets)
+								continue;
+							core_cnt = job->alloc_cores[job_index][socket_cnt]
+								+ this_cr_node->alloc_cores[socket_cnt];
+							if (core_cnt >= 
+							    node_record_table_ptr[host_index].cores) {
+								socket_cnt++;
+							}
+							job->alloc_cores[job_index][socket_cnt]++;
+							info("cons_res %u CYCLIC job->alloc_cores[%d][%d] = %d",
+							     job->job_id, job_index, socket_cnt, 
+							     job->alloc_cores[job_index][socket_cnt]);
+						}
+
 						taskcount++;
-#if(0)
-						info("cyclic jid %u s %d c %d t %d tc %d", 
-						     job->job_id, s, c, t, taskcount);
-#endif
 					}
 				}
 			}
 		}
-#if(0)
+#if(CR_DEBUG)
 		info("cons_res _cr_dist %u cyclic %d host %d %s alloc_ "
 		     "sockets %d lps %d ", 
 		     job->job_id, cyclic, host_index,  this_cr_node->node_ptr->name, 
 		     job->alloc_sockets[job_index], job->alloc_lps[job_index]);
+		if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY))
+			for(i=0; i<usable_sockets;i++)
+				info("cons_res _cr_dist: %u alloc_cores[%d][%d] = %d", 
+				     job->job_id, i, job_index, 
+				     job->alloc_cores[job_index][i]);
 #endif
 	}
 	return rc;
@@ -720,7 +823,7 @@ static int _cr_dist(struct select_cr_job *job, const int cyclic)
  */
 static int _cr_exclusive_dist(struct select_cr_job *job)
 {
-	int i;
+	int i, j;
 	int host_index = 0;
 
 	for (i = 0; i < node_record_count; i++) {
@@ -729,6 +832,11 @@ static int _cr_exclusive_dist(struct select_cr_job *job)
 		job->alloc_lps[host_index] = node_record_table_ptr[i].cpus;
 		job->alloc_sockets[host_index] = 
 			node_record_table_ptr[i].sockets; 
+		if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) {
+			for (j = 0; j < node_record_table_ptr[i].sockets; j++)
+				job->alloc_cores[host_index][j] = 
+					node_record_table_ptr[i].cores; 
+		}
 		host_index++;
 	}
 	return SLURM_SUCCESS;
@@ -744,9 +852,12 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size)
 	int i, j, k, l, m, host_index;
 	int usable_cpus, usable_sockets, usable_cores, usable_threads;
 	int taskcount=0, last_socket_index;
-	int socket_index, core_index, thread_index;
+	int socket_index, core_index, thread_index; 
+	int c_ok = 0;
 	int job_index = -1;
-	
+	int socket_cnt = 0;
+	int core_cnt = 0;
+
 	debug3("cons_res _cr_plane_dist plane_size %d ", plane_size);
 	debug3("cons_res _cr_plane_dist  maxtasks %d num_hosts %d",
 	       maxtasks, num_hosts);
@@ -770,12 +881,13 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size)
 			}
 		}
 	}
-#if(0)	
+#if(CR_DEBUG)	
 	for (i = 0; i < job->nhosts; i++) {
 		info("cons_res _cr_plane_dist %u host %s alloc_ lps %d ", 
 		     job->job_id, job->host[i],  job->alloc_lps[i]);
 	}
 #endif
+
 	for (host_index = 0; 
 	     ((host_index < node_record_count) && (taskcount < job->nprocs));
 	     host_index++) {
@@ -801,27 +913,40 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size)
 					 &alloc_lps, &job->job_id);
 		
 		avail_cpus = slurm_get_avail_procs(job->max_sockets,
-						       job->max_cores,
-						       job->max_threads,
-						       job->cpus_per_task,
-						       job->ntasks_per_node,
-						       job->ntasks_per_socket,
-						       job->ntasks_per_core,
-						       &usable_cpus,
-						       &usable_sockets,
-						       &usable_cores,
-						       &usable_threads,
-						       alloc_sockets,
-						       alloc_lps,
-						       cr_type);
+						   job->max_cores,
+						   job->max_threads,
+						   job->cpus_per_task,
+						   job->ntasks_per_node,
+						   job->ntasks_per_socket,
+						   job->ntasks_per_core,
+						   &usable_cpus,
+						   &usable_sockets,
+						   &usable_cores,
+						   &usable_threads,
+						   alloc_sockets,
+						   (int *)this_cr_node->
+						   alloc_cores,
+						   alloc_lps,
+						   cr_type);
+		
 		if (avail_cpus == 0) {
 			error(" cons_res: no available cpus on node %s", 
 			      node_record_table_ptr[host_index].name);
 		}
 
+		job->alloc_sockets[job_index] = 0;
+		if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) {
+			for (j = 0; 
+			     j < node_record_table_ptr[host_index].cores; 
+			     j++)
+				job->alloc_cores[job_index][j] = 0;
+		}		
+
 		maxtasks = job->alloc_lps[job_index];
 		last_socket_index = -1;
 		next = 0;
+		socket_cnt = 0;
+		core_cnt = 0;
 		for (j=0; next<maxtasks; j++) {
 			for (socket_index=0; 
 			     ((socket_index<usable_sockets) 
@@ -852,16 +977,43 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size)
 							last_socket_index =
 								socket_index;
 						}
+						if (((cr_type == CR_CORE) ||
+						     (cr_type == CR_CORE_MEMORY))
+						    && (c_ok == 0)) {
+							if (socket_cnt >= 
+							    node_record_table_ptr[host_index].sockets)
+								continue;
+							core_cnt = job->alloc_cores[job_index][socket_cnt]
+								+ this_cr_node->alloc_cores[socket_cnt];
+							if (core_cnt >= 
+							    node_record_table_ptr[host_index].cores) {
+								socket_cnt++;
+							}
+							job->alloc_cores[job_index][socket_cnt]++;
+							info("cons_res %u PLANE job->alloc_cores[%d][%d] = %d",
+							     job->job_id, job_index, socket_cnt, 
+							     job->alloc_cores[job_index][socket_cnt]);
+							if (m == (usable_cores-1))
+								c_ok = 1;
+						}
 						next++;
 					}
 				}
+				socket_cnt++;
 			}
 		}
-#if(0)
+#if(CR_DEBUG)
 		info("cons_res _cr_plane_dist %u host %d %s alloc_ "
-		     "sockets %d lps %d ", 
+		     "s %d c %d lps %d ", 
 		     job->job_id, host_index,  this_cr_node->node_ptr->name, 
 		     job->alloc_sockets[job_index], job->alloc_lps[job_index]);
+		int i = 0;
+		if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) {
+			for (i = 0; i < this_cr_node->node_ptr->sockets; i++)
+				info("cons_res _cr_plane_dist %u host %d %s alloc_cores %d",
+				     job->job_id, host_index,  this_cr_node->node_ptr->name, 
+				     job->alloc_cores[job_index][i]);
+		}
 #endif
 	}
 	return SLURM_SUCCESS;
@@ -870,11 +1022,18 @@ static int _cr_plane_dist(struct select_cr_job *job, const int plane_size)
 /* xfree a select_cr_job job */
 static void _xfree_select_cr_job(struct select_cr_job *job)
 {
+	int i;
+	
 	xfree(job->host);
 	xfree(job->cpus);
-	xfree(job->alloc_lps);
+	xfree(job->alloc_lps);	
 	xfree(job->alloc_sockets);
 	xfree(job->alloc_memory);
+	if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) {
+		for (i = 0; i < job->nhosts; i++)
+			xfree(job->alloc_cores[i]);
+		xfree(job->alloc_cores);
+	}
 	FREE_NULL_BITMAP(job->node_bitmap);
 	xfree(job);
 }
@@ -960,7 +1119,20 @@ static void _count_cpus(unsigned *bitmap, int sum)
 		case CR_CORE:
 		case CR_CORE_MEMORY:
 		{
-			/* FIXME */
+			int core_cnt = 0;
+			for (i = 0; i < this_node->node_ptr->sockets; i++)
+				core_cnt += this_node->alloc_cores[i];
+			if (slurmctld_conf.fast_schedule) {
+				sum += ((node_record_table_ptr[i].config_ptr->sockets
+					 * node_record_table_ptr[i].config_ptr->cores)
+					- core_cnt)
+					* node_record_table_ptr[i].config_ptr->threads;
+			} else {
+				sum += ((node_record_table_ptr[i].sockets 
+					 * node_record_table_ptr[i].cores) 
+					- core_cnt)
+					* node_record_table_ptr[i].threads;
+			}
 			break;
 		}
 		case CR_MEMORY:
@@ -1030,7 +1202,7 @@ static int _synchronize_bitmaps(bitstr_t ** partially_idle_bitmap)
 
 static int _clear_select_jobinfo(struct job_record *job_ptr)
 {
-	int rc = SLURM_SUCCESS, i, nodes, job_id;
+	int rc = SLURM_SUCCESS, i, j, nodes, job_id;
 	struct select_cr_job *job = NULL;
 	ListIterator iterator;
 
@@ -1059,13 +1231,11 @@ static int _clear_select_jobinfo(struct job_record *job_ptr)
 				goto out;
 			}
 			
+			/* Updating this node allocated resources */
 			switch(cr_type) {
 			case CR_SOCKET:
 			case CR_SOCKET_MEMORY:
-			case CR_CORE:
-			case CR_CORE_MEMORY:
-				/* Updating this node allocated resources */
-				this_node->alloc_lps -= job->alloc_lps[i];
+				this_node->alloc_lps     -= job->alloc_lps[i];
 				this_node->alloc_sockets -= job->alloc_sockets[i];
 				if ((this_node->alloc_lps < 0) || (this_node->alloc_sockets < 0)) {
 					error(" alloc_lps < 0 %d on %s",
@@ -1076,8 +1246,44 @@ static int _clear_select_jobinfo(struct job_record *job_ptr)
 					rc = SLURM_ERROR;  
 					goto out;
 				}
-				if ((cr_type == CR_SOCKET) || (cr_type == CR_CORE))
-					break;
+				this_node->alloc_memory -= job->alloc_memory[i];
+				if (this_node->alloc_memory < 0) {
+					error(" alloc_memory < 0 %d on %s",
+					      this_node->alloc_memory,
+					      this_node->node_ptr->name);
+					this_node->alloc_memory = 0;
+					rc = SLURM_ERROR;  
+					goto out;
+				}
+				break;
+			case CR_CORE:
+			case CR_CORE_MEMORY:
+				this_node->alloc_lps   -= job->alloc_lps[i];
+				for (j =0; j < this_node->node_ptr->sockets; j++)
+					this_node->alloc_cores[j] -= job->alloc_cores[i][j];
+				for (j =0; j < this_node->node_ptr->sockets; j++) {
+					if ((this_node->alloc_lps >= 0) || (this_node->alloc_cores[j] >= 0)) {
+						continue;
+					} else { 
+						error(" alloc_lps < 0 %d on %s",
+						      this_node->alloc_lps,
+						      this_node->node_ptr->name);
+						this_node->alloc_lps = 0;
+						this_node->alloc_cores = 0;
+						rc = SLURM_ERROR;  
+						goto out;
+					}
+				}
+				this_node->alloc_memory -= job->alloc_memory[i];
+				if (this_node->alloc_memory < 0) {
+					error(" alloc_memory < 0 %d on %s",
+					      this_node->alloc_memory,
+					      this_node->node_ptr->name);
+					this_node->alloc_memory = 0;
+					rc = SLURM_ERROR;  
+					goto out;
+				}
+				break;
 			case CR_MEMORY:
 				this_node->alloc_memory -= job->alloc_memory[i];
 				if (this_node->alloc_memory < 0) {
@@ -1116,10 +1322,16 @@ static int _clear_select_jobinfo(struct job_record *job_ptr)
 			default:
 				break;
 			}
-#if(1)
-			info("cons_res %u _clear_select_jobinfo (-) node %s alloc_ lps %d sockets %d ",
-			     job->job_id, this_node->node_ptr->name, this_node->alloc_lps, 
-			     this_node->alloc_sockets);
+#if(CR_DEBUG)
+			info("cons_res %u _clear_select_jobinfo (-) node %s alloc_ s %d lps %d",
+			     job->job_id, this_node->node_ptr->name, 
+			     this_node->alloc_sockets, 
+			     this_node->alloc_lps);
+			if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY))
+				for (j =0; j < this_node->node_ptr->sockets; j++)
+					info("cons_res %u _clear_select_jobinfo (-) node %s alloc_  c %d",
+					     job->job_id, this_node->node_ptr->name, 
+					     this_node->alloc_cores[j]);
 #endif
 		}
 	out:
@@ -1209,7 +1421,7 @@ extern int select_p_job_init(List job_list)
 
 extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 {
-	int i;
+	int i, j;
 
 	if (node_ptr == NULL) {
 		error("select_g_node_init: node_ptr == NULL");
@@ -1228,9 +1440,16 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 
 	for (i = 0; i < select_node_cnt; i++) {
 		select_node_ptr[i].node_ptr = &node_ptr[i];
-		select_node_ptr[i].alloc_lps     = 0;
-		select_node_ptr[i].alloc_sockets = 0;
-		select_node_ptr[i].alloc_memory  = 0;
+		select_node_ptr[i].alloc_lps      = 0;
+		select_node_ptr[i].alloc_sockets  = 0;
+		select_node_ptr[i].alloc_memory   = 0;
+		if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) {
+			select_node_ptr[i].alloc_cores    = 
+				xmalloc(sizeof(int) * select_node_ptr->node_ptr->sockets);
+			for (j = 0; j < select_node_ptr->node_ptr->sockets; j++) {
+				select_node_ptr[i].alloc_cores[j] = 0;
+			}
+		}
 	}
 
 	select_fast_schedule = slurm_get_fast_schedule();
@@ -1488,7 +1707,7 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap,
 		goto cleanup;
 	
 	if (!test_only) {
-		int jobid, job_nodecnt, j;
+		int jobid, job_nodecnt, j, k;
 		bitoff_t size;
 		static struct select_cr_job *job;
 		job = xmalloc(sizeof(struct select_cr_job));
@@ -1515,13 +1734,17 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap,
 			bit_set(job->node_bitmap, i);
 		}
 		
-		job->host = (char **) xmalloc(job->nhosts * sizeof(char *));
-		job->cpus = (int *) xmalloc(job->nhosts * sizeof(int));
-
-		/* Build number of needed lps for each hosts for this job */
-		job->alloc_lps     = (int *) xmalloc(job->nhosts * sizeof(int));
+		job->host      = (char **) xmalloc(job->nhosts * sizeof(char *));
+		job->cpus      = (int *) xmalloc(job->nhosts * sizeof(int));
+		job->alloc_lps = (int *) xmalloc(job->nhosts * sizeof(int));
 		job->alloc_sockets = (int *) xmalloc(job->nhosts * sizeof(int));
 		job->alloc_memory  = (int *) xmalloc(job->nhosts * sizeof(int));
+		if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) {
+			job->alloc_cores   = (int **) xmalloc(job->nhosts * sizeof(int *));
+			for (i = 0; i < job->nhosts; i++)
+				job->alloc_cores[i] = (int *) xmalloc(
+					node_record_table_ptr[i].sockets * sizeof(int));
+		}
 
 		j = 0;
 		for (i = 0; i < node_record_count; i++) {
@@ -1529,13 +1752,16 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap,
 				continue;
 			job->host[j] = node_record_table_ptr[i].name;
 			job->cpus[j] = node_record_table_ptr[i].cpus;
-			job->alloc_memory[j] = job_ptr->details->job_max_memory; 
-			job->alloc_lps[j]     = 0;
+			job->alloc_lps[j] = 0;
 			job->alloc_sockets[j] = 0;
+			job->alloc_memory[j] = job_ptr->details->job_max_memory; 
+			if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)) {
+				for (k = 0; k < node_record_table_ptr[i].sockets; k++)
+					job->alloc_cores[j][k]   = 0;
+			}
 			j++;
 		}
-		
-		/* check for error SMB Fixme */
+
 		debug3("cons_res %u task_dist %d", job_ptr->job_id, job_ptr->details->task_dist);
 		if (job_ptr->details->shared == 0) {
 			/* Nodes need to be allocated in dedicated
@@ -1576,14 +1802,6 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap,
 		}
 		if (error_code != SLURM_SUCCESS)
 			goto cleanup;
-#if(0)
-		/* debugging only Remove */
-		for (i = 0; i < job->nhosts; i++) {
-			debug3("cons_res: job: %u after _cr_dist host %s cpus %u alloc_lps %d alloc_sockets %d",
-			       job->job_id, job->host[i], job->cpus[i], 
-			       job->alloc_lps[i], job->alloc_sockets[i]);
-		}
-#endif
 
 		_append_to_job_list(job);
 	}
@@ -1751,6 +1969,7 @@ extern int select_p_get_extra_jobinfo(struct node_record *node_ptr,
 {
 	int rc = SLURM_SUCCESS, i, avail = 0;
 	uint32_t *tmp_32 = (uint32_t *) data;
+	*tmp_32 = 0;
 
 	xassert(job_ptr);
 	xassert(job_ptr->magic == JOB_MAGIC);
@@ -1816,22 +2035,13 @@ extern int select_p_get_extra_jobinfo(struct node_record *node_ptr,
 				 * given job for a specific node --> based 
 				 * on the output from _cr_dist */
 				switch(cr_type) {
+				case CR_MEMORY:
+					*tmp_32 = node_ptr->cpus;
+					break;
 				case CR_SOCKET:
 				case CR_SOCKET_MEMORY:
-					/* Number of hardware resources allocated
-					   for this job. This might be more than
-					   what the job requires since we
-					   only allocated whole sockets at
-					   this level */
-					*tmp_32 = job->alloc_lps[i];
-					break;
 				case CR_CORE: 
 				case CR_CORE_MEMORY: 
-					/* Not yet implemented */
-					break;
-				case CR_MEMORY:
-					*tmp_32 = node_ptr->cpus;
-					break;
 				case CR_CPU:
 				case CR_CPU_MEMORY:
 				default:
@@ -1845,6 +2055,7 @@ extern int select_p_get_extra_jobinfo(struct node_record *node_ptr,
 		}
 		if (!job) {
 			debug3("cons_res: job %d not active", job_ptr->job_id);
+			*tmp_32 = 0;
 		}
 	     cleanup:
 		list_iterator_destroy(iterator);
@@ -1863,7 +2074,7 @@ extern int select_p_get_select_nodeinfo(struct node_record *node_ptr,
 					enum select_data_info dinfo,
 					void *data)
 {
-	int rc = SLURM_SUCCESS;
+	int rc = SLURM_SUCCESS, i;
 	struct node_cr_record *this_cr_node;
 
 	xassert(node_ptr);
@@ -1917,7 +2128,10 @@ extern int select_p_get_select_nodeinfo(struct node_record *node_ptr,
 			break;
 		case CR_CORE:
 		case CR_CORE_MEMORY:
-                        /* FIXME */
+			*tmp_32 = 0;
+			for (i = 0; i < this_cr_node->node_ptr->sockets; i++)  
+				*tmp_32 += this_cr_node->alloc_cores[i] *
+					node_ptr->threads;
 			break;
 		case CR_MEMORY:
 			*tmp_32 = 0;
@@ -1939,7 +2153,7 @@ extern int select_p_get_select_nodeinfo(struct node_record *node_ptr,
 
 extern int select_p_update_nodeinfo(struct job_record *job_ptr)
 {
-	int rc = SLURM_SUCCESS, i, job_id, nodes;
+	int rc = SLURM_SUCCESS, i, j, job_id, nodes;
 	struct select_cr_job *job = NULL;
 	ListIterator iterator;
 
@@ -1965,29 +2179,39 @@ extern int select_p_update_nodeinfo(struct job_record *job_ptr)
 				rc = SLURM_ERROR;
 				goto cleanup;
 			}
+			/* Updating this node's allocated resources */
 			switch (cr_type) {
-			case CR_SOCKET:
-			case CR_CORE:
 			case CR_SOCKET_MEMORY:
-			case CR_CORE_MEMORY:
-				/* Updating this node's allocated resources */
+				this_node->alloc_memory += job->alloc_memory[i];
+			case CR_SOCKET:
 				this_node->alloc_lps     += job->alloc_lps[i];
 				this_node->alloc_sockets += job->alloc_sockets[i];
 				if (this_node->alloc_sockets > this_node->node_ptr->sockets)
 					error("Job %u Host %s too many allocated sockets %d",
 					      job->job_id, this_node->node_ptr->name, 
 					      this_node->alloc_sockets);
-				if ((cr_type == CR_SOCKET) 
-				    || (cr_type == CR_CORE))
-					break;
-			case CR_MEMORY: 
 				this_node->alloc_memory += job->alloc_memory[i];
 				break;
-			case CR_CPU:
+			case CR_CORE_MEMORY:
+				this_node->alloc_memory += job->alloc_memory[i];
+			case CR_CORE:
+				this_node->alloc_lps   += job->alloc_lps[i];
+				for (j = 0; j < this_node->node_ptr->sockets; j++)
+					this_node->alloc_cores[j] += job->alloc_cores[i][j];
+				for (j = 0; j < this_node->node_ptr->sockets; j++)
+					if (this_node->alloc_cores[j] <= this_node->node_ptr->cores)
+						continue;
+					else
+						error("Job %u Host %s too many allocated cores %d for socket %d ",
+						      job->job_id, this_node->node_ptr->name, 
+						      this_node->alloc_cores[j], j);
+				break;
 			case CR_CPU_MEMORY:
+				this_node->alloc_memory += job->alloc_memory[i];
+			case CR_CPU:
 				this_node->alloc_lps     += job->alloc_lps[i];				
-				if (cr_type == CR_CPU) 
-					break;				
+				break;
+			case CR_MEMORY: 
 				this_node->alloc_memory += job->alloc_memory[i];
 				break;
 			default:
@@ -1995,11 +2219,16 @@ extern int select_p_update_nodeinfo(struct job_record *job_ptr)
 				rc = SLURM_ERROR;
 				break;
 			}
-#if(1)
+#if(CR_DEBUG)
 			/* Remove debug only */
 			info("cons_res %u update_nodeinfo (+) node %s alloc_ lps %d sockets %d mem %d ",
 			     job->job_id, this_node->node_ptr->name, this_node->alloc_lps, 
 			     this_node->alloc_sockets, this_node->alloc_memory);
+			if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY))
+				for (j = 0; j < this_node->node_ptr->sockets; j++)
+					info("cons_res %u update_nodeinfo (+) node %s alloc_ cores %d ",
+					     job->job_id, this_node->node_ptr->name, 
+					     this_node->alloc_cores[j]);
 #endif
 		}
 	}
diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c
index 2721fd01f5817113d65bd0c1e429654889df1e7f..8ddc29b6b924c027cff3972e58601569c30ec074 100644
--- a/src/plugins/select/linear/select_linear.c
+++ b/src/plugins/select/linear/select_linear.c
@@ -355,7 +355,7 @@ int get_avail_cpus(struct job_record *job_ptr, int index)
 			max_sockets, max_cores, max_threads, cpus_per_task,
 			ntasks_per_node, ntasks_per_socket, ntasks_per_core,
 	    		&cpus, &sockets, &cores, &threads, 
-			0, 0, SELECT_TYPE_INFO_NONE);
+			0, NULL, 0, SELECT_TYPE_INFO_NONE);
 
 #if 0
 	debug3("avail_cpus index %d = %d (out of %d %d %d %d)",
diff --git a/src/plugins/task/affinity/Makefile.in b/src/plugins/task/affinity/Makefile.in
index 48aed618c9fdf61f1c7ab74da57835eec74adabd..0f924784eaaa37d7ef8b5f1ed14e73dc5a9e935e 100644
--- a/src/plugins/task/affinity/Makefile.in
+++ b/src/plugins/task/affinity/Makefile.in
@@ -306,6 +306,7 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
 @HAVE_SCHED_SETAFFINITY_FALSE@	schedutils.c \
 @HAVE_SCHED_SETAFFINITY_FALSE@	task_affinity.c
 
+task_affinity_la_LIBADD = $(top_builddir)/src/common/libcommon.la
 all: all-am
 
 .SUFFIXES:
diff --git a/src/plugins/task/affinity/dist_tasks.c b/src/plugins/task/affinity/dist_tasks.c
index c4dbf4036c32fb9c2e790e67ce609aa3da9762b3..ec38c81730fcac0ed03bbe118bc3e209e606940c 100644
--- a/src/plugins/task/affinity/dist_tasks.c
+++ b/src/plugins/task/affinity/dist_tasks.c
@@ -89,7 +89,8 @@ static void _get_resources_this_node(int *cpus,
 				     int *sockets,
 				     int *cores,
 				     int *threads,
-				     int *alloc_sockets, 
+				     int *alloc_sockets,
+				     int *alloc_cores,
 				     int *alloc_lps,
 				     unsigned int *jobid);
 static void _cr_update_reservation(int reserve, uint32_t *reserved, 
@@ -145,8 +146,7 @@ void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id)
 		return;
 	}
 
-	/* SMB. We are still thinking about this. Does this make
-	   sense?
+	/* We are still thinking about this. Does this make sense?
 	if (req->task_dist == SLURM_DIST_ARBITRARY) {
 		req->cpu_bind_type >= CPU_BIND_NONE;
 		info("lllp_distribution jobid [%u] -m hostfile - auto binding off ",
@@ -533,11 +533,11 @@ static int _task_layout_lllp_init(launch_tasks_request_msg_t *req,
 				  int *hw_sockets, 
 				  int *hw_cores,
 				  int *hw_threads,
-				  int *alloc_sockets,
-				  int *alloc_lps,
 				  int *avail_cpus)
 {
 	int i;
+	int alloc_sockets = 0, alloc_lps = 0;
+	int alloc_cores[conf->sockets];
 
 	if (req->cpu_bind_type & CPU_BIND_TO_THREADS) {
 		/* Default: in here in case we decide to change the
@@ -552,11 +552,9 @@ static int _task_layout_lllp_init(launch_tasks_request_msg_t *req,
 		info ("task_layout cpu_bind_type CPU_BIND_TO_SOCKETS");
 	}
 
-	*alloc_sockets = 0;
-	*alloc_lps     = 0;
 	_get_resources_this_node(usable_cpus, usable_sockets, usable_cores,
-				 usable_threads, alloc_sockets, alloc_lps,
-				 &req->job_id);
+				 usable_threads, &alloc_sockets, alloc_cores,
+				 &alloc_lps, &req->job_id);
 
 	*hw_sockets = *usable_sockets;
 	*hw_cores   = *usable_cores;
@@ -569,8 +567,8 @@ static int _task_layout_lllp_init(launch_tasks_request_msg_t *req,
 					    req->ntasks_per_core,
 					    usable_cpus, usable_sockets,
 					    usable_cores, usable_threads,
-					    *alloc_sockets, *alloc_lps, conf->cr_type);
-	
+					    alloc_sockets, alloc_cores,
+					    alloc_lps, conf->cr_type);
 	/* Allocate masks array */
 	*masks_p = xmalloc(maxtasks * sizeof(bitstr_t*));
 	for (i = 0; i < maxtasks; i++) { 
@@ -593,13 +591,14 @@ static void _get_resources_this_node(int *cpus,
 				     int *sockets,
 				     int *cores,
 				     int *threads,
-				     int *alloc_sockets, 
+				     int *alloc_sockets,
+				     int *alloc_cores,
 				     int *alloc_lps,
 	                             unsigned int *jobid)
 {
 	int bit_index = 0;
 	int i, j , k;
-	int this_socket = 0;
+	int this_socket = 0, cr_core_enabled = 0;
 
 	/* FIX for heterogeneous socket/core/thread count per system
 	 * in future releases */
@@ -609,10 +608,13 @@ static void _get_resources_this_node(int *cpus,
 	*threads = conf->threads;
 
 	switch(conf->cr_type) {
-	case CR_SOCKET:
-	case CR_SOCKET_MEMORY: 
 	case CR_CORE:
 	case CR_CORE_MEMORY: 
+		for(i = 0; i < *sockets; i++)
+			alloc_cores[i] = 0;
+		cr_core_enabled = 1;
+	case CR_SOCKET:
+	case CR_SOCKET_MEMORY: 
 	case CR_CPU:
 	case CR_CPU_MEMORY:
 		for(i = 0; i < *sockets; i++) {
@@ -623,6 +625,9 @@ static void _get_resources_this_node(int *cpus,
 					     *jobid, bit_index, lllp_reserved[bit_index]);
 					if(lllp_reserved[bit_index] > 0) {
 						*alloc_lps += 1;
+						if ((k == 0) && (cr_core_enabled)) {
+							alloc_cores[i]++;
+						}
 						this_socket++;
 					}
 					bit_index++;
@@ -639,8 +644,14 @@ static void _get_resources_this_node(int *cpus,
 		break;
 	}
 
+#if(0)
 	info("_get_resources jobid %d hostname %s alloc_sockets %d alloc_lps %d ", 
-	     jobid, conf->hostname, *alloc_sockets, *alloc_lps);
+	     *jobid, conf->hostname, *alloc_sockets, *alloc_lps);
+	if (cr_core_enabled) 
+		for (i = 0; i < *sockets; i++)
+			info("_get_resources %d hostname %s socket id %d cores %d ", 
+			     *jobid, conf->hostname, i, alloc_cores[i]);
+#endif
 }
 	
 /* 
@@ -676,7 +687,6 @@ static int _task_layout_lllp_cyclic(launch_tasks_request_msg_t *req,
 	int hw_sockets = 0, hw_cores = 0, hw_threads = 0;
 	int usable_cpus = 0, avail_cpus = 0;
 	int usable_sockets = 0, usable_cores = 0, usable_threads = 0;
-	int alloc_sockets = 0, alloc_lps = 0;
 	
 	bitstr_t **masks = NULL;
 	bool bind_to_exact_socket = true;
@@ -696,8 +706,6 @@ static int _task_layout_lllp_cyclic(launch_tasks_request_msg_t *req,
 					&hw_sockets, 
 					&hw_cores, 
 					&hw_threads, 
-					&alloc_sockets, 
-					&alloc_lps,
 					&avail_cpus);
 	if (retval != SLURM_SUCCESS) {
 		return retval;
@@ -782,7 +790,6 @@ static int _task_layout_lllp_block(launch_tasks_request_msg_t *req,
 	int hw_sockets = 0, hw_cores = 0, hw_threads = 0;
 	int usable_cpus = 0, avail_cpus = 0;
 	int usable_sockets = 0, usable_cores = 0, usable_threads = 0;
-	int alloc_sockets = 0, alloc_lps = 0;
 
 	bitstr_t **masks = NULL;
 	bool bind_to_exact_socket = true;
@@ -802,8 +809,6 @@ static int _task_layout_lllp_block(launch_tasks_request_msg_t *req,
 					&hw_sockets, 
 					&hw_cores, 
 					&hw_threads, 
-					&alloc_sockets, 
-					&alloc_lps,
 					&avail_cpus);
 	if (retval != SLURM_SUCCESS) {
 		return retval;
@@ -906,9 +911,6 @@ static int _task_layout_lllp_block(launch_tasks_request_msg_t *req,
  * in srun. The second distribution "plane|block|cyclic" is computed
  * locally by each slurmd.
  *  
- * Restriction: Any restrictions? what about plane:cyclic or
- * plane:block? Only plane:plane? FIXME!!! SMB!!!
- * 
  * The input to the lllp distribution algorithms is the gids
  * (tasksids) generated for the local node.
  *  
@@ -928,7 +930,6 @@ static int _task_layout_lllp_plane(launch_tasks_request_msg_t *req,
 	int usable_sockets = 0, usable_cores = 0, usable_threads = 0;
 	int plane_size = req->plane_size;
 	int max_plane_size = 0;
-	int alloc_sockets = 0, alloc_lps = 0;
 
 	bitstr_t **masks = NULL; 
 	bool bind_to_exact_socket = true;
@@ -948,8 +949,6 @@ static int _task_layout_lllp_plane(launch_tasks_request_msg_t *req,
 					&hw_sockets, 
 					&hw_cores, 
 					&hw_threads, 
-					&alloc_sockets, 
-					&alloc_lps,
 					&avail_cpus);
 	if (retval != SLURM_SUCCESS) {
 		return retval;
diff --git a/src/slaunch/opt.c b/src/slaunch/opt.c
index 66ae47d6c91fbef1e4139a55aa4d78a2c602ad6d..1cd525586abd3101553148f23da8538e27a3ff4d 100644
--- a/src/slaunch/opt.c
+++ b/src/slaunch/opt.c
@@ -256,20 +256,22 @@ static int _verify_cpu_bind(const char *arg, char **cpu_bind,
 
 	p = buf;
 	while ((tok = strsep(&p, ";"))) {
-		if (!strcasecmp(tok, "help")) {
-			printf("CPU bind options:\n"
-			       "\tq[uiet],        quietly bind before task runs (default)\n"
-			       "\tv[erbose],      verbosely report binding before task runs\n"
-			       "\tno[ne]          don't bind tasks to CPUs (default)\n"
-			       "\trank            bind by task rank\n"
-			       "\tmap_cpu:<list>  specify a CPU ID binding for each task\n"
-			       "\t                where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
-			       "\tmask_cpu:<list> specify a CPU ID binding mask for each task\n"
-			       "\t                where <list> is <mask1>,<mask2>,...<maskN>\n"
-			       "\tsockets         auto-generated masks bind to sockets\n"
-			       "\tcores           auto-generated masks bind to cores\n"
-			       "\tthreads         auto-generated masks bind to threads\n"
-			       "\thelp            show this help message\n");
+		if (strcasecmp(tok, "help") == 0) {
+			printf(
+"CPU bind options:\n"
+"    --cpu_bind=         Bind tasks to CPUs\n"
+"        q[uiet]         quietly bind before task runs (default)\n"
+"        v[erbose]       verbosely report binding before task runs\n"
+"        no[ne]          don't bind tasks to CPUs (default)\n"
+"        rank            bind by task rank\n"
+"        map_cpu:<list>  specify a CPU ID binding for each task\n"
+"                        where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
+"        mask_cpu:<list> specify a CPU ID binding mask for each task\n"
+"                        where <list> is <mask1>,<mask2>,...<maskN>\n"
+"        sockets         auto-generated masks bind to sockets\n"
+"        cores           auto-generated masks bind to cores\n"
+"        threads         auto-generated masks bind to threads\n"
+"        help            show this help message\n");
 			return 1;
 		} else if ((strcasecmp(tok, "q") == 0) ||
 			   (strcasecmp(tok, "quiet") == 0)) {
@@ -381,19 +383,21 @@ static int _verify_mem_bind(const char *arg, char **mem_bind,
 	}
 
 	p = buf;
-	while((tok = strsep(&p, ";"))) {
-		if(!strcasecmp(tok, "help")) {
-			printf("Memory bind options:\n"
-			       "\tq[uiet],        quietly bind before task runs (default)\n"
-			       "\tv[erbose],      verbosely report binding before task runs\n"
-			       "\tno[ne]          don't bind tasks to memory (default)\n"
-			       "\trank            bind by task rank\n"
-			       "\tlocal           bind to memory local to processor\n"
-			       "\tmap_mem:<list>  specify a memory binding for each task\n"
-			       "\t                where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
-			       "\tmask_mem:<list> specify a memory binding mask for each tasks\n"
-			       "\t                where <list> is <mask1>,<mask2>,...<maskN>\n"
-			       "\thelp            show this help message\n");
+	while ((tok = strsep(&p, ";"))) {
+		if (strcasecmp(tok, "help") == 0) {
+			printf(
+"Memory bind options:\n"
+"    --mem_bind=         Bind memory to locality domains (ldom)\n"
+"        q[uiet]         quietly bind before task runs (default)\n"
+"        v[erbose]       verbosely report binding before task runs\n"
+"        no[ne]          don't bind tasks to memory (default)\n"
+"        rank            bind by task rank\n"
+"        local           bind to memory local to processor\n"
+"        map_mem:<list>  specify a memory binding for each task\n"
+"                        where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
+"        mask_mem:<list> specify a memory binding mask for each tasks\n"
+"                        where <list> is <mask1>,<mask2>,...<maskN>\n"
+"        help            show this help message\n");
 			return 1;
 			
 		} else if ((strcasecmp(tok, "q") == 0) ||
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 785fa0228cee3f37256f7470a5f08d7202f1494c..f0485e490eaad192b3efb93687dea78b1ebcb57b 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -1037,8 +1037,12 @@ static int _load_step_state(struct job_record *job_ptr, Buf buffer)
 		xfree(bit_fmt);
 	}
 
-	switch_g_job_step_allocated(switch_tmp, 
+	if (step_ptr->step_layout && step_ptr->step_layout->node_list) {
+		switch_g_job_step_allocated(switch_tmp, 
 				    step_ptr->step_layout->node_list);
+	} else {
+		switch_g_job_step_allocated(switch_tmp, NULL);
+	}
 	info("recovered job step %u.%u", job_ptr->job_id, step_id);
 	return SLURM_SUCCESS;
 
@@ -2693,6 +2697,13 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate,
 		job_desc_msg->min_threads = 1;	/* default thread count of 1 */
 	if (job_desc_msg->min_nodes == NO_VAL)
 		job_desc_msg->min_nodes = 1;	/* default node count of 1 */
+	if (job_desc_msg->min_sockets == NO_VAL)
+		job_desc_msg->min_sockets = 1;	/* default socket count of 1 */
+	if (job_desc_msg->min_cores == NO_VAL)
+		job_desc_msg->min_cores = 1;	/* default core count of 1 */
+	if (job_desc_msg->min_threads == NO_VAL)
+		job_desc_msg->min_threads = 1;	/* default thread count of 1 */
+
 	if (job_desc_msg->job_min_procs == NO_VAL)
 		job_desc_msg->job_min_procs = 1;   /* default 1 cpu per node */
 	if (job_desc_msg->job_min_sockets == NO_VAL)
@@ -3454,7 +3465,6 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 		}
 	}
 
-
 	if (job_specs->job_min_threads != NO_VAL && detail_ptr) {
 		if (super_user ||
 		    (detail_ptr->job_min_threads > job_specs->job_min_threads)) {
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 73982d420cf9b96ba5c9b5a45042b3a7f7fe26b5..3c3891dc408b48d2bbe13f8a5421cf7a5caad86a 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -1386,9 +1386,12 @@ extern void build_node_details(struct job_record *job_ptr)
 			error_code = select_g_get_extra_jobinfo( 
 				node_ptr, job_ptr, SELECT_AVAIL_CPUS, 
 				&usable_lps);
-			if (cr_enabled)
-				job_ptr->alloc_lps[cr_count++] = usable_lps;
-			if(error_code != SLURM_SUCCESS) {
+			if (error_code == SLURM_SUCCESS) {
+				if (cr_enabled && job_ptr->alloc_lps) {
+					job_ptr->alloc_lps[cr_count++] =
+								usable_lps;
+				}
+			} else {
 				if (cr_enabled) {
 					xfree(job_ptr->alloc_lps); 
 					job_ptr->alloc_lps = NULL;
diff --git a/src/srun/opt.c b/src/srun/opt.c
index ddc346f0407b458eeedf8a87299745de152c99a1..ce62ca63b330e3beefcfdde28c977a8ad0126ed0 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -153,6 +153,7 @@
 #define LONG_OPT_NTASKSPERCORE	 0x138
 #define LONG_OPT_PRINTREQ	 0x139
 #define LONG_OPT_JOBMEM	         0x13a
+#define LONG_OPT_HINT	         0x13b
 
 /*---- global variables, defined in opt.h ----*/
 char **remote_argv;
@@ -211,7 +212,12 @@ static bool  _under_parallel_debugger(void);
 static void  _usage(void);
 static bool  _valid_node_list(char **node_list_pptr);
 static task_dist_states_t _verify_dist_type(const char *arg, uint32_t *psize);
-static bool  _verify_cpu_core_thread_count(const char *arg,
+static bool  _verify_socket_core_thread_count(const char *arg,
+					   int *min_sockets, int *max_sockets,
+					   int *min_cores, int *max_cores,
+					   int *min_threads, int  *max_threads,
+					   cpu_bind_type_t *cpu_bind_type);
+static bool  _verify_hint(const char *arg,
 					   int *min_sockets, int *max_sockets,
 					   int *min_cores, int *max_cores,
 					   int *min_threads, int  *max_threads,
@@ -447,19 +453,21 @@ static int _verify_cpu_bind(const char *arg, char **cpu_bind,
 	p = buf;
 	while ((tok = strsep(&p, ";"))) {
 		if (strcasecmp(tok, "help") == 0) {
-			printf("CPU bind options:\n"
-			       "\tq[uiet],        quietly bind before task runs (default)\n"
-			       "\tv[erbose],      verbosely report binding before task runs\n"
-			       "\tno[ne]          don't bind tasks to CPUs (default)\n"
-			       "\trank            bind by task rank\n"
-			       "\tmap_cpu:<list>  specify a CPU ID binding for each task\n"
-			       "\t                where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
-			       "\tmask_cpu:<list> specify a CPU ID binding mask for each task\n"
-			       "\t                where <list> is <mask1>,<mask2>,...<maskN>\n"
-			       "\tsockets         auto-generated masks bind to sockets\n"
-			       "\tcores           auto-generated masks bind to cores\n"
-			       "\tthreads         auto-generated masks bind to threads\n"
-			       "\thelp            show this help message\n");
+			printf(
+"CPU bind options:\n"
+"    --cpu_bind=         Bind tasks to CPUs\n"
+"        q[uiet]         quietly bind before task runs (default)\n"
+"        v[erbose]       verbosely report binding before task runs\n"
+"        no[ne]          don't bind tasks to CPUs (default)\n"
+"        rank            bind by task rank\n"
+"        map_cpu:<list>  specify a CPU ID binding for each task\n"
+"                        where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
+"        mask_cpu:<list> specify a CPU ID binding mask for each task\n"
+"                        where <list> is <mask1>,<mask2>,...<maskN>\n"
+"        sockets         auto-generated masks bind to sockets\n"
+"        cores           auto-generated masks bind to cores\n"
+"        threads         auto-generated masks bind to threads\n"
+"        help            show this help message\n");
 			return 1;
 		} else if ((strcasecmp(tok, "q") == 0) ||
 			   (strcasecmp(tok, "quiet") == 0)) {
@@ -573,17 +581,19 @@ static int _verify_mem_bind(const char *arg, char **mem_bind,
 	p = buf;
 	while ((tok = strsep(&p, ";"))) {
 		if (strcasecmp(tok, "help") == 0) {
-			printf("Memory bind options:\n"
-			       "\tq[uiet],        quietly bind before task runs (default)\n"
-			       "\tv[erbose],      verbosely report binding before task runs\n"
-			       "\tno[ne]          don't bind tasks to memory (default)\n"
-			       "\trank            bind by task rank\n"
-			       "\tlocal           bind to memory local to processor\n"
-			       "\tmap_mem:<list>  specify a memory binding for each task\n"
-			       "\t                where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
-			       "\tmask_mem:<list> specify a memory binding mask for each tasks\n"
-			       "\t                where <list> is <mask1>,<mask2>,...<maskN>\n"
-			       "\thelp            show this help message\n");
+			printf(
+"Memory bind options:\n"
+"    --mem_bind=         Bind memory to locality domains (ldom)\n"
+"        q[uiet]         quietly bind before task runs (default)\n"
+"        v[erbose]       verbosely report binding before task runs\n"
+"        no[ne]          don't bind tasks to memory (default)\n"
+"        rank            bind by task rank\n"
+"        local           bind to memory local to processor\n"
+"        map_mem:<list>  specify a memory binding for each task\n"
+"                        where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
+"        mask_mem:<list> specify a memory binding mask for each tasks\n"
+"                        where <list> is <mask1>,<mask2>,...<maskN>\n"
+"        help            show this help message\n");
 			return 1;
 			
 		} else if ((strcasecmp(tok, "q") == 0) ||
@@ -667,7 +677,7 @@ static int _verify_mem_bind(const char *arg, char **mem_bind,
  * RET true if valid
  */
 static bool
-_verify_cpu_core_thread_count(const char *start_ptr, 
+_verify_socket_core_thread_count(const char *arg, 
 			      int *min_sockets, int *max_sockets,
 			      int *min_cores, int *max_cores,
 			      int *min_threads, int  *max_threads,
@@ -675,7 +685,7 @@ _verify_cpu_core_thread_count(const char *start_ptr,
 {
 	bool tmp_val,ret_val;
 	int i,j;
-	const char *cur_ptr = start_ptr;
+	const char *cur_ptr = arg;
 	char buf[3][48]; /* each can hold INT64_MAX - INT64_MAX */
 	buf[0][0] = '\0';
 	buf[1][0] = '\0';
@@ -721,6 +731,74 @@ _verify_cpu_core_thread_count(const char *start_ptr,
 	return ret_val;
 }
 
+/* 
+ * verify that a hint is valid and convert it into the implied settings
+ * RET true if valid
+ */
+static bool
+_verify_hint(const char *arg, 
+			      int *min_sockets, int *max_sockets,
+			      int *min_cores, int *max_cores,
+			      int *min_threads, int  *max_threads,
+			      cpu_bind_type_t *cpu_bind_type)
+{
+	char *buf, *p, *tok;
+	if (!arg) {
+		return true;
+	}
+
+	buf = xstrdup(arg);
+	p = buf;
+	/* change all ',' delimiters not followed by a digit to ';'  */
+	/* simplifies parsing tokens while keeping map/mask together */
+	while (*p) {
+		if (*p == ',') {
+			if (!isdigit(*(p+1)))
+				*p = ';';
+		}
+		*p++;
+	}
+
+	p = buf;
+	while ((tok = strsep(&p, ";"))) {
+		if (strcasecmp(tok, "help") == 0) {
+			printf(
+"Application hint options:\n"
+"    --hint=             Bind tasks according to application hints\n"
+"        compute_bound   use all cores in each physical CPU\n"
+"        memory_bound    use only one core in each physical CPU\n"
+"        [no]multithread [don't] use extra threads with in-core multi-threading\n"
+"        help            show this help message\n");
+			return 1;
+		} else if (strcasecmp(tok, "compute_bound") == 0) {
+		        *min_sockets = 1;
+		        *max_sockets = INT_MAX;
+		        *min_cores   = 1;
+		        *max_cores   = INT_MAX;
+			*cpu_bind_type |= CPU_BIND_TO_CORES;
+		} else if (strcasecmp(tok, "memory_bound") == 0) {
+		        *min_cores = 1;
+		        *max_cores = 1;
+			*cpu_bind_type |= CPU_BIND_TO_CORES;
+		} else if (strcasecmp(tok, "multithread") == 0) {
+		        *min_threads = 1;
+		        *max_threads = INT_MAX;
+			*cpu_bind_type |= CPU_BIND_TO_THREADS;
+		} else if (strcasecmp(tok, "nomultithread") == 0) {
+		        *min_threads = 1;
+		        *max_threads = 1;
+			*cpu_bind_type |= CPU_BIND_TO_THREADS;
+		} else {
+			error("unrecognized --hint argument \"%s\", see --hint=help", tok);
+			xfree(buf);
+			return 1;
+		}
+	}
+
+	xfree(buf);
+	return 0;
+}
+
 /* return command name from its full path name */
 static char * _base_name(char* command)
 {
@@ -843,16 +921,15 @@ static void _opt_default()
 	opt.cpus_set = false;
 	opt.min_nodes = 1;
 	opt.max_nodes = 0;
-	opt.min_sockets_per_node = 0; /* request, not constraint (mincpus) */
-	opt.max_sockets_per_node = 0;
-	opt.min_cores_per_socket = 0; /* request, not constraint (mincores) */
-	opt.max_cores_per_socket = 0;
-	opt.min_threads_per_core = 0; /* request, not constraint
-				       * (minthreads */
-	opt.max_threads_per_core = 0; 
-	opt.ntasks_per_node   = 0; 
-	opt.ntasks_per_socket = 0; 
-	opt.ntasks_per_core   = 0; 
+	opt.min_sockets_per_node = NO_VAL; /* requested min/maxsockets */
+	opt.max_sockets_per_node = NO_VAL;
+	opt.min_cores_per_socket = NO_VAL; /* requested min/maxcores */
+	opt.max_cores_per_socket = NO_VAL;
+	opt.min_threads_per_core = NO_VAL; /* requested min/maxthreads */
+	opt.max_threads_per_core = NO_VAL; 
+	opt.ntasks_per_node      = NO_VAL; /* ntask max limits */
+	opt.ntasks_per_socket    = NO_VAL; 
+	opt.ntasks_per_core      = NO_VAL; 
 	opt.nodes_set = false;
 	opt.cpu_bind_type = 0;
 	opt.cpu_bind = NULL;
@@ -1281,6 +1358,7 @@ void set_options(const int argc, char **argv, int first)
 		{"minthreads",       required_argument, 0, LONG_OPT_MINTHREADS},
 		{"mem",              required_argument, 0, LONG_OPT_MEM},
 		{"job-mem",          required_argument, 0, LONG_OPT_JOBMEM},
+		{"hint",             required_argument, 0, LONG_OPT_HINT},
 		{"mpi",              required_argument, 0, LONG_OPT_MPI},
 		{"no-shell",         no_argument,       0, LONG_OPT_NOSHELL},
 		{"tmp",              required_argument, 0, LONG_OPT_TMP},
@@ -1392,7 +1470,7 @@ void set_options(const int argc, char **argv, int first)
 			if(!first && opt.extra_set)
 				break;
 
-			opt.extra_set = _verify_cpu_core_thread_count(
+			opt.extra_set = _verify_socket_core_thread_count(
 				optarg,
 				&opt.min_sockets_per_node,
 				&opt.max_sockets_per_node,
@@ -1840,6 +1918,18 @@ void set_options(const int argc, char **argv, int first)
 				             &opt.min_threads_per_core,
 				             &opt.max_threads_per_core, true );
 			break;
+		case LONG_OPT_HINT:
+			if (_verify_hint(optarg,
+				&opt.min_sockets_per_node,
+				&opt.max_sockets_per_node,
+				&opt.min_cores_per_socket,
+				&opt.max_cores_per_socket,
+				&opt.min_threads_per_core,
+				&opt.max_threads_per_core,
+				&opt.cpu_bind_type)) {
+				exit(1);
+			}
+			break;
 		case LONG_OPT_NTASKSPERNODE:
 			opt.ntasks_per_node = _get_int(optarg, "ntasks-per-node",
 				true);
@@ -2615,6 +2705,8 @@ static void _help(void)
 	if (conf->task_plugin != NULL
 	    && strcasecmp(conf->task_plugin, "task/affinity") == 0) {
 		printf(
+"      --hint=                 Bind tasks according to application hints\n"
+"                              (see \"--hint=help\" for options)\n"
 "      --cpu_bind=             Bind tasks to CPUs\n"
 "                              (see \"--cpu_bind=help\" for options)\n"
 "      --mem_bind=             Bind memory to locality domains (ldom)\n"
diff --git a/testsuite/expect/globals b/testsuite/expect/globals
index 34f7e35fa908edb9f407f5e0b56039feb5f9ef20..1eadf38a4ffe913497532741686d2eba39dba9f9 100755
--- a/testsuite/expect/globals
+++ b/testsuite/expect/globals
@@ -86,7 +86,7 @@ cset mpirun	"mpirun"
 cset totalviewcli	"/usr/local/bin/totalviewcli"
 
 # Pattern to match your shell prompt
-cset prompt {(%|#|\$) *$}
+cset prompt {(%|#|\$|\]) *$}
 
 #
 # Specify locations of other executable files used
diff --git a/testsuite/expect/test1.89 b/testsuite/expect/test1.89
index 22fc50ca5118c61ab00fb7cd26e13433677542ba..73750e5acf05152467d8ccd9b0b6399d63d2d77c 100755
--- a/testsuite/expect/test1.89
+++ b/testsuite/expect/test1.89
@@ -102,6 +102,7 @@ expect {
 #
 # Run a job step with affinity
 #
+set expected_mask [ expr ((1 << $task_cnt) - 1) ]
 set task_mask 0
 send "$srun -c1 --cpu_bind=rank $file_prog\n"
 expect {
@@ -121,8 +122,8 @@ expect {
 	}
 	-re $prompt
 }
-if {$task_mask != $mask} {
-	send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$mask)\n"
+if {$task_mask != $expected_mask} {
+	send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$expected_mask)\n"
 	set exit_code 1
 }
 
diff --git a/testsuite/expect/test18.36 b/testsuite/expect/test18.36
index f9f652d5c583d7f6cbc50b57e0409501ec6e6768..50bd11a20db2e44d6d8d7c3e74a8deb946537400 100755
--- a/testsuite/expect/test18.36
+++ b/testsuite/expect/test18.36
@@ -121,6 +121,7 @@ expect {
 #
 # Run a job step with affinity
 #
+set expected_mask [ expr ((1 << $task_cnt) - 1) ]
 set task_mask 0
 send "$slaunch -n $available_cpus --cpu_bind=rank $file_prog\n"
 expect {
@@ -139,7 +140,7 @@ expect {
 	}
 	-re $prompt
 }
-if {$task_mask != $mask} {
+if {$task_mask != $expected_mask} {
 	send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$mask)\n"
 	set exit_code 1
 }