diff --git a/src/common/env.c b/src/common/env.c
index 2674a8e4d9fd2e3738896bb6310a68e42bdaa299..994c4419e9a7ec41e3b4cbe6c545df09ed94814b 100644
--- a/src/common/env.c
+++ b/src/common/env.c
@@ -877,21 +877,16 @@ extern void
 env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch,
 			const char *node_name)
 {
-	char *tmp = getenvp(batch->environment, "SLURM_CPUS_PER_TASK");
+	char *tmp;
 	uint32_t num_nodes = 0;
 	uint32_t num_cpus = 0;
 	int i;
 	slurm_step_layout_t *step_layout = NULL;
-	int cpus_per_task = 1;
 	uint32_t num_tasks = batch->nprocs;
+	uint16_t cpus_per_task;
 
-	if(tmp) 
-		cpus_per_task = atoi(tmp);
-	
 	/* There is no explicit node count in the batch structure,
-	 * so we need to calculate the node count. We also need to
-	 * figure out the explicit cpu count so we can figure out the
-	 * cpus_per_task. */
+	 * so we need to calculate the node count. */
 	for (i = 0; i < batch->num_cpu_groups; i++) {
 		num_nodes += batch->cpu_count_reps[i];
 		num_cpus += batch->cpu_count_reps[i] * batch->cpus_per_node[i];
@@ -921,17 +916,25 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch,
 	if(num_tasks) 
 		env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", 
 					num_tasks);
-	else 
-		num_tasks = num_cpus / cpus_per_task;
+
+	if((batch->cpus_per_task != 0) &&
+	   (batch->cpus_per_task != (uint16_t) NO_VAL))
+		cpus_per_task = batch->cpus_per_task;
+	else
+		cpus_per_task = 1;	/* default value */
+	if (cpus_per_task > 1) {
+		env_array_overwrite_fmt(dest, "SLURM_CPUS_PER_TASK", "%u",
+					cpus_per_task);
+	}
+	num_tasks = num_cpus / cpus_per_task;
 	
 	step_layout = slurm_step_layout_create(batch->nodes,
 					       batch->cpus_per_node,
 					       batch->cpu_count_reps,
 					       num_nodes,
 					       num_tasks,
-					       (uint16_t)cpus_per_task,
-					       (uint16_t)
-					       SLURM_DIST_BLOCK,
+					       cpus_per_task,
+					       (uint16_t)SLURM_DIST_BLOCK,
 					       (uint16_t)NO_VAL);
 	tmp = _uint16_array_to_str(step_layout->node_cnt,
 				   step_layout->tasks);
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index 21e3355636086ba6f67caefacb3f8a0795bf4304..fe3d6e8977de3737e38f1c22d116254a579ba57f 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -605,6 +605,7 @@ typedef struct batch_job_launch_msg {
 	uint32_t num_cpu_groups;/* elements in below cpu arrays */
 	uint16_t *cpus_per_node;/* cpus per node */
 	uint32_t *cpu_count_reps;/* how many nodes have same cpu count */
+	uint16_t cpus_per_task;	/* number of CPUs requested per task */
 	char *nodes;		/* list of nodes allocated to job_step */
 	char *script;		/* the actual job script, default NONE */
 	char *err;		/* pathname of stderr */
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 26e60854d59b5167d5a23f3dddbd0b9ce8b32ead..b9a76daabf7448bffed0e4e210b366f29d197480 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -4078,6 +4078,7 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer)
 	pack8(msg->overcommit, buffer);
 
 	pack16(msg->acctg_freq,     buffer);
+	pack16(msg->cpus_per_task,  buffer);
 
 	pack32(msg->num_cpu_groups, buffer);
 	if (msg->num_cpu_groups) {
@@ -4126,6 +4127,7 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer)
 	safe_unpack8(&launch_msg_ptr->overcommit, buffer);
 
 	safe_unpack16(&launch_msg_ptr->acctg_freq,     buffer);
+	safe_unpack16(&launch_msg_ptr->cpus_per_task,  buffer);
 
 	safe_unpack32(&launch_msg_ptr->num_cpu_groups, buffer);
 	if (launch_msg_ptr->num_cpu_groups) {
diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c
index 6ca85bcfd56d1ba6aa3e8799a68b65f1164b591d..33398f00c606ce51a93221f89140b627c6217509 100644
--- a/src/sbatch/opt.c
+++ b/src/sbatch/opt.c
@@ -1923,8 +1923,6 @@ static bool _opt_verify(void)
 		else
 			setenvf(NULL, "SLURM_OPEN_MODE", "t");
 	}
-	if (opt.cpus_per_task > 1)
-		setenvfs("SLURM_CPUS_PER_TASK=%d", opt.cpus_per_task); 
 	if (opt.dependency)
 		setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency);
 
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index 8e33269e84146db00a7fa6c2fc4b929f37f07e3f..ba200211ad776421611552178f607f6ecc37b066 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -526,6 +526,7 @@ extern void launch_job(struct job_record *job_ptr)
 	launch_msg_ptr->overcommit = job_ptr->details->overcommit;
 	launch_msg_ptr->open_mode  = job_ptr->details->open_mode;
 	launch_msg_ptr->acctg_freq = job_ptr->details->acctg_freq;
+	launch_msg_ptr->cpus_per_task = job_ptr->details->cpus_per_task;
 
 	if (make_batch_job_cred(launch_msg_ptr, job_ptr)) {
 		error("aborting batch job %u", job_ptr->job_id);
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index ea1af5c1ae89d82f7605cd8134a24efe5b4efa7b..b03d1463e1e15e04c9e65a8fe22ee0713c271ba3 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -2802,6 +2802,7 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid,
 						 job_desc_msg->environment);
 	launch_msg_ptr->envc = job_desc_msg->env_size;
 	launch_msg_ptr->job_mem = job_desc_msg->job_min_memory;
+	launch_msg_ptr->cpus_per_task = job_desc_msg->cpus_per_task;
 
 	/* _max_nprocs() represents the total number of CPUs available
 	 * for this step (overcommit not supported yet). If job_desc_msg