diff --git a/NEWS b/NEWS
index 8c73dd78dfaeb921509d3a3fcab48a062cb76c5a..970d14441a31419c7cd63ce51d9b89e7f0462908 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,9 @@ documents those changes that are of interest to users and administrators.
  -- Fix issue where if no clusters were added but yet a QOS needed to be
     deleted make it possible.
  -- SlurmDBD - change all timestamps to bigint from int to solve Y2038 problem.
+ -- Add salloc/sbatch/srun --spread-job to distribute tasks over as many nodes
+    as possible. This also treats the --ntasks-node-node option as a maximum
+    value.
 
 * Changes in Slurm 16.05.2
 ==========================
diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1
index 1ee90a10d9493550a0a41ee4e3f9f64ba94ad71f..9037a7f12bfda351b49aa41ab59ea095795e8478 100644
--- a/doc/man/man1/salloc.1
+++ b/doc/man/man1/salloc.1
@@ -1203,6 +1203,11 @@ Restrict node selection to nodes with at least the specified number of
 sockets.  See additional information under \fB\-B\fR option above when
 task/affinity plugin is enabled.
 
+.TP
+\fB\-\-spread\-job\fR>
+Spread the job allocation over as many nodes as possible and attempt to
+evenly distribute tasks across the allocated nodes.
+
 .TP
 \fB\-\-switches\fR=<\fIcount\fR>[@<\fImax\-time\fR>]
 When a tree topology is used, this defines the maximum count of switches
@@ -1482,6 +1487,9 @@ Same as \fB\-\-reservation\fR
 \fBSALLOC_SIGNAL\fR
 Same as \fB\-\-signal\fR
 .TP
+\fBSALLOC_SPREAD_JOB\fR
+Same as \fB\-\-spread\-job\fR
+.TP
 \fBSALLOC_THREAD_SPEC\fR
 Same as \fB\-\-thread\-spec\fR
 .TP
diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1
index 0f5e0d088c9f4f26de4d2467e60cd577612087ba..97b5bda99239b6d1b6960a1ba33cefb92c1cedd5 100644
--- a/doc/man/man1/sbatch.1
+++ b/doc/man/man1/sbatch.1
@@ -1387,6 +1387,11 @@ Restrict node selection to nodes with at least the specified number of
 sockets.  See additional information under \fB\-B\fR option above when
 task/affinity plugin is enabled.
 
+.TP
+\fB\-\-spread\-job\fR>
+Spread the job allocation over as many nodes as possible and attempt to
+evenly distribute tasks across the allocated nodes.
+
 .TP
 \fB\-\-switches\fR=<\fIcount\fR>[@<\fImax\-time\fR>]
 When a tree topology is used, this defines the maximum count of switches
@@ -1738,6 +1743,9 @@ Same as \fB\-\-requeue\fR
 \fBSBATCH_SIGNAL\fR
 Same as \fB\-\-signal\fR
 .TP
+\fBSBATCH_SPREAD_JOB\fR
+Same as \fB\-\-spread\-job\fR
+.TP
 \fBSBATCH_THREAD_SPEC\fR
 Same as \fB\-\-thread\-spec\fR
 .TP
diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1
index d2dd29beba4fe0959fcc7a9e7f959ded260b0d74..a0feb08049d28d98e1b8b93a6532f6a73cdc4b70 100644
--- a/doc/man/man1/srun.1
+++ b/doc/man/man1/srun.1
@@ -1811,6 +1811,11 @@ Restrict node selection to nodes with at least the specified number of
 sockets.  See additional information under \fB\-B\fR option above when
 task/affinity plugin is enabled. This option applies to job allocations.
 
+.TP
+\fB\-\-spread\-job\fR
+Spread the job allocation over as many nodes as possible and attempt to
+evenly distribute tasks across the allocated nodes.
+
 .TP
 \fB\-\-switches\fR=<\fIcount\fR>[@<\fImax\-time\fR>]
 When a tree topology is used, this defines the maximum count of switches
@@ -2498,6 +2503,9 @@ Same as \fB\-e, \-\-error\fR
 \fBSLURM_STDINMODE\fR
 Same as \fB\-i, \-\-input\fR
 .TP
+\fBSLURM_SPREAD_JOB\fR
+Same as \fB\-\-spread\-job\fR
+.TP
 \fBSLURM_SRUN_REDUCE_TASK_EXIT_MSG\fR
 if set and non-zero, successive task exit messages with the same exit code will
 be printed only once.
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index 8e8f76243d069574d1d726cfb918889709109e9d..262c293ab127a500e39740d4584fe4e1e2389107 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -977,7 +977,7 @@ enum ctx_keys {
 #define PRIORITY_FLAGS_FAIR_TREE	0x0020	/* Prioritize by level in
 						 * account hierarchy. */
 
-/* These bits are set in the flags field of job_desc_msg_t */
+/* These bits are set in the bitflags field of job_desc_msg_t */
 #define KILL_INV_DEP       0x00000001	/* Kill job on invalid dependency */
 #define NO_KILL_INV_DEP    0x00000002	/* Don't kill job on invalid dependency */
 #define HAS_STATE_DIR      0x00000004	/* Used by slurmctld to track state dir */
@@ -986,6 +986,7 @@ enum ctx_keys {
 #define TEST_NOW_ONLY      0x00000020	/* Test for immediately start only */
 #define NODE_MEM_CALC      0x00000040	/* Per-node memory limit calculated */
 #define NODE_REBOOT        0x00000080	/* Waiting for node reboot */
+#define SPREAD_JOB         0x00000100	/* Spread job across max node count */
 
 /*****************************************************************************\
  *      SLURM HOSTLIST FUNCTIONS
diff --git a/src/api/job_info.c b/src/api/job_info.c
index 2007c7e1b359f98758ad7240b880e29103877afd..21f22701552613a3dc65a67a3303ee452443cfdc 100644
--- a/src/api/job_info.c
+++ b/src/api/job_info.c
@@ -955,6 +955,8 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
 			xstrcat(out, "KillOInInvalidDependent=Yes");
 		if (job_ptr->bitflags & NO_KILL_INV_DEP)
 			xstrcat(out, "KillOInInvalidDependent=No");
+		if (job_ptr->bitflags & SPREAD_JOB)
+			xstrcat(out, "SpreadJob=Yes");
 	}
 
 	/****** END OF JOB RECORD ******/
diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c
index 3a1333f5add0ab321cec87be91dd22d42569e6cd..d4a1e4da592a995b26c0365229c4a48b2950cf10 100644
--- a/src/plugins/select/cons_res/job_test.c
+++ b/src/plugins/select/cons_res/job_test.c
@@ -202,7 +202,7 @@ static uint16_t _allocate_sc(struct job_record *job_ptr, bitstr_t *core_map,
 			      bool entire_sockets_only)
 {
 	uint16_t cpu_count = 0, cpu_cnt = 0;
-	uint16_t si, cps, avail_cpus = 0, num_tasks = 0;
+	uint16_t si, cps, avail_cpus = 0, max_cpus, num_tasks = 0;
 	uint32_t core_begin    = cr_get_coremap_offset(node_i);
 	uint32_t core_end      = cr_get_coremap_offset(node_i+1);
 	uint32_t c;
@@ -432,6 +432,16 @@ static uint16_t _allocate_sc(struct job_record *job_ptr, bitstr_t *core_map,
 		if (job_ptr->details->ntasks_per_node)
 			avail_cpus = num_tasks * cpus_per_task;
 	}
+
+	if ((job_ptr->bit_flags & SPREAD_JOB) &&
+	    (job_ptr->details->ntasks_per_node !=  0)) {
+		/* Treat ntasks_per_node as maximum */
+		max_cpus = job_ptr->details->ntasks_per_node;
+		if (cpus_per_task > 1)
+			max_cpus *= cpus_per_task;
+		avail_cpus = MIN(avail_cpus, max_cpus);
+	}
+
 	if ((job_ptr->details->ntasks_per_node &&
 	     (num_tasks < job_ptr->details->ntasks_per_node) &&
 	     (job_ptr->details->overcommit == 0)) ||
diff --git a/src/salloc/opt.c b/src/salloc/opt.c
index 9efc8ae4b39cf56a3a90577aba1220560a4659fd..6725842c6bcec0c111370fb8a3923c94e239a516 100644
--- a/src/salloc/opt.c
+++ b/src/salloc/opt.c
@@ -105,6 +105,7 @@
 #define OPT_HINT	0x1a
 #define OPT_CPU_FREQ    0x1b
 #define OPT_THREAD_SPEC 0x1c
+#define OPT_SPREAD_JOB  0x1d
 
 /* generic getopt_long flags, integers and *not* valid characters */
 
@@ -159,6 +160,7 @@
 #define LONG_OPT_PROFILE         0x144
 #define LONG_OPT_CPU_FREQ        0x145
 #define LONG_OPT_GRES_FLAGS      0x146
+#define LONG_OPT_SPREAD_JOB      0x147
 #define LONG_OPT_PRIORITY        0x160
 #define LONG_OPT_POWER           0x162
 #define LONG_OPT_THREAD_SPEC     0x163
@@ -416,16 +418,17 @@ env_vars_t env_vars[] = {
   {"SALLOC_PARTITION",     OPT_STRING,     &opt.partition,     NULL          },
   {"SALLOC_POWER",         OPT_POWER,      NULL,               NULL          },
   {"SALLOC_PROFILE",       OPT_PROFILE,    NULL,               NULL          },
+  {"SALLOC_REQ_SWITCH",    OPT_INT,        &opt.req_switch,    NULL          },
   {"SALLOC_QOS",           OPT_STRING,     &opt.qos,           NULL          },
   {"SALLOC_RESERVATION",   OPT_STRING,     &opt.reservation,   NULL          },
   {"SALLOC_SIGNAL",        OPT_SIGNAL,     NULL,               NULL          },
+  {"SALLOC_SPREAD_JOB",    OPT_SPREAD_JOB, NULL,               NULL          },
   {"SALLOC_THREAD_SPEC",   OPT_THREAD_SPEC,NULL,               NULL          },
   {"SALLOC_TIMELIMIT",     OPT_STRING,     &opt.time_limit_str,NULL          },
   {"SALLOC_WAIT",          OPT_IMMEDIATE,  NULL,               NULL          },
   {"SALLOC_WAIT_ALL_NODES",OPT_INT,        &opt.wait_all_nodes,NULL          },
-  {"SALLOC_WCKEY",         OPT_STRING,     &opt.wckey,         NULL          },
-  {"SALLOC_REQ_SWITCH",    OPT_INT,        &opt.req_switch,    NULL          },
   {"SALLOC_WAIT4SWITCH",   OPT_TIME_VAL,   NULL,               NULL          },
+  {"SALLOC_WCKEY",         OPT_STRING,     &opt.wckey,         NULL          },
   {NULL, 0, NULL, NULL}
 };
 
@@ -622,6 +625,9 @@ _process_env_var(env_vars_t *e, const char *val)
 		opt.core_spec = parse_int("thread_spec", val, true) |
 					 CORE_SPEC_THREAD;
 		break;
+	case OPT_SPREAD_JOB:
+		opt.job_flags |= SPREAD_JOB;
+		break;
 	default:
 		/* do nothing */
 		break;
@@ -715,6 +721,7 @@ void set_options(const int argc, char **argv)
 		{"reservation",   required_argument, 0, LONG_OPT_RESERVATION},
 		{"signal",        required_argument, 0, LONG_OPT_SIGNAL},
 		{"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE},
+		{"spread-job",    no_argument,       0, LONG_OPT_SPREAD_JOB},
 		{"switches",      required_argument, 0, LONG_OPT_REQ_SWITCH},
 		{"tasks-per-node",  required_argument, 0, LONG_OPT_NTASKSPERNODE},
 		{"thread-spec",   required_argument, 0, LONG_OPT_THREAD_SPEC},
@@ -739,7 +746,7 @@ void set_options(const int argc, char **argv)
 
 	opt.progname = xbasename(argv[0]);
 	optind = 0;
-	while((opt_char = getopt_long(argc, argv, opt_string,
+	while ((opt_char = getopt_long(argc, argv, opt_string,
 				      optz, &option_index)) != -1) {
 		switch (opt_char) {
 
@@ -1276,6 +1283,9 @@ void set_options(const int argc, char **argv)
 			opt.core_spec = parse_int("thread_spec", optarg, true) |
 				CORE_SPEC_THREAD;
 			break;
+		case LONG_OPT_SPREAD_JOB:
+			opt.job_flags |= SPREAD_JOB;
+			break;
 		default:
 			if (spank_process_option(opt_char, optarg) < 0) {
 				error("Unrecognized command line parameter %c",
@@ -2006,7 +2016,7 @@ static void _usage(void)
 #endif
 #endif
 "              [--mail-type=type] [--mail-user=user] [--nice[=value]]\n"
-"              [--bell] [--no-bell] [--kill-command[=signal]]\n"
+"              [--bell] [--no-bell] [--kill-command[=signal]] [--spread-job]\n"
 "              [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n"
 "              [--network=type] [--mem-per-cpu=MB] [--qos=qos]\n"
 "              [--mem_bind=...] [--reservation=name] [--mcs-label=mcs]\n"
@@ -2072,6 +2082,7 @@ static void _help(void)
 "      --reboot                reboot compute nodes before starting job\n"
 "  -s, --oversubscribe         oversubscribe resources with other jobs\n"
 "      --signal=[B:]num[@time] send signal when time limit within time seconds\n"
+"      --spread-job            spread job across as many nodes as possible\n"
 "      --switches=max-switches{@max-time-to-wait}\n"
 "                              Optimum switches and max time to wait for optimum\n"
 "  -S, --core-spec=cores       count of reserved cores\n"
diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c
index 6eeabc359d543d65213981fcd7f7625a9a8d1863..267d3dc2b79e6b8916f6d007bf5c70056689b1e7 100644
--- a/src/sbatch/opt.c
+++ b/src/sbatch/opt.c
@@ -111,6 +111,7 @@ enum wrappers {
 #define OPT_CORE_SPEC     0x1a
 #define OPT_CPU_FREQ      0x1b
 #define OPT_POWER         0x1d
+#define OPT_SPREAD_JOB    0x1e
 #define OPT_ARRAY_INX     0x20
 #define OPT_PROFILE       0x21
 #define OPT_HINT	  0x22
@@ -180,6 +181,7 @@ enum wrappers {
 #define LONG_OPT_GRES_FLAGS      0x15a
 #define LONG_OPT_PRIORITY        0x160
 #define LONG_OPT_KILL_INV_DEP    0x161
+#define LONG_OPT_SPREAD_JOB      0x162
 #define LONG_OPT_MCS_LABEL       0x165
 #define LONG_OPT_DEADLINE        0x166
 
@@ -473,6 +475,7 @@ env_vars_t env_vars[] = {
   {"SBATCH_REQUEUE",       OPT_REQUEUE,    NULL,               NULL          },
   {"SBATCH_RESERVATION",   OPT_STRING,     &opt.reservation,   NULL          },
   {"SBATCH_SIGNAL",        OPT_SIGNAL,     NULL,               NULL          },
+  {"SBATCH_SPREAD_JOB",    OPT_SPREAD_JOB, NULL,               NULL          },
   {"SBATCH_THREAD_SPEC",   OPT_THREAD_SPEC,NULL,               NULL          },
   {"SBATCH_TIMELIMIT",     OPT_STRING,     &opt.time_limit_str,NULL          },
   {"SBATCH_WAIT",          OPT_BOOL,       &opt.wait,          NULL          },
@@ -661,6 +664,9 @@ _process_env_var(env_vars_t *e, const char *val)
 			exit(error_exit);
 		}
 		break;
+	case OPT_SPREAD_JOB:
+		opt.job_flags |= SPREAD_JOB;
+		break;
 	case OPT_GET_USER_ENV:
 		if (val)
 			_proc_get_user_env((char *)val);
@@ -792,6 +798,7 @@ static struct option long_options[] = {
 	{"reservation",   required_argument, 0, LONG_OPT_RESERVATION},
 	{"signal",        required_argument, 0, LONG_OPT_SIGNAL},
 	{"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE},
+	{"spread-job",    no_argument,       0, LONG_OPT_SPREAD_JOB},
 	{"switches",      required_argument, 0, LONG_OPT_REQ_SWITCH},
 	{"tasks-per-node",required_argument, 0, LONG_OPT_NTASKSPERNODE},
 	{"test-only",     no_argument,       0, LONG_OPT_TEST_ONLY},
@@ -1858,6 +1865,9 @@ static void _set_options(int argc, char **argv)
 			if (xstrcasecmp(optarg, "no") == 0)
 				opt.job_flags |= NO_KILL_INV_DEP;
 			break;
+		case LONG_OPT_SPREAD_JOB:
+			opt.job_flags |= SPREAD_JOB;
+			break;
 		default:
 			if (spank_process_option (opt_char, optarg) < 0) {
 				error("Unrecognized command line parameter %c",
@@ -3214,7 +3224,7 @@ static void _usage(void)
 "              [--cpu-freq=min[-max[:gov]] [--power=flags] [--gres-flags=opts]\n"
 "              [--switches=max-switches{@max-time-to-wait}] [--reboot]\n"
 "              [--core-spec=cores] [--thread-spec=threads] [--bb=burst_buffer_spec]\n"
-"              [--array=index_values] [--profile=...] [--ignore-pbs]\n"
+"              [--array=index_values] [--profile=...] [--ignore-pbs] [--spread-job]\n"
 "              [--export[=names]] [--export-file=file|fd] executable [args...]\n");
 }
 
@@ -3287,6 +3297,7 @@ static void _help(void)
 "  -s, --oversubscribe         over subscribe resources with other jobs\n"
 "  -S, --core-spec=cores       count of reserved cores\n"
 "      --signal=[B:]num[@time] send signal when time limit within time seconds\n"
+"      --spread-job            spread job across as many nodes as possible\n"
 "      --switches=max-switches{@max-time-to-wait}\n"
 "                              Optimum switches and max time to wait for optimum\n"
 "      --thread-spec=threads   count of reserved threads\n"
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index c0ac413181c10bc12d91b199aa963828dc19c2cc..f0f0e4a068de5d38f94dbc0e4f297f164fbd22a2 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -6961,6 +6961,14 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc,
 	 */
 	detail_ptr->mc_ptr = _set_multi_core_data(job_desc);
 
+	if ((job_ptr->bit_flags & SPREAD_JOB) && (detail_ptr->max_nodes == 0) &&
+	    (detail_ptr->num_tasks != 0)) {
+		if (detail_ptr->min_nodes == 0)
+			detail_ptr->min_nodes = 1;
+		detail_ptr->max_nodes =
+			MIN(node_record_count, detail_ptr->num_tasks);
+	}
+
 	return SLURM_SUCCESS;
 }
 
diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c
index 61ac0f49578fee81e42c1bf384316d95b2d64353..867c5feeeb14ebc90a458b4b7e1836b2994b707a 100644
--- a/src/srun/libsrun/opt.c
+++ b/src/srun/libsrun/opt.c
@@ -116,6 +116,7 @@
 #define OPT_PROFILE     0x20
 #define OPT_EXPORT	0x21
 #define OPT_HINT	0x22
+#define OPT_SPREAD_JOB  0x23
 
 /* generic getopt_long flags, integers and *not* valid characters */
 #define LONG_OPT_HELP        0x100
@@ -192,6 +193,7 @@
 #define LONG_OPT_LAUNCH_CMD      0x156
 #define LONG_OPT_PROFILE         0x157
 #define LONG_OPT_EXPORT          0x158
+#define LONG_OPT_SPREAD_JOB      0x159
 #define LONG_OPT_PRIORITY        0x160
 #define LONG_OPT_ACCEL_BIND      0x161
 #define LONG_OPT_MCS_LABEL       0x165
@@ -621,6 +623,7 @@ env_vars_t env_vars[] = {
 {"SLURM_RESERVATION",   OPT_STRING,     &opt.reservation,   NULL             },
 {"SLURM_RESTART_DIR",   OPT_STRING,     &opt.restart_dir ,  NULL             },
 {"SLURM_RESV_PORTS",    OPT_RESV_PORTS, NULL,               NULL             },
+{"SLURM_SPREAD_JOB",    OPT_SPREAD_JOB, NULL,               NULL             },
 {"SLURM_SIGNAL",        OPT_SIGNAL,     NULL,               NULL             },
 {"SLURM_SRUN_MULTI",    OPT_MULTI,      NULL,               NULL             },
 {"SLURM_STDERRMODE",    OPT_STRING,     &opt.efname,        NULL             },
@@ -859,6 +862,9 @@ _process_env_var(env_vars_t *e, const char *val)
 		opt.core_spec = _get_int(val, "thread_spec", true) |
 					 CORE_SPEC_THREAD;
 		break;
+	case OPT_SPREAD_JOB:
+		opt.job_flags |= SPREAD_JOB;
+		break;
 	default:
 		/* do nothing */
 		break;
@@ -994,6 +1000,7 @@ static void _set_options(const int argc, char **argv)
 		{"signal",	     required_argument, 0, LONG_OPT_SIGNAL},
 		{"slurmd-debug",     required_argument, 0, LONG_OPT_DEBUG_SLURMD},
 		{"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE},
+		{"spread-job",       no_argument,       0, LONG_OPT_SPREAD_JOB},
 		{"switches",         required_argument, 0, LONG_OPT_REQ_SWITCH},
 		{"task-epilog",      required_argument, 0, LONG_OPT_TASK_EPILOG},
 		{"task-prolog",      required_argument, 0, LONG_OPT_TASK_PROLOG},
@@ -1752,6 +1759,9 @@ static void _set_options(const int argc, char **argv)
 		case LONG_OPT_COMPRESS:
 			opt.compress = parse_compress_type(optarg);
 			break;
+		case LONG_OPT_SPREAD_JOB:
+			opt.job_flags |= SPREAD_JOB;
+			break;
 		default:
 			if (spank_process_option (opt_char, optarg) < 0) {
 				exit(error_exit);
@@ -2707,7 +2717,7 @@ static void _usage(void)
 "            [--prolog=fname] [--epilog=fname]\n"
 "            [--task-prolog=fname] [--task-epilog=fname]\n"
 "            [--ctrl-comm-ifhn=addr] [--multi-prog] [--mcs-label=mcs]\n"
-"            [--cpu-freq=min[-max[:gov]] [--power=flags]\n"
+"            [--cpu-freq=min[-max[:gov]] [--power=flags] [--spread-job]\n"
 "            [--switches=max-switches{@max-time-to-wait}] [--reboot]\n"
 "            [--core-spec=cores] [--thread-spec=threads]\n"
 "            [--bb=burst_buffer_spec] [--bbf=burst_buffer_file]\n"
@@ -2804,6 +2814,7 @@ static void _help(void)
 "  -S, --core-spec=cores       count of reserved cores\n"
 "      --signal=[B:]num[@time] send signal when time limit within time seconds\n"
 "      --slurmd-debug=level    slurmd debug level\n"
+"      --spread-job            spread job across as many nodes as possible\n"
 "      --switches=max-switches{@max-time-to-wait}\n"
 "                              Optimum switches and max time to wait for optimum\n"
 "      --task-epilog=program   run \"program\" after launching task\n"