diff --git a/NEWS b/NEWS
index a00fcfa9fbbf2fa6de637fc1efd574ba606282aa..3d14c4bce15c6cd15f5d0be9794b17f18a9ff55c 100644
--- a/NEWS
+++ b/NEWS
@@ -245,6 +245,14 @@ documents those changes that are of interest to users and administrators.
     variable "PMI_CRAY_NO_SMP_ENV=1"
  -- Fix invalid memory reference in SlurmDBD when putting a node up.
  -- Allow opening of plugstack.conf even when a symlink.
+ -- Fix scontrol reboot so that rebooted nodes will not be set down with reason
+    'Node xyz unexpectedly rebooted' but will be correctly put back to service.
+ -- CRAY - Throttle the post NHC operations as to not hog the job write lock
+    if many steps/jobs finish at once.
+ -- Disable changes to GRES count while jobs are running on the node.
+ -- CRAY - Fix issue with scontrol reconfig.
+ -- slurmd: Remove wrong reporting of "Error reading step  ... memory limit".
+    The logic was treating success as an error.
 
 * Changes in Slurm 14.11.5
 ==========================
diff --git a/src/common/gres.c b/src/common/gres.c
index b4348d2529a98bc14ff1d0c6020bbda8c9772b19..da3ab68ebe406a632680bcd7a3154506b619c6bc 100644
--- a/src/common/gres.c
+++ b/src/common/gres.c
@@ -1631,11 +1631,23 @@ extern int _node_config_validate(char *node_name, char *orig_config,
 			     context_ptr->gres_type, node_name,
 			     gres_data->gres_cnt_found, gres_cnt);
 		}
-		gres_data->gres_cnt_found = gres_cnt;
-		updated_config = true;
+		if ((gres_data->gres_cnt_found != NO_VAL) &&
+		    (gres_data->gres_cnt_alloc != 0)) {
+			if (reason_down && (*reason_down == NULL)) {
+				xstrfmtcat(*reason_down,
+					   "%s count changed and jobs are "
+					   "using them (%u != %u)",
+					   context_ptr->gres_type,
+					   gres_data->gres_cnt_found, gres_cnt);
+			}
+			rc = EINVAL;
+		} else {
+			gres_data->gres_cnt_found = gres_cnt;
+			updated_config = true;
+		}
 	}
 	if (updated_config == false)
-		return SLURM_SUCCESS;
+		return rc;
 
 	if ((set_cnt == 0) && (set_cnt != gres_data->topo_cnt)) {
 		/* Need to clear topology info */
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index f13e9f4783a4d13dc0b3450f17eead6682b76ddf..bc0de744fd27a6352283a5664bda6cb5b1bda49a 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -1351,9 +1351,11 @@ extern char *rpc_num2string(uint16_t opcode);
 		int rc;							\
 		while (remaining > 0) {					\
 			rc = read(fd, ptr, remaining);			\
-			if ((rc == 0) && (remaining == size))		\
+			if ((rc == 0) && (remaining == size)) {		\
+				debug("%s:%d: %s: safe_read EOF",	\
+				      __FILE__, __LINE__, __CURRENT_FUNC__); \
 				goto rwfail;				\
-			else if (rc == 0) {				\
+			} else if (rc == 0) {				\
 				debug("%s:%d: %s: safe_read (%d of %d) EOF", \
 				      __FILE__, __LINE__, __CURRENT_FUNC__, \
 				      remaining, (int)size);		\
diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c
index c9a3c620e4d4a9eaa7000b5207defeaea59ab4f3..f7fe589d1df14e58df92f1c5c97ba063421d1307 100644
--- a/src/plugins/select/cray/select_cray.c
+++ b/src/plugins/select/cray/select_cray.c
@@ -151,6 +151,13 @@ static uint32_t blade_cnt = 0;
 static pthread_mutex_t blade_mutex = PTHREAD_MUTEX_INITIALIZER;
 static time_t last_npc_update;
 
+static alpsc_topology_t *topology = NULL;
+static size_t topology_num_nodes = 0;
+
+static int active_post_nhc_cnt = 0;
+static pthread_mutex_t throttle_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t throttle_cond = PTHREAD_COND_INITIALIZER;
+
 #ifdef HAVE_NATIVE_CRAY
 
 
@@ -865,6 +872,32 @@ static void _set_job_running_restore(select_jobinfo_t *jobinfo)
 		last_npc_update = time(NULL);
 }
 
+/* These functions prevent the fini's of jobs and steps from keeping
+ * the slurmctld write locks constantly set after the nhc is ran,
+ * which can prevent other RPCs and system functions from being
+ * processed. For example, a steady stream of step or job completions
+ * can prevent squeue from responding or jobs from being scheduled. */
+static void _throttle_start(void)
+{
+	slurm_mutex_lock(&throttle_mutex);
+	while (1) {
+		if (active_post_nhc_cnt == 0) {
+			active_post_nhc_cnt++;
+			break;
+		}
+		pthread_cond_wait(&throttle_cond, &throttle_mutex);
+	}
+	slurm_mutex_unlock(&throttle_mutex);
+	usleep(100);
+}
+static void _throttle_fini(void)
+{
+	slurm_mutex_lock(&throttle_mutex);
+	active_post_nhc_cnt--;
+	pthread_cond_broadcast(&throttle_cond);
+	slurm_mutex_unlock(&throttle_mutex);
+}
+
 static void *_job_fini(void *args)
 {
 	struct job_record *job_ptr = (struct job_record *)args;
@@ -896,6 +929,7 @@ static void *_job_fini(void *args)
 	/***********/
 	xfree(nhc_info.nodelist);
 
+	_throttle_start();
 	lock_slurmctld(job_write_lock);
 	if (job_ptr->magic == JOB_MAGIC) {
 		select_jobinfo_t *jobinfo = NULL;
@@ -911,6 +945,7 @@ static void *_job_fini(void *args)
 		      "this should never happen", nhc_info.jobid);
 
 	unlock_slurmctld(job_write_lock);
+	_throttle_fini();
 
 	return NULL;
 }
@@ -957,6 +992,7 @@ static void *_step_fini(void *args)
 
 	xfree(nhc_info.nodelist);
 
+	_throttle_start();
 	lock_slurmctld(job_write_lock);
 	if (!step_ptr->job_ptr) {
 		error("For some reason we don't have a job_ptr for "
@@ -988,6 +1024,7 @@ static void *_step_fini(void *args)
 		post_job_step(step_ptr);
 	}
 	unlock_slurmctld(job_write_lock);
+	_throttle_fini();
 
 	return NULL;
 }
@@ -1114,6 +1151,9 @@ extern int fini ( void )
 		_free_blade(&blade_array[i]);
 	xfree(blade_array);
 
+	if (topology)
+		free(topology);
+
 	slurm_mutex_unlock(&blade_mutex);
 
 	return other_select_fini();
@@ -1440,23 +1480,27 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 #if defined(HAVE_NATIVE_CRAY_GA) && !defined(HAVE_CRAY_NETWORK)
 	int nn, end_nn, last_nn = 0;
 	bool found = 0;
-	alpsc_topology_t *topology = NULL;
-	size_t num_nodes;
 	char *err_msg = NULL;
 
-	if (alpsc_get_topology(&err_msg, &topology, &num_nodes)) {
-		if (err_msg) {
-			error("(%s: %d: %s) Could not get system "
-			      "topology info: %s",
-			      THIS_FILE, __LINE__, __FUNCTION__, err_msg);
-			free(err_msg);
-		} else {
-			error("(%s: %d: %s) Could not get system "
-			      "topology info: No error message present.",
-			      THIS_FILE, __LINE__, __FUNCTION__);
+	if (!topology) {
+		if (alpsc_get_topology(&err_msg, &topology,
+				       &topology_num_nodes)) {
+			if (err_msg) {
+				error("(%s: %d: %s) Could not get system "
+				      "topology info: %s",
+				      THIS_FILE, __LINE__,
+				      __FUNCTION__, err_msg);
+				free(err_msg);
+			} else {
+				error("(%s: %d: %s) Could not get system "
+				      "topology info: No error "
+				      "message present.",
+				      THIS_FILE, __LINE__, __FUNCTION__);
+			}
+			return SLURM_ERROR;
 		}
-		return SLURM_ERROR;
 	}
+
 #endif
 
 	slurm_mutex_lock(&blade_mutex);
@@ -1490,7 +1534,7 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 		}
 
 #if defined(HAVE_NATIVE_CRAY_GA) && !defined(HAVE_CRAY_NETWORK)
-		end_nn = num_nodes;
+		end_nn = topology_num_nodes;
 
 	start_again:
 
@@ -1506,7 +1550,7 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 				break;
 			}
 		}
-		if (end_nn != num_nodes) {
+		if (end_nn != topology_num_nodes) {
 			/* already looped */
 			fatal("Node %s(%d) isn't found on the system",
 			      node_ptr->name, nodeinfo->nid);
@@ -1545,10 +1589,6 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 	/* give back the memory */
 	xrealloc(blade_array, sizeof(blade_info_t) * blade_cnt);
 
-#if defined(HAVE_NATIVE_CRAY_GA) && !defined(HAVE_CRAY_NETWORK)
-	free(topology);
-#endif
-
 	slurm_mutex_unlock(&blade_mutex);
 
 	return other_node_init(node_ptr, node_cnt);
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 9e368a126df8f91b88986cd9a9a7c2718b50d76e..33e944c8600bede0227c1c30c314f2a3fbc859f4 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -1350,7 +1350,7 @@ static void _queue_reboot_msg(void)
 		node_ptr->reason = xstrdup("Scheduled reboot");
 		bit_clear(avail_node_bitmap, i);
 		bit_clear(idle_node_bitmap, i);
-		node_ptr->last_response = now;
+		node_ptr->last_response = now + slurm_get_resume_timeout();
 	}
 	if (reboot_agent_args != NULL) {
 		hostlist_uniq(reboot_agent_args->hostlist);
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 078d66c757e96d2d6a324040ba86bfd002e04c9b..3060f8b298343ea099f9d0c83ae132c9bcba05bc 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -2325,8 +2325,11 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg,
 				node_ptr->reason = xstrdup(
 					"Node unexpectedly rebooted");
 			}
-			info("Node %s unexpectedly rebooted",
-			     reg_msg->node_name);
+			info("%s: Node %s unexpectedly rebooted boot_time %d"
+			     "last response %d",
+			     __func__, reg_msg->node_name,
+			     (int)node_ptr->boot_time,
+			     (int)node_ptr->last_response);
 			_make_node_down(node_ptr, now);
 			kill_running_job_by_node_name(reg_msg->node_name);
 			last_node_update = now;
diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c
index a075ce01c95168a09399b5fb42cafa6d35d8861e..a4136edf6c6ad4b8aa08f1d8fef0605a5493f2ee 100644
--- a/src/slurmd/slurmd/req.c
+++ b/src/slurmd/slurmd/req.c
@@ -1990,9 +1990,10 @@ _load_job_limits(void)
 		if (fd == -1)
 			continue;	/* step completed */
 
-		if (!stepd_get_mem_limits(fd, stepd->protocol_version,
-					  &stepd_mem_info)) {
-			error("Error reading step %u.%u memory limits",
+		if (stepd_get_mem_limits(fd, stepd->protocol_version,
+					  &stepd_mem_info) != SLURM_SUCCESS) {
+			error("Error reading step %u.%u memory limits from "
+			      "slurmstepd",
 			      stepd->jobid, stepd->stepid);
 			close(fd);
 			continue;
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index ea5c9e68798056b8e444c872a7bdc606306e06a5..f652d842f4825a9b215f5f2459c2eb0b74848df9 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -2051,8 +2051,8 @@ static int _memory_spec_init(void)
 	pid_t pid;
 
 	if (conf->mem_spec_limit == 0) {
-		info ("Resource spec: system memory limit not configured "
-		      "for this node");
+		info ("Resource spec: Reserved system memory limit not "
+		      "configured for this node");
 		return SLURM_SUCCESS;
 	}
 	if (init_system_memory_cgroup() != SLURM_SUCCESS) {