diff --git a/contribs/cray/capmc_resume.c b/contribs/cray/capmc_resume.c
index bbc8e79b2e6bb7f2e411a3153fdf6e338d924a49..e2c7fe432bb888e557158e0176047ef01a00306a 100644
--- a/contribs/cray/capmc_resume.c
+++ b/contribs/cray/capmc_resume.c
@@ -74,6 +74,18 @@
 #define DEFAULT_CAPMC_TIMEOUT 10000	/* 10 seconds */
 #define MIN_CAPMC_TIMEOUT 1000		/* 1 second */
 
+/* Number of times to try performing "node_off" operation */
+#define NODE_OFF_RETRIES 10
+
+/* Number of times to try performing "node_on" operation */
+#define NODE_ON_RETRIES 10
+
+/* Number of times to try performing node state change operation */
+#define NODE_STATE_RETRIES 10
+
+/* How long to wait for a node to enter "off" state, in seconds */
+#define NODE_OFF_STATE_WAIT (30 * 60)
+
 /* Static variables */
 static char *capmc_path = NULL;
 static uint32_t capmc_poll_freq = 45;
@@ -215,6 +227,9 @@ static char *_run_script(char **script_argv, int *status)
 		close(pfd[1]);
 		error("%s: fork(): %s", prog_name,
 		      slurm_strerror(slurm_get_errno()));
+		*status = 127;
+		resp = xstrdup("System error");
+		return resp;
 	} else {
 		struct pollfd fds;
 		struct timeval tstart;
@@ -287,6 +302,8 @@ static bool _check_node_state(int nid, char *nid_str, char *state)
 	if (status != 0) {
 		error("%s: capmc(%s,%s,%s): %d %s", prog_name,
 			argv[1], argv[2], argv[3], status, resp_msg);
+		xfree(resp_msg);
+		return node_state_ok;
 	}
 	j = json_tokener_parse(resp_msg);
 	if (j == NULL) {
@@ -316,6 +333,8 @@ static void *_node_update(void *args)
 	char *argv[10], nid_str[32], *resp_msg;
 	int i, nid = -1, status = 0;
 	bool node_state_ok;
+	bool node_off_sent = false, node_on_sent = false, node_state_sent;
+	time_t poll_start;
 
 	for (i = 0; node_name[i]; i++) {
 		if ((node_name[i] >= '0') && (node_name[i] <= '9')) {
@@ -340,13 +359,24 @@ static void *_node_update(void *args)
 		argv[4] = "-n";
 		argv[5] = nid_str;
 		argv[6] = NULL;
-		resp_msg = _run_script(argv, &status);
-		if (status != 0) {
-			error("%s: capmc(%s,%s,%s,%s,%s): %d %s", prog_name,
-			      argv[1], argv[2], argv[3], argv[4], argv[5],
-			      status, resp_msg);
+		node_state_sent = false;
+		for (i = 0; ((i < NODE_STATE_RETRIES) && !node_state_sent);
+		     i++) {
+			resp_msg = _run_script(argv, &status);
+			if ((status != 0) ||
+			    (resp_msg &&
+			     (strcasestr(resp_msg, "Success") == NULL))) {
+				error("%s: capmc(%s,%s,%s,%s,%s): %d %s",
+				      prog_name, argv[1], argv[2], argv[3],
+				      argv[4], argv[5], status, resp_msg);
+				sleep(1);
+			} else {
+				debug("%s: set_mcdram_cfg sent to %s",
+				      prog_name, nid_str);
+				node_state_sent = true;
+			}
+			xfree(resp_msg);
 		}
-		xfree(resp_msg);
 	}
 
 	if (numa_mode) {
@@ -359,13 +389,24 @@ static void *_node_update(void *args)
 		argv[4] = "-n";
 		argv[5] = nid_str;
 		argv[6] = NULL;
-		resp_msg = _run_script(argv, &status);
-		if (status != 0) {
-			error("%s: capmc(%s,%s,%s,%s,%s): %d %s", prog_name,
-			      argv[1], argv[2], argv[3], argv[4], argv[5],
-			      status, resp_msg);
+		node_state_sent = false;
+		for (i = 0; ((i < NODE_STATE_RETRIES) && !node_state_sent);
+		     i++) {
+			resp_msg = _run_script(argv, &status);
+			if ((status != 0) ||
+			    (resp_msg &&
+			     (strcasestr(resp_msg, "Success") == NULL))) {
+				error("%s: capmc(%s,%s,%s,%s,%s): %d %s",
+				      prog_name, argv[1], argv[2], argv[3],
+				      argv[4], argv[5], status, resp_msg);
+				sleep(1);
+			} else {
+				debug("%s: set_numa_cfg sent to %s",
+				      prog_name, nid_str);
+				node_state_sent = true;
+			}
+			xfree(resp_msg);
 		}
-		xfree(resp_msg);
 	}
 
 	/* Test if already in "off" state */
@@ -379,20 +420,33 @@ static void *_node_update(void *args)
 		argv[2] = "-n";
 		argv[3] = nid_str;
 		argv[4] = NULL;
-		resp_msg = _run_script(argv, &status);
-		if (status != 0) {
-			error("%s: capmc(%s,%s,%s): %d %s", prog_name,
-			      argv[1], argv[2], argv[3], status, resp_msg);
+		for (i = 0; ((i < NODE_OFF_RETRIES) && !node_off_sent); i++) {
+			resp_msg = _run_script(argv, &status);
+			if ((status != 0) ||
+			    (resp_msg &&
+			     (strcasestr(resp_msg, "Success") == NULL))) {
+				error("%s: capmc(%s,%s,%s): %d %s", prog_name,
+				      argv[1], argv[2], argv[3], status,
+				      resp_msg);
+				sleep(1);
+			} else {
+				debug("%s: node_off sent to %s",
+				      prog_name, nid_str);
+				node_off_sent = true;
+			}
+			xfree(resp_msg);
 		}
-		xfree(resp_msg);
 	}
 
 	/* Wait for node in "off" state */
-	while (!node_state_ok) {
+	poll_start = time(NULL);
+	while (!node_state_ok &&
+	      (difftime(time(NULL), poll_start) < NODE_OFF_STATE_WAIT)) {
 		sleep(capmc_poll_freq);
 		node_state_ok = _check_node_state(nid, nid_str, "off");
 	}
 
+
 	/* Request node power up.
 	 * Example: "capmc node_on –n 43" */
 	argv[0] = "capmc";
@@ -400,12 +454,19 @@ static void *_node_update(void *args)
 	argv[2] = "-n";
 	argv[3] = nid_str;
 	argv[4] = NULL;
-	resp_msg = _run_script(argv, &status);
-	if (status != 0) {
-		error("%s: capmc(%s,%s,%s): %d %s", prog_name,
-			argv[1], argv[2], argv[3], status, resp_msg);
+	for (i = 0; ((i < NODE_ON_RETRIES) && !node_on_sent); i++) {
+		resp_msg = _run_script(argv, &status);
+		if ((status != 0) ||
+		    (resp_msg && (strcasestr(resp_msg, "Success") == NULL))) {
+			error("%s: capmc(%s,%s,%s): %d %s", prog_name,
+			      argv[1], argv[2], argv[3], status, resp_msg);
+			sleep(1);
+		} else {
+			debug("%s: node_on sent to %s", prog_name, nid_str);
+			node_on_sent = true;
+		}
+		xfree(resp_msg);
 	}
-	xfree(resp_msg);
 
 fini:	slurm_mutex_lock(&thread_cnt_mutex);
 	thread_cnt--;
diff --git a/contribs/cray/capmc_suspend.c b/contribs/cray/capmc_suspend.c
index 772a5d5aa45fb17f9e5e16139650edd2aad4dafb..fbeebc0ccf81a734f75c9556c6f1cfdb3c4be4cc 100644
--- a/contribs/cray/capmc_suspend.c
+++ b/contribs/cray/capmc_suspend.c
@@ -73,6 +73,12 @@
 #define DEFAULT_CAPMC_TIMEOUT 10000	/* 10 seconds */
 #define MIN_CAPMC_TIMEOUT 1000		/* 1 second */
 
+/* Number of times to try performing "node_off" operation */
+#define NODE_OFF_RETRIES 10
+
+/* How long to wait for a node to enter "off" state, in seconds */
+#define NODE_OFF_STATE_WAIT (30 * 60)
+
 /* Static variables */
 static char *capmc_path = NULL;
 static uint32_t capmc_poll_freq = 45;   /* capmc state polling frequency */
@@ -210,6 +216,9 @@ static char *_run_script(char **script_argv, int *status)
 		close(pfd[1]);
 		error("%s: fork(): %s", prog_name,
 		      slurm_strerror(slurm_get_errno()));
+		*status = 127;
+		resp = xstrdup("System error");
+		return resp;
 	} else {
 		struct pollfd fds;
 		struct timeval tstart;
@@ -315,6 +324,8 @@ static bool _check_node_state(int nid, char *nid_str, char *state)
 	if (status != 0) {
 		error("%s: capmc(%s,%s,%s): %d %s", prog_name,
 			argv[1], argv[2], argv[3], status, resp_msg);
+		xfree(resp_msg);
+		return node_state_ok;
 	}
 	j = json_tokener_parse(resp_msg);
 	if (j == NULL) {
@@ -343,7 +354,8 @@ static void *_node_update(void *args)
 	char *node_name = (char *) args;
 	char *argv[10], nid_str[32], *resp_msg;
 	int i, nid = -1, status = 0;
-	bool node_state_ok;
+	bool node_state_ok, node_off_sent = false;
+	time_t poll_start;
 
 	for (i = 0; node_name[i]; i++) {
 		if ((node_name[i] >= '0') && (node_name[i] <= '9')) {
@@ -364,15 +376,24 @@ static void *_node_update(void *args)
 	argv[2] = "-n";
 	argv[3] = nid_str;
 	argv[4] = NULL;
-	resp_msg = _run_script(argv, &status);
-	if (status != 0) {
-		error("%s: capmc(%s,%s,%s): %d %s", prog_name,
-		      argv[1], argv[2], argv[3], status, resp_msg);
+	for (i = 0; ((i < NODE_OFF_RETRIES) && !node_off_sent); i++) {
+		resp_msg = _run_script(argv, &status);
+		if ((status != 0) ||
+		    (resp_msg && (strcasestr(resp_msg, "Success") == NULL))) {
+			error("%s: capmc(%s,%s,%s): %d %s", prog_name,
+			      argv[1], argv[2], argv[3], status, resp_msg);
+			sleep(1);
+		} else {
+			debug("%s: node_off sent to %s", prog_name, nid_str);
+			node_off_sent = true;
+		}
+		xfree(resp_msg);
 	}
-	xfree(resp_msg);
 
 	/* Wait for node in "off" state */
-	while (!node_state_ok) {
+	poll_start = time(NULL);
+	while (!node_state_ok &&
+	      (difftime(time(NULL), poll_start) < NODE_OFF_STATE_WAIT)) {
 		sleep(capmc_poll_freq);
 		node_state_ok = _check_node_state(nid, nid_str, "off");
 	}