diff --git a/NEWS b/NEWS
index dbc8ca12bc256f0915571ece6158bd641df49aff..631b475450cca5e9709e8e19be4357a6884d65da 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,8 @@ documents those changes that are of interest to users and admins.
 =============================
  -- BlueGene srun --geometry was not getting propogated properly.
  -- Fix race condition with multiple simultaneous epilogs.
+ -- Modify slurmd to resend job completion RPC to slurmctld in the 
+    case where slurmctld is not responding.
 
 * Changes in SLURM 0.5.0-pre7
 =============================
diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c
index f1d840fce73102d1f11f7bbeb056add326e12e9f..c63050da771663befc86c53f840d265d8c746009 100644
--- a/src/slurmd/mgr.c
+++ b/src/slurmd/mgr.c
@@ -93,6 +93,9 @@ static int exit_errno[] =
 
 #define MAX_SMGR_EXIT_STATUS 6
 
+#define RETRY_DELAY 15		/* retry every 15 seconds */
+#define MAX_RETRY   240		/* retry 240 times (one hour max) */
+
 /*
  *  List of signals to block in this process
  */
@@ -1131,7 +1134,7 @@ _send_launch_resp(slurmd_job_t *job, int rc)
 static int
 _complete_job(uint32_t jobid, uint32_t stepid, int err, int status)
 {
-	int                      rc;
+	int                      rc, i;
 	slurm_msg_t              req_msg;
 	complete_job_step_msg_t  req;
 
@@ -1143,12 +1146,22 @@ _complete_job(uint32_t jobid, uint32_t stepid, int err, int status)
 	req_msg.msg_type= REQUEST_COMPLETE_JOB_STEP;
 	req_msg.data	= &req;	
 
-	if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) < 0) {
+	/* Note: these log messages don't go to slurmd.log from here */
+	for (i=0; i<=MAX_RETRY; i++) {
+		if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) >= 0)
+			break;
+		info("Retrying job complete RPC for %u.%u", jobid, stepid);
+		sleep(RETRY_DELAY);
+	}
+	if (i > MAX_RETRY) {
 		error("Unable to send job complete message: %m");
 		return SLURM_ERROR;
 	}
 
-	if (rc) slurm_seterrno_ret(rc);
+	if ((rc == ESLURM_ALREADY_DONE) || (rc == ESLURM_INVALID_JOB_ID))
+		rc = SLURM_SUCCESS;
+	if (rc)
+		slurm_seterrno_ret(rc);
 
 	return SLURM_SUCCESS;
 }