diff --git a/NEWS b/NEWS
index d0f49194fe3b40f63952a7a4c0810a58d2347c01..660dccbd1537c263257c5b4930b062d3e7bf71a7 100644
--- a/NEWS
+++ b/NEWS
@@ -149,6 +149,8 @@ documents those changes that are of interest to users and admins.
     comes last.
  -- When attempting to requeue a job distinguish the case in which the job is
     JOB_COMPLETING or already pending.
+ -- When reconfiguring the controller don't restart the slurmctld epilog if it
+    is already running.
 
 * Changes in Slurm 14.03.3-2
 ============================
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index cf90d4274723517de52f2e52dc684ac2c069ca3f..80196e9cba060c6163f06c074cf1c6ba5f07eeae 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -3174,7 +3174,10 @@ struct job_record *_job_rec_copy(struct job_record *job_ptr)
 	details_new->std_in = xstrdup(job_details->std_in);
 	details_new->std_out = xstrdup(job_details->std_out);
 	details_new->work_dir = xstrdup(job_details->work_dir);
-	_copy_job_desc_files(job_ptr->job_id, job_ptr_new->job_id);
+	if (_copy_job_desc_files(job_ptr->job_id, job_ptr_new->job_id)) {
+		_list_delete_job((void *) job_ptr_new);
+		return NULL;
+	}
 
 	return job_ptr_new;
 }
@@ -5034,10 +5037,9 @@ _copy_job_desc_to_file(job_desc_msg_t * job_desc, uint32_t job_id)
 	xstrcat(dir_name, job_dir);
 	if (mkdir(dir_name, 0700)) {
 		if (!slurmctld_primary && (errno == EEXIST)) {
-			fatal("Apparent duplicate job ID %u. Two primary "
-			      "slurmctld daemons may currently be active. "
-			      "Shutting down this daemon to avoid inconsistent "
-			      "state due to split brain.", job_id);
+			error("Apparent duplicate job ID %u. Two primary "
+			      "slurmctld daemons might currently be active",
+			      job_id);
 		}
 		error("mkdir(%s) error %m", dir_name);
 		xfree(dir_name);
@@ -5065,11 +5067,37 @@ _copy_job_desc_to_file(job_desc_msg_t * job_desc, uint32_t job_id)
 	return error_code;
 }
 
+/* Return true of the specified job ID already has a batch directory so
+ * that a different job ID can be created. This is to help limit damage from
+ * split-brain, where two slurmctld daemons are running as primary. */
+static bool _dup_job_file_test(uint32_t job_id)
+{
+	char *dir_name_src, job_dir[40];
+	struct stat buf;
+	int rc, hash;
+
+	dir_name_src  = slurm_get_state_save_location();
+	hash = job_id % 10;
+	sprintf(job_dir, "/hash.%d", hash);
+	xstrcat(dir_name_src, job_dir);
+	sprintf(job_dir, "/job.%u", job_id);
+	xstrcat(dir_name_src, job_dir);
+	rc = stat(dir_name_src, &buf);
+	xfree(dir_name_src);
+	if (rc == 0) {
+		error("Vestigial state files for job %u, but no job record. "
+		      "this may be the result of two slurmctld running in "
+		      "primary mode", job_id);
+		return true;
+	}
+	return false;
+}
+
 /* _copy_job_desc_files - create copies of a job script and environment files */
 static int
 _copy_job_desc_files(uint32_t job_id_src, uint32_t job_id_dest)
 {
-	int error_code = 0, hash;
+	int error_code = SLURM_SUCCESS, hash;
 	char *dir_name_src, *dir_name_dest, job_dir[40];
 	char *file_name_src, *file_name_dest;
 
@@ -5090,10 +5118,9 @@ _copy_job_desc_files(uint32_t job_id_src, uint32_t job_id_dest)
 	xstrcat(dir_name_dest, job_dir);
 	if (mkdir(dir_name_dest, 0700)) {
 		if (!slurmctld_primary && (errno == EEXIST)) {
-			fatal("Apparent duplicate job ID %u. Two primary "
-			      "slurmctld daemons may currently be active. "
-			      "Shutting down this daemon to avoid inconsistent "
-			      "state due to split brain.", job_id_dest);
+			error("Apparent duplicate job ID %u. Two primary "
+			      "slurmctld daemons might currently be active",
+			      job_id_dest);
 		}
 		error("mkdir(%s) error %m", dir_name_dest);
 		xfree(dir_name_src);
@@ -7464,10 +7491,12 @@ static int _set_job_id(struct job_record *job_ptr)
 		if (++job_id_sequence >= slurmctld_conf.max_job_id)
 			job_id_sequence = slurmctld_conf.first_job_id;
 		new_id = job_id_sequence;
-		if (find_job_record(new_id) == NULL) {
-			job_ptr->job_id = new_id;
-			return SLURM_SUCCESS;
-		}
+		if (find_job_record(new_id))
+			continue;
+		if (_dup_job_file_test(new_id))
+			continue;
+		job_ptr->job_id = new_id;
+		return SLURM_SUCCESS;
 	}
 	error("We have exhausted our supply of valid job id values. "
 	      "FirstJobId=%u MaxJobId=%u", slurmctld_conf.first_job_id,
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index bca68f8ad804202acc607bd545d0342d09e15c35..718b93f91a7f4613e698f07d3ced6cd437064078 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -1742,6 +1742,17 @@ static int _sync_nodes_to_comp_job(void)
 	job_iterator = list_iterator_create(job_list);
 	while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
 		if ((job_ptr->node_bitmap) && IS_JOB_COMPLETING(job_ptr)) {
+
+			/* If the controller is reconfiguring
+			 * and the job is in completing state
+			 * and the slurmctld epilog is already
+			 * running which means deallocate_nodes()
+			 * was alredy called, do invoke it again
+			 * and don't start another epilog.
+			 */
+			if (job_ptr->epilog_running == true)
+				continue;
+
 			update_cnt++;
 			/* This needs to be set up for the priority
 			   plugin and this happens before it is
@@ -1750,7 +1761,8 @@ static int _sync_nodes_to_comp_job(void)
 			if (!cluster_cpus)
 				set_cluster_cpus();
 
-			info("Job %u in completing state", job_ptr->job_id);
+			info("%s: Job %u in completing state",
+			     __func__, job_ptr->job_id);
 			if (!job_ptr->node_bitmap_cg)
 				build_cg_bitmap(job_ptr);
 			deallocate_nodes(job_ptr, false, false, false);
@@ -1761,7 +1773,7 @@ static int _sync_nodes_to_comp_job(void)
 	}
 	list_iterator_destroy(job_iterator);
 	if (update_cnt)
-		info("_sync_nodes_to_comp_job completing %d jobs", update_cnt);
+		info("%s: completing %d jobs", __func__, update_cnt);
 	return update_cnt;
 }