diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c
index 9a417d3db6c1bfa1b67f7158935473f793bb0637..d1232b0ba828ccaa4616656faa1217695bdb35a6 100644
--- a/src/plugins/select/bluegene/plugin/bg_job_run.c
+++ b/src/plugins/select/bluegene/plugin/bg_job_run.c
@@ -830,6 +830,11 @@ static void _start_agent(bg_action_t *bg_action_ptr)
 		slurm_mutex_unlock(&block_state_mutex);
 		if((rc = boot_block(bg_record)) != SLURM_SUCCESS) {
 			slurm_mutex_lock(&block_state_mutex);
+			if(!_make_sure_block_still_exists(bg_action_ptr,
+							  bg_record)) {
+				slurm_mutex_unlock(&block_state_mutex);
+				return;
+			}
 			_reset_block(bg_record);
 			slurm_mutex_unlock(&block_state_mutex);
 			bg_requeue_job(bg_action_ptr->job_ptr->job_id, 1);
diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c
index a5dceec9bfcdb9789ec5d95e17370409aefd9212..4dceae290ba1664d627609113504bd7edee7e10f 100644
--- a/src/plugins/select/bluegene/plugin/bg_record_functions.c
+++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c
@@ -638,79 +638,50 @@ extern int update_block_user(bg_record_t *bg_record, int set)
 }
 
 /* If any nodes in node_list are drained, draining, or down,
- *   then just return
- *   else drain all of the nodes
- * This function lets us drain an entire bgblock only if
- * we have not already identified a specific node as bad. */
+ * put block in an error state and drain the nodes which aren't
+ * already in the bad state.
+ * block_state_mutex must be unlocked before calling this.
+ */
 extern void drain_as_needed(bg_record_t *bg_record, char *reason)
 {
-	bool needed = true;
-	hostlist_t hl;
-	char *host = NULL;
-
-	if(bg_record->job_running > NO_JOB_RUNNING) {
-		bg_requeue_job(bg_record->job_running, 0);
-		slurm_mutex_lock(&block_state_mutex);
-		if(remove_from_bg_list(bg_lists->job_running, bg_record)
-		   == SLURM_SUCCESS) {
-			num_unused_cpus += bg_record->cpu_cnt;
-		}
+	slurm_mutex_lock(&block_state_mutex);
+	if(!block_ptr_exist_in_list(bg_lists->main, bg_record)) {
 		slurm_mutex_unlock(&block_state_mutex);
+		error("drain_as_needed: block disappeared");
+		return;
 	}
 
-	/* small blocks */
-	if(bg_record->cpu_cnt < bg_conf->cpus_per_bp) {
-		debug2("small block");
-		goto end_it;
-	}
+	if(bg_record->job_running > NO_JOB_RUNNING)
+		bg_requeue_job(bg_record->job_running, 0);
 
-	/* at least one base partition */
-	hl = hostlist_create(bg_record->nodes);
-	if (!hl) {
-		slurm_drain_nodes(bg_record->nodes, reason,
-				  slurm_get_slurm_user_id());
-		return;
-	}
-	while ((host = hostlist_shift(hl))) {
-		if (node_already_down(host)) {
-			needed = false;
+ 	if(bg_record->cpu_cnt >= bg_conf->cpus_per_bp) {
+		/* at least one base partition */
+		hostlist_t hl = hostlist_create(bg_record->nodes);
+		char *host = NULL;
+
+		if (!hl) {
+			slurm_drain_nodes(bg_record->nodes, reason,
+					  slurm_get_slurm_user_id());
+			return;
+		}
+		while ((host = hostlist_shift(hl))) {
+			if (!node_already_down(host))
+				slurm_drain_nodes(host, reason,
+						  slurm_get_slurm_user_id());
 			free(host);
-			break;
 		}
-		free(host);
-	}
-	hostlist_destroy(hl);
-
-	if (needed) {
+		hostlist_destroy(hl);
+	} else
 		slurm_drain_nodes(bg_record->nodes, reason,
 				  slurm_get_slurm_user_id());
-	}
-end_it:
-	while(bg_record->job_running > NO_JOB_RUNNING) {
-		debug2("block %s is still running job %d",
-		       bg_record->bg_block_id, bg_record->job_running);
-		sleep(1);
-	}
+
+	slurm_mutex_unlock(&block_state_mutex);
 
 	put_block_in_error_state(bg_record, BLOCK_ERROR_STATE, reason);
 	return;
 }
 
-extern int set_ionodes(bg_record_t *bg_record, int io_start, int io_nodes)
-{
-	char bitstring[BITSIZE];
-
-	if(!bg_record)
-		return SLURM_ERROR;
-
-	bg_record->ionode_bitmap = bit_alloc(bg_conf->numpsets);
-	/* Set the correct ionodes being used in this block */
-	bit_nset(bg_record->ionode_bitmap, io_start, io_start+io_nodes);
-	bit_fmt(bitstring, BITSIZE, bg_record->ionode_bitmap);
-	bg_record->ionodes = xstrdup(bitstring);
-	return SLURM_SUCCESS;
-}
-
+/* block_state_mutex must be locked before calling this. */
 extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq,
 			 bool no_check, bitoff_t io_start)
 {
@@ -1448,6 +1419,7 @@ extern int up_nodecard(char *bp_name, bitstr_t *ionode_bitmap)
 	return SLURM_SUCCESS;
 }
 
+/* block_state_mutex must be unlocked before calling this. */
 extern int put_block_in_error_state(bg_record_t *bg_record,
 				    int state, char *reason)
 {
@@ -1463,8 +1435,17 @@ extern int put_block_in_error_state(bg_record_t *bg_record,
 		   to wait for the job to be removed.  We don't really
 		   need to free the block though since we may just
 		   want it to be in an error state for some reason. */
-		while(bg_record->job_running > NO_JOB_RUNNING)
+		while(bg_record->job_running > NO_JOB_RUNNING) {
+			if(bg_record->magic != BLOCK_MAGIC) {
+				error("While putting block %s in a error "
+				      "state it was destroyed",
+				      bg_record->bg_block_id);
+				return SLURM_ERROR;
+			}
+			debug2("block %s is still running job %d",
+			       bg_record->bg_block_id, bg_record->job_running);
 			sleep(1);
+		}
 	}
 
 	slurm_mutex_lock(&block_state_mutex);
diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.h b/src/plugins/select/bluegene/plugin/bg_record_functions.h
index 00c8177ea353b97a193d9c1b8b26b2c1244c7a59..3a9ecf9c1bd066ce62b07afbd7590cb5b4b73935 100644
--- a/src/plugins/select/bluegene/plugin/bg_record_functions.h
+++ b/src/plugins/select/bluegene/plugin/bg_record_functions.h
@@ -137,8 +137,6 @@ extern bg_record_t *find_bg_record_in_list(List my_list, char *bg_block_id);
 extern int update_block_user(bg_record_t *bg_block_id, int set);
 extern void drain_as_needed(bg_record_t *bg_record, char *reason);
 
-extern int set_ionodes(bg_record_t *bg_record, int io_start, int io_nodes);
-
 extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq,
 			 bool no_check, bitoff_t io_start);
 extern int handle_small_record_request(List records, blockreq_t *blockreq,
diff --git a/src/plugins/select/bluegene/plugin/block_sys.c b/src/plugins/select/bluegene/plugin/block_sys.c
index aaf31dc93d01232eaa394ee939340b673636ce95..d02cecc3e30495749c0c2eb78d9f8fbeb9df9c55 100755
--- a/src/plugins/select/bluegene/plugin/block_sys.c
+++ b/src/plugins/select/bluegene/plugin/block_sys.c
@@ -266,6 +266,23 @@ static int _post_allocate(bg_record_t *bg_record)
 }
 
 #ifdef HAVE_BG_FILES
+
+static int _set_ionodes(bg_record_t *bg_record, int io_start, int io_nodes)
+{
+	char bitstring[BITSIZE];
+
+	if(!bg_record)
+		return SLURM_ERROR;
+
+	bg_record->ionode_bitmap = bit_alloc(bg_conf->numpsets);
+	/* Set the correct ionodes being used in this block */
+	bit_nset(bg_record->ionode_bitmap, io_start, io_start+io_nodes);
+	bit_fmt(bitstring, BITSIZE, bg_record->ionode_bitmap);
+	bg_record->ionodes = xstrdup(bitstring);
+	return SLURM_SUCCESS;
+}
+
+
 #ifdef HAVE_BGL
 extern int find_nodecard_num(rm_partition_t *block_ptr, rm_nodecard_t *ncard,
 			     int *nc_id)
@@ -659,7 +676,7 @@ int read_bg_blocks(List curr_block_list)
 				io_cnt = 0;
 			}
 #endif
-			if(set_ionodes(bg_record, io_start, io_cnt)
+			if(_set_ionodes(bg_record, io_start, io_cnt)
 			   == SLURM_ERROR)
 				error("couldn't create ionode_bitmap "
 				      "for ionodes %d to %d",
diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c
index c4222bd914c217f8f939bfbcdf03eab758cecec9..2b823ae4687b01cbed5a2a67b6995387cca06a16 100644
--- a/src/plugins/select/bluegene/plugin/bluegene.c
+++ b/src/plugins/select/bluegene/plugin/bluegene.c
@@ -84,7 +84,7 @@ static int  _validate_config_nodes(List curr_block_list,
 				   List found_block_list, char *dir);
 static int _delete_old_blocks(List curr_block_list,
 			      List found_block_list);
-static void *_mult_free_block(void *args);
+static void *_clear_block(void *args);
 static char *_get_bg_conf(void);
 static int  _reopen_bridge_log(void);
 static void _destroy_bitmap(void *object);
@@ -627,19 +627,21 @@ extern int free_block_list(List track_list, bool destroy, bool wait)
 	while ((bg_record = list_next(itr))) {
 		bg_free_t *bg_free = xmalloc(sizeof(bg_free_t));
 
+		/* just incase it wasn't already done. */
+		bg_record->magic = 0;
+
 		bg_free->free_cond = &free_cond;
 		bg_free->free_cnt = &free_cnt;
 		bg_free->free_mutex = &free_mutex;
 		bg_free->bg_record = bg_record;
 		bg_free->wait = wait;
-		info("going to free %s", bg_record->bg_block_id);
 		slurm_attr_init(&attr_agent);
 		if (pthread_attr_setdetachstate(
 			    &attr_agent, PTHREAD_CREATE_DETACHED))
 			error("pthread_attr_setdetachstate error %m");
 		retries = 0;
 		while (pthread_create(&thread_agent, &attr_agent,
-				      _mult_free_block, bg_free)) {
+				      _clear_block, bg_free)) {
 			error("pthread_create error %m");
 			if (++retries > MAX_PTHREAD_RETRIES)
 				fatal("Can't create "
@@ -649,7 +651,7 @@ extern int free_block_list(List track_list, bool destroy, bool wait)
 		}
 	}
 
-	/* _mult_free_block should handle cleanup so just return */
+	/* _clear_block should handle cleanup so just return */
 	if (!wait) {
 		list_iterator_destroy(itr);
 		return SLURM_SUCCESS;
@@ -1567,7 +1569,7 @@ static int _delete_old_blocks(List curr_block_list, List found_block_list)
 
 /* Free multiple blocks in parallel no locks should be needed here
  * except for destroying */
-static void *_mult_free_block(void *args)
+static void *_clear_block(void *args)
 {
 	bg_free_t *bg_free = (bg_free_t *)args;
 	bg_record_t *bg_record = bg_free->bg_record;
@@ -1576,22 +1578,20 @@ static void *_mult_free_block(void *args)
 		goto end_it;
 
 	if (bg_record->job_ptr) {
-		info("_mult_free_block: We are freeing a block (%s) that "
-		     "has job %u(%u), This should never happen.",
+		info("_clear_block: We are freeing a block (%s) that "
+		     "has job %u(%u).",
 		     bg_record->bg_block_id,
 		     bg_record->job_ptr->job_id,
 		     bg_record->job_running);
 		bg_requeue_job(bg_record->job_ptr->job_id, 0);
-		bg_record->job_ptr = NULL;
-		bg_record->job_running = NO_JOB_RUNNING;
 	}
 
 	if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
-		info("_mult_free_block: freeing the block %s.",
+		info("_clear_block: freeing the block %s.",
 		     bg_record->bg_block_id);
 	bg_free_block(bg_record, 1, 0);
 	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
-		info("_mult_free_block: done %s", bg_record->bg_block_id);
+		info("_clear_block: done %s", bg_record->bg_block_id);
 
 	if (!bg_free->wait) {
 		slurm_mutex_lock(&block_state_mutex);
@@ -1608,28 +1608,28 @@ static void *_mult_free_block(void *args)
 
 #ifdef HAVE_BG_FILES
 		if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
-			info("_mult_free_block: removing %s from database",
+			info("_clear_block: removing %s from database",
 			     bg_record->bg_block_id);
 
 		rc = bridge_remove_block(bg_record->bg_block_id);
 		if (rc != STATUS_OK) {
 			if(rc == PARTITION_NOT_FOUND) {
-				debug("_mult_free_block: block %s is not found",
+				debug("_clear_block: block %s is not found",
 				      bg_record->bg_block_id);
 			} else {
-				error("_mult_free_block: "
+				error("_clear_block: "
 				      "rm_remove_partition(%s): %s",
 				      bg_record->bg_block_id,
 				      bg_err_str(rc));
 			}
 		} else
 			if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
-				info("_mult_free_block: done %s",
+				info("_clear_block: done %s",
 				     bg_record->bg_block_id);
 #endif
 		destroy_bg_record(bg_record);
 		if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
-			info("_mult_free_block: destroyed");
+			info("_clear_block: destroyed");
 
 		slurm_mutex_unlock(&block_state_mutex);
 	}
diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c
index 786a52c7b022be78a93f01de2e3f90d26f5c1c15..b4d1bb55767f9e6550651d935a10bc2e40bd57e4 100644
--- a/src/plugins/select/bluegene/plugin/select_bluegene.c
+++ b/src/plugins/select/bluegene/plugin/select_bluegene.c
@@ -808,7 +808,15 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr)
 
 	/* First fail any job running on this block */
 	if(bg_record->job_running > NO_JOB_RUNNING) {
-		slurm_fail_job(bg_record->job_running);
+		slurm_mutex_unlock(&block_state_mutex);
+		bg_requeue_job(bg_record->job_running, 0);
+		slurm_mutex_lock(&block_state_mutex);
+		if(!block_ptr_exist_in_list(bg_lists->main, bg_record)) {
+			slurm_mutex_unlock(&block_state_mutex);
+			error("while trying to put block in "
+			      "error state it disappeared");
+			return SLURM_ERROR;
+		}
 		/* need to set the job_ptr to NULL
 		   here or we will get error message
 		   about us trying to free this block
@@ -846,16 +854,10 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr)
 				     found_record->job_running,
 				     found_record->bg_block_id,
 				     bg_record->bg_block_id);
-				/* We need to fail this job first to
-				   get the correct result even though
-				   we are freeing the block later */
-				slurm_fail_job(found_record->job_running);
-				/* need to set the job_ptr to NULL
-				   here or we will get error message
-				   about us trying to free this block
-				   with a job in it.
+				/* This job will be requeued in the
+				   free_block_list code below, just
+				   make note of it here.
 				*/
-				found_record->job_ptr = NULL;
 			} else {
 				debug2("block %s is part of errored %s "
 				       "but no running job",
@@ -920,16 +922,10 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr)
 				     found_record->job_running,
 				     found_record->bg_block_id,
 				     bg_record->bg_block_id);
-				/* We need to fail this job first to
-				   get the correct result even though
-				   we are freeing the block later */
-				slurm_fail_job(found_record->job_running);
-				/* need to set the job_ptr to NULL
-				   here or we will get error message
-				   about us trying to free this block
-				   with a job in it.
+				/* This job will be requeued in the
+				   free_block_list code below, just
+				   make note of it here.
 				*/
-				found_record->job_ptr = NULL;
 			} else {
 				debug2("block %s is part of to be freed %s "
 				       "but no running job",