From ac971600078c19d1c59ebe24f8d5e23265443c21 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@schedmd.com>
Date: Tue, 13 Dec 2011 11:44:27 -0800
Subject: [PATCH] BGQ - handle when we are trying to boot a block when it is
 already trying to boot.  This should happen very rarely.

---
 .../select/bluegene/bl_bgq/bridge_helper.cc   | 40 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc b/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc
index 09345356cb7..73ed59eeae6 100644
--- a/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc
+++ b/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc
@@ -227,10 +227,46 @@ extern int bridge_handle_runtime_errors(const char *function,
 
 	switch (err) {
 	case bgsched::RuntimeErrors::BlockBootError:
-		error("%s: Error booting block %s.", function,
-		      bg_record->bg_block_id);
+	{
+		BlockFilter filter;
+		Block::Ptrs vec;
+
 		rc = BG_ERROR_BOOT_ERROR;
+
+		if ((bg_record->magic != BLOCK_MAGIC)
+		    || !bg_record->bg_block_id) {
+			error("%s: bad block given to booting.", function);
+			break;
+		}
+
+		filter.setName(string(bg_record->bg_block_id));
+
+		vec = bridge_get_blocks(filter);
+		if (vec.empty()) {
+			debug("%s: block %s not found, removing "
+			      "from slurm", function, bg_record->bg_block_id);
+			break;
+		}
+		const Block::Ptr &block_ptr = *(vec.begin());
+		uint16_t state = bridge_translate_status(
+			block_ptr->getStatus().toValue());
+		if (state == BG_BLOCK_FREE) {
+			error("%s: Block %s was free but we got an error "
+			      "while trying to boot it. (system=%s) (us=%s)",
+			      function, bg_record->bg_block_id,
+			      bg_block_state_string(state),
+			      bg_block_state_string(bg_record->state));
+		} else {
+			debug("%s: tring to boot a block %s that wasn't "
+			      "free (system=%s) (us=%s), no real error.",
+			      function, bg_record->bg_block_id,
+			      bg_block_state_string(state),
+			      bg_block_state_string(bg_record->state));
+			rc = SLURM_SUCCESS;
+		}
+
 		break;
+	}
 	case bgsched::RuntimeErrors::BlockFreeError:
 		/* not a real error */
 		rc = BG_ERROR_INVALID_STATE;
-- 
GitLab