From ac971600078c19d1c59ebe24f8d5e23265443c21 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Tue, 13 Dec 2011 11:44:27 -0800 Subject: [PATCH] BGQ - handle when we are trying to boot a block when it is already trying to boot. This should happen very rarely. --- .../select/bluegene/bl_bgq/bridge_helper.cc | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc b/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc index 09345356cb7..73ed59eeae6 100644 --- a/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc +++ b/src/plugins/select/bluegene/bl_bgq/bridge_helper.cc @@ -227,10 +227,46 @@ extern int bridge_handle_runtime_errors(const char *function, switch (err) { case bgsched::RuntimeErrors::BlockBootError: - error("%s: Error booting block %s.", function, - bg_record->bg_block_id); + { + BlockFilter filter; + Block::Ptrs vec; + rc = BG_ERROR_BOOT_ERROR; + + if ((bg_record->magic != BLOCK_MAGIC) + || !bg_record->bg_block_id) { + error("%s: bad block given to booting.", function); + break; + } + + filter.setName(string(bg_record->bg_block_id)); + + vec = bridge_get_blocks(filter); + if (vec.empty()) { + debug("%s: block %s not found, removing " + "from slurm", function, bg_record->bg_block_id); + break; + } + const Block::Ptr &block_ptr = *(vec.begin()); + uint16_t state = bridge_translate_status( + block_ptr->getStatus().toValue()); + if (state == BG_BLOCK_FREE) { + error("%s: Block %s was free but we got an error " + "while trying to boot it. (system=%s) (us=%s)", + function, bg_record->bg_block_id, + bg_block_state_string(state), + bg_block_state_string(bg_record->state)); + } else { + debug("%s: tring to boot a block %s that wasn't " + "free (system=%s) (us=%s), no real error.", + function, bg_record->bg_block_id, + bg_block_state_string(state), + bg_block_state_string(bg_record->state)); + rc = SLURM_SUCCESS; + } + break; + } case bgsched::RuntimeErrors::BlockFreeError: /* not a real error */ rc = BG_ERROR_INVALID_STATE; -- GitLab