From 0c00d359f27c8c88bf7a8af9c5f5fee8c6d9fb15 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@schedmd.com>
Date: Wed, 18 Dec 2013 11:48:04 -0800
Subject: [PATCH] BGQ - make sure if multiple steps fail in a single block at
 the same time that spans multiple midplanes the cnodes are correctly
 accounted for that are in error.

---
 src/plugins/select/bluegene/select_bluegene.c | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c
index a4a4b84a470..69bbb0adba6 100644
--- a/src/plugins/select/bluegene/select_bluegene.c
+++ b/src/plugins/select/bluegene/select_bluegene.c
@@ -2899,18 +2899,32 @@ extern int select_p_fail_cnode(struct step_record *step_ptr)
 		itr2 = list_iterator_create(bg_record->ba_mp_list);
 		while ((found_ba_mp = (ba_mp_t *)list_next(itr2))) {
 
-			if (!found_ba_mp->used
-			    || !bit_test(step_ptr->step_node_bitmap,
-					 found_ba_mp->index))
+			if (!found_ba_mp->used)
 				continue;
 
+			if (!bit_test(step_ptr->step_node_bitmap,
+				      found_ba_mp->index)) {
+				/* Make sure we get the count of this midplane
+				   even if it isn't in this particular step.
+				*/
+				bg_record->cnode_err_cnt += bit_set_count(
+					found_ba_mp->cnode_err_bitmap);
+				continue;
+			}
+
 			/* perhaps this block isn't involved in this
 			   error */
 			if (jobinfo->units_avail
 			    && found_ba_mp->cnode_usable_bitmap
 			    && bit_overlap(found_ba_mp->cnode_usable_bitmap,
-					   ba_mp->cnode_err_bitmap))
+					   ba_mp->cnode_err_bitmap)) {
+				/* Make sure we get the count of this midplane
+				   even if it isn't in this particular step.
+				*/
+				bg_record->cnode_err_cnt += bit_set_count(
+					found_ba_mp->cnode_err_bitmap);
 				continue;
+			}
 
 			if (!found_ba_mp->cnode_err_bitmap)
 				found_ba_mp->cnode_err_bitmap =
-- 
GitLab