From 0c00d359f27c8c88bf7a8af9c5f5fee8c6d9fb15 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Wed, 18 Dec 2013 11:48:04 -0800 Subject: [PATCH] BGQ - make sure if multiple steps fail in a single block at the same time that spans multiple midplanes the cnodes are correctly accounted for that are in error. --- src/plugins/select/bluegene/select_bluegene.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index a4a4b84a470..69bbb0adba6 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -2899,18 +2899,32 @@ extern int select_p_fail_cnode(struct step_record *step_ptr) itr2 = list_iterator_create(bg_record->ba_mp_list); while ((found_ba_mp = (ba_mp_t *)list_next(itr2))) { - if (!found_ba_mp->used - || !bit_test(step_ptr->step_node_bitmap, - found_ba_mp->index)) + if (!found_ba_mp->used) continue; + if (!bit_test(step_ptr->step_node_bitmap, + found_ba_mp->index)) { + /* Make sure we get the count of this midplane + even if it isn't in this particular step. + */ + bg_record->cnode_err_cnt += bit_set_count( + found_ba_mp->cnode_err_bitmap); + continue; + } + /* perhaps this block isn't involved in this error */ if (jobinfo->units_avail && found_ba_mp->cnode_usable_bitmap && bit_overlap(found_ba_mp->cnode_usable_bitmap, - ba_mp->cnode_err_bitmap)) + ba_mp->cnode_err_bitmap)) { + /* Make sure we get the count of this midplane + even if it isn't in this particular step. + */ + bg_record->cnode_err_cnt += bit_set_count( + found_ba_mp->cnode_err_bitmap); continue; + } if (!found_ba_mp->cnode_err_bitmap) found_ba_mp->cnode_err_bitmap = -- GitLab