From b1c864ef9a8403935574d8945a5b6a9a827b0f73 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Mon, 30 Jan 2012 13:02:34 -0800 Subject: [PATCH] BGQ - fix issue with potential deadlock on very busy system --- src/plugins/select/bluegene/bg_core.c | 3 +- .../select/bluegene/bl_bgq/bridge_status.cc | 101 ++++++++++++------ 2 files changed, 69 insertions(+), 35 deletions(-) diff --git a/src/plugins/select/bluegene/bg_core.c b/src/plugins/select/bluegene/bg_core.c index 4f43b268225..a7cc0be388d 100644 --- a/src/plugins/select/bluegene/bg_core.c +++ b/src/plugins/select/bluegene/bg_core.c @@ -121,7 +121,8 @@ static int _post_block_free(bg_record_t *bg_record, bool restore) */ remove_from_bg_list(bg_lists->booted, bg_record); - if (restore) + /* If we don't have any mp_counts force block removal */ + if (restore && bg_record->mp_count) return SLURM_SUCCESS; if (remove_from_bg_list(bg_lists->main, bg_record) != SLURM_SUCCESS) { diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_status.cc b/src/plugins/select/bluegene/bl_bgq/bridge_status.cc index 43a746e80c1..59130f7d04c 100644 --- a/src/plugins/select/bluegene/bl_bgq/bridge_status.cc +++ b/src/plugins/select/bluegene/bl_bgq/bridge_status.cc @@ -254,7 +254,8 @@ static void _handle_bad_nodeboard(const char *nb_name, const char* mp_coords, } static void _handle_node_change(ba_mp_t *ba_mp, const std::string& cnode_loc, - EnumWrapper<Hardware::State> state) + EnumWrapper<Hardware::State> state, + List *delete_list) { Coordinates ibm_cnode_coords = getNodeMidplaneCoordinates(cnode_loc); uint16_t cnode_coords[Dimension::NodeDims]; @@ -263,7 +264,6 @@ static void _handle_node_change(ba_mp_t *ba_mp, const std::string& cnode_loc, bg_record_t *bg_record; ba_mp_t *found_ba_mp; ListIterator itr, itr2; - List delete_list = NULL; select_nodeinfo_t *nodeinfo; struct node_record *node_ptr = NULL; @@ -359,7 +359,6 @@ static void _handle_node_change(ba_mp_t *ba_mp, const std::string& cnode_loc, xfree(nodeinfo->failed_cnodes); nodeinfo->failed_cnodes = ba_node_map_ranged_hostlist( ba_mp->cnode_err_bitmap, ba_mp_geo_system); - slurm_mutex_lock(&block_state_mutex); itr = list_iterator_create(bg_lists->main); while ((bg_record = (bg_record_t *)list_next(itr))) { if (bg_record->free_cnt) @@ -385,12 +384,15 @@ static void _handle_node_change(ba_mp_t *ba_mp, const std::string& cnode_loc, if (bg_conf->sub_mp_sys && (state == Hardware::Missing)) { - if (!delete_list) - delete_list = list_create(NULL); + if (!*delete_list) + *delete_list = list_create(NULL); debug("Removing block %s, " "it has missing cnodes", bg_record->bg_block_id); - list_push(delete_list, bg_record); + /* If we don't have any mp_counts + * force block removal */ + bg_record->mp_count = 0; + list_push(*delete_list, bg_record); break; } @@ -454,18 +456,11 @@ static void _handle_node_change(ba_mp_t *ba_mp, const std::string& cnode_loc, list_iterator_destroy(itr2); } list_iterator_destroy(itr); - slurm_mutex_unlock(&block_state_mutex); - - bg_status_process_kill_job_list(kill_job_list); - - if (delete_list) { - free_block_list(NO_VAL, delete_list, 1, 0); - list_destroy(delete_list); - } } static void _handle_cable_change(int dim, ba_mp_t *ba_mp, - EnumWrapper<Hardware::State> state) + EnumWrapper<Hardware::State> state, + List *delete_list) { select_nodeinfo_t *nodeinfo; struct node_record *node_ptr = NULL; @@ -503,8 +498,6 @@ static void _handle_cable_change(int dim, ba_mp_t *ba_mp, } else if (!(ba_mp->axis_switch[dim].usage & BG_SWITCH_CABLE_ERROR)) { bg_record_t *bg_record = NULL; ListIterator itr; - List delete_list = NULL; - bool delete_it = 0; next_ba_mp = ba_mp->next_mp[dim]; @@ -530,11 +523,6 @@ static void _handle_cable_change(int dim, ba_mp_t *ba_mp, nodeinfo->extra_info = xstrdup(reason); /* Now handle potential overlapping blocks. */ - if (bg_conf->layout_mode == LAYOUT_DYNAMIC) - delete_it = 1; - - slurm_mutex_lock(&block_state_mutex); - delete_list = list_create(NULL); itr = list_iterator_create(bg_lists->main); while ((bg_record = (bg_record_t *)list_next(itr))) { if (bg_record->free_cnt) @@ -545,13 +533,11 @@ static void _handle_cable_change(int dim, ba_mp_t *ba_mp, continue; if (!bit_test(bg_record->mp_bitmap, next_ba_mp->index)) continue; - list_push(delete_list, bg_record); + if (!*delete_list) + *delete_list = list_create(NULL); + list_push(*delete_list, bg_record); } list_iterator_destroy(itr); - slurm_mutex_unlock(&block_state_mutex); - - free_block_list(NO_VAL, delete_list, delete_it, 0); - list_destroy(delete_list); } last_bg_update = time(NULL); } @@ -713,7 +699,7 @@ static void _do_block_poll(void) } static void _handle_midplane_update(ComputeHardware::ConstPtr bgq, - ba_mp_t *ba_mp) + ba_mp_t *ba_mp, List *delete_list) { Midplane::ConstPtr mp_ptr = bridge_get_midplane(bgq, ba_mp); int i; @@ -735,7 +721,8 @@ static void _handle_midplane_update(ComputeHardware::ConstPtr bgq, BOOST_FOREACH(const Node::ConstPtr& cnode_ptr, vec) { _handle_node_change(ba_mp, cnode_ptr->getLocation(), - cnode_ptr->getState()); + cnode_ptr->getState(), + delete_list); } } } @@ -772,7 +759,8 @@ static void _handle_midplane_update(ComputeHardware::ConstPtr bgq, if (my_cable) _handle_cable_change( dim, ba_mp, - my_cable->getState()); + my_cable->getState(), + delete_list); } } } @@ -782,6 +770,7 @@ static void _do_hardware_poll(int level, uint16_t *coords, ComputeHardware::ConstPtr bgqsys) { ba_mp_t *ba_mp; + List delete_list = NULL; if (!bgqsys) { error("_do_hardware_poll: No ComputeHardware ptr"); @@ -800,10 +789,24 @@ static void _do_hardware_poll(int level, uint16_t *coords, } return; } + /* block_state_mutex may be needed in some of these functions, + * so lock it first to avoid dead lock */ + slurm_mutex_lock(&block_state_mutex); slurm_mutex_lock(&ba_system_mutex); if ((ba_mp = coord2ba_mp(coords))) - _handle_midplane_update(bgqsys, ba_mp); + _handle_midplane_update(bgqsys, ba_mp, &delete_list); slurm_mutex_unlock(&ba_system_mutex); + slurm_mutex_unlock(&block_state_mutex); + + bg_status_process_kill_job_list(kill_job_list); + + if (delete_list) { + bool delete_it = 0; + if (bg_conf->layout_mode == LAYOUT_DYNAMIC) + delete_it = 1; + free_block_list(NO_VAL, delete_list, delete_it, 0); + list_destroy(delete_list); + } } static void *_poll(void *no_data) @@ -1050,10 +1053,14 @@ void event_handler::handleNodeStateChangedRealtimeEvent( uint16_t coords[SYSTEM_DIMENSIONS]; int dim; ba_mp_t *ba_mp; + List delete_list = NULL; for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++) coords[dim] = ibm_coords[dim]; + /* block_state_mutex may be needed in _handle_node_change, + * so lock it first to avoid dead lock */ + slurm_mutex_lock(&block_state_mutex); slurm_mutex_lock(&ba_system_mutex); ba_mp = coord2ba_mp(coords); @@ -1065,6 +1072,7 @@ void event_handler::handleNodeStateChangedRealtimeEvent( bridge_hardware_state_string(event.getPreviousState()), bridge_hardware_state_string(event.getState())); slurm_mutex_unlock(&ba_system_mutex); + slurm_mutex_unlock(&block_state_mutex); return; } @@ -1073,8 +1081,21 @@ void event_handler::handleNodeStateChangedRealtimeEvent( bridge_hardware_state_string(event.getPreviousState()), bridge_hardware_state_string(event.getState())); - _handle_node_change(ba_mp, event.getLocation(), event.getState()); + _handle_node_change(ba_mp, event.getLocation(), event.getState(), + &delete_list); slurm_mutex_unlock(&ba_system_mutex); + slurm_mutex_unlock(&block_state_mutex); + + bg_status_process_kill_job_list(kill_job_list); + + if (delete_list) { + /* The only reason blocks are added to this list is if + there are missing cnodes on the block so remove + them from the mix. + */ + free_block_list(NO_VAL, delete_list, 1, 0); + list_destroy(delete_list); + } return; } @@ -1086,13 +1107,16 @@ void event_handler::handleTorusCableStateChangedRealtimeEvent( uint16_t coords[SYSTEM_DIMENSIONS]; int dim; ba_mp_t *from_ba_mp; - + List delete_list = NULL; for (dim = 0; dim < SYSTEM_DIMENSIONS; dim++) coords[dim] = ibm_coords[dim]; dim = event.getDimension(); + /* block_state_mutex may be needed in _handle_cable_change, + * so lock it first to avoid dead lock */ + slurm_mutex_lock(&block_state_mutex); slurm_mutex_lock(&ba_system_mutex); from_ba_mp = coord2ba_mp(coords); if (!from_ba_mp) { @@ -1102,13 +1126,22 @@ void event_handler::handleTorusCableStateChangedRealtimeEvent( bridge_hardware_state_string(event.getPreviousState()), bridge_hardware_state_string(event.getState())); slurm_mutex_unlock(&ba_system_mutex); + slurm_mutex_unlock(&block_state_mutex); return; } /* Else mark the midplane down */ - _handle_cable_change(dim, from_ba_mp, event.getState()); + _handle_cable_change(dim, from_ba_mp, event.getState(), &delete_list); slurm_mutex_unlock(&ba_system_mutex); + slurm_mutex_unlock(&block_state_mutex); + if (delete_list) { + bool delete_it = 0; + if (bg_conf->layout_mode == LAYOUT_DYNAMIC) + delete_it = 1; + free_block_list(NO_VAL, delete_list, delete_it, 0); + list_destroy(delete_list); + } return; } -- GitLab