From 261ee46c292e35f5eded1e7032eee8d8fab5635f Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Fri, 4 Mar 2011 22:51:25 +0000 Subject: [PATCH] fix for handling extremely overloaded system --- .../select/bluegene/ba/block_allocator.c | 69 +++---------------- .../select/bluegene/ba_bgq/block_allocator.c | 4 +- .../select/bluegene/bg_dynamic_block.c | 4 +- src/plugins/select/bluegene/bg_job_place.c | 13 ++-- .../select/bluegene/bg_record_functions.c | 15 +--- .../select/bluegene/bl/bridge_linker.c | 12 ++-- .../bluegene/bl/bridge_switch_connections.c | 3 + src/slurmctld/proc_req.c | 3 +- 8 files changed, 33 insertions(+), 90 deletions(-) diff --git a/src/plugins/select/bluegene/ba/block_allocator.c b/src/plugins/select/bluegene/ba/block_allocator.c index d29c6d723ca..116b892b674 100644 --- a/src/plugins/select/bluegene/ba/block_allocator.c +++ b/src/plugins/select/bluegene/ba/block_allocator.c @@ -781,8 +781,9 @@ extern int remove_block(List nodes, bool is_small) ba_node = &ba_main_grid[curr_ba_node->coord[X]] [curr_ba_node->coord[Y]] [curr_ba_node->coord[Z]]; + if (curr_ba_node->used) + ba_node->used &= (~BA_MP_USED_TRUE); - ba_node->used = false; /* Small blocks don't use wires, and only have 1 node, so just break. */ if (is_small) @@ -859,7 +860,12 @@ extern int check_and_set_mp_list(List nodes) } if (ba_node->used) - curr_ba_node->used = true; + curr_ba_node->used = ba_node->used; + if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("check_and_set_mp_list: " + "%s is used ?= %d %d", + curr_ba_node->coord_str, + curr_ba_node->used, ba_node->used); for(i=0; i<cluster_dims; i++) { ba_switch = &ba_node->axis_switch[i]; curr_ba_switch = &curr_ba_node->axis_switch[i]; @@ -1058,65 +1064,6 @@ extern void reset_ba_system(bool track_down_nodes) } } -/* - * IN: hostlist of midplanes we want to be able to use, mark all - * others as used. - * RET: SLURM_SUCCESS on success, or SLURM_ERROR on error - * - * Need to call reset_all_removed_mps before starting another - * allocation attempt if possible use removable_set_mps since it is - * faster. It does basically the opposite of this function. If you - * have to come up with this list though it is faster to use this - * function than if you have to call bitmap2node_name since that is slow. - */ -extern int set_all_mps_except(char *mps) -{ - int x, y, z; - hostlist_t hl = hostlist_create(mps); - char *host = NULL, *numeric = NULL; - int number, coords[HIGHEST_DIMENSIONS]; - - memset(coords, 0, sizeof(coords)); - - while ((host = hostlist_shift(hl))){ - numeric = host; - number = 0; - while (numeric) { - if (numeric[0] < '0' || numeric[0] > 'Z' - || (numeric[0] > '9' - && numeric[0] < 'A')) { - numeric++; - continue; - } - number = xstrntol(numeric, &p, cluster_dims, - cluster_base); - break; - } - hostlist_parse_int_to_array( - number, coords, cluster_dims, cluster_base); - ba_main_grid[coords[X]][coords[Y]][coords[Z]].state - |= NODE_RESUME; - free(host); - } - hostlist_destroy(hl); - - for (x = 0; x < DIM_SIZE[X]; x++) { - for (y = 0; y < DIM_SIZE[Y]; y++) - for (z = 0; z < DIM_SIZE[Z]; z++) { - if (ba_main_grid[x][y][z].state - & NODE_RESUME) { - /* clear the bit and mark as unused */ - ba_main_grid[x][y][z].state - &= ~NODE_RESUME; - } else - ba_main_grid[x][y][z].used - |= BA_MP_USED_TEMP; - } - } - - return SLURM_SUCCESS; -} - /* * set values of every grid point (used in smap) */ diff --git a/src/plugins/select/bluegene/ba_bgq/block_allocator.c b/src/plugins/select/bluegene/ba_bgq/block_allocator.c index 3bf5e3e757a..6ee3afe5285 100644 --- a/src/plugins/select/bluegene/ba_bgq/block_allocator.c +++ b/src/plugins/select/bluegene/ba_bgq/block_allocator.c @@ -295,8 +295,8 @@ extern int remove_block(List mps, bool is_small) [curr_ba_mp->coord[X]] [curr_ba_mp->coord[Y]] [curr_ba_mp->coord[Z]]; - - ba_mp->used &= (~BA_MP_USED_TRUE); + if (curr_ba_mp->used) + ba_mp->used &= (~BA_MP_USED_TRUE); ba_mp->used &= (~BA_MP_USED_ALTERED_PASS); /* Small blocks don't use wires, and only have 1 mp, diff --git a/src/plugins/select/bluegene/bg_dynamic_block.c b/src/plugins/select/bluegene/bg_dynamic_block.c index a22c0ac4d20..48926b27931 100644 --- a/src/plugins/select/bluegene/bg_dynamic_block.c +++ b/src/plugins/select/bluegene/bg_dynamic_block.c @@ -477,7 +477,7 @@ extern bg_record_t *create_small_record(bg_record_t *bg_record, } else { new_ba_mp = ba_copy_mp(ba_mp); ba_setup_mp(new_ba_mp, false); - + new_ba_mp->used = BA_MP_USED_TRUE; list_append(found_record->ba_mp_list, new_ba_mp); found_record->mp_count = 1; found_record->nodes = xstrdup_printf( @@ -750,7 +750,7 @@ static int _breakup_blocks(List block_list, List new_blocks, while ((bg_record = list_next(itr))) { if (bg_record->free_cnt) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) - info("%s being free for other job(s), skipping", + info("%s being freed by other job(s), skipping", bg_record->bg_block_id); continue; } diff --git a/src/plugins/select/bluegene/bg_job_place.c b/src/plugins/select/bluegene/bg_job_place.c index 66029df0fb8..cc3c25fbee5 100644 --- a/src/plugins/select/bluegene/bg_job_place.c +++ b/src/plugins/select/bluegene/bg_job_place.c @@ -592,14 +592,15 @@ static int _check_for_booted_overlapping_blocks( } } - if (!SELECT_IS_CHECK_FULL_SET(query_mode) + if (((bg_conf->layout_mode == LAYOUT_DYNAMIC) + || ((!SELECT_IS_CHECK_FULL_SET(query_mode) + || SELECT_IS_MODE_RUN_NOW(query_mode)) + && (bg_conf->layout_mode != LAYOUT_DYNAMIC))) && ((found_record->job_running != NO_JOB_RUNNING) - || (found_record->state - == BG_BLOCK_ERROR))) { + || (found_record->state == BG_BLOCK_ERROR))) { if ((found_record->job_running == BLOCK_ERROR_STATE) - || (found_record->state - == BG_BLOCK_ERROR)) + || (found_record->state == BG_BLOCK_ERROR)) error("can't use %s, " "overlapping block %s " "is in an error state.", @@ -1178,7 +1179,7 @@ static int _find_best_block_match(List block_list, new_blocks = create_dynamic_block( block_list, &request, job_list, track_down_nodes); - /* this gets altered in + /* this could get altered in * create_dynamic_block so we reset it */ memcpy(request.geometry, req_geometry, sizeof(req_geometry)); diff --git a/src/plugins/select/bluegene/bg_record_functions.c b/src/plugins/select/bluegene/bg_record_functions.c index 3010a163901..61356c9f765 100644 --- a/src/plugins/select/bluegene/bg_record_functions.c +++ b/src/plugins/select/bluegene/bg_record_functions.c @@ -281,19 +281,6 @@ extern void process_nodes(bg_record_t *bg_record, bool startup) } else if (bg_record->node_cnt == bg_conf->mp_node_cnt) bg_record->full_block = 1; - if(!bg_record->nodes) { - info("block %s doesn't appear to have any nodes in it", - bg_record->bg_block_id); - itr = list_iterator_create(bg_record->ba_mp_list); - while ((ba_mp = list_next(itr))) { - if (!ba_mp->used) - continue; - info("process_nodes: %s is included in this block", - ba_mp->coord_str); - } - list_iterator_destroy(itr); - xassert(0); - } if (node_name2bitmap(bg_record->nodes, false, &bg_record->bitmap)) { @@ -734,7 +721,7 @@ extern int add_bg_record(List records, List *used_nodes, bg_record->mloaderimage = xstrdup(bg_conf->default_mloaderimage); - if (bg_record->conn_type[0] != SELECT_SMALL) { + if (bg_record->conn_type[0] < SELECT_SMALL) { /* this needs to be an append so we keep things in the order we got them, they will be sorted later */ list_append(records, bg_record); diff --git a/src/plugins/select/bluegene/bl/bridge_linker.c b/src/plugins/select/bluegene/bl/bridge_linker.c index f9c1fc7e61c..a73fe4df334 100644 --- a/src/plugins/select/bluegene/bl/bridge_linker.c +++ b/src/plugins/select/bluegene/bl/bridge_linker.c @@ -962,18 +962,22 @@ extern int bridge_setup_system() extern int bridge_block_create(bg_record_t *bg_record) { + int rc = SLURM_SUCCESS; + #if defined HAVE_BG_FILES _new_block((rm_partition_t **)&bg_record->bg_block); #endif _pre_allocate(bg_record); if (bg_record->cpu_cnt < bg_conf->cpus_per_mp) - configure_small_block(bg_record); + rc = configure_small_block(bg_record); else - configure_block_switches(bg_record); + rc = configure_block_switches(bg_record); - _post_allocate(bg_record); - return SLURM_SUCCESS; + if (rc == SLURM_SUCCESS) + rc = _post_allocate(bg_record); + + return rc; } extern int bridge_block_boot(bg_record_t *bg_record) diff --git a/src/plugins/select/bluegene/bl/bridge_switch_connections.c b/src/plugins/select/bluegene/bl/bridge_switch_connections.c index f150666656b..f33c3123fab 100644 --- a/src/plugins/select/bluegene/bl/bridge_switch_connections.c +++ b/src/plugins/select/bluegene/bl/bridge_switch_connections.c @@ -639,9 +639,12 @@ cleanup: error("bridge_free_nodecard_list(): %s", bridge_err_str(rc)); return SLURM_ERROR; } + #endif if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) info("making the small block"); + if (rc != SLURM_ERROR) + rc = SLURM_SUCCESS; return rc; } diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 19c6a80e963..3482cef99b9 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1470,7 +1470,8 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) } #ifdef HAVE_FRONT_END - job_ptr = find_job_record(comp_msg->job_id); + if (!job_ptr) + job_ptr = find_job_record(comp_msg->job_id); if (job_ptr && job_ptr->front_end_ptr) nodes = job_ptr->front_end_ptr->name; msg_title = "front_end"; -- GitLab